diff --git a/src/main/services/team/TeamProvisioningService.ts b/src/main/services/team/TeamProvisioningService.ts index c65a5a48..37ded340 100644 --- a/src/main/services/team/TeamProvisioningService.ts +++ b/src/main/services/team/TeamProvisioningService.ts @@ -166,6 +166,7 @@ import { getOpenCodeTeamRuntimeDirectory, migrateLegacyOpenCodeRuntimeState, readOpenCodeRuntimeLaneIndex, + recoverStaleOpenCodeRuntimeLaneIndexEntry, removeOpenCodeRuntimeLaneIndexEntry, upsertOpenCodeRuntimeLaneIndexEntry, } from './opencode/store/OpenCodeRuntimeManifestEvidenceReader'; @@ -6063,12 +6064,7 @@ export class TeamProvisioningService { ); } if (!this.isCurrentTrackedRun(run)) return; - this.teamChangeEmitter?.({ - type: 'member-spawn', - teamName: run.teamName, - runId: run.runId, - detail: memberName, - }); + this.emitMemberSpawnChange(run, memberName); if (run.isLaunch) { void this.persistLaunchStateSnapshot(run, run.provisioningComplete ? 'finished' : 'active'); } @@ -6115,13 +6111,15 @@ export class TeamProvisioningService { await this.persistLaunchStateSnapshot(run, run.provisioningComplete ? 'finished' : 'active'); const persisted = await this.launchStateStore.read(teamName); - const liveSnapshot = snapshotFromRuntimeMemberStatuses({ - teamName: run.teamName, - expectedMembers: run.expectedMembers, - leadSessionId: run.detectedSessionId ?? undefined, - launchPhase: run.provisioningComplete ? 'finished' : 'active', - statuses: this.buildRuntimeSpawnStatusRecord(run), - }); + const liveSnapshot = + this.buildLiveLaunchSnapshotForRun(run, run.provisioningComplete ? 'finished' : 'active') ?? + snapshotFromRuntimeMemberStatuses({ + teamName: run.teamName, + expectedMembers: run.expectedMembers, + leadSessionId: run.detectedSessionId ?? undefined, + launchPhase: run.provisioningComplete ? 'finished' : 'active', + statuses: this.buildRuntimeSpawnStatusRecord(run), + }); const snapshot = persisted ?? liveSnapshot; const statuses = await this.attachLiveRuntimeMetadataToStatuses( teamName, @@ -9689,6 +9687,9 @@ export class TeamProvisioningService { this.provisioningRunByTeam.set(request.teamName, runId); run.onProgress(run.progress); await this.clearPersistedLaunchState(request.teamName); + for (const lane of run.mixedSecondaryLanes ?? []) { + await this.publishMixedSecondaryLaneStatusChange(run, lane); + } // Read existing tasks to include in teammate prompts for work resumption const taskReader = new TeamTaskReader(); @@ -11813,6 +11814,53 @@ export class TeamProvisioningService { return statuses; } + private buildLiveLaunchSnapshotForRun( + run: ProvisioningRun, + launchPhase: PersistedTeamLaunchPhase = run.provisioningComplete ? 'finished' : 'active' + ): PersistedTeamLaunchSnapshot | null { + const mixedSnapshot = this.buildMixedPersistedLaunchSnapshotForRun(run, launchPhase); + if (mixedSnapshot) { + return mixedSnapshot; + } + + if (!run.isLaunch || !run.expectedMembers || run.expectedMembers.length === 0) { + return null; + } + + return snapshotFromRuntimeMemberStatuses({ + teamName: run.teamName, + expectedMembers: run.expectedMembers, + leadSessionId: run.detectedSessionId ?? undefined, + launchPhase, + statuses: this.buildRuntimeSpawnStatusRecord(run), + }); + } + + private emitMemberSpawnChange( + run: Pick, + memberName: string + ) { + this.teamChangeEmitter?.({ + type: 'member-spawn', + teamName: run.teamName, + runId: run.runId, + detail: memberName, + }); + } + + private async publishMixedSecondaryLaneStatusChange( + run: ProvisioningRun, + lane: MixedSecondaryRuntimeLaneState + ): Promise { + if (run.isLaunch) { + await this.persistLaunchStateSnapshot(run, this.getMixedSecondaryLaunchPhase(run)); + } + if (!this.isCurrentTrackedRun(run)) { + return; + } + this.emitMemberSpawnChange(run, lane.member.name); + } + private buildMixedPersistedLaunchSnapshotForRun( run: ProvisioningRun, launchPhase: PersistedTeamLaunchPhase @@ -11888,27 +11936,14 @@ export class TeamProvisioningService { ? 'finished' : 'active' ): Promise { - const mixedSnapshot = this.buildMixedPersistedLaunchSnapshotForRun(run, launchPhase); - if (mixedSnapshot) { - await this.launchStateStore.write(run.teamName, mixedSnapshot); - return mixedSnapshot; - } - - if (!run.isLaunch || !run.expectedMembers || run.expectedMembers.length === 0) { + const snapshot = this.buildLiveLaunchSnapshotForRun(run, launchPhase); + if (!snapshot) { if (run.isLaunch) { await this.clearPersistedLaunchState(run.teamName); } return null; } - const snapshot = snapshotFromRuntimeMemberStatuses({ - teamName: run.teamName, - expectedMembers: run.expectedMembers, - leadSessionId: run.detectedSessionId ?? undefined, - launchPhase, - statuses: this.buildRuntimeSpawnStatusRecord(run), - }); - if (snapshot.teamLaunchState === 'clean_success' && launchPhase !== 'active') { await this.clearPersistedLaunchState(run.teamName); return null; @@ -11949,6 +11984,7 @@ export class TeamProvisioningService { }; lane.warnings = []; lane.diagnostics = [message]; + await this.publishMixedSecondaryLaneStatusChange(run, lane); return; } @@ -11977,8 +12013,7 @@ export class TeamProvisioningService { memberName: lane.member.name, cwd: run.request.cwd, }); - - await this.persistLaunchStateSnapshot(run, this.getMixedSecondaryLaunchPhase(run)); + await this.publishMixedSecondaryLaneStatusChange(run, lane); const previousLaunchState = await this.launchStateStore.read(run.teamName); try { @@ -12050,7 +12085,7 @@ export class TeamProvisioningService { this.deleteSecondaryRuntimeRun(run.teamName, lane.laneId); } - await this.persistLaunchStateSnapshot(run, this.getMixedSecondaryLaunchPhase(run)); + await this.publishMixedSecondaryLaneStatusChange(run, lane); } private async stopSingleMixedSecondaryRuntimeLane( @@ -12129,6 +12164,7 @@ export class TeamProvisioningService { diagnostics: ['OpenCode runtime adapter is not registered for mixed team launch.'], }; lane.diagnostics = lane.result.diagnostics; + await this.publishMixedSecondaryLaneStatusChange(run, lane); } return this.persistLaunchStateSnapshot(run, 'finished'); } @@ -12140,12 +12176,187 @@ export class TeamProvisioningService { return this.persistLaunchStateSnapshot(run, this.getMixedSecondaryLaunchPhase(run)); } + private async recoverStaleMixedSecondaryLaunchSnapshot( + teamName: string, + bootstrapSnapshot: PersistedTeamLaunchSnapshot | null, + persistedSnapshot: PersistedTeamLaunchSnapshot | null + ): Promise { + if (persistedSnapshot && this.hasMixedLaunchMetadata(persistedSnapshot)) { + return persistedSnapshot; + } + + const teamMeta = await this.teamMetaStore.getMeta(teamName).catch(() => null); + const leadProviderId = normalizeOptionalTeamProviderId(teamMeta?.providerId); + if (!leadProviderId || leadProviderId === 'opencode') { + return null; + } + + const membersMeta = await this.membersMetaStore.getMeta(teamName).catch(() => null); + const activeMembers = (membersMeta?.members ?? []).filter( + (member) => !member.removedAt && !isLeadMember({ name: member.name }) + ); + if (activeMembers.length === 0) { + return null; + } + + const laneIndex = await readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName).catch( + () => ({ + version: 1 as const, + updatedAt: nowIso(), + lanes: {} as Record< + string, + { + laneId: string; + state: 'active' | 'stopped' | 'degraded'; + updatedAt: string; + diagnostics?: string[]; + } + >, + }) + ); + const bootstrapStatuses = snapshotToMemberSpawnStatuses(bootstrapSnapshot); + const leadDefaults = { + providerId: leadProviderId, + providerBackendId: + migrateProviderBackendId( + leadProviderId, + teamMeta?.providerBackendId ?? membersMeta?.providerBackendId + ) ?? null, + selectedFastMode: teamMeta?.fastMode, + resolvedFastMode: + typeof teamMeta?.launchIdentity?.resolvedFastMode === 'boolean' + ? teamMeta.launchIdentity.resolvedFastMode + : null, + launchIdentity: teamMeta?.launchIdentity ?? null, + }; + const primaryMembers: TeamMember[] = []; + const secondaryMembers: Array<{ + laneId: string; + member: TeamMember; + leadDefaults: typeof leadDefaults; + evidence?: { + launchState?: MemberLaunchState; + agentToolAccepted?: boolean; + runtimeAlive?: boolean; + bootstrapConfirmed?: boolean; + hardFailure?: boolean; + hardFailureReason?: string; + diagnostics?: string[]; + }; + pendingReason?: string; + }> = []; + let recoveredAny = false; + + for (const member of activeMembers) { + const laneIdentity = buildPlannedMemberLaneIdentity({ + leadProviderId, + member: { + name: member.name, + providerId: normalizeOptionalTeamProviderId(member.providerId), + }, + }); + + if ( + laneIdentity.laneKind !== 'secondary' || + laneIdentity.laneOwnerProviderId !== 'opencode' + ) { + primaryMembers.push(member); + continue; + } + + let laneEntry = laneIndex.lanes[laneIdentity.laneId]; + if (laneEntry?.state === 'active') { + const recovery = await recoverStaleOpenCodeRuntimeLaneIndexEntry({ + teamsBasePath: getTeamsBasePath(), + teamName, + laneId: laneIdentity.laneId, + }); + if (recovery.stale) { + recoveredAny = true; + laneEntry = { + laneId: laneIdentity.laneId, + state: 'degraded', + updatedAt: nowIso(), + diagnostics: recovery.diagnostics, + }; + } + } + + if (laneEntry?.state === 'degraded') { + recoveredAny = true; + const diagnostics = laneEntry.diagnostics?.length + ? [...laneEntry.diagnostics] + : [`OpenCode lane ${laneIdentity.laneId} is degraded and requires stop + relaunch.`]; + secondaryMembers.push({ + laneId: laneIdentity.laneId, + member, + leadDefaults, + evidence: { + launchState: 'failed_to_start', + agentToolAccepted: false, + runtimeAlive: false, + bootstrapConfirmed: false, + hardFailure: true, + hardFailureReason: diagnostics[0], + diagnostics, + }, + }); + continue; + } + + secondaryMembers.push({ + laneId: laneIdentity.laneId, + member, + leadDefaults, + pendingReason: 'Waiting for OpenCode secondary lane recovery.', + }); + } + + if (!recoveredAny) { + return null; + } + + const primaryStatuses = Object.fromEntries( + primaryMembers.map((member) => [ + member.name, + bootstrapStatuses[member.name] ?? createInitialMemberSpawnStatusEntry(), + ]) + ); + const recoveredSnapshot = this.runtimeLaneCoordinator.buildAggregateLaunchSnapshot({ + teamName, + leadSessionId: persistedSnapshot?.leadSessionId ?? bootstrapSnapshot?.leadSessionId, + launchPhase: + persistedSnapshot?.launchPhase === 'active' + ? 'active' + : bootstrapSnapshot?.launchPhase === 'active' + ? 'active' + : 'reconciled', + leadDefaults, + primaryMembers, + primaryStatuses, + secondaryMembers, + }); + await this.launchStateStore.write(teamName, recoveredSnapshot); + return recoveredSnapshot; + } + private async reconcilePersistedLaunchState(teamName: string): Promise<{ snapshot: ReturnType | null; statuses: Record; }> { const bootstrapSnapshot = await readBootstrapLaunchSnapshot(teamName); const persisted = await this.launchStateStore.read(teamName); + const recoveredMixedSnapshot = await this.recoverStaleMixedSecondaryLaunchSnapshot( + teamName, + bootstrapSnapshot, + persisted + ); + if (recoveredMixedSnapshot) { + return { + snapshot: recoveredMixedSnapshot, + statuses: snapshotToMemberSpawnStatuses(recoveredMixedSnapshot), + }; + } const preferredSnapshot = choosePreferredLaunchSnapshot(bootstrapSnapshot, persisted); if (preferredSnapshot && preferredSnapshot === bootstrapSnapshot) { return { diff --git a/src/main/services/team/opencode/store/OpenCodeRuntimeManifestEvidenceReader.ts b/src/main/services/team/opencode/store/OpenCodeRuntimeManifestEvidenceReader.ts index a2efbc5e..f638b273 100644 --- a/src/main/services/team/opencode/store/OpenCodeRuntimeManifestEvidenceReader.ts +++ b/src/main/services/team/opencode/store/OpenCodeRuntimeManifestEvidenceReader.ts @@ -153,6 +153,37 @@ export function getOpenCodeRuntimeManifestPath( ); } +export async function inspectOpenCodeRuntimeLaneStorage(params: { + teamsBasePath: string; + teamName: string; + laneId: string; +}): Promise<{ + laneDirectoryExists: boolean; + hasStateOnDisk: boolean; + fileNames: string[]; +}> { + const laneDir = getOpenCodeTeamRuntimeLaneDirectory( + params.teamsBasePath, + params.teamName, + params.laneId + ); + const laneDirectoryExists = await fileExists(laneDir); + if (!laneDirectoryExists) { + return { + laneDirectoryExists: false, + hasStateOnDisk: false, + fileNames: [], + }; + } + + const fileNames = (await readdir(laneDir).catch(() => [] as string[])).sort(); + return { + laneDirectoryExists: true, + hasStateOnDisk: fileNames.length > 0, + fileNames, + }; +} + export function getOpenCodeLaneScopedRuntimeFilePath(params: { teamsBasePath: string; teamName: string; @@ -284,6 +315,51 @@ export async function clearOpenCodeRuntimeLaneStorage(params: { await removeOpenCodeRuntimeLaneIndexEntry(params); } +export async function recoverStaleOpenCodeRuntimeLaneIndexEntry(params: { + teamsBasePath: string; + teamName: string; + laneId: string; +}): Promise<{ + stale: boolean; + degraded: boolean; + diagnostics: string[]; +}> { + const index = await readOpenCodeRuntimeLaneIndex(params.teamsBasePath, params.teamName); + const entry = index.lanes[params.laneId]; + if (!entry || entry.state !== 'active') { + return { + stale: false, + degraded: false, + diagnostics: [], + }; + } + + const storage = await inspectOpenCodeRuntimeLaneStorage(params); + if (storage.hasStateOnDisk) { + return { + stale: false, + degraded: false, + diagnostics: [], + }; + } + + const diagnostics = [ + `OpenCode lane ${params.laneId} is marked active in lanes.json, but no lane state exists on disk.`, + ]; + await upsertOpenCodeRuntimeLaneIndexEntry({ + teamsBasePath: params.teamsBasePath, + teamName: params.teamName, + laneId: params.laneId, + state: 'degraded', + diagnostics, + }); + return { + stale: true, + degraded: true, + diagnostics, + }; +} + export async function migrateLegacyOpenCodeRuntimeState(params: { teamsBasePath: string; teamName: string; diff --git a/test/main/services/team/OpenCodeRuntimeManifestEvidenceReader.test.ts b/test/main/services/team/OpenCodeRuntimeManifestEvidenceReader.test.ts index 87b00cc2..99a36fe1 100644 --- a/test/main/services/team/OpenCodeRuntimeManifestEvidenceReader.test.ts +++ b/test/main/services/team/OpenCodeRuntimeManifestEvidenceReader.test.ts @@ -9,8 +9,10 @@ import { getOpenCodeLaneScopedRuntimeFilePath, getOpenCodeRuntimeLaneIndexPath, getOpenCodeTeamRuntimeDirectory, + inspectOpenCodeRuntimeLaneStorage, migrateLegacyOpenCodeRuntimeState, readOpenCodeRuntimeLaneIndex, + recoverStaleOpenCodeRuntimeLaneIndexEntry, upsertOpenCodeRuntimeLaneIndexEntry, } from '../../../../src/main/services/team/opencode/store/OpenCodeRuntimeManifestEvidenceReader'; @@ -238,4 +240,53 @@ describe('OpenCodeRuntimeManifestEvidenceReader migration', () => { capabilitySnapshotId: 'cap-1', }); }); + + it('reports missing lane storage when an active lane index entry has no lane dir or state', async () => { + const teamName = 'team-epsilon'; + const laneId = 'secondary:opencode:alice'; + + await upsertOpenCodeRuntimeLaneIndexEntry({ + teamsBasePath: tempDir, + teamName, + laneId, + state: 'active', + }); + + await expect( + inspectOpenCodeRuntimeLaneStorage({ + teamsBasePath: tempDir, + teamName, + laneId, + }) + ).resolves.toEqual({ + laneDirectoryExists: false, + hasStateOnDisk: false, + fileNames: [], + }); + + const result = await recoverStaleOpenCodeRuntimeLaneIndexEntry({ + teamsBasePath: tempDir, + teamName, + laneId, + }); + + expect(result).toEqual({ + stale: true, + degraded: true, + diagnostics: [ + `OpenCode lane ${laneId} is marked active in lanes.json, but no lane state exists on disk.`, + ], + }); + await expect(readOpenCodeRuntimeLaneIndex(tempDir, teamName)).resolves.toMatchObject({ + lanes: { + [laneId]: { + laneId, + state: 'degraded', + diagnostics: [ + `OpenCode lane ${laneId} is marked active in lanes.json, but no lane state exists on disk.`, + ], + }, + }, + }); + }); }); diff --git a/test/main/services/team/TeamProvisioningService.test.ts b/test/main/services/team/TeamProvisioningService.test.ts index 1e476288..7c5d4a35 100644 --- a/test/main/services/team/TeamProvisioningService.test.ts +++ b/test/main/services/team/TeamProvisioningService.test.ts @@ -268,6 +268,54 @@ function writeBootstrapState( ); } +function writeTeamMeta( + teamName: string, + overrides: Record = {} +): void { + const teamDir = path.join(tempTeamsBase, teamName); + fs.mkdirSync(teamDir, { recursive: true }); + fs.writeFileSync( + path.join(teamDir, 'team.meta.json'), + `${JSON.stringify( + { + version: 1, + cwd: '/Users/test/proj', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + effort: 'medium', + createdAt: Date.now(), + ...overrides, + }, + null, + 2 + )}\n`, + 'utf8' + ); +} + +function writeMembersMeta( + teamName: string, + members: Record[], + providerBackendId = 'codex-native' +): void { + const teamDir = path.join(tempTeamsBase, teamName); + fs.mkdirSync(teamDir, { recursive: true }); + fs.writeFileSync( + path.join(teamDir, 'members.meta.json'), + `${JSON.stringify( + { + version: 1, + providerBackendId, + members, + }, + null, + 2 + )}\n`, + 'utf8' + ); +} + function createMemberSpawnStatusEntry( overrides: Record = {} ): Record { @@ -1733,6 +1781,7 @@ describe('TeamProvisioningService', () => { (svc as any).launchStateStore = { read: vi.fn(async () => null), write: vi.fn(async () => {}), + clear: vi.fn(async () => {}), }; const run = createMemberSpawnRun({ @@ -4932,4 +4981,155 @@ describe('TeamProvisioningService', () => { agentToolAccepted: true, }); }); + + it('recovers stale mixed secondary lanes when lanes.json says active but lane state is missing', async () => { + const teamName = 'signal-ops-6212'; + writeTeamMeta(teamName, { + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + }); + writeMembersMeta(teamName, [ + { + name: 'atlas', + providerId: 'opencode', + model: 'opencode/nemotron-3-super-free', + }, + { + name: 'bob', + providerId: 'codex', + model: 'gpt-5.4', + }, + { + name: 'nova', + providerId: 'codex', + model: 'gpt-5.4', + }, + { + name: 'tom', + providerId: 'opencode', + model: 'opencode/minimax-m2.5-free', + }, + ]); + writeLaunchConfig(teamName, '/Users/test/proj', 'lead-session', ['bob', 'nova']); + writeBootstrapState(teamName, [ + { name: 'bob', status: 'registered' }, + { name: 'nova', status: 'registered' }, + ]); + await upsertOpenCodeRuntimeLaneIndexEntry({ + teamsBasePath: tempTeamsBase, + teamName, + laneId: 'secondary:opencode:atlas', + state: 'active', + }); + await upsertOpenCodeRuntimeLaneIndexEntry({ + teamsBasePath: tempTeamsBase, + teamName, + laneId: 'secondary:opencode:tom', + state: 'active', + }); + + const svc = new TeamProvisioningService(); + const result = await svc.getMemberSpawnStatuses(teamName); + + expect(result.teamLaunchState).toBe('partial_failure'); + expect(result.launchPhase).toBe('reconciled'); + expect(result.expectedMembers).toEqual(expect.arrayContaining(['atlas', 'bob', 'nova', 'tom'])); + expect(result.statuses.atlas).toMatchObject({ + status: 'error', + launchState: 'failed_to_start', + error: expect.stringContaining('no lane state exists on disk'), + }); + expect(result.statuses.tom).toMatchObject({ + status: 'error', + launchState: 'failed_to_start', + error: expect.stringContaining('no lane state exists on disk'), + }); + await expect(readOpenCodeRuntimeLaneIndex(tempTeamsBase, teamName)).resolves.toMatchObject({ + lanes: { + 'secondary:opencode:atlas': { + state: 'degraded', + }, + 'secondary:opencode:tom': { + state: 'degraded', + }, + }, + }); + await expect(fsPromises.readFile(getTeamLaunchStatePath(teamName), 'utf8')).resolves.toContain( + '"secondary:opencode:atlas"' + ); + }); + + it('includes queued OpenCode secondary lanes in live spawn statuses before the final mixed snapshot settles', async () => { + const svc = new TeamProvisioningService(); + vi.spyOn(svc as any, 'refreshMemberSpawnStatusesFromLeadInbox').mockResolvedValue(undefined); + vi.spyOn(svc as any, 'maybeAuditMemberSpawnStatuses').mockResolvedValue(undefined); + + const run = createMemberSpawnRun({ + teamName: 'mixed-live-team', + runId: 'run-mixed-live-1', + expectedMembers: ['bob'], + memberSpawnStatuses: new Map([ + [ + 'bob', + createMemberSpawnStatusEntry({ + status: 'online', + launchState: 'confirmed_alive', + runtimeAlive: true, + bootstrapConfirmed: true, + livenessSource: 'heartbeat', + }), + ], + ]), + }); + run.isLaunch = true; + run.request = { + teamName: 'mixed-live-team', + cwd: '/tmp/mixed-live-team', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + members: [], + }; + run.effectiveMembers = [ + { + name: 'bob', + providerId: 'codex', + model: 'gpt-5.4', + }, + ]; + run.mixedSecondaryLanes = [ + { + laneId: 'secondary:opencode:atlas', + providerId: 'opencode', + member: { + name: 'atlas', + providerId: 'opencode', + model: 'opencode/nemotron-3-super-free', + }, + runId: null, + state: 'queued', + result: null, + warnings: [], + diagnostics: [], + }, + ]; + run.detectedSessionId = 'lead-session'; + + (svc as any).runs.set(run.runId, run); + (svc as any).provisioningRunByTeam.set(run.teamName, run.runId); + + const result = await svc.getMemberSpawnStatuses(run.teamName); + + expect(result.teamLaunchState).toBe('partial_pending'); + expect(result.expectedMembers).toEqual(expect.arrayContaining(['bob', 'atlas'])); + expect(result.statuses.bob).toMatchObject({ + status: 'online', + launchState: 'confirmed_alive', + }); + expect(result.statuses.atlas).toMatchObject({ + status: 'spawning', + launchState: 'starting', + }); + }); });