From 1d3d7e1f1f47f2f4a98cdbe24bad7076088d7738 Mon Sep 17 00:00:00 2001 From: 777genius Date: Sun, 19 Apr 2026 20:17:41 +0300 Subject: [PATCH] fix(team): harden teammate restart lifecycle --- src/main/services/team/TeamDataService.ts | 2 + .../services/team/TeamProvisioningService.ts | 223 +++- src/renderer/store/slices/teamSlice.ts | 13 +- .../services/team/TeamDataService.test.ts | 122 +++ .../team/TeamProvisioningService.test.ts | 986 +++++++++++++++++- test/renderer/store/teamSlice.test.ts | 16 + 6 files changed, 1332 insertions(+), 30 deletions(-) diff --git a/src/main/services/team/TeamDataService.ts b/src/main/services/team/TeamDataService.ts index 0fc2be98..f6c8f63f 100644 --- a/src/main/services/team/TeamDataService.ts +++ b/src/main/services/team/TeamDataService.ts @@ -1311,6 +1311,7 @@ export class TeamDataService { } nextByName.add(name.toLowerCase()); const prev = existingByName.get(name.toLowerCase()); + const isSameActiveMember = Boolean(prev && prev.removedAt == null); return { name, role: member.role?.trim() || undefined, @@ -1322,6 +1323,7 @@ export class TeamDataService { ? member.effort : undefined, agentType: prev?.agentType ?? 'general-purpose', + agentId: isSameActiveMember ? prev?.agentId : undefined, color: prev?.color, joinedAt: prev?.joinedAt ?? joinedAt, removedAt: undefined, diff --git a/src/main/services/team/TeamProvisioningService.ts b/src/main/services/team/TeamProvisioningService.ts index 25654973..6e6a0fd3 100644 --- a/src/main/services/team/TeamProvisioningService.ts +++ b/src/main/services/team/TeamProvisioningService.ts @@ -1065,19 +1065,57 @@ function sleep(ms: number): Promise { async function waitForPidsToExit( pids: readonly number[], opts: { timeoutMs: number; pollMs: number } -): Promise { +): Promise { if (pids.length === 0) { - return; + return []; } const deadline = Date.now() + opts.timeoutMs; + let remainingPids = [...new Set(pids)]; while (Date.now() < deadline) { - const remaining = pids.filter((pid) => isProcessAlive(pid)); - if (remaining.length === 0) { - return; + remainingPids = remainingPids.filter((pid) => isProcessAlive(pid)); + if (remainingPids.length === 0) { + return []; } await sleep(opts.pollMs); } + + return remainingPids; +} + +async function waitForTmuxPanesToExit( + paneIds: readonly string[], + opts: { timeoutMs: number; pollMs: number } +): Promise { + const normalizedPaneIds = [...new Set(paneIds.map((paneId) => paneId.trim()).filter(Boolean))]; + if (normalizedPaneIds.length === 0) { + return []; + } + + const deadline = Date.now() + opts.timeoutMs; + let remainingPaneIds = normalizedPaneIds; + let lastError: unknown = null; + while (Date.now() < deadline) { + let livePanePidById: Map; + try { + livePanePidById = await listTmuxPanePidsForCurrentPlatform(remainingPaneIds); + lastError = null; + } catch (error) { + lastError = error; + await sleep(opts.pollMs); + continue; + } + remainingPaneIds = remainingPaneIds.filter((paneId) => livePanePidById.has(paneId)); + if (remainingPaneIds.length === 0) { + return []; + } + await sleep(opts.pollMs); + } + + if (lastError) { + throw lastError; + } + return remainingPaneIds; } async function waitForChildProcessToExit( @@ -4100,6 +4138,23 @@ export class TeamProvisioningService { } } + private clearMemberSpawnToolTracking(run: ProvisioningRun, memberName: string): void { + let removed = false; + for (const [toolUseId, trackedMemberName] of run.memberSpawnToolUseIds.entries()) { + if (trackedMemberName !== memberName) continue; + run.memberSpawnToolUseIds.delete(toolUseId); + removed = true; + } + + if (removed) { + this.appendMemberBootstrapDiagnostic( + run, + memberName, + 'cleared stale spawn tool tracking before manual restart' + ); + } + } + /** * Update spawn status for a specific team member and emit a change event. */ @@ -4182,6 +4237,11 @@ export class TeamProvisioningService { next.launchState = 'failed_to_start'; } else if (status === 'offline') { Object.assign(next, createInitialMemberSpawnStatusEntry(), { updatedAt }); + next.error = undefined; + next.hardFailureReason = undefined; + next.livenessSource = undefined; + next.firstSpawnAcceptedAt = undefined; + next.lastHeartbeatAt = undefined; } next.launchState = deriveMemberLaunchState(next); @@ -4202,8 +4262,12 @@ export class TeamProvisioningService { } run.memberSpawnStatuses.set(memberName, next); - if ((status === 'online' && next.bootstrapConfirmed) || status === 'offline') { - run.pendingMemberRestarts.delete(memberName); + if ( + (status === 'online' && (next.bootstrapConfirmed || livenessSource === 'process')) || + status === 'offline' || + status === 'error' + ) { + run.pendingMemberRestarts?.delete(memberName); } this.syncMemberLaunchGraceCheck(run, memberName, next); @@ -4441,20 +4505,38 @@ export class TeamProvisioningService { throw new Error(`Team "${teamName}" is not currently running`); } - const config = await this.configReader.getConfig(teamName); - const configuredMembers = config?.members ?? []; - let metaMembers: Awaited> = []; - try { - metaMembers = await this.membersMetaStore.getMembers(teamName); - } catch { - metaMembers = []; - } + const readCurrentConfiguredMember = async (): Promise<{ + config: TeamConfig | null; + configuredMembers: TeamConfig['members']; + metaMembers: Awaited>; + configuredMember: ReturnType; + }> => { + const config = await this.configReader.getConfig(teamName); + const configuredMembers = config?.members ?? []; + let metaMembers: Awaited> = []; + try { + metaMembers = await this.membersMetaStore.getMembers(teamName); + } catch { + metaMembers = []; + } - const configuredMember = this.resolveEffectiveConfiguredMember( - configuredMembers, - metaMembers, - memberName - ); + return { + config, + configuredMembers, + metaMembers, + configuredMember: this.resolveEffectiveConfiguredMember( + configuredMembers, + metaMembers, + memberName + ), + }; + }; + + let { config, configuredMembers, metaMembers, configuredMember } = + await readCurrentConfiguredMember(); + if (!config) { + throw new Error(`Team "${teamName}" configuration is no longer available`); + } if (!configuredMember) { throw new Error(`Member "${memberName}" is not configured in team "${teamName}"`); } @@ -4484,6 +4566,8 @@ export class TeamProvisioningService { ); } + this.agentRuntimeSnapshotCache.delete(teamName); + this.liveTeamAgentRuntimeMetadataCache.delete(teamName); const liveRuntimeByMember = await this.getLiveTeamAgentRuntimeMetadata(teamName); const livePids = new Set(); let hasAliveRuntimeWithoutPid = false; @@ -4506,6 +4590,7 @@ export class TeamProvisioningService { ); } + const tmuxPaneIdsToVerify: string[] = []; for (const persistedRuntimeMember of persistedRuntimeMembers) { const paneId = typeof persistedRuntimeMember.tmuxPaneId === 'string' @@ -4515,6 +4600,7 @@ export class TeamProvisioningService { if (!paneId || backendType !== 'tmux') { continue; } + tmuxPaneIdsToVerify.push(paneId); try { killTmuxPaneForCurrentPlatformSync(paneId); logger.info( @@ -4542,15 +4628,73 @@ export class TeamProvisioningService { } if (livePids.size > 0) { - await waitForPidsToExit(Array.from(livePids), { + const lingeringPids = await waitForPidsToExit(Array.from(livePids), { timeoutMs: 1_500, pollMs: 100, }); + if (lingeringPids.length > 0) { + throw new Error( + `Restart for teammate "${memberName}" is still waiting for the previous process to exit (${lingeringPids.join(', ')}).` + ); + } + } + + if (tmuxPaneIdsToVerify.length > 0) { + let lingeringPaneIds: string[]; + try { + lingeringPaneIds = await waitForTmuxPanesToExit(tmuxPaneIdsToVerify, { + timeoutMs: 1_500, + pollMs: 100, + }); + } catch (error) { + throw new Error( + `Restart for teammate "${memberName}" could not verify that the previous tmux pane exited: ${ + error instanceof Error ? error.message : String(error) + }` + ); + } + if (lingeringPaneIds.length > 0) { + throw new Error( + `Restart for teammate "${memberName}" is still waiting for the previous tmux pane to exit (${lingeringPaneIds.join(', ')}).` + ); + } + } + + this.setMemberSpawnStatus(run, memberName, 'offline'); + + const latestRunId = this.getAliveRunId(teamName); + const currentRun = this.runs.get(runId); + if ( + latestRunId !== runId || + !currentRun || + currentRun !== run || + currentRun.processKilled || + currentRun.cancelRequested + ) { + throw new Error(`Team "${teamName}" is not currently running`); + } + + ({ config, configuredMembers, metaMembers, configuredMember } = + await readCurrentConfiguredMember()); + if (!config) { + throw new Error(`Team "${teamName}" configuration disappeared while restart was in progress`); + } + if (!configuredMember) { + throw new Error( + `Member "${memberName}" is no longer configured in team "${teamName}" after restart preparation` + ); + } + if (configuredMember.removedAt) { + throw new Error(`Member "${memberName}" was removed while restart was in progress`); + } + if (isLeadMember({ name: memberName, agentType: configuredMember.agentType })) { + throw new Error('Lead restart is not supported from member controls'); } this.agentRuntimeSnapshotCache.delete(teamName); this.liveTeamAgentRuntimeMetadataCache.delete(teamName); - this.setMemberSpawnStatus(run, memberName, 'offline'); + this.resetRuntimeToolActivity(run, memberName); + this.clearMemberSpawnToolTracking(run, memberName); this.setMemberSpawnStatus(run, memberName, 'spawning'); this.appendMemberBootstrapDiagnostic(run, memberName, 'manual restart requested from UI'); run.pendingMemberRestarts.set(memberName, { @@ -4613,6 +4757,10 @@ export class TeamProvisioningService { return; } if (!entry.firstSpawnAcceptedAt) { + if (existing) { + clearTimeout(existing); + this.pendingTimeouts.delete(key); + } return; } const remainingMs = @@ -8402,6 +8550,23 @@ export class TeamProvisioningService { }); } + for (const member of metaMembers) { + const memberName = typeof member?.name === 'string' ? member.name.trim() : ''; + if (!memberName || isLeadMember({ name: memberName, agentType: member.agentType })) { + continue; + } + const runtimeModel = + member.model?.trim() || + this.findConfiguredMemberModel(configuredMembers, memberName) || + this.findEffectiveRunMemberModel(run, memberName); + upsertMetadata(memberName, { + ...(runtimeModel ? { model: runtimeModel } : {}), + ...(typeof member.agentId === 'string' && member.agentId.trim() + ? { agentId: member.agentId.trim() } + : {}), + }); + } + for (const member of run?.effectiveMembers ?? []) { const memberName = member.name?.trim() ?? ''; if (!memberName || isLeadMember(member) || memberName.toLowerCase() === 'user') { @@ -8448,12 +8613,14 @@ export class TeamProvisioningService { ? processPid : undefined; const status = this.findTrackedMemberSpawnStatus(run, memberName); + const mayInferAliveFromStatusOnly = status?.launchState !== 'failed_to_start'; const alive = typeof resolvedPid === 'number' && resolvedPid > 0 ? true : backendType === 'tmux' ? false - : Boolean(status?.runtimeAlive || status?.bootstrapConfirmed); + : mayInferAliveFromStatusOnly && + Boolean(status?.runtimeAlive || status?.bootstrapConfirmed); metadataByMember.set(memberName, { ...metadata, alive, @@ -9670,6 +9837,16 @@ export class TeamProvisioningService { } if (outcome === 'already_running') { + if (run.pendingMemberRestarts.has(memberName)) { + run.pendingMemberRestarts.delete(memberName); + this.setMemberSpawnStatus( + run, + memberName, + 'error', + buildRestartStillRunningReason(memberName) + ); + return true; + } this.setMemberSpawnStatus(run, memberName, 'online', undefined, 'process'); return true; } diff --git a/src/renderer/store/slices/teamSlice.ts b/src/renderer/store/slices/teamSlice.ts index 0b249482..d923fd8a 100644 --- a/src/renderer/store/slices/teamSlice.ts +++ b/src/renderer/store/slices/teamSlice.ts @@ -4179,11 +4179,14 @@ export const createTeamSlice: StateCreator = (set, }, restartMember: async (teamName: string, memberName: string) => { - await unwrapIpc('team:restartMember', () => api.teams.restartMember(teamName, memberName)); - await Promise.all([ - get().fetchMemberSpawnStatuses(teamName), - get().fetchTeamAgentRuntime(teamName), - ]); + try { + await unwrapIpc('team:restartMember', () => api.teams.restartMember(teamName, memberName)); + } finally { + await Promise.allSettled([ + get().fetchMemberSpawnStatuses(teamName), + get().fetchTeamAgentRuntime(teamName), + ]); + } }, removeMember: async (teamName: string, memberName: string) => { diff --git a/test/main/services/team/TeamDataService.test.ts b/test/main/services/team/TeamDataService.test.ts index 597d3d55..79ae0f0a 100644 --- a/test/main/services/team/TeamDataService.test.ts +++ b/test/main/services/team/TeamDataService.test.ts @@ -528,6 +528,128 @@ describe('TeamDataService', () => { expect(writeMembers).not.toHaveBeenCalled(); }); + it('preserves agentId for existing members during replaceMembers', async () => { + const writeMembers = vi.fn(async () => {}); + const membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'alice', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.4-mini', + effort: 'medium', + agentType: 'general-purpose', + agentId: 'alice@runtime-team', + joinedAt: 1710000000000, + }, + ]), + writeMembers, + } as never; + + const service = new TeamDataService( + { getConfig: vi.fn(), listTeams: vi.fn() } as never, + { getTasks: vi.fn(async () => []) } as never, + { listInboxNames: vi.fn(async () => []), getMessages: vi.fn(async () => []) } as never, + {} as never, + {} as never, + { resolveMembers: vi.fn(() => []) } as never, + { + getState: vi.fn(async () => ({ teamName: 'runtime-team', reviewers: [], tasks: {} })), + } as never, + {} as never, + membersMetaStore, + { readMessages: vi.fn(async () => []) } as never + ); + + await service.replaceMembers('runtime-team', { + members: [ + { + name: 'alice', + role: 'Reviewer', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'high', + }, + ], + }); + + expect(writeMembers).toHaveBeenCalledWith( + 'runtime-team', + expect.arrayContaining([ + expect.objectContaining({ + name: 'alice', + role: 'Reviewer', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'high', + agentId: 'alice@runtime-team', + }), + ]) + ); + }); + + it('does not carry over agentId from a previously removed member with the same name', async () => { + const writeMembers = vi.fn(async () => {}); + const membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'alice', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.4-mini', + effort: 'medium', + agentType: 'general-purpose', + agentId: 'alice@old-runtime-team', + joinedAt: 1710000000000, + removedAt: 1715000000000, + }, + ]), + writeMembers, + } as never; + + const service = new TeamDataService( + { getConfig: vi.fn(), listTeams: vi.fn() } as never, + { getTasks: vi.fn(async () => []) } as never, + { listInboxNames: vi.fn(async () => []), getMessages: vi.fn(async () => []) } as never, + {} as never, + {} as never, + { resolveMembers: vi.fn(() => []) } as never, + { + getState: vi.fn(async () => ({ teamName: 'runtime-team', reviewers: [], tasks: {} })), + } as never, + {} as never, + membersMetaStore, + { readMessages: vi.fn(async () => []) } as never + ); + + await service.replaceMembers('runtime-team', { + members: [ + { + name: 'alice', + role: 'Reviewer', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'high', + }, + ], + }); + + expect(writeMembers).toHaveBeenCalledWith( + 'runtime-team', + expect.arrayContaining([ + expect.objectContaining({ + name: 'alice', + role: 'Reviewer', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'high', + agentId: undefined, + removedAt: undefined, + }), + ]) + ); + }); + it('keeps getTeamData read-only and skips kanban garbage-collect', async () => { const order: string[] = []; const tasks: TeamTask[] = [ diff --git a/test/main/services/team/TeamProvisioningService.test.ts b/test/main/services/team/TeamProvisioningService.test.ts index 5ad91a5c..24a12ce0 100644 --- a/test/main/services/team/TeamProvisioningService.test.ts +++ b/test/main/services/team/TeamProvisioningService.test.ts @@ -50,6 +50,10 @@ vi.mock('@main/utils/childProcess', () => ({ killProcessTree: vi.fn(), })); +vi.mock('@main/utils/processKill', () => ({ + killProcessByPid: vi.fn(), +})); + vi.mock('@main/utils/pathDecoder', async (importOriginal) => { const actual = await importOriginal(); return { @@ -74,9 +78,13 @@ import { createPersistedLaunchSnapshot } from '@main/services/team/TeamLaunchSta import { getTeamLaunchStatePath } from '@main/services/team/TeamLaunchStateStore'; import { ClaudeBinaryResolver } from '@main/services/team/ClaudeBinaryResolver'; import { spawnCli } from '@main/utils/childProcess'; +import { killProcessByPid } from '@main/utils/processKill'; import { encodePath } from '@main/utils/pathDecoder'; import { AGENT_TEAMS_NAMESPACED_TEAMMATE_OPERATIONAL_TOOL_NAMES } from 'agent-teams-controller'; -import { listTmuxPanePidsForCurrentPlatform } from '@features/tmux-installer/main'; +import { + killTmuxPaneForCurrentPlatformSync, + listTmuxPanePidsForCurrentPlatform, +} from '@features/tmux-installer/main'; import pidusage from 'pidusage'; function allowConsoleLogs() { @@ -622,7 +630,21 @@ describe('TeamProvisioningService', () => { const run = createMemberSpawnRun({ teamName: 'edited-team', expectedMembers: ['alice'], - memberSpawnStatuses: new Map(), + memberSpawnStatuses: new Map([ + [ + 'alice', + createMemberSpawnStatusEntry({ + status: 'online', + launchState: 'confirmed_alive', + agentToolAccepted: true, + runtimeAlive: true, + bootstrapConfirmed: true, + livenessSource: 'heartbeat', + firstSpawnAcceptedAt: new Date().toISOString(), + lastHeartbeatAt: new Date().toISOString(), + }), + ], + ]), }); run.child = { pid: 111 }; run.processKilled = false; @@ -668,6 +690,228 @@ describe('TeamProvisioningService', () => { expect(restartMessage).toContain('Their workflow: Use checklist'); }); + it('re-reads teammate runtime settings immediately before respawn so stale edit snapshots are not reused', async () => { + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'edited-team', + expectedMembers: ['alice'], + memberSpawnStatuses: new Map([ + [ + 'alice', + createMemberSpawnStatusEntry({ + status: 'online', + launchState: 'confirmed_alive', + agentToolAccepted: true, + runtimeAlive: true, + bootstrapConfirmed: true, + livenessSource: 'heartbeat', + firstSpawnAcceptedAt: new Date().toISOString(), + lastHeartbeatAt: new Date().toISOString(), + }), + ], + ]), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + const getConfig = vi + .fn() + .mockResolvedValue({ + name: 'Edited Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + }); + const getMembers = vi + .fn() + .mockResolvedValueOnce([ + { + name: 'alice', + role: 'Reviewer', + workflow: 'Use checklist', + providerId: 'codex', + model: 'gpt-5.4-mini', + effort: 'high', + agentType: 'general-purpose', + }, + ]) + .mockResolvedValueOnce([ + { + name: 'alice', + role: 'Approver', + workflow: 'Use the updated checklist', + providerId: 'codex', + model: 'gpt-5.4', + effort: 'medium', + agentType: 'general-purpose', + }, + ]); + + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { getConfig }; + (svc as any).membersMetaStore = { getMembers }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => []); + (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => new Map()); + (svc as any).aliveRunByTeam.set('edited-team', run.runId); + (svc as any).runs.set(run.runId, run); + + await svc.restartMember('edited-team', 'alice'); + + expect(getMembers).toHaveBeenCalledTimes(2); + expect(sendMessageToRun).toHaveBeenCalledTimes(1); + const restartCall = sendMessageToRun.mock.calls[0] as unknown as + | [unknown, string] + | undefined; + const restartMessage = restartCall?.[1] ?? ''; + expect(restartMessage).toContain('provider="codex"'); + expect(restartMessage).toContain('model="gpt-5.4"'); + expect(restartMessage).toContain('effort="medium"'); + expect(restartMessage).toContain('with role "Approver"'); + expect(restartMessage).toContain('Their workflow: Use the updated checklist'); + }); + + it('aborts restart if the teammate is removed before respawn is requested', async () => { + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'edited-team', + expectedMembers: ['alice'], + memberSpawnStatuses: new Map([ + [ + 'alice', + createMemberSpawnStatusEntry({ + status: 'online', + launchState: 'confirmed_alive', + agentToolAccepted: true, + runtimeAlive: true, + bootstrapConfirmed: true, + livenessSource: 'heartbeat', + firstSpawnAcceptedAt: new Date().toISOString(), + lastHeartbeatAt: new Date().toISOString(), + }), + ], + ]), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + const getConfig = vi + .fn() + .mockResolvedValue({ + name: 'Edited Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + }); + const getMembers = vi + .fn() + .mockResolvedValueOnce([ + { + name: 'alice', + role: 'Reviewer', + providerId: 'codex', + model: 'gpt-5.4-mini', + effort: 'high', + agentType: 'general-purpose', + }, + ]) + .mockResolvedValueOnce([ + { + name: 'alice', + role: 'Reviewer', + providerId: 'codex', + model: 'gpt-5.4-mini', + effort: 'high', + agentType: 'general-purpose', + removedAt: new Date().toISOString(), + }, + ]); + + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { getConfig }; + (svc as any).membersMetaStore = { getMembers }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => []); + (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => new Map()); + (svc as any).aliveRunByTeam.set('edited-team', run.runId); + (svc as any).runs.set(run.runId, run); + + await expect(svc.restartMember('edited-team', 'alice')).rejects.toThrow( + 'Member "alice" was removed while restart was in progress' + ); + + expect(sendMessageToRun).not.toHaveBeenCalled(); + expect(run.pendingMemberRestarts.has('alice')).toBe(false); + expect(run.memberSpawnStatuses.get('alice')).toMatchObject({ + status: 'offline', + launchState: 'starting', + runtimeAlive: false, + }); + }); + + it('aborts restart if team config disappears before respawn is requested', async () => { + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'edited-team', + expectedMembers: ['alice'], + memberSpawnStatuses: new Map([ + [ + 'alice', + createMemberSpawnStatusEntry({ + status: 'online', + launchState: 'confirmed_alive', + agentToolAccepted: true, + runtimeAlive: true, + bootstrapConfirmed: true, + livenessSource: 'heartbeat', + firstSpawnAcceptedAt: new Date().toISOString(), + lastHeartbeatAt: new Date().toISOString(), + }), + ], + ]), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + const getConfig = vi + .fn() + .mockResolvedValueOnce({ + name: 'Edited Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + }) + .mockResolvedValueOnce(null); + const getMembers = vi.fn(async () => [ + { + name: 'alice', + role: 'Reviewer', + providerId: 'codex', + model: 'gpt-5.4-mini', + effort: 'high', + agentType: 'general-purpose', + }, + ]); + + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { getConfig }; + (svc as any).membersMetaStore = { getMembers }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => []); + (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => new Map()); + (svc as any).aliveRunByTeam.set('edited-team', run.runId); + (svc as any).runs.set(run.runId, run); + + await expect(svc.restartMember('edited-team', 'alice')).rejects.toThrow( + 'Team "edited-team" configuration disappeared while restart was in progress' + ); + + expect(sendMessageToRun).not.toHaveBeenCalled(); + expect(run.pendingMemberRestarts.has('alice')).toBe(false); + expect(run.memberSpawnStatuses.get('alice')).toMatchObject({ + status: 'offline', + launchState: 'starting', + runtimeAlive: false, + }); + }); + it('treats duplicate_skipped already_running as a failed codex restart because the old runtime is still active', async () => { const svc = new TeamProvisioningService(); const run = createMemberSpawnRun({ @@ -820,6 +1064,424 @@ describe('TeamProvisioningService', () => { expect(run.pendingMemberRestarts.has('bob')).toBe(true); }); + it('waits for a killed tmux pane to disappear before sending a restart request', async () => { + vi.useFakeTimers(); + + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'tmux-team', + expectedMembers: ['forge'], + memberSpawnStatuses: new Map(), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { + getConfig: vi.fn(async () => ({ + name: 'Tmux Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + })), + }; + (svc as any).membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'forge', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.4', + effort: 'medium', + agentType: 'general-purpose', + }, + ]), + }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => [ + { + name: 'forge', + agentId: 'forge@tmux-team', + backendType: 'tmux', + tmuxPaneId: '%2', + }, + ]); + (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => new Map()); + (svc as any).aliveRunByTeam.set('tmux-team', run.runId); + (svc as any).runs.set(run.runId, run); + + vi.mocked(listTmuxPanePidsForCurrentPlatform) + .mockResolvedValueOnce(new Map([['%2', 999]])) + .mockResolvedValueOnce(new Map()); + + const restartPromise = svc.restartMember('tmux-team', 'forge'); + await Promise.resolve(); + + expect(sendMessageToRun).not.toHaveBeenCalled(); + + await vi.advanceTimersByTimeAsync(100); + await restartPromise; + + expect(sendMessageToRun).toHaveBeenCalledTimes(1); + }); + + it('fails early when the previous tmux pane does not exit before restart', async () => { + vi.useFakeTimers(); + + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'tmux-team', + expectedMembers: ['forge'], + memberSpawnStatuses: new Map(), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { + getConfig: vi.fn(async () => ({ + name: 'Tmux Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + })), + }; + (svc as any).membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'forge', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.4', + effort: 'medium', + agentType: 'general-purpose', + }, + ]), + }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => [ + { + name: 'forge', + agentId: 'forge@tmux-team', + backendType: 'tmux', + tmuxPaneId: '%2', + }, + ]); + (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => new Map()); + (svc as any).aliveRunByTeam.set('tmux-team', run.runId); + (svc as any).runs.set(run.runId, run); + + vi.mocked(listTmuxPanePidsForCurrentPlatform).mockImplementation(async () => + new Map([['%2', 999]]) + ); + + const restartPromise = expect(svc.restartMember('tmux-team', 'forge')).rejects.toThrow( + 'Restart for teammate "forge" is still waiting for the previous tmux pane to exit (%2).' + ); + await vi.advanceTimersByTimeAsync(1_500); + await restartPromise; + + expect(sendMessageToRun).not.toHaveBeenCalled(); + }); + + it('still verifies tmux pane exit when pane kill throws, and blocks restart if the pane remains alive', async () => { + vi.useFakeTimers(); + + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'tmux-team', + expectedMembers: ['forge'], + memberSpawnStatuses: new Map(), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { + getConfig: vi.fn(async () => ({ + name: 'Tmux Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + })), + }; + (svc as any).membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'forge', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.4', + effort: 'medium', + agentType: 'general-purpose', + }, + ]), + }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => [ + { + name: 'forge', + agentId: 'forge@tmux-team', + backendType: 'tmux', + tmuxPaneId: '%2', + }, + ]); + (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => new Map()); + (svc as any).aliveRunByTeam.set('tmux-team', run.runId); + (svc as any).runs.set(run.runId, run); + + vi.mocked(killTmuxPaneForCurrentPlatformSync).mockImplementation(() => { + throw new Error('pane kill failed'); + }); + vi.mocked(listTmuxPanePidsForCurrentPlatform).mockImplementation(async () => + new Map([['%2', 999]]) + ); + + const restartPromise = expect(svc.restartMember('tmux-team', 'forge')).rejects.toThrow( + 'Restart for teammate "forge" is still waiting for the previous tmux pane to exit (%2).' + ); + await vi.advanceTimersByTimeAsync(1_500); + await restartPromise; + + expect(sendMessageToRun).not.toHaveBeenCalled(); + }); + + it('does not treat tmux pane lookup failures as a successful restart precondition', async () => { + vi.useFakeTimers(); + + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'tmux-team', + expectedMembers: ['forge'], + memberSpawnStatuses: new Map(), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { + getConfig: vi.fn(async () => ({ + name: 'Tmux Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + })), + }; + (svc as any).membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'forge', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.4', + effort: 'medium', + agentType: 'general-purpose', + }, + ]), + }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => [ + { + name: 'forge', + agentId: 'forge@tmux-team', + backendType: 'tmux', + tmuxPaneId: '%2', + }, + ]); + (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => new Map()); + (svc as any).aliveRunByTeam.set('tmux-team', run.runId); + (svc as any).runs.set(run.runId, run); + + vi.mocked(listTmuxPanePidsForCurrentPlatform).mockRejectedValue( + new Error('tmux list-panes failed') + ); + + const restartPromise = expect(svc.restartMember('tmux-team', 'forge')).rejects.toThrow( + 'Restart for teammate "forge" could not verify that the previous tmux pane exited: tmux list-panes failed' + ); + await vi.advanceTimersByTimeAsync(1_500); + await restartPromise; + + expect(sendMessageToRun).not.toHaveBeenCalled(); + }); + + it('fails early when the previous process backend runtime does not exit before restart', async () => { + vi.useFakeTimers(); + + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'process-team', + expectedMembers: ['forge'], + memberSpawnStatuses: new Map(), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { + getConfig: vi.fn(async () => ({ + name: 'Process Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + })), + }; + (svc as any).membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'forge', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.4', + effort: 'medium', + agentType: 'general-purpose', + }, + ]), + }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => []); + (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn( + async () => + new Map([ + [ + 'forge', + { + alive: true, + backendType: 'process', + pid: process.pid, + agentId: 'forge@process-team', + }, + ], + ]) + ); + (svc as any).aliveRunByTeam.set('process-team', run.runId); + (svc as any).runs.set(run.runId, run); + + const restartPromise = expect(svc.restartMember('process-team', 'forge')).rejects.toThrow( + `Restart for teammate "forge" is still waiting for the previous process to exit (${process.pid}).` + ); + await vi.advanceTimersByTimeAsync(1_500); + await restartPromise; + + expect(vi.mocked(killProcessByPid)).toHaveBeenCalledWith(process.pid); + expect(sendMessageToRun).not.toHaveBeenCalled(); + }); + + it('bypasses stale live runtime metadata cache before restarting a process backend teammate', async () => { + vi.useFakeTimers(); + + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'process-team', + expectedMembers: ['forge'], + memberSpawnStatuses: new Map(), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { + getConfig: vi.fn(async () => ({ + name: 'Process Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + })), + }; + (svc as any).membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'forge', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.4', + effort: 'medium', + agentType: 'general-purpose', + }, + ]), + }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => [ + { + name: 'forge', + agentId: 'forge@process-team', + backendType: 'process', + }, + ]); + (svc as any).findLiveProcessPidByAgentId = vi.fn(() => + new Map([['forge@process-team', process.pid]]) + ); + (svc as any).liveTeamAgentRuntimeMetadataCache.set('process-team', { + expiresAtMs: Date.now() + 60_000, + metadata: new Map([ + [ + 'forge', + { + alive: false, + backendType: 'process', + agentId: 'forge@process-team', + }, + ], + ]), + }); + (svc as any).aliveRunByTeam.set('process-team', run.runId); + (svc as any).runs.set(run.runId, run); + + const restartPromise = expect(svc.restartMember('process-team', 'forge')).rejects.toThrow( + `Restart for teammate "forge" is still waiting for the previous process to exit (${process.pid}).` + ); + await vi.advanceTimersByTimeAsync(1_500); + await restartPromise; + + expect(vi.mocked(killProcessByPid)).toHaveBeenCalledWith(process.pid); + expect(sendMessageToRun).not.toHaveBeenCalled(); + }); + + it('uses members.meta agentId to detect a live process backend teammate when config runtime identity is stale', async () => { + vi.useFakeTimers(); + + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'process-team', + expectedMembers: ['forge'], + memberSpawnStatuses: new Map(), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + + const sendMessageToRun = vi.fn(async () => {}); + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { + getConfig: vi.fn(async () => ({ + name: 'Process Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + })), + }; + (svc as any).membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'forge', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.4', + effort: 'medium', + agentType: 'general-purpose', + agentId: 'forge@process-team', + }, + ]), + }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => []); + (svc as any).findLiveProcessPidByAgentId = vi.fn(() => + new Map([['forge@process-team', process.pid]]) + ); + (svc as any).aliveRunByTeam.set('process-team', run.runId); + (svc as any).runs.set(run.runId, run); + + const restartPromise = expect(svc.restartMember('process-team', 'forge')).rejects.toThrow( + `Restart for teammate "forge" is still waiting for the previous process to exit (${process.pid}).` + ); + await vi.advanceTimersByTimeAsync(1_500); + await restartPromise; + + expect(vi.mocked(killProcessByPid)).toHaveBeenCalledWith(process.pid); + expect(sendMessageToRun).not.toHaveBeenCalled(); + }); + it('rejects a second restart request while the first restart is still in flight', async () => { const svc = new TeamProvisioningService(); const run = createMemberSpawnRun({ @@ -866,6 +1528,82 @@ describe('TeamProvisioningService', () => { ); }); + it('clears stale member spawn tool tracking before starting a manual restart', async () => { + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + teamName: 'codex-team', + expectedMembers: ['bob'], + memberSpawnStatuses: new Map([ + [ + 'bob', + createMemberSpawnStatusEntry({ + status: 'waiting', + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + firstSpawnAcceptedAt: new Date().toISOString(), + }), + ], + ]), + }); + run.child = { pid: 111 }; + run.processKilled = false; + run.cancelRequested = false; + run.activeToolCalls.set('tool-agent-old', { + memberName: 'bob', + toolUseId: 'tool-agent-old', + toolName: 'Agent', + preview: 'Spawn teammate bob', + startedAt: new Date().toISOString(), + state: 'running', + source: 'runtime', + }); + run.memberSpawnToolUseIds.set('tool-agent-old', 'bob'); + + const sendMessageToRun = vi.fn(async () => {}); + (svc as any).sendMessageToRun = sendMessageToRun; + (svc as any).configReader = { + getConfig: vi.fn(async () => ({ + name: 'Codex Team', + members: [{ name: 'team-lead', agentType: 'team-lead' }], + })), + }; + (svc as any).membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'bob', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'medium', + agentType: 'general-purpose', + }, + ]), + }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => []); + (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => new Map()); + (svc as any).aliveRunByTeam.set('codex-team', run.runId); + (svc as any).runs.set(run.runId, run); + + await svc.restartMember('codex-team', 'bob'); + + expect(run.activeToolCalls.has('tool-agent-old')).toBe(false); + expect(run.memberSpawnToolUseIds.has('tool-agent-old')).toBe(false); + expect(sendMessageToRun).toHaveBeenCalledTimes(1); + + (svc as any).finishRuntimeToolActivity( + run, + 'tool-agent-old', + [{ type: 'text', text: 'late stale result' }], + true + ); + + expect(run.memberSpawnStatuses.get('bob')).toMatchObject({ + status: 'spawning', + launchState: 'starting', + }); + expect(run.pendingMemberRestarts.has('bob')).toBe(true); + }); + it('marks a pending restart as failed when the teammate never rejoins within the restart grace window', async () => { const svc = new TeamProvisioningService(); const run = createMemberSpawnRun({ @@ -2113,6 +2851,140 @@ describe('TeamProvisioningService', () => { }); }); + it('clears a pending restart when the teammate is confirmed online via process liveness', () => { + const run = createMemberSpawnRun({ + memberSpawnStatuses: new Map([ + [ + 'alice', + createMemberSpawnStatusEntry({ + status: 'waiting', + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + firstSpawnAcceptedAt: new Date().toISOString(), + }), + ], + ]), + }); + run.pendingMemberRestarts.set('alice', { + requestedAt: new Date().toISOString(), + desired: { + name: 'alice', + providerId: 'codex', + model: 'gpt-5.4-mini', + effort: 'medium', + }, + }); + const svc = new TeamProvisioningService(); + + (svc as any).setMemberSpawnStatus(run, 'alice', 'online', undefined, 'process'); + + expect(run.pendingMemberRestarts.has('alice')).toBe(false); + expect(run.memberSpawnStatuses.get('alice')).toMatchObject({ + status: 'online', + launchState: 'runtime_pending_bootstrap', + runtimeAlive: true, + livenessSource: 'process', + }); + }); + + it('treats deterministic already_running as a failed restart when a restart is pending', () => { + const run = createMemberSpawnRun({ + teamName: 'nice-team', + expectedMembers: ['alice'], + memberSpawnStatuses: new Map([ + [ + 'alice', + createMemberSpawnStatusEntry({ + status: 'waiting', + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + firstSpawnAcceptedAt: new Date().toISOString(), + }), + ], + ]), + }); + run.pendingMemberRestarts.set('alice', { + requestedAt: new Date().toISOString(), + desired: { + name: 'alice', + providerId: 'codex', + model: 'gpt-5.4-mini', + effort: 'medium', + }, + }); + const svc = new TeamProvisioningService(); + + const handled = (svc as any).handleDeterministicBootstrapEvent(run, { + type: 'system', + subtype: 'team_bootstrap', + event: 'member_spawn_result', + member_name: 'alice', + outcome: 'already_running', + run_id: run.runId, + team_name: run.teamName, + seq: 1, + }); + + expect(handled).toBe(true); + expect(run.pendingMemberRestarts.has('alice')).toBe(false); + expect(run.memberSpawnStatuses.get('alice')).toMatchObject({ + status: 'error', + launchState: 'failed_to_start', + hardFailure: true, + hardFailureReason: + 'Restart for teammate "alice" was skipped because the previous runtime still appears to be active. The requested settings may not have been applied.', + }); + }); + + it('clears a pending restart when deterministic spawn reports a hard failure', () => { + const run = createMemberSpawnRun({ + teamName: 'nice-team', + expectedMembers: ['alice'], + memberSpawnStatuses: new Map([ + [ + 'alice', + createMemberSpawnStatusEntry({ + status: 'waiting', + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + firstSpawnAcceptedAt: new Date().toISOString(), + }), + ], + ]), + }); + run.pendingMemberRestarts.set('alice', { + requestedAt: new Date().toISOString(), + desired: { + name: 'alice', + providerId: 'codex', + model: 'gpt-5.4-mini', + effort: 'medium', + }, + }); + const svc = new TeamProvisioningService(); + + const handled = (svc as any).handleDeterministicBootstrapEvent(run, { + type: 'system', + subtype: 'team_bootstrap', + event: 'member_spawn_result', + member_name: 'alice', + outcome: 'failed', + reason: 'spawn failed hard', + run_id: run.runId, + team_name: run.teamName, + seq: 1, + }); + + expect(handled).toBe(true); + expect(run.pendingMemberRestarts.has('alice')).toBe(false); + expect(run.memberSpawnStatuses.get('alice')).toMatchObject({ + status: 'error', + launchState: 'failed_to_start', + hardFailure: true, + hardFailureReason: 'spawn failed hard', + }); + }); + it('clears stale failed_to_start state when live runtime metadata proves the teammate is alive', async () => { const svc = new TeamProvisioningService(); (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => @@ -2188,6 +3060,79 @@ describe('TeamProvisioningService', () => { }); }); + it('does not self-clear a failed launch from stale runtimeAlive state when no live pid exists', async () => { + const svc = new TeamProvisioningService(); + const run = createMemberSpawnRun({ + runId: 'run-self-clear-1', + teamName: 'beacon-desk-4', + expectedMembers: ['bob'], + memberSpawnStatuses: new Map([ + [ + 'bob', + createMemberSpawnStatusEntry({ + status: 'error', + launchState: 'failed_to_start', + runtimeAlive: true, + livenessSource: 'process', + bootstrapConfirmed: false, + hardFailure: true, + error: 'Teammate did not join within the launch grace window.', + hardFailureReason: 'Teammate did not join within the launch grace window.', + }), + ], + ]), + }); + + (svc as any).runs.set(run.runId, run); + (svc as any).provisioningRunByTeam.set(run.teamName, run.runId); + (svc as any).configReader = { + getConfig: vi.fn(async () => ({ + name: 'Beacon Desk', + members: [ + { name: 'team-lead', agentType: 'team-lead' }, + { name: 'bob', agentType: 'general-purpose', providerId: 'codex', model: 'gpt-5.3-codex' }, + ], + })), + }; + (svc as any).membersMetaStore = { + getMembers: vi.fn(async () => [ + { + name: 'bob', + role: 'Developer', + providerId: 'codex', + model: 'gpt-5.3-codex', + effort: 'medium', + agentType: 'general-purpose', + }, + ]), + }; + (svc as any).readPersistedRuntimeMembers = vi.fn(() => []); + (svc as any).findLiveProcessPidByAgentId = vi.fn(() => new Map()); + + const result = await (svc as any).attachLiveRuntimeMetadataToStatuses('beacon-desk-4', { + bob: createMemberSpawnStatusEntry({ + status: 'error', + launchState: 'failed_to_start', + runtimeAlive: true, + livenessSource: 'process', + bootstrapConfirmed: false, + hardFailure: true, + error: 'Teammate did not join within the launch grace window.', + hardFailureReason: 'Teammate did not join within the launch grace window.', + }), + }); + + expect(result.bob).toMatchObject({ + status: 'error', + launchState: 'failed_to_start', + runtimeAlive: true, + hardFailure: true, + hardFailureReason: 'Teammate did not join within the launch grace window.', + error: 'Teammate did not join within the launch grace window.', + runtimeModel: 'gpt-5.3-codex', + }); + }); + it('does not downgrade an already-online teammate when waiting is reported later', () => { const run = createMemberSpawnRun({ memberSpawnStatuses: new Map([ @@ -2258,6 +3203,43 @@ describe('TeamProvisioningService', () => { }); }); + it('clears an old member launch grace timer when a new spawn attempt resets acceptance state', () => { + vi.useFakeTimers(); + + const acceptedAt = new Date(Date.now() - 5_000).toISOString(); + const run = createMemberSpawnRun({ + memberSpawnStatuses: new Map([ + [ + 'alice', + createMemberSpawnStatusEntry({ + status: 'waiting', + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + firstSpawnAcceptedAt: acceptedAt, + }), + ], + ]), + }); + const svc = new TeamProvisioningService(); + const timerKey = (svc as any).getMemberLaunchGraceKey(run, 'alice'); + + (svc as any).syncMemberLaunchGraceCheck(run, 'alice', run.memberSpawnStatuses.get('alice')); + expect((svc as any).pendingTimeouts.has(timerKey)).toBe(true); + + (svc as any).setMemberSpawnStatus(run, 'alice', 'offline'); + expect((svc as any).pendingTimeouts.has(timerKey)).toBe(false); + + (svc as any).setMemberSpawnStatus(run, 'alice', 'spawning'); + expect((svc as any).pendingTimeouts.has(timerKey)).toBe(false); + expect(run.memberSpawnStatuses.get('alice')).toMatchObject({ + firstSpawnAcceptedAt: undefined, + lastHeartbeatAt: undefined, + error: undefined, + hardFailureReason: undefined, + livenessSource: undefined, + }); + }); + it('reconciles stale never-spawned failures when bootstrap state proves the teammate was registered', async () => { const teamName = 'registered-bootstrap-team'; const leadSessionId = 'lead-session'; diff --git a/test/renderer/store/teamSlice.test.ts b/test/renderer/store/teamSlice.test.ts index 20f77cc5..71244a95 100644 --- a/test/renderer/store/teamSlice.test.ts +++ b/test/renderer/store/teamSlice.test.ts @@ -2421,6 +2421,22 @@ describe('teamSlice actions', () => { expect(store.getState().teamAgentRuntimeByTeam['my-team']).toEqual(createRuntimeSnapshot()); }); + it('restartMember refreshes spawn statuses and runtime snapshot even when restart fails', async () => { + const store = createSliceStore(); + const refreshSpawnStatuses = vi.fn(async (_teamName: string) => undefined); + const refreshRuntimeSnapshot = vi.fn(async (_teamName: string) => undefined); + store.setState({ + fetchMemberSpawnStatuses: refreshSpawnStatuses, + fetchTeamAgentRuntime: refreshRuntimeSnapshot, + }); + hoisted.restartMember.mockRejectedValueOnce(new Error('restart failed')); + + await expect(store.getState().restartMember('my-team', 'alice')).rejects.toThrow('restart failed'); + + expect(refreshSpawnStatuses).toHaveBeenCalledWith('my-team'); + expect(refreshRuntimeSnapshot).toHaveBeenCalledWith('my-team'); + }); + it('clears stale runtime snapshots on delete', async () => { const store = createSliceStore(); store.setState({