fix(team): harden restart and session refresh flows
This commit is contained in:
parent
4cab3052eb
commit
cb62acb4fb
5 changed files with 178 additions and 9 deletions
|
|
@ -885,6 +885,15 @@ function buildRestartStillRunningReason(memberName: string): string {
|
|||
);
|
||||
}
|
||||
|
||||
function buildRestartDuplicateUnconfirmedReason(memberName: string, rawReason?: string): string {
|
||||
const suffix = rawReason?.trim()
|
||||
? ` Agent returned duplicate_skipped with unrecognized reason "${rawReason.trim()}".`
|
||||
: ' Agent returned duplicate_skipped without a reason.';
|
||||
return (
|
||||
`Restart for teammate "${memberName}" could not be confirmed and may not have applied.` + suffix
|
||||
);
|
||||
}
|
||||
|
||||
function buildRestartGraceTimeoutReason(memberName: string): string {
|
||||
return `Teammate "${memberName}" did not rejoin within the restart grace window.`;
|
||||
}
|
||||
|
|
@ -4048,8 +4057,25 @@ export class TeamProvisioningService {
|
|||
const detail =
|
||||
parsedStatus.reason === 'already_running'
|
||||
? 'duplicate spawn skipped - already running'
|
||||
: 'duplicate spawn skipped - teammate bootstrap still pending';
|
||||
: parsedStatus.reason === 'bootstrap_pending'
|
||||
? 'duplicate spawn skipped - teammate bootstrap still pending'
|
||||
: parsedStatus.rawReason
|
||||
? `duplicate spawn skipped - unrecognized reason: ${parsedStatus.rawReason}`
|
||||
: 'duplicate spawn skipped - reason unavailable';
|
||||
this.appendMemberBootstrapDiagnostic(run, spawnedMemberName, detail);
|
||||
if (pendingRestart && !parsedStatus.reason) {
|
||||
logger.warn(
|
||||
`[${run.teamName}] Restart for teammate "${spawnedMemberName}" returned duplicate_skipped without a recognized reason`
|
||||
);
|
||||
run.pendingMemberRestarts.delete(spawnedMemberName);
|
||||
this.setMemberSpawnStatus(
|
||||
run,
|
||||
spawnedMemberName,
|
||||
'error',
|
||||
buildRestartDuplicateUnconfirmedReason(spawnedMemberName, parsedStatus.rawReason)
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (parsedStatus.reason === 'already_running') {
|
||||
if (pendingRestart) {
|
||||
run.pendingMemberRestarts.delete(spawnedMemberName);
|
||||
|
|
|
|||
|
|
@ -10,6 +10,16 @@ import type { Session, SessionSortMode } from '@renderer/types/data';
|
|||
import type { StateCreator } from 'zustand';
|
||||
|
||||
const logger = createLogger('Store:session');
|
||||
const SESSION_IN_PLACE_RETRY_DELAY_MS = 150;
|
||||
|
||||
function isTransientSessionsPaginatedIpcError(error: unknown): boolean {
|
||||
const message = error instanceof Error ? error.message : String(error ?? '');
|
||||
return (
|
||||
message.includes(
|
||||
"Error invoking remote method 'get-sessions-paginated': reply was never sent"
|
||||
) || message.includes("No handler registered for 'get-sessions-paginated'")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks the latest in-place refresh generation per project.
|
||||
|
|
@ -257,20 +267,21 @@ export const createSessionSlice: StateCreator<AppState, [], [], SessionSlice> =
|
|||
const generation = (projectRefreshGeneration.get(projectId) ?? 0) + 1;
|
||||
projectRefreshGeneration.set(projectId, generation);
|
||||
|
||||
try {
|
||||
const fetchPage = async () => {
|
||||
const { connectionMode } = get();
|
||||
const result = await api.getSessionsPaginated(projectId, null, 20, {
|
||||
return api.getSessionsPaginated(projectId, null, 20, {
|
||||
includeTotalCount: false,
|
||||
prefilterAll: false,
|
||||
metadataLevel: connectionMode === 'ssh' ? 'light' : 'deep',
|
||||
});
|
||||
};
|
||||
|
||||
const applyResult = (result: Awaited<ReturnType<typeof api.getSessionsPaginated>>) => {
|
||||
// Drop stale responses from older in-flight refreshes
|
||||
if (projectRefreshGeneration.get(projectId) !== generation) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Update sessions without loading state
|
||||
set({
|
||||
sessions: result.sessions,
|
||||
sessionsCursor: result.nextCursor,
|
||||
|
|
@ -278,7 +289,27 @@ export const createSessionSlice: StateCreator<AppState, [], [], SessionSlice> =
|
|||
sessionsTotalCount: result.totalCount,
|
||||
// Don't touch sessionsLoading - keep it as-is
|
||||
});
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await fetchPage();
|
||||
applyResult(result);
|
||||
} catch (error) {
|
||||
if (isTransientSessionsPaginatedIpcError(error) && get().selectedProjectId === projectId) {
|
||||
logger.warn('refreshSessionsInPlace transient IPC error - retrying once');
|
||||
try {
|
||||
await new Promise((resolve) => setTimeout(resolve, SESSION_IN_PLACE_RETRY_DELAY_MS));
|
||||
if (get().selectedProjectId !== projectId) {
|
||||
return;
|
||||
}
|
||||
const retried = await fetchPage();
|
||||
applyResult(retried);
|
||||
return;
|
||||
} catch (retryError) {
|
||||
logger.error('refreshSessionsInPlace retry error:', retryError);
|
||||
return;
|
||||
}
|
||||
}
|
||||
logger.error('refreshSessionsInPlace error:', error);
|
||||
// Don't set error state - this is a background refresh
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ export type AgentToolDuplicateSkipReason = 'already_running' | 'bootstrap_pendin
|
|||
|
||||
export interface ParsedAgentToolResultStatus {
|
||||
status: 'duplicate_skipped';
|
||||
reason: AgentToolDuplicateSkipReason;
|
||||
reason?: AgentToolDuplicateSkipReason;
|
||||
rawReason?: string;
|
||||
name?: string;
|
||||
teamName?: string;
|
||||
}
|
||||
|
|
@ -262,14 +263,14 @@ export function parseAgentToolResultStatus(content: unknown): ParsedAgentToolRes
|
|||
return null;
|
||||
}
|
||||
|
||||
const reason = fields.get('reason');
|
||||
if (reason !== 'already_running' && reason !== 'bootstrap_pending') {
|
||||
return null;
|
||||
}
|
||||
const rawReason = fields.get('reason');
|
||||
const reason =
|
||||
rawReason === 'already_running' || rawReason === 'bootstrap_pending' ? rawReason : undefined;
|
||||
|
||||
return {
|
||||
status: 'duplicate_skipped',
|
||||
reason,
|
||||
...(rawReason ? { rawReason } : {}),
|
||||
name: fields.get('name'),
|
||||
teamName: fields.get('team_name'),
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1064,6 +1064,88 @@ describe('TeamProvisioningService', () => {
|
|||
expect(run.pendingMemberRestarts.has('bob')).toBe(true);
|
||||
});
|
||||
|
||||
it('fails a codex teammate restart immediately when Agent returns duplicate_skipped without a reason', async () => {
|
||||
allowConsoleLogs();
|
||||
const svc = new TeamProvisioningService();
|
||||
const run = createMemberSpawnRun({
|
||||
teamName: 'codex-team',
|
||||
expectedMembers: ['jack'],
|
||||
memberSpawnStatuses: new Map([
|
||||
[
|
||||
'jack',
|
||||
createMemberSpawnStatusEntry({
|
||||
launchState: 'failed_to_start',
|
||||
hardFailure: true,
|
||||
hardFailureReason: 'Teammate was never spawned during launch.',
|
||||
error: 'Teammate was never spawned during launch.',
|
||||
}),
|
||||
],
|
||||
]),
|
||||
});
|
||||
run.child = { pid: 111 };
|
||||
run.processKilled = false;
|
||||
run.cancelRequested = false;
|
||||
|
||||
(svc as any).sendMessageToRun = vi.fn(async () => {});
|
||||
(svc as any).configReader = {
|
||||
getConfig: vi.fn(async () => ({
|
||||
name: 'Codex Team',
|
||||
members: [{ name: 'team-lead', agentType: 'team-lead' }],
|
||||
})),
|
||||
};
|
||||
(svc as any).membersMetaStore = {
|
||||
getMembers: vi.fn(async () => [
|
||||
{
|
||||
name: 'jack',
|
||||
role: 'Developer',
|
||||
providerId: 'codex',
|
||||
model: 'gpt-5.4',
|
||||
effort: 'medium',
|
||||
agentType: 'general-purpose',
|
||||
},
|
||||
]),
|
||||
};
|
||||
(svc as any).readPersistedRuntimeMembers = vi.fn(() => []);
|
||||
(svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn(async () => new Map());
|
||||
(svc as any).aliveRunByTeam.set('codex-team', run.runId);
|
||||
(svc as any).runs.set(run.runId, run);
|
||||
|
||||
await svc.restartMember('codex-team', 'jack');
|
||||
|
||||
run.activeToolCalls.set('tool-agent-1', {
|
||||
memberName: 'jack',
|
||||
toolUseId: 'tool-agent-1',
|
||||
toolName: 'Agent',
|
||||
preview: 'Spawn teammate jack',
|
||||
startedAt: new Date().toISOString(),
|
||||
state: 'running',
|
||||
source: 'runtime',
|
||||
});
|
||||
run.memberSpawnToolUseIds.set('tool-agent-1', 'jack');
|
||||
|
||||
(svc as any).finishRuntimeToolActivity(
|
||||
run,
|
||||
'tool-agent-1',
|
||||
[
|
||||
{
|
||||
type: 'text',
|
||||
text: 'status: duplicate_skipped\nname: jack\nteam_name: codex-team',
|
||||
},
|
||||
],
|
||||
false
|
||||
);
|
||||
|
||||
expect(run.pendingMemberRestarts.has('jack')).toBe(false);
|
||||
expect(run.memberSpawnStatuses.get('jack')).toMatchObject({
|
||||
status: 'error',
|
||||
launchState: 'failed_to_start',
|
||||
runtimeAlive: false,
|
||||
hardFailure: true,
|
||||
hardFailureReason:
|
||||
'Restart for teammate "jack" could not be confirmed and may not have applied. Agent returned duplicate_skipped without a reason.',
|
||||
});
|
||||
});
|
||||
|
||||
it('waits for a killed tmux pane to disappear before sending a restart request', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
|
|
|
|||
|
|
@ -270,6 +270,35 @@ describe('sessionSlice', () => {
|
|||
await Promise.all([first, second]);
|
||||
expect(store.getState().sessions[0]?.id).toBe('newest');
|
||||
});
|
||||
|
||||
it('should retry once on transient invoke lifecycle errors', async () => {
|
||||
vi.useFakeTimers();
|
||||
store.setState({
|
||||
selectedProjectId: 'project-1',
|
||||
sessions: [{ id: 'seed' }] as never[],
|
||||
});
|
||||
|
||||
mockAPI.getSessionsPaginated
|
||||
.mockRejectedValueOnce(
|
||||
new Error(
|
||||
"Error invoking remote method 'get-sessions-paginated': reply was never sent"
|
||||
)
|
||||
)
|
||||
.mockResolvedValueOnce({
|
||||
sessions: [{ id: 'recovered' }] as never[],
|
||||
nextCursor: null,
|
||||
hasMore: false,
|
||||
totalCount: 1,
|
||||
});
|
||||
|
||||
const refreshPromise = store.getState().refreshSessionsInPlace('project-1');
|
||||
await vi.advanceTimersByTimeAsync(150);
|
||||
await refreshPromise;
|
||||
|
||||
expect(mockAPI.getSessionsPaginated).toHaveBeenCalledTimes(2);
|
||||
expect(store.getState().sessions[0]?.id).toBe('recovered');
|
||||
vi.useRealTimers();
|
||||
});
|
||||
});
|
||||
|
||||
describe('fetchSessionDetail', () => {
|
||||
|
|
|
|||
Loading…
Reference in a new issue