fix(team): harden runtime provider recovery

This commit is contained in:
777genius 2026-06-06 21:20:23 +03:00
parent c8a3ad07ac
commit ea56d15712
12 changed files with 242 additions and 64 deletions

View file

@ -233,6 +233,7 @@ import {
import {
deriveMemberLaunchState,
isAutoClearableLaunchFailureReason,
isBootstrapCheckInTimeoutFailureReason,
isCliProvisionedButNotAliveFailureReason,
isNeverSpawnedDuringLaunchReason,
isProvisionedButNotAliveFailureReason,
@ -3468,10 +3469,7 @@ function getMemberInboxRelayPriority(
}
function getLeadInboxRelayPriority(message: Pick<InboxMessage, 'messageKind'>): number {
if (message.messageKind === 'member_work_sync_nudge') {
return 30;
}
return 0;
return message.messageKind === 'member_work_sync_nudge' ? 30 : 0;
}
function compareInboxRelayMessages(
@ -23980,15 +23978,26 @@ export class TeamProvisioningService {
return 0;
}
const readCommitBatch: (InboxMessage & { messageId: string })[] = [];
for (const m of batch) {
relayedIds.add(m.messageId);
if (m.messageKind !== 'member_work_sync_nudge') {
readCommitBatch.push(m);
relayedIds.add(m.messageId);
continue;
}
if (await this.hasAcceptedMemberWorkSyncReport({ teamName, memberName })) {
readCommitBatch.push(m);
relayedIds.add(m.messageId);
}
}
this.relayedMemberInboxMessageIds.set(relayKey, this.trimRelayedSet(relayedIds));
try {
await this.markInboxMessagesRead(teamName, memberName, batch);
} catch {
// Best-effort: relay succeeded; marking read failed.
if (readCommitBatch.length > 0) {
try {
await this.markInboxMessagesRead(teamName, memberName, readCommitBatch);
} catch {
// Best-effort: relay succeeded; marking read failed.
}
}
return batch.length;
@ -31863,9 +31872,21 @@ export class TeamProvisioningService {
const heartbeatReason = heartbeatMessage
? extractBootstrapFailureReason(heartbeatMessage.text)
: null;
const bootstrapFailureReason =
bootstrapMember?.hardFailure === true &&
!bootstrapMember.bootstrapConfirmed &&
isBootstrapMemberEvidenceCurrentForMember(
currentBootstrapEvidenceBoundary,
bootstrapMember,
'confirmation'
)
? (bootstrapMember.hardFailureReason ?? bootstrapMember.runtimeDiagnostic)
: null;
const acceptedAtMs =
current.firstSpawnAcceptedAt != null ? Date.parse(current.firstSpawnAcceptedAt) : NaN;
const initialFailureReason = current.hardFailureReason ?? current.runtimeDiagnostic;
const hasBootstrapCheckInTimeoutFailure =
isBootstrapCheckInTimeoutFailureReason(initialFailureReason);
const hadAutoClearableFailure = isAutoClearableLaunchFailureReason(initialFailureReason);
const requiresConfirmedBootstrapToClearFailure =
isCliProvisionedButNotAliveFailureReason(initialFailureReason);
@ -31937,6 +31958,8 @@ export class TeamProvisioningService {
const currentProvesSpawnAcceptance =
current.agentToolAccepted === true || typeof current.firstSpawnAcceptedAt === 'string';
if (
!bootstrapFailureReason &&
!hasBootstrapCheckInTimeoutFailure &&
hadAutoClearableFailure &&
!requiresConfirmedBootstrapToClearFailure &&
(bootstrapProvesSpawnAcceptance || currentProvesSpawnAcceptance)
@ -31965,6 +31988,18 @@ export class TeamProvisioningService {
if (heartbeatReason) {
current.hardFailure = true;
current.hardFailureReason = heartbeatReason;
current.runtimeDiagnostic = heartbeatReason;
current.runtimeDiagnosticSeverity = 'error';
current.diagnostics = mergeRuntimeDiagnostics(current.diagnostics, [heartbeatReason]);
current.sources.hardFailureSignal = true;
} else if (bootstrapFailureReason) {
current.hardFailure = true;
current.hardFailureReason = bootstrapFailureReason;
current.runtimeDiagnostic = bootstrapFailureReason;
current.runtimeDiagnosticSeverity = 'error';
current.diagnostics = mergeRuntimeDiagnostics(current.diagnostics, [
bootstrapFailureReason,
]);
current.sources.hardFailureSignal = true;
} else if (heartbeatMessage && !isOpenCodeSecondaryLaneMember) {
current.bootstrapConfirmed = true;

View file

@ -47,7 +47,7 @@ function createRuntimeMember(overrides: Record<string, unknown> = {}): Record<st
bootstrapProofMode: 'native_app_managed_context',
bootstrapContextHash: 'context-hash',
bootstrapBriefingHash: 'briefing-hash',
bootstrapRuntimeEventsPath: '/tmp/tom.runtime.jsonl',
bootstrapRuntimeEventsPath: '/repo/.agent-teams/tom.runtime.jsonl',
...overrides,
};
}

View file

@ -2,6 +2,10 @@ import { extractToolCalls, extractToolResults } from '@main/utils/toolExtraction
import { isLeadMember as isLeadMemberCheck } from '@shared/utils/leadDetection';
import { createLogger } from '@shared/utils/logger';
import { getTaskDisplayId } from '@shared/utils/taskIdentity';
import {
inferTeamProviderIdFromModel,
normalizeOptionalTeamProviderId,
} from '@shared/utils/teamProvider';
import { TeamConfigReader } from '../../TeamConfigReader';
import { TeamMembersMetaStore } from '../../TeamMembersMetaStore';
@ -36,6 +40,8 @@ import type {
BoardTaskLogSegment,
BoardTaskLogStreamResponse,
BoardTaskLogStreamSummary,
TeamMember,
TeamProviderId,
TeamTask,
} from '@shared/types';
@ -104,6 +110,58 @@ function normalizeMemberName(value: string): string {
return value.trim().toLowerCase();
}
function resolveExplicitMemberProviderId(
member: TeamMember | undefined
): TeamProviderId | undefined {
if (!member) {
return undefined;
}
const legacyProvider = (member as { provider?: unknown }).provider;
return (
normalizeOptionalTeamProviderId(member.providerId) ??
normalizeOptionalTeamProviderId(legacyProvider)
);
}
function inferProviderIdFromMemberModel(
member: TeamMember | undefined
): TeamProviderId | undefined {
return inferTeamProviderIdFromModel(member?.model);
}
function inferProviderIdFromBackend(providerBackendId: unknown): TeamProviderId | undefined {
const normalized = typeof providerBackendId === 'string' ? providerBackendId.trim() : '';
if (normalized === 'codex-native') {
return 'codex';
}
if (normalized === 'opencode-cli') {
return 'opencode';
}
return undefined;
}
function resolveProviderFromMemberSources(input: {
configMembers: readonly TeamMember[];
metaMembers: readonly TeamMember[];
memberName: string;
}): TeamProviderId | undefined {
const normalizedMemberName = normalizeMemberName(input.memberName);
const configMember = input.configMembers.find(
(candidate) => normalizeMemberName(candidate.name) === normalizedMemberName
);
const metaMember = input.metaMembers.find(
(candidate) => normalizeMemberName(candidate.name) === normalizedMemberName
);
return (
resolveExplicitMemberProviderId(metaMember) ??
resolveExplicitMemberProviderId(configMember) ??
inferProviderIdFromBackend(configMember?.providerBackendId) ??
inferProviderIdFromMemberModel(configMember) ??
inferProviderIdFromBackend(metaMember?.providerBackendId) ??
inferProviderIdFromMemberModel(metaMember)
);
}
const isBoardMcpToolName = isBoardTaskLogMcpToolName;
const canonicalizeBoardToolName = canonicalizeBoardTaskLogToolName;
@ -2260,10 +2318,13 @@ export class BoardTaskLogStreamService {
return false;
}
const member = [...metaMembers, ...(config?.members ?? [])].find(
(candidate) => normalizeMemberName(candidate.name) === normalizedOwner
return (
resolveProviderFromMemberSources({
configMembers: config?.members ?? [],
metaMembers,
memberName: normalizedOwner,
}) === 'opencode'
);
return member?.providerId === 'opencode';
} catch {
return false;
}

View file

@ -1,4 +1,8 @@
import { getTaskDisplayId } from '@shared/utils/taskIdentity';
import {
inferTeamProviderIdFromModel,
normalizeOptionalTeamProviderId,
} from '@shared/utils/teamProvider';
import { TeamConfigReader } from '../../TeamConfigReader';
import { TeamMembersMetaStore } from '../../TeamMembersMetaStore';
@ -14,6 +18,7 @@ import type {
BoardTaskLogParticipant,
BoardTaskLogSegment,
BoardTaskLogStreamResponse,
TeamProviderId,
TeamTask,
} from '@shared/types';
@ -44,6 +49,31 @@ function buildActor(memberName: string, sessionId: string): BoardTaskLogActor {
};
}
function resolveExplicitProviderId(member: {
providerId?: unknown;
provider?: unknown;
}): ReturnType<typeof normalizeOptionalTeamProviderId> {
return (
normalizeOptionalTeamProviderId(member.providerId) ??
normalizeOptionalTeamProviderId(member.provider)
);
}
function inferProviderIdFromMemberModel(member: { model?: string } | undefined) {
return inferTeamProviderIdFromModel(member?.model);
}
function inferProviderIdFromBackend(providerBackendId: unknown): TeamProviderId | undefined {
const normalized = typeof providerBackendId === 'string' ? providerBackendId.trim() : '';
if (normalized === 'codex-native') {
return 'codex';
}
if (normalized === 'opencode-cli') {
return 'opencode';
}
return undefined;
}
export class CodexNativeTaskLogStreamSource {
constructor(
private readonly taskReader: TeamTaskReader = new TeamTaskReader(),
@ -171,9 +201,19 @@ export class CodexNativeTaskLogStreamSource {
this.membersMetaStore.getMembers(teamName).catch(() => []),
this.readConfigForObservation(teamName).catch(() => null),
]);
const member = [...metaMembers, ...(config?.members ?? [])].find(
const configMember = (config?.members ?? []).find(
(candidate) => normalizeMemberName(candidate.name) === normalizedOwner
) as { providerId?: string } | undefined;
return member?.providerId === 'codex';
);
const metaMember = metaMembers.find(
(candidate) => normalizeMemberName(candidate.name) === normalizedOwner
);
const providerId =
resolveExplicitProviderId(metaMember ?? {}) ??
resolveExplicitProviderId(configMember ?? {}) ??
inferProviderIdFromBackend(configMember?.providerBackendId) ??
inferProviderIdFromMemberModel(configMember) ??
inferProviderIdFromBackend(metaMember?.providerBackendId) ??
inferProviderIdFromMemberModel(metaMember);
return providerId === 'codex';
}
}

View file

@ -7,8 +7,21 @@ export const OPENCODE_WINDOWS_NODE_MODULES_SYMLINK_PERMISSION_MESSAGE =
const OPENCODE_WINDOWS_ACCESS_DENIED_PATTERN =
/\b(?:EPERM|EACCES)\b|access is denied|permission denied|operation not permitted/i;
const OPENCODE_WINDOWS_NODE_MODULES_SYMLINK_PERMISSION_PATTERN =
/(?=[\s\S]*\bEPERM\b)(?=[\s\S]*operation not permitted)(?=[\s\S]*\bsymlink\b)(?=[\s\S]*opencode)(?=[\s\S]*node_modules)(?=[\s\S]*(?:[A-Z]:\\|AppData\\Local\\claude-multimodel-nodejs))/i;
const OPENCODE_WINDOWS_EPERM_CODE_PATTERN = /\bEPERM\b/i;
const WINDOWS_DRIVE_PATH_PATTERN = /\b[A-Z]:\\/i;
function isOpenCodeWindowsNodeModulesSymlinkPermissionText(value: string): boolean {
const lower = value.toLowerCase();
return (
OPENCODE_WINDOWS_EPERM_CODE_PATTERN.test(value) &&
lower.includes('operation not permitted') &&
lower.includes('symlink') &&
lower.includes('opencode') &&
lower.includes('node_modules') &&
(WINDOWS_DRIVE_PATH_PATTERN.test(value) ||
lower.includes('appdata\\local\\claude-multimodel-nodejs'))
);
}
export function isOpenCodeWindowsNodeModulesSymlinkPermissionDiagnostic(
value: string | null | undefined
@ -19,7 +32,7 @@ export function isOpenCodeWindowsNodeModulesSymlinkPermissionDiagnostic(
}
return (
trimmed === OPENCODE_WINDOWS_NODE_MODULES_SYMLINK_PERMISSION_MESSAGE ||
OPENCODE_WINDOWS_NODE_MODULES_SYMLINK_PERMISSION_PATTERN.test(trimmed)
isOpenCodeWindowsNodeModulesSymlinkPermissionText(trimmed)
);
}

View file

@ -272,7 +272,7 @@ describe('BoardTaskLogStreamService', () => {
expect(runtimeFallbackSource.getTaskLogStream).toHaveBeenCalledTimes(1);
});
it('merges OpenCode runtime stream when board transcript slices mask member execution', async () => {
it('merges OpenCode runtime stream using config provider when runtime meta has stale model only', async () => {
const lead = {
role: 'lead' as const,
sessionId: 'session-lead',
@ -344,10 +344,13 @@ describe('BoardTaskLogStreamService', () => {
getDeletedTasks: vi.fn(async () => []),
};
const membersMetaStore = {
getMembers: vi.fn(async () => [{ name: 'jack', providerId: 'opencode' }]),
getMembers: vi.fn(async () => [{ name: 'jack', role: 'developer', model: 'gpt-5.5' }]),
};
const configReader = {
getConfig: vi.fn(async () => null),
getConfig: vi.fn(async () => ({
name: 'demo',
members: [{ name: 'jack', providerBackendId: 'opencode-cli', model: 'gpt-5.5' }],
})),
};
const buildBundleChunks = vi.fn((messages: ParsedMessage[]) => [{ id: messages[0]?.uuid }]);

View file

@ -2,8 +2,8 @@ import { describe, expect, it, vi } from 'vitest';
import { CodexNativeTaskLogStreamSource } from '../../../../src/main/services/team/taskLogs/stream/CodexNativeTaskLogStreamSource';
import type { ParsedMessage } from '../../../../src/main/types';
import type { CodexNativeTraceRun } from '../../../../src/main/services/team/taskLogs/stream/CodexNativeTraceReader';
import type { ParsedMessage } from '../../../../src/main/types';
import type { TeamTask } from '../../../../src/shared/types';
function task(overrides: Partial<TeamTask> = {}): TeamTask {
@ -37,16 +37,27 @@ function message(uuid: string, timestamp: string, toolName: string): ParsedMessa
}
describe('CodexNativeTaskLogStreamSource', () => {
it('resolves short task refs, verifies Codex owner, and reads full/display/short trace candidates', async () => {
it('resolves short task refs and keeps config Codex owner when runtime meta has stale model only', async () => {
const taskReader = {
getTasks: vi.fn(async () => [task()]),
getDeletedTasks: vi.fn(async () => []),
};
const membersMetaStore = {
getMembers: vi.fn(async () => [{ name: 'atlas', providerId: 'codex' }]),
getMembers: vi.fn(async () => [
{ name: 'atlas', role: 'developer', model: 'opencode/openai/gpt-oss' },
]),
};
const configReader = {
getConfig: vi.fn(async () => null),
getConfig: vi.fn(async () => ({
name: 'vector-room-131313',
members: [
{
name: 'atlas',
providerBackendId: 'codex-native',
model: 'opencode/openai/gpt-oss',
},
],
})),
};
const traceRuns: CodexNativeTraceRun[] = [
{
@ -122,7 +133,10 @@ describe('CodexNativeTaskLogStreamSource', () => {
getMembers: vi.fn(async () => [{ name: 'alice', providerId: 'anthropic' }]),
} as never,
{
getConfig: vi.fn(async () => null),
getConfig: vi.fn(async () => ({
name: 'vector-room-131313',
members: [{ name: 'alice', providerId: 'codex' }],
})),
} as never,
traceReader as never
);

View file

@ -63,11 +63,7 @@ describe('OpenCode production prompt artifacts safe e2e', () => {
const launchInput = captureAdapter.launchInputs[0];
expect(launchInput).toBeDefined();
expect(launchInput?.prompt ?? '').toContain('production desktop app');
expect(launchInput?.expectedMembers.map((member) => member.name)).toEqual([
'team-lead',
'bob',
'jack',
]);
expect(launchInput?.expectedMembers.map((member) => member.name)).toEqual(['team-lead', 'bob', 'jack']);
expect(launchInput?.prompt?.length ?? 0).toBeGreaterThan(1_500);
const bridgeCapture = createCapturingOpenCodeBridge(selectedModel);
@ -82,11 +78,7 @@ describe('OpenCode production prompt artifacts safe e2e', () => {
expect(launchCommand?.leadPrompt).toContain('OpenCode members bootstrap silently');
expect(launchCommand?.leadPrompt.length ?? 0).toBeGreaterThan(1_500);
expect(launchCommand?.leadPrompt.length ?? 0).toBeLessThan(80_000);
expect(launchCommand?.members.map((member) => member.name)).toEqual([
'team-lead',
'bob',
'jack',
]);
expect(launchCommand?.members.map((member) => member.name)).toEqual(['team-lead', 'bob', 'jack']);
for (const member of launchCommand?.members ?? []) {
expect(member.prompt).toContain(`You are ${member.name}`);

View file

@ -285,7 +285,7 @@ describe('Team agent launch matrix safe e2e', () => {
undefined,
undefined,
undefined,
worktreeManager
worktreeManager as TeamMemberWorktreeManager
);
svc.setRuntimeAdapterRegistry(new TeamRuntimeAdapterRegistry([adapter]));
const progressEvents: TeamProvisioningProgress[] = [];
@ -463,7 +463,7 @@ describe('Team agent launch matrix safe e2e', () => {
expect(runId).toBe(adapter.launchInputs[0]?.runId);
expect(adapter.bootstrapCheckins).toEqual([
{
memberName: 'alice',
memberName: 'team-lead',
runId,
state: 'accepted',
},
@ -535,6 +535,7 @@ describe('Team agent launch matrix safe e2e', () => {
expect(runId).toBe(adapter.launchInputs[0]?.runId);
expect(adapter.launchInputs[0]?.expectedMembers.map((member) => member.name)).toEqual([
'team-lead',
'alice',
'bob',
]);
@ -677,7 +678,7 @@ describe('Team agent launch matrix safe e2e', () => {
const approval = approvalEvents.find(
(event): event is ToolApprovalRequest =>
!('dismissed' in event) && !('autoResolved' in event)
!('dismissed' in event) && !('autoResolved' in event) && event.source === 'alice'
);
expect(approval).toMatchObject({
runId: launch.runId,

View file

@ -20,10 +20,8 @@ import { TeamProvisioningService } from '../../../../src/main/services/team/Team
interface TranscriptIndexHarness {
bootstrapTranscriptOutcomeCache: Map<string, unknown>;
bootstrapTranscriptOutcomeInFlight: Map<string, Promise<unknown>>;
bootstrapTranscriptFileIndexByPath: Map<string, unknown>;
bootstrapTranscriptFileIndexInFlight: Map<string, Promise<unknown>>;
appendBootstrapTranscriptFileIndex: (...args: unknown[]) => Promise<unknown>;
rebuildBootstrapTranscriptFileIndex: (...args: unknown[]) => Promise<unknown>;
parsedBootstrapTranscriptTailCache: Map<string, unknown>;
getParsedBootstrapTranscriptTail: (...args: unknown[]) => Promise<unknown>;
readRecentBootstrapTranscriptOutcome: (
filePath: string,
sinceMs: number | null,
@ -39,8 +37,7 @@ function createTranscriptIndexHarness(): TranscriptIndexHarness {
) as unknown as TranscriptIndexHarness;
service.bootstrapTranscriptOutcomeCache = new Map();
service.bootstrapTranscriptOutcomeInFlight = new Map();
service.bootstrapTranscriptFileIndexByPath = new Map();
service.bootstrapTranscriptFileIndexInFlight = new Map();
service.parsedBootstrapTranscriptTailCache = new Map();
return service;
}
@ -84,17 +81,11 @@ describe('TeamProvisioningService bootstrap transcript index', () => {
);
const service = createTranscriptIndexHarness();
const originalRebuild = service.rebuildBootstrapTranscriptFileIndex.bind(service);
const originalAppend = service.appendBootstrapTranscriptFileIndex.bind(service);
let rebuildCalls = 0;
let appendCalls = 0;
service.rebuildBootstrapTranscriptFileIndex = async (...args: unknown[]) => {
rebuildCalls += 1;
return originalRebuild(...args);
};
service.appendBootstrapTranscriptFileIndex = async (...args: unknown[]) => {
appendCalls += 1;
return originalAppend(...args);
const originalParseTail = service.getParsedBootstrapTranscriptTail.bind(service);
let parseTailCalls = 0;
service.getParsedBootstrapTranscriptTail = async (...args: unknown[]) => {
parseTailCalls += 1;
return originalParseTail(...args);
};
await expect(
@ -110,8 +101,7 @@ describe('TeamProvisioningService bootstrap transcript index', () => {
observedAt: '2026-04-18T10:00:00.000Z',
source: 'member_briefing',
});
expect(rebuildCalls).toBe(1);
expect(appendCalls).toBe(0);
expect(parseTailCalls).toBe(1);
await fs.appendFile(
transcriptPath,
@ -135,7 +125,21 @@ describe('TeamProvisioningService bootstrap transcript index', () => {
observedAt: '2026-04-18T10:01:00.000Z',
reason: 'Bootstrap failed: member_briefing tool is not available',
});
expect(rebuildCalls).toBe(1);
expect(appendCalls).toBe(1);
expect(parseTailCalls).toBe(2);
await expect(
service.readRecentBootstrapTranscriptOutcome(
transcriptPath,
null,
'alice',
'demo-team',
{ contextMemberNames: ['alice'] }
)
).resolves.toEqual({
kind: 'failure',
observedAt: '2026-04-18T10:01:00.000Z',
reason: 'Bootstrap failed: member_briefing tool is not available',
});
expect(parseTailCalls).toBe(2);
});
});

View file

@ -18190,6 +18190,11 @@ describe('TeamProvisioningService', () => {
effort: 'medium',
cwd: tempClaudeRoot,
expectedMembers: [
expect.objectContaining({
name: 'team-lead',
providerId: 'opencode',
model: 'big-pickle',
}),
expect.objectContaining({
name: 'bob',
providerId: 'opencode',

View file

@ -39,7 +39,7 @@ import { ClaudeBinaryResolver } from '@main/services/team/ClaudeBinaryResolver';
import { TeamProvisioningService } from '@main/services/team/TeamProvisioningService';
import { resolveInteractiveShellEnvBestEffort } from '@main/utils/shellEnv';
type CodexProbeHarness = TeamProvisioningService & {
type CodexProbeHarness = {
probeClaudeRuntime: (
claudePath: string,
cwd: string,
@ -89,9 +89,19 @@ describe('TeamProvisioningService Codex create-team preflight', () => {
it('uses refreshed Codex provider env for both runtime probe and deep one-shot preflight', async () => {
const service = new TeamProvisioningService();
const harness = service as unknown as CodexProbeHarness;
const probeClaudeRuntime = vi.spyOn(harness, 'probeClaudeRuntime').mockResolvedValue({});
const probeClaudeRuntime = vi
.spyOn(
harness as unknown as { probeClaudeRuntime: CodexProbeHarness['probeClaudeRuntime'] },
'probeClaudeRuntime'
)
.mockResolvedValue({});
const runProviderOneShotDiagnostic = vi
.spyOn(harness, 'runProviderOneShotDiagnostic')
.spyOn(
harness as unknown as {
runProviderOneShotDiagnostic: CodexProbeHarness['runProviderOneShotDiagnostic'];
},
'runProviderOneShotDiagnostic'
)
.mockResolvedValue({});
const result = await service.prepareForProvisioning(tempRoot, {