feat(team): add bootstrap recovery and app integration

This commit is contained in:
iliya 2026-04-07 00:43:38 +03:00
parent d2cd655c11
commit 8ef89eefce
6 changed files with 1563 additions and 37 deletions

View file

@ -175,7 +175,7 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
const teamProvisioningService = getTeamProvisioningService(services);
teamProvisioningService.stopTeam(validatedTeamName.value!);
return reply.send(teamProvisioningService.getRuntimeState(validatedTeamName.value!));
return reply.send(await teamProvisioningService.getRuntimeState(validatedTeamName.value!));
} catch (error) {
if (shouldLogError(error)) {
logger.error(
@ -198,7 +198,7 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
}
return reply.send(
getTeamProvisioningService(services).getRuntimeState(validatedTeamName.value!)
await getTeamProvisioningService(services).getRuntimeState(validatedTeamName.value!)
);
} catch (error) {
if (shouldLogError(error)) {
@ -236,9 +236,11 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
app.get('/api/teams/runtime/alive', async (_request, reply) => {
try {
const teamProvisioningService = getTeamProvisioningService(services);
const runtimeStates = teamProvisioningService
.getAliveTeams()
.map((teamName) => teamProvisioningService.getRuntimeState(teamName));
const runtimeStates = await Promise.all(
teamProvisioningService
.getAliveTeams()
.map((teamName) => teamProvisioningService.getRuntimeState(teamName))
);
return reply.send(runtimeStates);
} catch (error) {
if (shouldLogError(error)) {

View file

@ -0,0 +1,713 @@
import { getTeamsBasePath } from '@main/utils/pathDecoder';
import { createPersistedLaunchSnapshot } from './TeamLaunchStateEvaluator';
import * as fs from 'fs';
import * as path from 'path';
import type {
PersistedTeamLaunchMemberState,
PersistedTeamLaunchSnapshot,
TeamProvisioningProgress,
TeamRuntimeState,
} from '@shared/types';
const TEAM_BOOTSTRAP_STATE_FILE = 'bootstrap-state.json';
const TEAM_BOOTSTRAP_JOURNAL_FILE = 'bootstrap-journal.jsonl';
const TEAM_BOOTSTRAP_LOCK_DIR = '.bootstrap.lock';
const TEAM_BOOTSTRAP_LOCK_METADATA_FILE = 'metadata.json';
const MAX_BOOTSTRAP_STATE_BYTES = 256 * 1024;
const MAX_BOOTSTRAP_JOURNAL_BYTES = 256 * 1024;
const MAX_BOOTSTRAP_LOCK_METADATA_BYTES = 64 * 1024;
const ACTIVE_BOOTSTRAP_STUCK_CLASSIFICATION_MS = 3 * 60 * 1000;
type RawBootstrapMemberState = {
name?: unknown;
status?: unknown;
lastAttemptAt?: unknown;
lastObservedAt?: unknown;
failureReason?: unknown;
};
type RawBootstrapState = {
version?: unknown;
runId?: unknown;
teamName?: unknown;
startedAt?: unknown;
ownerPid?: unknown;
updatedAt?: unknown;
phase?: unknown;
realTaskSubmissionState?: unknown;
members?: unknown;
terminal?: unknown;
};
type RawBootstrapJournalRecord =
| { ts?: unknown; type?: 'phase'; phase?: unknown }
| { ts?: unknown; type?: 'lock'; action?: unknown; ownerPid?: unknown; detail?: unknown }
| { ts?: unknown; type?: 'member'; name?: unknown; action?: unknown; detail?: unknown }
| { ts?: unknown; type?: 'terminal'; status?: unknown; reason?: unknown }
| { ts?: unknown; type?: 'real_task'; state?: unknown; detail?: unknown };
type RawBootstrapLockMetadata = {
pid?: unknown;
runId?: unknown;
requestHash?: unknown;
ownerStartedAt?: unknown;
createdAt?: unknown;
nonce?: unknown;
};
type BootstrapStateInspection = {
raw: RawBootstrapState | null;
issue?: string;
};
type BootstrapJournalInspection = {
warnings?: string[];
issue?: string;
};
type BootstrapLockMetadata = {
pid: number;
runId: string;
ownerStartedAt?: number;
};
type BootstrapRuntimePhase =
| 'validating_spec'
| 'loading_existing_state'
| 'acquiring_bootstrap_lock'
| 'creating_team'
| 'spawning_members'
| 'auditing_truth'
| 'completed'
| 'failed'
| 'canceled';
function isBootstrapPhaseTerminal(phase: BootstrapRuntimePhase): boolean {
return phase === 'completed' || phase === 'failed' || phase === 'canceled';
}
function isProcessAlive(pid: number): boolean {
if (!Number.isFinite(pid) || pid <= 0) {
return false;
}
try {
process.kill(pid, 0);
return true;
} catch (error) {
return (error as NodeJS.ErrnoException | undefined)?.code === 'EPERM';
}
}
function classifyBootstrapOwnerState(raw: RawBootstrapState): {
ownerDead: boolean;
stale: boolean;
failureReason?: string;
} {
const phase = typeof raw.phase === 'string' ? (raw.phase as BootstrapRuntimePhase) : null;
if (!phase || isBootstrapPhaseTerminal(phase)) {
return { ownerDead: false, stale: false };
}
const ownerPid = typeof raw.ownerPid === 'number' ? raw.ownerPid : null;
if (ownerPid === null || isProcessAlive(ownerPid)) {
return { ownerDead: false, stale: false };
}
const updatedAtMs =
typeof raw.updatedAt === 'number'
? raw.updatedAt
: typeof raw.updatedAt === 'string'
? Date.parse(raw.updatedAt)
: NaN;
const stale =
Number.isFinite(updatedAtMs) &&
Date.now() - updatedAtMs >= ACTIVE_BOOTSTRAP_STUCK_CLASSIFICATION_MS;
return {
ownerDead: true,
stale,
failureReason: stale
? `bootstrap owner pid ${ownerPid} is gone and persisted bootstrap state is stale`
: `bootstrap owner pid ${ownerPid} is gone before bootstrap reached a terminal state`,
};
}
async function inspectBootstrapState(teamName: string): Promise<BootstrapStateInspection> {
const targetPath = getTeamBootstrapStatePath(teamName);
try {
const stat = await fs.promises.lstat(targetPath);
if (stat.isSymbolicLink()) {
return {
raw: null,
issue:
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json is a symlink.',
};
}
if (!stat.isFile()) {
return {
raw: null,
issue:
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json is not a regular file.',
};
}
if (stat.size > MAX_BOOTSTRAP_STATE_BYTES) {
return {
raw: null,
issue:
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json is oversized.',
};
}
const raw = JSON.parse(await fs.promises.readFile(targetPath, 'utf8')) as RawBootstrapState;
if (raw.version !== 1) {
return {
raw: null,
issue:
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json has an unsupported schema version.',
};
}
return { raw };
} catch (error) {
if ((error as NodeJS.ErrnoException | undefined)?.code === 'ENOENT') {
return { raw: null };
}
return {
raw: null,
issue:
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json is invalid, truncated, or inaccessible.',
};
}
}
async function readRawBootstrapState(teamName: string): Promise<RawBootstrapState | null> {
return (await inspectBootstrapState(teamName)).raw;
}
function getBootstrapProgressProjection(
phase: BootstrapRuntimePhase,
memberCount: number
): { state: Exclude<TeamProvisioningProgress['state'], 'idle'>; message: string } | null {
switch (phase) {
case 'validating_spec':
return {
state: 'validating',
message: 'Validating deterministic bootstrap spec',
};
case 'loading_existing_state':
return {
state: 'configuring',
message: 'Loading existing team state',
};
case 'acquiring_bootstrap_lock':
return {
state: 'configuring',
message: 'Acquiring deterministic bootstrap lock',
};
case 'creating_team':
return {
state: 'assembling',
message: 'Creating team config',
};
case 'spawning_members':
return {
state: 'assembling',
message:
memberCount > 0
? `Spawning teammate runtimes (${memberCount})`
: 'Spawning teammate runtimes',
};
case 'auditing_truth':
return {
state: 'finalizing',
message: 'Auditing registered teammates and bootstrap truth',
};
case 'completed':
return {
state: 'ready',
message: 'Deterministic bootstrap completed',
};
case 'failed':
return {
state: 'failed',
message: 'Deterministic bootstrap failed',
};
case 'canceled':
return {
state: 'cancelled',
message: 'Deterministic bootstrap cancelled',
};
default:
return null;
}
}
function toIso(value: unknown, fallback: string): string {
if (typeof value === 'string' && value.trim().length > 0) {
return value;
}
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
return new Date(value).toISOString();
}
return fallback;
}
function normalizeBootstrapMemberState(
memberName: string,
raw: RawBootstrapMemberState,
updatedAt: string
): PersistedTeamLaunchMemberState {
const status = typeof raw.status === 'string' ? raw.status : 'pending';
const hardFailure = status === 'failed';
const bootstrapConfirmed = status === 'bootstrap_confirmed';
const runtimeAlive = bootstrapConfirmed || status === 'runtime_alive';
const agentToolAccepted =
bootstrapConfirmed ||
runtimeAlive ||
status === 'registered' ||
status === 'spawn_started' ||
hardFailure;
return {
name: memberName,
launchState: hardFailure
? 'failed_to_start'
: bootstrapConfirmed
? 'confirmed_alive'
: runtimeAlive || agentToolAccepted
? 'runtime_pending_bootstrap'
: 'starting',
agentToolAccepted,
runtimeAlive,
bootstrapConfirmed,
hardFailure,
hardFailureReason:
typeof raw.failureReason === 'string' && raw.failureReason.trim().length > 0
? raw.failureReason.trim()
: undefined,
firstSpawnAcceptedAt: agentToolAccepted ? toIso(raw.lastAttemptAt, updatedAt) : undefined,
lastHeartbeatAt: bootstrapConfirmed ? toIso(raw.lastObservedAt, updatedAt) : undefined,
lastRuntimeAliveAt: runtimeAlive ? toIso(raw.lastObservedAt, updatedAt) : undefined,
lastEvaluatedAt: toIso(raw.lastObservedAt, updatedAt),
sources: {
configRegistered:
status === 'registered' ||
status === 'runtime_alive' ||
status === 'bootstrap_confirmed' ||
hardFailure,
processAlive: runtimeAlive || undefined,
hardFailureSignal: hardFailure || undefined,
},
diagnostics: hardFailure
? [
typeof raw.failureReason === 'string' && raw.failureReason.trim().length > 0
? raw.failureReason.trim()
: 'deterministic bootstrap failed',
]
: runtimeAlive
? bootstrapConfirmed
? ['late heartbeat received']
: ['runtime alive', 'waiting for bootstrap']
: agentToolAccepted
? ['spawn accepted']
: undefined,
};
}
export function getTeamBootstrapStatePath(teamName: string): string {
return path.join(getTeamsBasePath(), teamName, TEAM_BOOTSTRAP_STATE_FILE);
}
function getTeamBootstrapJournalPath(teamName: string): string {
return path.join(getTeamsBasePath(), teamName, TEAM_BOOTSTRAP_JOURNAL_FILE);
}
function getTeamBootstrapLockMetadataPath(teamName: string): string {
return path.join(
getTeamsBasePath(),
teamName,
TEAM_BOOTSTRAP_LOCK_DIR,
TEAM_BOOTSTRAP_LOCK_METADATA_FILE
);
}
async function readBootstrapLockMetadata(teamName: string): Promise<BootstrapLockMetadata | null> {
const targetPath = getTeamBootstrapLockMetadataPath(teamName);
try {
const stat = await fs.promises.lstat(targetPath);
if (stat.isSymbolicLink() || !stat.isFile() || stat.size > MAX_BOOTSTRAP_LOCK_METADATA_BYTES) {
return null;
}
const raw = JSON.parse(
await fs.promises.readFile(targetPath, 'utf8')
) as RawBootstrapLockMetadata;
if (
typeof raw.pid !== 'number' ||
!Number.isFinite(raw.pid) ||
raw.pid <= 0 ||
typeof raw.runId !== 'string' ||
raw.runId.trim().length === 0
) {
return null;
}
return {
pid: raw.pid,
runId: raw.runId.trim(),
ownerStartedAt:
typeof raw.ownerStartedAt === 'number' && Number.isFinite(raw.ownerStartedAt)
? raw.ownerStartedAt
: undefined,
};
} catch {
return null;
}
}
async function readBootstrapJournalWarnings(teamName: string): Promise<string[] | undefined> {
return (await inspectBootstrapJournal(teamName)).warnings;
}
async function inspectBootstrapJournal(teamName: string): Promise<BootstrapJournalInspection> {
const targetPath = getTeamBootstrapJournalPath(teamName);
try {
const stat = await fs.promises.lstat(targetPath);
if (stat.isSymbolicLink()) {
return {
issue:
'Persisted deterministic bootstrap journal is unreadable because bootstrap-journal.jsonl is a symlink.',
};
}
if (!stat.isFile()) {
return {
issue:
'Persisted deterministic bootstrap journal is unreadable because bootstrap-journal.jsonl is not a regular file.',
};
}
if (stat.size > MAX_BOOTSTRAP_JOURNAL_BYTES) {
return {
issue:
'Persisted deterministic bootstrap journal is unreadable because bootstrap-journal.jsonl is oversized.',
};
}
const raw = await fs.promises.readFile(targetPath, 'utf8');
const lines = raw
.split('\n')
.map((line) => line.trim())
.filter((line) => line.length > 0)
.slice(-3);
const messages = lines
.map((line) => {
try {
return JSON.parse(line) as RawBootstrapJournalRecord;
} catch {
return null;
}
})
.filter((record): record is RawBootstrapJournalRecord => Boolean(record))
.map((record) => {
if (record.type === 'phase' && typeof record.phase === 'string') {
return `bootstrap phase: ${record.phase}`;
}
if (record.type === 'lock' && typeof record.action === 'string') {
const owner = typeof record.ownerPid === 'number' ? ` (pid ${record.ownerPid})` : '';
return `bootstrap lock ${record.action}${owner}`;
}
if (
record.type === 'member' &&
typeof record.name === 'string' &&
typeof record.action === 'string'
) {
return typeof record.detail === 'string' && record.detail.trim().length > 0
? `${record.name}: ${record.action} (${record.detail.trim()})`
: `${record.name}: ${record.action}`;
}
if (record.type === 'terminal' && typeof record.status === 'string') {
return typeof record.reason === 'string' && record.reason.trim().length > 0
? `bootstrap ${record.status}: ${record.reason.trim()}`
: `bootstrap ${record.status}`;
}
if (record.type === 'real_task' && typeof record.state === 'string') {
return typeof record.detail === 'string' && record.detail.trim().length > 0
? `first task ${record.state}: ${record.detail.trim()}`
: `first task ${record.state}`;
}
return null;
})
.filter((item): item is string => Boolean(item));
return {
warnings:
messages.length > 0
? [`Recent deterministic bootstrap events: ${messages.join(' | ')}`]
: undefined,
};
} catch (error) {
if ((error as NodeJS.ErrnoException | undefined)?.code === 'ENOENT') {
return {};
}
return {
issue:
'Persisted deterministic bootstrap journal is unreadable because bootstrap-journal.jsonl is invalid, truncated, or inaccessible.',
};
}
}
async function readDegradedBootstrapRuntimeState(
teamName: string,
stateIssue: string
): Promise<TeamRuntimeState | null> {
const lockMetadata = await readBootstrapLockMetadata(teamName);
if (!lockMetadata) {
return null;
}
const journalInspection = await inspectBootstrapJournal(teamName);
const warnings = [
stateIssue,
journalInspection.issue,
...(journalInspection.warnings ?? []),
].filter((item): item is string => typeof item === 'string' && item.trim().length > 0);
const ownerAlive = isProcessAlive(lockMetadata.pid);
const now = new Date().toISOString();
return {
teamName,
isAlive: false,
runId: lockMetadata.runId,
progress: {
runId: lockMetadata.runId,
teamName,
state: ownerAlive ? 'configuring' : 'failed',
message: ownerAlive
? 'Deterministic bootstrap recovery is degraded because persisted bootstrap state is unreadable'
: 'Deterministic bootstrap recovery failed because persisted bootstrap state is unreadable and the bootstrap owner is gone',
messageSeverity: 'warning',
error: ownerAlive
? stateIssue
: `${stateIssue} Bootstrap owner pid ${lockMetadata.pid} is not alive.`,
warnings: warnings.length > 0 ? warnings : undefined,
startedAt:
typeof lockMetadata.ownerStartedAt === 'number' &&
Number.isFinite(lockMetadata.ownerStartedAt)
? new Date(lockMetadata.ownerStartedAt).toISOString()
: now,
updatedAt: now,
pid: lockMetadata.pid,
},
};
}
export async function readBootstrapLaunchSnapshot(
teamName: string
): Promise<PersistedTeamLaunchSnapshot | null> {
const raw = await readRawBootstrapState(teamName);
if (!raw) {
return null;
}
try {
const updatedAt = toIso(raw.updatedAt, new Date().toISOString());
const rawMembers = Array.isArray(raw.members) ? raw.members : [];
const members: Record<string, PersistedTeamLaunchMemberState> = {};
const expectedMembers: string[] = [];
for (const item of rawMembers) {
if (!item || typeof item !== 'object') continue;
const rawMember = item as RawBootstrapMemberState;
const memberName = typeof rawMember.name === 'string' ? rawMember.name.trim() : '';
if (!memberName || memberName === 'team-lead' || memberName === 'user') continue;
expectedMembers.push(memberName);
members[memberName] = normalizeBootstrapMemberState(memberName, rawMember, updatedAt);
}
const terminal =
raw.terminal && typeof raw.terminal === 'object'
? (raw.terminal as Record<string, unknown>)
: null;
const terminalStatus = typeof terminal?.status === 'string' ? terminal.status : undefined;
const phase = typeof raw.phase === 'string' ? raw.phase : undefined;
const ownerState = classifyBootstrapOwnerState(raw);
const launchPhase =
terminalStatus === 'completed' ||
terminalStatus === 'partial_success' ||
terminalStatus === 'failed' ||
terminalStatus === 'canceled' ||
ownerState.ownerDead ||
phase === 'completed' ||
phase === 'failed' ||
phase === 'canceled'
? 'finished'
: 'active';
if (ownerState.ownerDead) {
const diagnostics = ownerState.failureReason ? [ownerState.failureReason] : undefined;
for (const memberName of expectedMembers) {
const entry = members[memberName];
if (
!entry ||
entry.launchState === 'confirmed_alive' ||
entry.launchState === 'failed_to_start'
) {
continue;
}
members[memberName] = {
...entry,
launchState: 'failed_to_start',
hardFailure: true,
hardFailureReason: ownerState.failureReason,
diagnostics: diagnostics ?? entry.diagnostics,
sources: {
...entry.sources,
hardFailureSignal: true,
},
};
}
}
return createPersistedLaunchSnapshot({
teamName:
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
? raw.teamName.trim()
: teamName,
expectedMembers,
launchPhase,
members,
updatedAt,
});
} catch {
return null;
}
}
export async function readBootstrapRealTaskSubmissionState(
teamName: string
): Promise<'not_submitted' | 'submitted' | 'unknown' | null> {
const raw = await readRawBootstrapState(teamName);
if (!raw) {
return null;
}
const state = raw.realTaskSubmissionState;
return state === 'not_submitted' || state === 'submitted' || state === 'unknown' ? state : null;
}
export async function readBootstrapRuntimeState(
teamName: string
): Promise<TeamRuntimeState | null> {
const inspection = await inspectBootstrapState(teamName);
const raw = inspection.raw;
if (!raw) {
return inspection.issue ? readDegradedBootstrapRuntimeState(teamName, inspection.issue) : null;
}
try {
const journalWarnings = await readBootstrapJournalWarnings(teamName);
const phase = typeof raw.phase === 'string' ? (raw.phase as BootstrapRuntimePhase) : null;
if (!phase) {
return null;
}
const ownerState = classifyBootstrapOwnerState(raw);
if (ownerState.ownerDead) {
const startedAt = toIso(raw.startedAt, new Date().toISOString());
const updatedAt = toIso(raw.updatedAt, startedAt);
return {
teamName:
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
? raw.teamName.trim()
: teamName,
isAlive: false,
runId: typeof raw.runId === 'string' ? raw.runId : null,
progress: {
runId: typeof raw.runId === 'string' ? raw.runId : teamName,
teamName:
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
? raw.teamName.trim()
: teamName,
state: 'failed',
message: ownerState.stale
? 'Deterministic bootstrap became stuck after owner process exited'
: 'Deterministic bootstrap owner exited before bootstrap completed',
error: ownerState.failureReason,
warnings: journalWarnings,
startedAt,
updatedAt,
...(typeof raw.ownerPid === 'number' ? { pid: raw.ownerPid } : {}),
},
};
}
const activePhases: BootstrapRuntimePhase[] = [
'validating_spec',
'loading_existing_state',
'acquiring_bootstrap_lock',
'creating_team',
'spawning_members',
'auditing_truth',
];
if (!activePhases.includes(phase)) {
return null;
}
const projection = getBootstrapProgressProjection(
phase,
Array.isArray(raw.members) ? raw.members.length : 0
);
if (!projection) {
return null;
}
const startedAt = toIso(raw.startedAt, new Date().toISOString());
const updatedAt = toIso(raw.updatedAt, startedAt);
const runId = typeof raw.runId === 'string' && raw.runId.trim().length > 0 ? raw.runId : null;
const pid =
typeof raw.ownerPid === 'number' && Number.isFinite(raw.ownerPid) && raw.ownerPid > 0
? raw.ownerPid
: undefined;
const progress: TeamProvisioningProgress = {
runId: runId ?? `bootstrap:${teamName}`,
teamName:
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
? raw.teamName.trim()
: teamName,
state: projection.state,
message: projection.message,
warnings: journalWarnings,
startedAt,
updatedAt,
...(pid ? { pid } : {}),
};
return {
teamName:
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
? raw.teamName.trim()
: teamName,
isAlive: false,
runId,
progress,
};
} catch {
return null;
}
}
export async function clearBootstrapState(teamName: string): Promise<void> {
try {
await fs.promises.rm(getTeamBootstrapStatePath(teamName), { force: true });
} catch {
// best-effort
}
}
export function choosePreferredLaunchSnapshot<T extends { updatedAt?: string }>(
bootstrapSnapshot: T | null,
launchSnapshot: T | null
): T | null {
if (!bootstrapSnapshot) return launchSnapshot;
if (!launchSnapshot) return bootstrapSnapshot;
const bootstrapMs = Date.parse(bootstrapSnapshot.updatedAt ?? '');
const launchMs = Date.parse(launchSnapshot.updatedAt ?? '');
if (Number.isFinite(bootstrapMs) && Number.isFinite(launchMs)) {
return bootstrapMs >= launchMs ? bootstrapSnapshot : launchSnapshot;
}
return bootstrapSnapshot;
}

View file

@ -12,6 +12,10 @@ import * as path from 'path';
import { getTeamFsWorkerClient } from './TeamFsWorkerClient';
import { TeamMembersMetaStore } from './TeamMembersMetaStore';
import { TeamMetaStore } from './TeamMetaStore';
import {
choosePreferredLaunchSnapshot,
readBootstrapLaunchSnapshot,
} from './TeamBootstrapStateReader';
import { normalizePersistedLaunchSnapshot } from './TeamLaunchStateEvaluator';
import type { TeamConfig, TeamMember, TeamSummary, TeamSummaryMember } from '@shared/types';
@ -42,17 +46,27 @@ interface LaunchStateSummary {
}
async function readLaunchStateSummary(teamDir: string): Promise<LaunchStateSummary | null> {
const bootstrapSnapshot = await readBootstrapLaunchSnapshot(path.basename(teamDir));
const launchStatePath = path.join(teamDir, TEAM_LAUNCH_STATE_FILE);
let launchSnapshot = null;
try {
const stat = await fs.promises.stat(launchStatePath);
if (!stat.isFile() || stat.size > MAX_LAUNCH_STATE_BYTES) {
return null;
}
const raw = await readFileUtf8WithTimeout(launchStatePath, PER_TEAM_READ_TIMEOUT_MS);
const snapshot = normalizePersistedLaunchSnapshot(path.basename(teamDir), JSON.parse(raw));
if (!snapshot) {
return null;
launchSnapshot = null;
} else {
const raw = await readFileUtf8WithTimeout(launchStatePath, PER_TEAM_READ_TIMEOUT_MS);
launchSnapshot = normalizePersistedLaunchSnapshot(path.basename(teamDir), JSON.parse(raw));
}
} catch {
launchSnapshot = null;
}
const snapshot = choosePreferredLaunchSnapshot(bootstrapSnapshot, launchSnapshot);
if (!snapshot) {
return null;
}
try {
const missingMembers = snapshot.expectedMembers.filter((name) => {
const member = snapshot.members[name];
return member?.launchState === 'failed_to_start';

View file

@ -73,6 +73,13 @@ import { TeamMetaStore } from './TeamMetaStore';
import { TeamSentMessagesStore } from './TeamSentMessagesStore';
import { TeamTaskReader } from './TeamTaskReader';
import { TeamLaunchStateStore } from './TeamLaunchStateStore';
import {
choosePreferredLaunchSnapshot,
clearBootstrapState,
readBootstrapLaunchSnapshot,
readBootstrapRealTaskSubmissionState,
readBootstrapRuntimeState,
} from './TeamBootstrapStateReader';
import { resolveDesktopTeammateModeDecision } from './runtimeTeammateMode';
import {
createPersistedLaunchSnapshot,
@ -131,6 +138,7 @@ import type {
TeamProvisioningState,
TeamRuntimeState,
TeamTask,
EffortLevel,
ToolActivityEventPayload,
ToolApprovalAutoResolved,
ToolApprovalEvent,
@ -501,7 +509,12 @@ interface ProvisioningRun {
provisioningComplete: boolean;
/** Path to the generated MCP config file for later cleanup. */
mcpConfigPath: string | null;
/** Path to the deterministic bootstrap spec file for later cleanup. */
bootstrapSpecPath: string | null;
/** Path to the deferred first-user-task file consumed by runtime after bootstrap. */
bootstrapUserPromptPath: string | null;
isLaunch: boolean;
deterministicBootstrap: boolean;
leadRelayCapture: {
leadName: string;
startedAt: string;
@ -1138,6 +1151,142 @@ export function buildAddMemberSpawnMessage(
);
}
type RuntimeBootstrapMemberSpec = {
name: string;
prompt: string;
cwd?: string;
model?: string;
provider?: TeamProviderId;
effort?: EffortLevel;
agentType?: string;
description?: string;
useSplitPane?: boolean;
planModeRequired?: boolean;
};
type RuntimeBootstrapSpec = {
version: 1;
runId: string;
mode: 'create';
initiator: {
kind: 'app';
source: 'claude_team_freecode';
};
team: {
name: string;
displayName?: string;
description?: string;
color?: string;
cwd: string;
};
lead: {
providerId?: TeamProviderId;
model?: string;
effort?: EffortLevel;
skipPermissions?: boolean;
worktree?: string | null;
extraCliArgs?: string[];
};
members: RuntimeBootstrapMemberSpec[];
launch?: {
initialUserPrompt?: string | null;
bootstrapTimeoutMs?: number;
continueOnPartialFailure?: boolean;
};
ui?: {
emitStructuredEvents?: boolean;
};
};
function buildDeterministicBootstrapSpec(
runId: string,
request: TeamCreateRequest,
effectiveMembers: TeamCreateRequest['members']
): RuntimeBootstrapSpec {
const displayName = request.displayName?.trim() || request.teamName;
const leadName =
effectiveMembers.find((member) => member.role?.toLowerCase().includes('lead'))?.name ||
'team-lead';
return {
version: 1,
runId,
mode: 'create',
initiator: {
kind: 'app',
source: 'claude_team_freecode',
},
team: {
name: request.teamName,
...(request.displayName?.trim() ? { displayName: request.displayName.trim() } : {}),
...(request.description?.trim() ? { description: request.description.trim() } : {}),
...(request.color?.trim() ? { color: request.color.trim() } : {}),
cwd: request.cwd,
},
lead: {
...(request.providerId ? { providerId: request.providerId } : {}),
...(request.model?.trim() ? { model: request.model.trim() } : {}),
...(request.effort ? { effort: request.effort } : {}),
...(request.skipPermissions !== undefined
? { skipPermissions: request.skipPermissions }
: {}),
...(request.worktree ? { worktree: request.worktree } : {}),
...(request.extraCliArgs ? { extraCliArgs: parseCliArgs(request.extraCliArgs) } : {}),
},
members: effectiveMembers.map((member) => ({
name: member.name,
prompt: buildMemberSpawnPrompt(member, displayName, request.teamName, leadName),
...(member.role?.trim() ? { role: member.role.trim() } : {}),
...(member.workflow?.trim() ? { workflow: member.workflow.trim() } : {}),
...(request.cwd ? { cwd: request.cwd } : {}),
...(member.model?.trim() ? { model: member.model.trim() } : {}),
...(member.providerId ? { provider: member.providerId } : {}),
...(member.effort ? { effort: member.effort } : {}),
...(member.role?.trim() ? { description: member.role.trim() } : {}),
})),
launch: {
continueOnPartialFailure: true,
},
ui: {
emitStructuredEvents: true,
},
};
}
async function writeDeterministicBootstrapSpecFile(spec: RuntimeBootstrapSpec): Promise<string> {
const tempDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'agent-teams-bootstrap-'));
const filePath = path.join(tempDir, `${spec.team.name}-${randomUUID()}.json`);
await fs.promises.writeFile(filePath, JSON.stringify(spec), {
encoding: 'utf8',
mode: 0o600,
});
return filePath;
}
async function removeDeterministicBootstrapSpecFile(filePath: string | null): Promise<void> {
if (!filePath) return;
await fs.promises.rm(filePath, { force: true }).catch(() => {});
await fs.promises.rmdir(path.dirname(filePath)).catch(() => {});
}
async function writeDeterministicBootstrapUserPromptFile(prompt: string): Promise<string> {
const tempDir = await fs.promises.mkdtemp(
path.join(os.tmpdir(), 'agent-teams-bootstrap-prompt-')
);
const filePath = path.join(tempDir, `${randomUUID()}.txt`);
await fs.promises.writeFile(filePath, prompt, {
encoding: 'utf8',
mode: 0o600,
});
return filePath;
}
async function removeDeterministicBootstrapUserPromptFile(filePath: string | null): Promise<void> {
if (!filePath) return;
await fs.promises.rm(filePath, { force: true }).catch(() => {});
await fs.promises.rmdir(path.dirname(filePath)).catch(() => {});
}
function buildTeamCtlOpsInstructions(teamName: string, leadName: string): string {
return wrapInAgentBlock(
[
@ -4030,6 +4179,7 @@ export class TeamProvisioningService {
// Verify --mcp-config still exists; regenerate if deleted (e.g. by stale GC)
const mcpFlagIdx = ctx.args.indexOf('--mcp-config');
const bootstrapPromptFlagIdx = ctx.args.indexOf('--team-bootstrap-user-prompt-file');
if (mcpFlagIdx !== -1 && mcpFlagIdx + 1 < ctx.args.length) {
const existingConfigPath = ctx.args[mcpFlagIdx + 1];
try {
@ -4054,6 +4204,73 @@ export class TeamProvisioningService {
}
}
if (bootstrapPromptFlagIdx !== -1 && bootstrapPromptFlagIdx + 1 < ctx.args.length) {
const existingPromptPath = ctx.args[bootstrapPromptFlagIdx + 1];
try {
await fs.promises.access(existingPromptPath, fs.constants.F_OK);
} catch {
const submissionState = await readBootstrapRealTaskSubmissionState(run.teamName);
if (submissionState === 'submitted') {
ctx.args.splice(bootstrapPromptFlagIdx, 2);
ctx.prompt = '';
run.bootstrapUserPromptPath = null;
} else if (submissionState === 'unknown') {
run.authRetryInProgress = false;
const progress = updateProgress(
run,
'failed',
'Unable to safely retry first task after auth failure',
{
error:
'deterministic bootstrap recorded the first real task as unknown, so retry would risk a duplicate submission',
cliLogsTail: extractCliLogsFromRun(run),
}
);
run.onProgress(progress);
this.cleanupRun(run);
return;
} else if (ctx.prompt.trim().length === 0) {
run.authRetryInProgress = false;
const progress = updateProgress(
run,
'failed',
'Failed to restore deferred first task after auth retry',
{
error:
'deterministic bootstrap user prompt file was missing and no prompt was available to regenerate it',
cliLogsTail: extractCliLogsFromRun(run),
}
);
run.onProgress(progress);
this.cleanupRun(run);
return;
} else {
logger.warn(
`[${run.teamName}] Bootstrap user prompt file ${existingPromptPath} missing, regenerating`
);
try {
const newPromptPath = await writeDeterministicBootstrapUserPromptFile(ctx.prompt);
ctx.args[bootstrapPromptFlagIdx + 1] = newPromptPath;
run.bootstrapUserPromptPath = newPromptPath;
} catch (regenErr) {
run.authRetryInProgress = false;
const progress = updateProgress(
run,
'failed',
'Failed to regenerate deferred first task for auth retry',
{
error: regenErr instanceof Error ? regenErr.message : String(regenErr),
cliLogsTail: extractCliLogsFromRun(run),
}
);
run.onProgress(progress);
this.cleanupRun(run);
return;
}
}
}
}
// Respawn with saved context — CLI handles its own auth refresh.
let child: ReturnType<typeof spawn>;
try {
@ -4091,8 +4308,9 @@ export class TeamProvisioningService {
});
run.onProgress(run.progress);
// Resend prompt
if (child.stdin?.writable) {
// Resend prompt only for legacy direct-stdin flows. Deterministic bootstrap
// owns the first real task via --team-bootstrap-user-prompt-file.
if (bootstrapPromptFlagIdx === -1 && child.stdin?.writable) {
const message = JSON.stringify({
type: 'user',
message: {
@ -4352,7 +4570,10 @@ export class TeamProvisioningService {
waitingTasksSince: null,
provisioningComplete: false,
mcpConfigPath: null,
bootstrapSpecPath: null,
bootstrapUserPromptPath: null,
isLaunch: false,
deterministicBootstrap: true,
fsPhase: 'waiting_config',
leadRelayCapture: null,
activeCrossTeamReplyHints: [],
@ -4404,24 +4625,41 @@ export class TeamProvisioningService {
run.onProgress(run.progress);
await this.clearPersistedLaunchState(request.teamName);
const prompt = buildProvisioningPrompt(request, effectiveMemberSpecs);
const promptSize = getPromptSizeSummary(prompt);
const bootstrapSpec = buildDeterministicBootstrapSpec(runId, request, effectiveMemberSpecs);
const initialUserPrompt = request.prompt?.trim() ?? '';
const promptSize = getPromptSizeSummary(initialUserPrompt);
let child: ReturnType<typeof spawn>;
const { env: shellEnv, geminiRuntimeAuth } = await this.buildProvisioningEnv(
request.providerId
);
shellEnv.CLAUDE_ENABLE_DETERMINISTIC_TEAM_BOOTSTRAP = '1';
const teammateModeDecision = await resolveDesktopTeammateModeDecision(request.extraCliArgs);
if (teammateModeDecision.forceProcessTeammates) {
shellEnv.CLAUDE_TEAM_FORCE_PROCESS_TEAMMATES = '1';
}
let mcpConfigPath: string;
let bootstrapSpecPath: string;
let bootstrapUserPromptPath: string | null = null;
try {
bootstrapSpecPath = await writeDeterministicBootstrapSpecFile(bootstrapSpec);
run.bootstrapSpecPath = bootstrapSpecPath;
if (initialUserPrompt) {
bootstrapUserPromptPath =
await writeDeterministicBootstrapUserPromptFile(initialUserPrompt);
run.bootstrapUserPromptPath = bootstrapUserPromptPath;
}
mcpConfigPath = await this.mcpConfigBuilder.writeConfigFile(request.cwd);
run.mcpConfigPath = mcpConfigPath;
await this.validateAgentTeamsMcpRuntime(claudePath, request.cwd, shellEnv, mcpConfigPath);
} catch (error) {
this.runs.delete(runId);
this.provisioningRunByTeam.delete(request.teamName);
await removeDeterministicBootstrapSpecFile(run.bootstrapSpecPath).catch(() => {});
run.bootstrapSpecPath = null;
await removeDeterministicBootstrapUserPromptFile(run.bootstrapUserPromptPath).catch(
() => {}
);
run.bootstrapUserPromptPath = null;
throw error;
}
const spawnArgs = [
@ -4434,6 +4672,11 @@ export class TeamProvisioningService {
'user,project,local',
'--mcp-config',
mcpConfigPath,
'--team-bootstrap-spec',
bootstrapSpecPath,
...(bootstrapUserPromptPath
? ['--team-bootstrap-user-prompt-file', bootstrapUserPromptPath]
: []),
'--disallowedTools',
APP_TEAM_RUNTIME_DISALLOWED_TOOLS,
// Explicit --permission-mode overrides user's defaultMode in ~/.claude/settings.json
@ -4505,6 +4748,12 @@ export class TeamProvisioningService {
const tasksDir = path.join(getTasksBasePath(), request.teamName);
await fs.promises.rm(teamDir, { recursive: true, force: true }).catch(() => {});
await fs.promises.rm(tasksDir, { recursive: true, force: true }).catch(() => {});
await removeDeterministicBootstrapSpecFile(run.bootstrapSpecPath).catch(() => {});
run.bootstrapSpecPath = null;
await removeDeterministicBootstrapUserPromptFile(run.bootstrapUserPromptPath).catch(
() => {}
);
run.bootstrapUserPromptPath = null;
if (run.mcpConfigPath) {
await this.mcpConfigBuilder.removeConfigFile(run.mcpConfigPath).catch(() => {});
run.mcpConfigPath = null;
@ -4525,21 +4774,9 @@ export class TeamProvisioningService {
args: spawnArgs,
cwd: request.cwd,
env: { ...shellEnv },
prompt,
prompt: initialUserPrompt,
};
// Send provisioning prompt as first stream-json message (SDKUserMessage format)
if (child.stdin?.writable) {
const message = JSON.stringify({
type: 'user',
message: {
role: 'user',
content: [{ type: 'text', text: prompt }],
},
});
child.stdin.write(message + '\n');
}
this.attachStdoutHandler(run);
this.attachStderrHandler(run);
@ -4841,7 +5078,10 @@ export class TeamProvisioningService {
waitingTasksSince: null,
provisioningComplete: false,
mcpConfigPath: null,
bootstrapSpecPath: null,
bootstrapUserPromptPath: null,
isLaunch: true,
deterministicBootstrap: false,
fsPhase: 'waiting_members',
leadRelayCapture: null,
activeCrossTeamReplyHints: [],
@ -5840,10 +6080,17 @@ export class TeamProvisioningService {
return Array.from(this.aliveRunByTeam.keys()).filter((name) => this.isTeamAlive(name));
}
getRuntimeState(teamName: string): TeamRuntimeState {
async getRuntimeState(teamName: string): Promise<TeamRuntimeState> {
const runId = this.getTrackedRunId(teamName);
const run = runId ? (this.runs.get(runId) ?? null) : null;
if (!run) {
const recovered = await readBootstrapRuntimeState(teamName);
if (recovered) {
return recovered;
}
}
return {
teamName,
isAlive: this.isTeamAlive(teamName),
@ -6175,7 +6422,11 @@ export class TeamProvisioningService {
}
const current = run.memberSpawnStatuses.get(expected);
if (current?.launchState === 'failed_to_start') {
if (
current?.launchState === 'failed_to_start' ||
current?.bootstrapConfirmed ||
current?.runtimeAlive
) {
continue;
}
@ -6224,6 +6475,7 @@ export class TeamProvisioningService {
private async clearPersistedLaunchState(teamName: string): Promise<void> {
await this.launchStateStore.clear(teamName);
await clearBootstrapState(teamName);
}
private getFailedSpawnMembers(
@ -6338,7 +6590,15 @@ export class TeamProvisioningService {
snapshot: ReturnType<typeof createPersistedLaunchSnapshot> | null;
statuses: Record<string, MemberSpawnStatusEntry>;
}> {
const bootstrapSnapshot = await readBootstrapLaunchSnapshot(teamName);
const persisted = await this.launchStateStore.read(teamName);
const preferredSnapshot = choosePreferredLaunchSnapshot(bootstrapSnapshot, persisted);
if (preferredSnapshot) {
return {
snapshot: preferredSnapshot,
statuses: snapshotToMemberSpawnStatuses(preferredSnapshot),
};
}
if (!persisted) {
return { snapshot: null, statuses: {} };
}
@ -6998,6 +7258,152 @@ export class TeamProvisioningService {
* Process a parsed stream-json message from stdout.
* Extracts assistant text for progress reporting and detects turn completion.
*/
private handleDeterministicBootstrapEvent(
run: ProvisioningRun,
msg: Record<string, unknown>
): boolean {
if (msg.type !== 'system' || msg.subtype !== 'team_bootstrap') {
return false;
}
const event = typeof msg.event === 'string' ? msg.event : undefined;
if (!event) {
return true;
}
if (event === 'started') {
const progress = updateProgress(run, 'configuring', 'Starting deterministic team bootstrap');
run.onProgress(progress);
return true;
}
if (event === 'phase_changed') {
const phase = typeof msg.phase === 'string' ? msg.phase : '';
if (phase === 'loading_existing_state') {
const progress = updateProgress(run, 'configuring', 'Loading existing team state');
run.onProgress(progress);
} else if (phase === 'acquiring_bootstrap_lock') {
const progress = updateProgress(
run,
'configuring',
'Acquiring deterministic bootstrap lock'
);
run.onProgress(progress);
} else if (phase === 'creating_team') {
const progress = updateProgress(run, 'assembling', 'Creating team config');
run.onProgress(progress);
} else if (phase === 'spawning_members') {
const progress = updateProgress(run, 'assembling', 'Spawning teammate runtimes');
run.onProgress(progress);
} else if (phase === 'auditing_truth') {
const progress = updateProgress(
run,
'finalizing',
'Auditing registered teammates and bootstrap truth',
{ configReady: true }
);
run.onProgress(progress);
}
return true;
}
if (event === 'team_created') {
const reused = msg.reused_existing_team === true;
const progress = updateProgress(
run,
'assembling',
reused
? 'Attached to existing team, starting teammates'
: 'Team config created, starting teammates',
{ configReady: true }
);
run.onProgress(progress);
return true;
}
if (event === 'member_spawn_started') {
const memberName = typeof msg.member_name === 'string' ? msg.member_name.trim() : '';
if (memberName) {
this.setMemberSpawnStatus(run, memberName, 'spawning');
}
return true;
}
if (event === 'member_spawn_result') {
const memberName = typeof msg.member_name === 'string' ? msg.member_name.trim() : '';
const outcome = typeof msg.outcome === 'string' ? msg.outcome : '';
const reason = typeof msg.reason === 'string' ? msg.reason.trim() : undefined;
if (!memberName) {
return true;
}
if (outcome === 'failed') {
this.setMemberSpawnStatus(
run,
memberName,
'error',
reason || 'Deterministic bootstrap failed to spawn teammate.'
);
return true;
}
if (outcome === 'already_running') {
this.setMemberSpawnStatus(run, memberName, 'online', undefined, 'process');
return true;
}
this.setMemberSpawnStatus(run, memberName, 'waiting');
return true;
}
if (event === 'completed') {
const failedMembers = Array.isArray(msg.failed_members) ? msg.failed_members : [];
for (const failed of failedMembers) {
const memberName = typeof failed?.name === 'string' ? failed.name.trim() : '';
const reason = typeof failed?.reason === 'string' ? failed.reason.trim() : undefined;
if (memberName) {
this.setMemberSpawnStatus(
run,
memberName,
'error',
reason || 'Deterministic bootstrap failed to spawn teammate.'
);
}
}
if (!run.provisioningComplete && !run.cancelRequested) {
void this.handleProvisioningTurnComplete(run).catch((error: unknown) => {
logger.error(
`[${run.teamName}] deterministic bootstrap completion handler failed: ${
error instanceof Error ? error.message : String(error)
}`
);
});
}
return true;
}
if (event === 'failed') {
if (run.progress.state === 'failed' || run.cancelRequested) {
return true;
}
const reason =
typeof msg.reason === 'string' && msg.reason.trim().length > 0
? msg.reason.trim()
: 'Deterministic bootstrap failed.';
const progress = updateProgress(run, 'failed', 'Deterministic bootstrap failed', {
error: reason,
cliLogsTail: extractCliLogsFromRun(run),
});
run.onProgress(progress);
run.processKilled = true;
killTeamProcess(run.child);
this.cleanupRun(run);
return true;
}
return true;
}
private handleStreamJsonMessage(run: ProvisioningRun, msg: Record<string, unknown>): void {
// stream-json output has various message types:
// {"type":"assistant","content":[{"type":"text","text":"..."},...]}
@ -7194,6 +7600,10 @@ export class TeamProvisioningService {
}
}
if (this.handleDeterministicBootstrapEvent(run, msg)) {
return;
}
// Handle control_request — tool approval protocol (only when --dangerously-skip-permissions is NOT set)
if (msg.type === 'control_request') {
this.handleControlRequest(run, msg);
@ -9334,6 +9744,14 @@ export class TeamProvisioningService {
void this.mcpConfigBuilder.removeConfigFile(run.mcpConfigPath);
run.mcpConfigPath = null;
}
if (run.bootstrapSpecPath) {
void removeDeterministicBootstrapSpecFile(run.bootstrapSpecPath);
run.bootstrapSpecPath = null;
}
if (run.bootstrapUserPromptPath) {
void removeDeterministicBootstrapUserPromptFile(run.bootstrapUserPromptPath);
run.bootstrapUserPromptPath = null;
}
// Remove from runs Map to free memory (stdoutBuffer, stderrBuffer, claudeLogLines)
this.runs.delete(run.runId);
}
@ -9397,10 +9815,32 @@ export class TeamProvisioningService {
}
if (run.fsPhase === 'waiting_members') {
if (run.deterministicBootstrap) {
const registeredNames = await this.getRegisteredTeamMemberNames(run.teamName);
const registeredMembers = registeredNames
? request.members.filter((member) => registeredNames.has(member.name)).length
: 0;
if (registeredMembers >= request.members.length) {
run.fsPhase = 'all_files_found';
if (!run.provisioningComplete) {
void this.handleProvisioningTurnComplete(run);
}
return;
}
}
if (request.members.length === 0) {
run.fsPhase = 'waiting_tasks';
const progress = updateProgress(run, 'finalizing', 'Solo team, preparing workspace');
run.onProgress(progress);
if (run.deterministicBootstrap) {
run.fsPhase = 'all_files_found';
if (!run.provisioningComplete) {
void this.handleProvisioningTurnComplete(run);
}
} else {
run.fsPhase = 'waiting_tasks';
const progress = updateProgress(run, 'finalizing', 'Solo team, preparing workspace');
run.onProgress(progress);
}
} else {
const teamDir = (await resolveTeamDir()) ?? configuredTeamDir;
const inboxDir = path.join(teamDir, 'inboxes');
@ -9735,6 +10175,10 @@ export class TeamProvisioningService {
members: run.effectiveMembers,
}
);
await this.refreshMemberSpawnStatusesFromLeadInbox(run);
await this.maybeAuditMemberSpawnStatuses(run, { force: true });
await this.finalizeMissingRegisteredMembersAsFailed(run);
await this.persistLaunchStateSnapshot(run, 'finished');
// Process was killed by timeout — mark as disconnected, not ready
const progress = updateProgress(run, 'disconnected', 'Team provisioned but process timed out', {
warnings,

View file

@ -15,7 +15,7 @@ describe('HTTP team runtime routes', () => {
const launchTeam = vi.fn<
(request: TeamLaunchRequest, onProgress: (progress: TeamProvisioningProgress) => void) => Promise<TeamLaunchResponse>
>();
const getRuntimeState = vi.fn<(teamName: string) => TeamRuntimeState>();
const getRuntimeState = vi.fn<(teamName: string) => Promise<TeamRuntimeState>>();
const getProvisioningStatus = vi.fn<(runId: string) => Promise<TeamProvisioningProgress>>();
const stopTeam = vi.fn<(teamName: string) => void>();
const getAliveTeams = vi.fn<() => string[]>();
@ -82,6 +82,7 @@ describe('HTTP team runtime routes', () => {
teamName: 'demo-team',
cwd: '/tmp/project',
prompt: 'Resume work',
providerId: 'anthropic',
skipPermissions: false,
clearContext: true,
},
@ -115,7 +116,7 @@ describe('HTTP team runtime routes', () => {
it('returns runtime state, provisioning status, and stop results', async () => {
const { app, getRuntimeState, getProvisioningStatus, stopTeam, getAliveTeams } = await createApp();
getRuntimeState
.mockReturnValueOnce({
.mockResolvedValueOnce({
teamName: 'demo-team',
isAlive: true,
runId: 'run-2',
@ -128,13 +129,13 @@ describe('HTTP team runtime routes', () => {
updatedAt: '2026-03-12T00:00:01.000Z',
},
})
.mockReturnValueOnce({
.mockResolvedValueOnce({
teamName: 'demo-team',
isAlive: false,
runId: null,
progress: null,
})
.mockReturnValueOnce({
.mockResolvedValueOnce({
teamName: 'demo-team',
isAlive: true,
runId: 'run-2',

View file

@ -0,0 +1,352 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
const hoisted = vi.hoisted(() => {
const files = new Map<
string,
{
contents: string;
size?: number;
symbolicLink?: boolean;
}
>();
const norm = (p: string): string => p.replace(/\\/g, '/');
const lstat = vi.fn(async (filePath: string) => {
const entry = files.get(norm(filePath));
if (!entry) {
const error = new Error('ENOENT') as NodeJS.ErrnoException;
error.code = 'ENOENT';
throw error;
}
return {
isFile: () => !entry.symbolicLink,
isSymbolicLink: () => Boolean(entry.symbolicLink),
size: entry.size ?? Buffer.byteLength(entry.contents, 'utf8'),
};
});
const readFile = vi.fn(async (filePath: string) => {
const entry = files.get(norm(filePath));
if (!entry) {
const error = new Error('ENOENT') as NodeJS.ErrnoException;
error.code = 'ENOENT';
throw error;
}
return entry.contents;
});
const access = vi.fn(async (filePath: string) => {
const entry = files.get(norm(filePath));
if (!entry) {
const error = new Error('ENOENT') as NodeJS.ErrnoException;
error.code = 'ENOENT';
throw error;
}
});
const rm = vi.fn(async (filePath: string) => {
files.delete(norm(filePath));
});
return { files, lstat, readFile, access, rm };
});
vi.mock('fs', async (importOriginal) => {
const actual = await importOriginal<typeof import('fs')>();
return {
...actual,
promises: {
...actual.promises,
lstat: hoisted.lstat,
readFile: hoisted.readFile,
access: hoisted.access,
rm: hoisted.rm,
},
};
});
vi.mock('../../../../src/main/utils/pathDecoder', () => ({
getTeamsBasePath: () => '/mock/teams',
}));
import {
choosePreferredLaunchSnapshot,
readBootstrapLaunchSnapshot,
readBootstrapRealTaskSubmissionState,
readBootstrapRuntimeState,
} from '../../../../src/main/services/team/TeamBootstrapStateReader';
describe('TeamBootstrapStateReader', () => {
beforeEach(() => {
hoisted.files.clear();
hoisted.lstat.mockClear();
hoisted.readFile.mockClear();
hoisted.access.mockClear();
hoisted.rm.mockClear();
});
it('rejects symlink bootstrap-state files', async () => {
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
contents: '{}',
symbolicLink: true,
});
await expect(readBootstrapLaunchSnapshot('demo')).resolves.toBeNull();
await expect(readBootstrapRuntimeState('demo')).resolves.toBeNull();
});
it('projects active bootstrap-state into runtime progress', async () => {
const nowSpy = vi.spyOn(Date, 'now').mockReturnValue(1700000001000);
const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true as never);
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
contents: JSON.stringify({
version: 1,
runId: 'run-123',
teamName: 'demo',
ownerPid: 4242,
startedAt: 1700000000000,
updatedAt: 1700000000500,
phase: 'acquiring_bootstrap_lock',
members: [{ name: 'alice', status: 'pending' }],
}),
});
hoisted.files.set('/mock/teams/demo/bootstrap-journal.jsonl', {
contents: [
JSON.stringify({ ts: 1, type: 'phase', runId: 'run-123', phase: 'loading_existing_state' }),
JSON.stringify({ ts: 2, type: 'lock', runId: 'run-123', action: 'acquired', ownerPid: 4242 }),
JSON.stringify({ ts: 3, type: 'member', runId: 'run-123', name: 'alice', action: 'spawn_started' }),
].join('\n'),
});
await expect(readBootstrapRuntimeState('demo')).resolves.toEqual({
teamName: 'demo',
isAlive: false,
runId: 'run-123',
progress: {
runId: 'run-123',
teamName: 'demo',
state: 'configuring',
message: 'Acquiring deterministic bootstrap lock',
warnings: [
'Recent deterministic bootstrap events: bootstrap phase: loading_existing_state | bootstrap lock acquired (pid 4242) | alice: spawn_started',
],
startedAt: '2023-11-14T22:13:20.000Z',
updatedAt: '2023-11-14T22:13:20.500Z',
pid: 4242,
},
});
killSpy.mockRestore();
nowSpy.mockRestore();
});
it('ignores terminal bootstrap-state for runtime recovery projection', async () => {
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
contents: JSON.stringify({
version: 1,
runId: 'run-123',
teamName: 'demo',
startedAt: 1700000000000,
updatedAt: 1700000000500,
phase: 'completed',
terminal: {
status: 'completed',
finishedAt: 1700000000500,
},
members: [{ name: 'alice', status: 'registered' }],
}),
});
await expect(readBootstrapRuntimeState('demo')).resolves.toBeNull();
});
it('reads persisted real-task submission state', async () => {
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
contents: JSON.stringify({
version: 1,
runId: 'run-123',
teamName: 'demo',
startedAt: 1700000000000,
updatedAt: 1700000000500,
phase: 'completed',
realTaskSubmissionState: 'submitted',
members: [],
}),
});
await expect(readBootstrapRealTaskSubmissionState('demo')).resolves.toBe('submitted');
});
it('classifies dead bootstrap owner as failed launch snapshot instead of pending', async () => {
const nowSpy = vi.spyOn(Date, 'now').mockReturnValue(1700000300000);
const killSpy = vi
.spyOn(process, 'kill')
.mockImplementation(() => {
const error = new Error('ESRCH') as NodeJS.ErrnoException;
error.code = 'ESRCH';
throw error;
});
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
contents: JSON.stringify({
version: 1,
runId: 'run-dead',
teamName: 'demo',
ownerPid: 777,
startedAt: 1700000000000,
updatedAt: 1700000000000,
phase: 'spawning_members',
members: [{ name: 'alice', status: 'registered' }],
}),
});
await expect(readBootstrapLaunchSnapshot('demo')).resolves.toMatchObject({
launchPhase: 'finished',
members: {
alice: {
launchState: 'failed_to_start',
hardFailure: true,
hardFailureReason:
'bootstrap owner pid 777 is gone and persisted bootstrap state is stale',
},
},
});
killSpy.mockRestore();
nowSpy.mockRestore();
});
it('projects dead bootstrap owner into failed runtime progress', async () => {
const nowSpy = vi.spyOn(Date, 'now').mockReturnValue(1700000201000);
const killSpy = vi
.spyOn(process, 'kill')
.mockImplementation(() => {
const error = new Error('ESRCH') as NodeJS.ErrnoException;
error.code = 'ESRCH';
throw error;
});
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
contents: JSON.stringify({
version: 1,
runId: 'run-dead',
teamName: 'demo',
ownerPid: 777,
startedAt: 1700000000000,
updatedAt: 1700000200000,
phase: 'spawning_members',
members: [{ name: 'alice', status: 'registered' }],
}),
});
await expect(readBootstrapRuntimeState('demo')).resolves.toMatchObject({
teamName: 'demo',
isAlive: false,
runId: 'run-dead',
progress: {
state: 'failed',
message: 'Deterministic bootstrap owner exited before bootstrap completed',
error:
'bootstrap owner pid 777 is gone before bootstrap reached a terminal state',
},
});
killSpy.mockRestore();
nowSpy.mockRestore();
});
it('projects degraded runtime progress when bootstrap-state is unreadable but lock owner is alive', async () => {
const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true as never);
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
contents: '{invalid-json',
});
hoisted.files.set('/mock/teams/demo/.bootstrap.lock/metadata.json', {
contents: JSON.stringify({
pid: 4242,
runId: 'run-lock',
requestHash: 'hash-1',
ownerStartedAt: 1700000000000,
createdAt: 1700000000100,
nonce: 'nonce-1',
}),
});
hoisted.files.set('/mock/teams/demo/bootstrap-journal.jsonl', {
contents: JSON.stringify({
ts: 3,
type: 'member',
runId: 'run-lock',
name: 'alice',
action: 'spawn_started',
}),
});
await expect(readBootstrapRuntimeState('demo')).resolves.toMatchObject({
teamName: 'demo',
isAlive: false,
runId: 'run-lock',
progress: {
state: 'configuring',
message:
'Deterministic bootstrap recovery is degraded because persisted bootstrap state is unreadable',
messageSeverity: 'warning',
pid: 4242,
},
});
killSpy.mockRestore();
});
it('projects degraded failed runtime progress when bootstrap-state is unreadable and lock owner is dead', async () => {
const killSpy = vi
.spyOn(process, 'kill')
.mockImplementation(() => {
const error = new Error('ESRCH') as NodeJS.ErrnoException;
error.code = 'ESRCH';
throw error;
});
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
contents: '{invalid-json',
});
hoisted.files.set('/mock/teams/demo/.bootstrap.lock/metadata.json', {
contents: JSON.stringify({
pid: 7331,
runId: 'run-dead-lock',
requestHash: 'hash-2',
ownerStartedAt: 1700000000000,
createdAt: 1700000000100,
nonce: 'nonce-2',
}),
});
await expect(readBootstrapRuntimeState('demo')).resolves.toMatchObject({
teamName: 'demo',
isAlive: false,
runId: 'run-dead-lock',
progress: {
state: 'failed',
message:
'Deterministic bootstrap recovery failed because persisted bootstrap state is unreadable and the bootstrap owner is gone',
messageSeverity: 'warning',
pid: 7331,
},
});
killSpy.mockRestore();
});
it('prefers the newer launch snapshot when bootstrap snapshot is stale', () => {
const preferred = choosePreferredLaunchSnapshot(
{ updatedAt: '2026-04-06T10:00:00.000Z', kind: 'bootstrap' },
{ updatedAt: '2026-04-06T10:05:00.000Z', kind: 'launch' }
);
expect(preferred).toEqual({
updatedAt: '2026-04-06T10:05:00.000Z',
kind: 'launch',
});
});
});