feat(team): add bootstrap recovery and app integration
This commit is contained in:
parent
d2cd655c11
commit
8ef89eefce
6 changed files with 1563 additions and 37 deletions
|
|
@ -175,7 +175,7 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
|
|||
|
||||
const teamProvisioningService = getTeamProvisioningService(services);
|
||||
teamProvisioningService.stopTeam(validatedTeamName.value!);
|
||||
return reply.send(teamProvisioningService.getRuntimeState(validatedTeamName.value!));
|
||||
return reply.send(await teamProvisioningService.getRuntimeState(validatedTeamName.value!));
|
||||
} catch (error) {
|
||||
if (shouldLogError(error)) {
|
||||
logger.error(
|
||||
|
|
@ -198,7 +198,7 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
|
|||
}
|
||||
|
||||
return reply.send(
|
||||
getTeamProvisioningService(services).getRuntimeState(validatedTeamName.value!)
|
||||
await getTeamProvisioningService(services).getRuntimeState(validatedTeamName.value!)
|
||||
);
|
||||
} catch (error) {
|
||||
if (shouldLogError(error)) {
|
||||
|
|
@ -236,9 +236,11 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
|
|||
app.get('/api/teams/runtime/alive', async (_request, reply) => {
|
||||
try {
|
||||
const teamProvisioningService = getTeamProvisioningService(services);
|
||||
const runtimeStates = teamProvisioningService
|
||||
.getAliveTeams()
|
||||
.map((teamName) => teamProvisioningService.getRuntimeState(teamName));
|
||||
const runtimeStates = await Promise.all(
|
||||
teamProvisioningService
|
||||
.getAliveTeams()
|
||||
.map((teamName) => teamProvisioningService.getRuntimeState(teamName))
|
||||
);
|
||||
return reply.send(runtimeStates);
|
||||
} catch (error) {
|
||||
if (shouldLogError(error)) {
|
||||
|
|
|
|||
713
src/main/services/team/TeamBootstrapStateReader.ts
Normal file
713
src/main/services/team/TeamBootstrapStateReader.ts
Normal file
|
|
@ -0,0 +1,713 @@
|
|||
import { getTeamsBasePath } from '@main/utils/pathDecoder';
|
||||
import { createPersistedLaunchSnapshot } from './TeamLaunchStateEvaluator';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
import type {
|
||||
PersistedTeamLaunchMemberState,
|
||||
PersistedTeamLaunchSnapshot,
|
||||
TeamProvisioningProgress,
|
||||
TeamRuntimeState,
|
||||
} from '@shared/types';
|
||||
|
||||
const TEAM_BOOTSTRAP_STATE_FILE = 'bootstrap-state.json';
|
||||
const TEAM_BOOTSTRAP_JOURNAL_FILE = 'bootstrap-journal.jsonl';
|
||||
const TEAM_BOOTSTRAP_LOCK_DIR = '.bootstrap.lock';
|
||||
const TEAM_BOOTSTRAP_LOCK_METADATA_FILE = 'metadata.json';
|
||||
const MAX_BOOTSTRAP_STATE_BYTES = 256 * 1024;
|
||||
const MAX_BOOTSTRAP_JOURNAL_BYTES = 256 * 1024;
|
||||
const MAX_BOOTSTRAP_LOCK_METADATA_BYTES = 64 * 1024;
|
||||
const ACTIVE_BOOTSTRAP_STUCK_CLASSIFICATION_MS = 3 * 60 * 1000;
|
||||
|
||||
type RawBootstrapMemberState = {
|
||||
name?: unknown;
|
||||
status?: unknown;
|
||||
lastAttemptAt?: unknown;
|
||||
lastObservedAt?: unknown;
|
||||
failureReason?: unknown;
|
||||
};
|
||||
|
||||
type RawBootstrapState = {
|
||||
version?: unknown;
|
||||
runId?: unknown;
|
||||
teamName?: unknown;
|
||||
startedAt?: unknown;
|
||||
ownerPid?: unknown;
|
||||
updatedAt?: unknown;
|
||||
phase?: unknown;
|
||||
realTaskSubmissionState?: unknown;
|
||||
members?: unknown;
|
||||
terminal?: unknown;
|
||||
};
|
||||
|
||||
type RawBootstrapJournalRecord =
|
||||
| { ts?: unknown; type?: 'phase'; phase?: unknown }
|
||||
| { ts?: unknown; type?: 'lock'; action?: unknown; ownerPid?: unknown; detail?: unknown }
|
||||
| { ts?: unknown; type?: 'member'; name?: unknown; action?: unknown; detail?: unknown }
|
||||
| { ts?: unknown; type?: 'terminal'; status?: unknown; reason?: unknown }
|
||||
| { ts?: unknown; type?: 'real_task'; state?: unknown; detail?: unknown };
|
||||
|
||||
type RawBootstrapLockMetadata = {
|
||||
pid?: unknown;
|
||||
runId?: unknown;
|
||||
requestHash?: unknown;
|
||||
ownerStartedAt?: unknown;
|
||||
createdAt?: unknown;
|
||||
nonce?: unknown;
|
||||
};
|
||||
|
||||
type BootstrapStateInspection = {
|
||||
raw: RawBootstrapState | null;
|
||||
issue?: string;
|
||||
};
|
||||
|
||||
type BootstrapJournalInspection = {
|
||||
warnings?: string[];
|
||||
issue?: string;
|
||||
};
|
||||
|
||||
type BootstrapLockMetadata = {
|
||||
pid: number;
|
||||
runId: string;
|
||||
ownerStartedAt?: number;
|
||||
};
|
||||
|
||||
type BootstrapRuntimePhase =
|
||||
| 'validating_spec'
|
||||
| 'loading_existing_state'
|
||||
| 'acquiring_bootstrap_lock'
|
||||
| 'creating_team'
|
||||
| 'spawning_members'
|
||||
| 'auditing_truth'
|
||||
| 'completed'
|
||||
| 'failed'
|
||||
| 'canceled';
|
||||
|
||||
function isBootstrapPhaseTerminal(phase: BootstrapRuntimePhase): boolean {
|
||||
return phase === 'completed' || phase === 'failed' || phase === 'canceled';
|
||||
}
|
||||
|
||||
function isProcessAlive(pid: number): boolean {
|
||||
if (!Number.isFinite(pid) || pid <= 0) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (error) {
|
||||
return (error as NodeJS.ErrnoException | undefined)?.code === 'EPERM';
|
||||
}
|
||||
}
|
||||
|
||||
function classifyBootstrapOwnerState(raw: RawBootstrapState): {
|
||||
ownerDead: boolean;
|
||||
stale: boolean;
|
||||
failureReason?: string;
|
||||
} {
|
||||
const phase = typeof raw.phase === 'string' ? (raw.phase as BootstrapRuntimePhase) : null;
|
||||
if (!phase || isBootstrapPhaseTerminal(phase)) {
|
||||
return { ownerDead: false, stale: false };
|
||||
}
|
||||
|
||||
const ownerPid = typeof raw.ownerPid === 'number' ? raw.ownerPid : null;
|
||||
if (ownerPid === null || isProcessAlive(ownerPid)) {
|
||||
return { ownerDead: false, stale: false };
|
||||
}
|
||||
|
||||
const updatedAtMs =
|
||||
typeof raw.updatedAt === 'number'
|
||||
? raw.updatedAt
|
||||
: typeof raw.updatedAt === 'string'
|
||||
? Date.parse(raw.updatedAt)
|
||||
: NaN;
|
||||
const stale =
|
||||
Number.isFinite(updatedAtMs) &&
|
||||
Date.now() - updatedAtMs >= ACTIVE_BOOTSTRAP_STUCK_CLASSIFICATION_MS;
|
||||
|
||||
return {
|
||||
ownerDead: true,
|
||||
stale,
|
||||
failureReason: stale
|
||||
? `bootstrap owner pid ${ownerPid} is gone and persisted bootstrap state is stale`
|
||||
: `bootstrap owner pid ${ownerPid} is gone before bootstrap reached a terminal state`,
|
||||
};
|
||||
}
|
||||
|
||||
async function inspectBootstrapState(teamName: string): Promise<BootstrapStateInspection> {
|
||||
const targetPath = getTeamBootstrapStatePath(teamName);
|
||||
try {
|
||||
const stat = await fs.promises.lstat(targetPath);
|
||||
if (stat.isSymbolicLink()) {
|
||||
return {
|
||||
raw: null,
|
||||
issue:
|
||||
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json is a symlink.',
|
||||
};
|
||||
}
|
||||
if (!stat.isFile()) {
|
||||
return {
|
||||
raw: null,
|
||||
issue:
|
||||
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json is not a regular file.',
|
||||
};
|
||||
}
|
||||
if (stat.size > MAX_BOOTSTRAP_STATE_BYTES) {
|
||||
return {
|
||||
raw: null,
|
||||
issue:
|
||||
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json is oversized.',
|
||||
};
|
||||
}
|
||||
const raw = JSON.parse(await fs.promises.readFile(targetPath, 'utf8')) as RawBootstrapState;
|
||||
if (raw.version !== 1) {
|
||||
return {
|
||||
raw: null,
|
||||
issue:
|
||||
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json has an unsupported schema version.',
|
||||
};
|
||||
}
|
||||
return { raw };
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException | undefined)?.code === 'ENOENT') {
|
||||
return { raw: null };
|
||||
}
|
||||
return {
|
||||
raw: null,
|
||||
issue:
|
||||
'Persisted deterministic bootstrap state is unreadable because bootstrap-state.json is invalid, truncated, or inaccessible.',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function readRawBootstrapState(teamName: string): Promise<RawBootstrapState | null> {
|
||||
return (await inspectBootstrapState(teamName)).raw;
|
||||
}
|
||||
|
||||
function getBootstrapProgressProjection(
|
||||
phase: BootstrapRuntimePhase,
|
||||
memberCount: number
|
||||
): { state: Exclude<TeamProvisioningProgress['state'], 'idle'>; message: string } | null {
|
||||
switch (phase) {
|
||||
case 'validating_spec':
|
||||
return {
|
||||
state: 'validating',
|
||||
message: 'Validating deterministic bootstrap spec',
|
||||
};
|
||||
case 'loading_existing_state':
|
||||
return {
|
||||
state: 'configuring',
|
||||
message: 'Loading existing team state',
|
||||
};
|
||||
case 'acquiring_bootstrap_lock':
|
||||
return {
|
||||
state: 'configuring',
|
||||
message: 'Acquiring deterministic bootstrap lock',
|
||||
};
|
||||
case 'creating_team':
|
||||
return {
|
||||
state: 'assembling',
|
||||
message: 'Creating team config',
|
||||
};
|
||||
case 'spawning_members':
|
||||
return {
|
||||
state: 'assembling',
|
||||
message:
|
||||
memberCount > 0
|
||||
? `Spawning teammate runtimes (${memberCount})`
|
||||
: 'Spawning teammate runtimes',
|
||||
};
|
||||
case 'auditing_truth':
|
||||
return {
|
||||
state: 'finalizing',
|
||||
message: 'Auditing registered teammates and bootstrap truth',
|
||||
};
|
||||
case 'completed':
|
||||
return {
|
||||
state: 'ready',
|
||||
message: 'Deterministic bootstrap completed',
|
||||
};
|
||||
case 'failed':
|
||||
return {
|
||||
state: 'failed',
|
||||
message: 'Deterministic bootstrap failed',
|
||||
};
|
||||
case 'canceled':
|
||||
return {
|
||||
state: 'cancelled',
|
||||
message: 'Deterministic bootstrap cancelled',
|
||||
};
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function toIso(value: unknown, fallback: string): string {
|
||||
if (typeof value === 'string' && value.trim().length > 0) {
|
||||
return value;
|
||||
}
|
||||
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
||||
return new Date(value).toISOString();
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function normalizeBootstrapMemberState(
|
||||
memberName: string,
|
||||
raw: RawBootstrapMemberState,
|
||||
updatedAt: string
|
||||
): PersistedTeamLaunchMemberState {
|
||||
const status = typeof raw.status === 'string' ? raw.status : 'pending';
|
||||
const hardFailure = status === 'failed';
|
||||
const bootstrapConfirmed = status === 'bootstrap_confirmed';
|
||||
const runtimeAlive = bootstrapConfirmed || status === 'runtime_alive';
|
||||
const agentToolAccepted =
|
||||
bootstrapConfirmed ||
|
||||
runtimeAlive ||
|
||||
status === 'registered' ||
|
||||
status === 'spawn_started' ||
|
||||
hardFailure;
|
||||
|
||||
return {
|
||||
name: memberName,
|
||||
launchState: hardFailure
|
||||
? 'failed_to_start'
|
||||
: bootstrapConfirmed
|
||||
? 'confirmed_alive'
|
||||
: runtimeAlive || agentToolAccepted
|
||||
? 'runtime_pending_bootstrap'
|
||||
: 'starting',
|
||||
agentToolAccepted,
|
||||
runtimeAlive,
|
||||
bootstrapConfirmed,
|
||||
hardFailure,
|
||||
hardFailureReason:
|
||||
typeof raw.failureReason === 'string' && raw.failureReason.trim().length > 0
|
||||
? raw.failureReason.trim()
|
||||
: undefined,
|
||||
firstSpawnAcceptedAt: agentToolAccepted ? toIso(raw.lastAttemptAt, updatedAt) : undefined,
|
||||
lastHeartbeatAt: bootstrapConfirmed ? toIso(raw.lastObservedAt, updatedAt) : undefined,
|
||||
lastRuntimeAliveAt: runtimeAlive ? toIso(raw.lastObservedAt, updatedAt) : undefined,
|
||||
lastEvaluatedAt: toIso(raw.lastObservedAt, updatedAt),
|
||||
sources: {
|
||||
configRegistered:
|
||||
status === 'registered' ||
|
||||
status === 'runtime_alive' ||
|
||||
status === 'bootstrap_confirmed' ||
|
||||
hardFailure,
|
||||
processAlive: runtimeAlive || undefined,
|
||||
hardFailureSignal: hardFailure || undefined,
|
||||
},
|
||||
diagnostics: hardFailure
|
||||
? [
|
||||
typeof raw.failureReason === 'string' && raw.failureReason.trim().length > 0
|
||||
? raw.failureReason.trim()
|
||||
: 'deterministic bootstrap failed',
|
||||
]
|
||||
: runtimeAlive
|
||||
? bootstrapConfirmed
|
||||
? ['late heartbeat received']
|
||||
: ['runtime alive', 'waiting for bootstrap']
|
||||
: agentToolAccepted
|
||||
? ['spawn accepted']
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export function getTeamBootstrapStatePath(teamName: string): string {
|
||||
return path.join(getTeamsBasePath(), teamName, TEAM_BOOTSTRAP_STATE_FILE);
|
||||
}
|
||||
|
||||
function getTeamBootstrapJournalPath(teamName: string): string {
|
||||
return path.join(getTeamsBasePath(), teamName, TEAM_BOOTSTRAP_JOURNAL_FILE);
|
||||
}
|
||||
|
||||
function getTeamBootstrapLockMetadataPath(teamName: string): string {
|
||||
return path.join(
|
||||
getTeamsBasePath(),
|
||||
teamName,
|
||||
TEAM_BOOTSTRAP_LOCK_DIR,
|
||||
TEAM_BOOTSTRAP_LOCK_METADATA_FILE
|
||||
);
|
||||
}
|
||||
|
||||
async function readBootstrapLockMetadata(teamName: string): Promise<BootstrapLockMetadata | null> {
|
||||
const targetPath = getTeamBootstrapLockMetadataPath(teamName);
|
||||
try {
|
||||
const stat = await fs.promises.lstat(targetPath);
|
||||
if (stat.isSymbolicLink() || !stat.isFile() || stat.size > MAX_BOOTSTRAP_LOCK_METADATA_BYTES) {
|
||||
return null;
|
||||
}
|
||||
const raw = JSON.parse(
|
||||
await fs.promises.readFile(targetPath, 'utf8')
|
||||
) as RawBootstrapLockMetadata;
|
||||
if (
|
||||
typeof raw.pid !== 'number' ||
|
||||
!Number.isFinite(raw.pid) ||
|
||||
raw.pid <= 0 ||
|
||||
typeof raw.runId !== 'string' ||
|
||||
raw.runId.trim().length === 0
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
pid: raw.pid,
|
||||
runId: raw.runId.trim(),
|
||||
ownerStartedAt:
|
||||
typeof raw.ownerStartedAt === 'number' && Number.isFinite(raw.ownerStartedAt)
|
||||
? raw.ownerStartedAt
|
||||
: undefined,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function readBootstrapJournalWarnings(teamName: string): Promise<string[] | undefined> {
|
||||
return (await inspectBootstrapJournal(teamName)).warnings;
|
||||
}
|
||||
|
||||
async function inspectBootstrapJournal(teamName: string): Promise<BootstrapJournalInspection> {
|
||||
const targetPath = getTeamBootstrapJournalPath(teamName);
|
||||
try {
|
||||
const stat = await fs.promises.lstat(targetPath);
|
||||
if (stat.isSymbolicLink()) {
|
||||
return {
|
||||
issue:
|
||||
'Persisted deterministic bootstrap journal is unreadable because bootstrap-journal.jsonl is a symlink.',
|
||||
};
|
||||
}
|
||||
if (!stat.isFile()) {
|
||||
return {
|
||||
issue:
|
||||
'Persisted deterministic bootstrap journal is unreadable because bootstrap-journal.jsonl is not a regular file.',
|
||||
};
|
||||
}
|
||||
if (stat.size > MAX_BOOTSTRAP_JOURNAL_BYTES) {
|
||||
return {
|
||||
issue:
|
||||
'Persisted deterministic bootstrap journal is unreadable because bootstrap-journal.jsonl is oversized.',
|
||||
};
|
||||
}
|
||||
|
||||
const raw = await fs.promises.readFile(targetPath, 'utf8');
|
||||
const lines = raw
|
||||
.split('\n')
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0)
|
||||
.slice(-3);
|
||||
|
||||
const messages = lines
|
||||
.map((line) => {
|
||||
try {
|
||||
return JSON.parse(line) as RawBootstrapJournalRecord;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter((record): record is RawBootstrapJournalRecord => Boolean(record))
|
||||
.map((record) => {
|
||||
if (record.type === 'phase' && typeof record.phase === 'string') {
|
||||
return `bootstrap phase: ${record.phase}`;
|
||||
}
|
||||
if (record.type === 'lock' && typeof record.action === 'string') {
|
||||
const owner = typeof record.ownerPid === 'number' ? ` (pid ${record.ownerPid})` : '';
|
||||
return `bootstrap lock ${record.action}${owner}`;
|
||||
}
|
||||
if (
|
||||
record.type === 'member' &&
|
||||
typeof record.name === 'string' &&
|
||||
typeof record.action === 'string'
|
||||
) {
|
||||
return typeof record.detail === 'string' && record.detail.trim().length > 0
|
||||
? `${record.name}: ${record.action} (${record.detail.trim()})`
|
||||
: `${record.name}: ${record.action}`;
|
||||
}
|
||||
if (record.type === 'terminal' && typeof record.status === 'string') {
|
||||
return typeof record.reason === 'string' && record.reason.trim().length > 0
|
||||
? `bootstrap ${record.status}: ${record.reason.trim()}`
|
||||
: `bootstrap ${record.status}`;
|
||||
}
|
||||
if (record.type === 'real_task' && typeof record.state === 'string') {
|
||||
return typeof record.detail === 'string' && record.detail.trim().length > 0
|
||||
? `first task ${record.state}: ${record.detail.trim()}`
|
||||
: `first task ${record.state}`;
|
||||
}
|
||||
return null;
|
||||
})
|
||||
.filter((item): item is string => Boolean(item));
|
||||
|
||||
return {
|
||||
warnings:
|
||||
messages.length > 0
|
||||
? [`Recent deterministic bootstrap events: ${messages.join(' | ')}`]
|
||||
: undefined,
|
||||
};
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException | undefined)?.code === 'ENOENT') {
|
||||
return {};
|
||||
}
|
||||
return {
|
||||
issue:
|
||||
'Persisted deterministic bootstrap journal is unreadable because bootstrap-journal.jsonl is invalid, truncated, or inaccessible.',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function readDegradedBootstrapRuntimeState(
|
||||
teamName: string,
|
||||
stateIssue: string
|
||||
): Promise<TeamRuntimeState | null> {
|
||||
const lockMetadata = await readBootstrapLockMetadata(teamName);
|
||||
if (!lockMetadata) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const journalInspection = await inspectBootstrapJournal(teamName);
|
||||
const warnings = [
|
||||
stateIssue,
|
||||
journalInspection.issue,
|
||||
...(journalInspection.warnings ?? []),
|
||||
].filter((item): item is string => typeof item === 'string' && item.trim().length > 0);
|
||||
const ownerAlive = isProcessAlive(lockMetadata.pid);
|
||||
const now = new Date().toISOString();
|
||||
|
||||
return {
|
||||
teamName,
|
||||
isAlive: false,
|
||||
runId: lockMetadata.runId,
|
||||
progress: {
|
||||
runId: lockMetadata.runId,
|
||||
teamName,
|
||||
state: ownerAlive ? 'configuring' : 'failed',
|
||||
message: ownerAlive
|
||||
? 'Deterministic bootstrap recovery is degraded because persisted bootstrap state is unreadable'
|
||||
: 'Deterministic bootstrap recovery failed because persisted bootstrap state is unreadable and the bootstrap owner is gone',
|
||||
messageSeverity: 'warning',
|
||||
error: ownerAlive
|
||||
? stateIssue
|
||||
: `${stateIssue} Bootstrap owner pid ${lockMetadata.pid} is not alive.`,
|
||||
warnings: warnings.length > 0 ? warnings : undefined,
|
||||
startedAt:
|
||||
typeof lockMetadata.ownerStartedAt === 'number' &&
|
||||
Number.isFinite(lockMetadata.ownerStartedAt)
|
||||
? new Date(lockMetadata.ownerStartedAt).toISOString()
|
||||
: now,
|
||||
updatedAt: now,
|
||||
pid: lockMetadata.pid,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function readBootstrapLaunchSnapshot(
|
||||
teamName: string
|
||||
): Promise<PersistedTeamLaunchSnapshot | null> {
|
||||
const raw = await readRawBootstrapState(teamName);
|
||||
if (!raw) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const updatedAt = toIso(raw.updatedAt, new Date().toISOString());
|
||||
const rawMembers = Array.isArray(raw.members) ? raw.members : [];
|
||||
const members: Record<string, PersistedTeamLaunchMemberState> = {};
|
||||
const expectedMembers: string[] = [];
|
||||
|
||||
for (const item of rawMembers) {
|
||||
if (!item || typeof item !== 'object') continue;
|
||||
const rawMember = item as RawBootstrapMemberState;
|
||||
const memberName = typeof rawMember.name === 'string' ? rawMember.name.trim() : '';
|
||||
if (!memberName || memberName === 'team-lead' || memberName === 'user') continue;
|
||||
expectedMembers.push(memberName);
|
||||
members[memberName] = normalizeBootstrapMemberState(memberName, rawMember, updatedAt);
|
||||
}
|
||||
|
||||
const terminal =
|
||||
raw.terminal && typeof raw.terminal === 'object'
|
||||
? (raw.terminal as Record<string, unknown>)
|
||||
: null;
|
||||
const terminalStatus = typeof terminal?.status === 'string' ? terminal.status : undefined;
|
||||
const phase = typeof raw.phase === 'string' ? raw.phase : undefined;
|
||||
const ownerState = classifyBootstrapOwnerState(raw);
|
||||
const launchPhase =
|
||||
terminalStatus === 'completed' ||
|
||||
terminalStatus === 'partial_success' ||
|
||||
terminalStatus === 'failed' ||
|
||||
terminalStatus === 'canceled' ||
|
||||
ownerState.ownerDead ||
|
||||
phase === 'completed' ||
|
||||
phase === 'failed' ||
|
||||
phase === 'canceled'
|
||||
? 'finished'
|
||||
: 'active';
|
||||
|
||||
if (ownerState.ownerDead) {
|
||||
const diagnostics = ownerState.failureReason ? [ownerState.failureReason] : undefined;
|
||||
for (const memberName of expectedMembers) {
|
||||
const entry = members[memberName];
|
||||
if (
|
||||
!entry ||
|
||||
entry.launchState === 'confirmed_alive' ||
|
||||
entry.launchState === 'failed_to_start'
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
members[memberName] = {
|
||||
...entry,
|
||||
launchState: 'failed_to_start',
|
||||
hardFailure: true,
|
||||
hardFailureReason: ownerState.failureReason,
|
||||
diagnostics: diagnostics ?? entry.diagnostics,
|
||||
sources: {
|
||||
...entry.sources,
|
||||
hardFailureSignal: true,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return createPersistedLaunchSnapshot({
|
||||
teamName:
|
||||
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
|
||||
? raw.teamName.trim()
|
||||
: teamName,
|
||||
expectedMembers,
|
||||
launchPhase,
|
||||
members,
|
||||
updatedAt,
|
||||
});
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function readBootstrapRealTaskSubmissionState(
|
||||
teamName: string
|
||||
): Promise<'not_submitted' | 'submitted' | 'unknown' | null> {
|
||||
const raw = await readRawBootstrapState(teamName);
|
||||
if (!raw) {
|
||||
return null;
|
||||
}
|
||||
const state = raw.realTaskSubmissionState;
|
||||
return state === 'not_submitted' || state === 'submitted' || state === 'unknown' ? state : null;
|
||||
}
|
||||
|
||||
export async function readBootstrapRuntimeState(
|
||||
teamName: string
|
||||
): Promise<TeamRuntimeState | null> {
|
||||
const inspection = await inspectBootstrapState(teamName);
|
||||
const raw = inspection.raw;
|
||||
if (!raw) {
|
||||
return inspection.issue ? readDegradedBootstrapRuntimeState(teamName, inspection.issue) : null;
|
||||
}
|
||||
|
||||
try {
|
||||
const journalWarnings = await readBootstrapJournalWarnings(teamName);
|
||||
const phase = typeof raw.phase === 'string' ? (raw.phase as BootstrapRuntimePhase) : null;
|
||||
if (!phase) {
|
||||
return null;
|
||||
}
|
||||
const ownerState = classifyBootstrapOwnerState(raw);
|
||||
if (ownerState.ownerDead) {
|
||||
const startedAt = toIso(raw.startedAt, new Date().toISOString());
|
||||
const updatedAt = toIso(raw.updatedAt, startedAt);
|
||||
return {
|
||||
teamName:
|
||||
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
|
||||
? raw.teamName.trim()
|
||||
: teamName,
|
||||
isAlive: false,
|
||||
runId: typeof raw.runId === 'string' ? raw.runId : null,
|
||||
progress: {
|
||||
runId: typeof raw.runId === 'string' ? raw.runId : teamName,
|
||||
teamName:
|
||||
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
|
||||
? raw.teamName.trim()
|
||||
: teamName,
|
||||
state: 'failed',
|
||||
message: ownerState.stale
|
||||
? 'Deterministic bootstrap became stuck after owner process exited'
|
||||
: 'Deterministic bootstrap owner exited before bootstrap completed',
|
||||
error: ownerState.failureReason,
|
||||
warnings: journalWarnings,
|
||||
startedAt,
|
||||
updatedAt,
|
||||
...(typeof raw.ownerPid === 'number' ? { pid: raw.ownerPid } : {}),
|
||||
},
|
||||
};
|
||||
}
|
||||
const activePhases: BootstrapRuntimePhase[] = [
|
||||
'validating_spec',
|
||||
'loading_existing_state',
|
||||
'acquiring_bootstrap_lock',
|
||||
'creating_team',
|
||||
'spawning_members',
|
||||
'auditing_truth',
|
||||
];
|
||||
if (!activePhases.includes(phase)) {
|
||||
return null;
|
||||
}
|
||||
const projection = getBootstrapProgressProjection(
|
||||
phase,
|
||||
Array.isArray(raw.members) ? raw.members.length : 0
|
||||
);
|
||||
if (!projection) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const startedAt = toIso(raw.startedAt, new Date().toISOString());
|
||||
const updatedAt = toIso(raw.updatedAt, startedAt);
|
||||
const runId = typeof raw.runId === 'string' && raw.runId.trim().length > 0 ? raw.runId : null;
|
||||
const pid =
|
||||
typeof raw.ownerPid === 'number' && Number.isFinite(raw.ownerPid) && raw.ownerPid > 0
|
||||
? raw.ownerPid
|
||||
: undefined;
|
||||
|
||||
const progress: TeamProvisioningProgress = {
|
||||
runId: runId ?? `bootstrap:${teamName}`,
|
||||
teamName:
|
||||
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
|
||||
? raw.teamName.trim()
|
||||
: teamName,
|
||||
state: projection.state,
|
||||
message: projection.message,
|
||||
warnings: journalWarnings,
|
||||
startedAt,
|
||||
updatedAt,
|
||||
...(pid ? { pid } : {}),
|
||||
};
|
||||
|
||||
return {
|
||||
teamName:
|
||||
typeof raw.teamName === 'string' && raw.teamName.trim().length > 0
|
||||
? raw.teamName.trim()
|
||||
: teamName,
|
||||
isAlive: false,
|
||||
runId,
|
||||
progress,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function clearBootstrapState(teamName: string): Promise<void> {
|
||||
try {
|
||||
await fs.promises.rm(getTeamBootstrapStatePath(teamName), { force: true });
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
}
|
||||
|
||||
export function choosePreferredLaunchSnapshot<T extends { updatedAt?: string }>(
|
||||
bootstrapSnapshot: T | null,
|
||||
launchSnapshot: T | null
|
||||
): T | null {
|
||||
if (!bootstrapSnapshot) return launchSnapshot;
|
||||
if (!launchSnapshot) return bootstrapSnapshot;
|
||||
|
||||
const bootstrapMs = Date.parse(bootstrapSnapshot.updatedAt ?? '');
|
||||
const launchMs = Date.parse(launchSnapshot.updatedAt ?? '');
|
||||
if (Number.isFinite(bootstrapMs) && Number.isFinite(launchMs)) {
|
||||
return bootstrapMs >= launchMs ? bootstrapSnapshot : launchSnapshot;
|
||||
}
|
||||
return bootstrapSnapshot;
|
||||
}
|
||||
|
|
@ -12,6 +12,10 @@ import * as path from 'path';
|
|||
import { getTeamFsWorkerClient } from './TeamFsWorkerClient';
|
||||
import { TeamMembersMetaStore } from './TeamMembersMetaStore';
|
||||
import { TeamMetaStore } from './TeamMetaStore';
|
||||
import {
|
||||
choosePreferredLaunchSnapshot,
|
||||
readBootstrapLaunchSnapshot,
|
||||
} from './TeamBootstrapStateReader';
|
||||
import { normalizePersistedLaunchSnapshot } from './TeamLaunchStateEvaluator';
|
||||
|
||||
import type { TeamConfig, TeamMember, TeamSummary, TeamSummaryMember } from '@shared/types';
|
||||
|
|
@ -42,17 +46,27 @@ interface LaunchStateSummary {
|
|||
}
|
||||
|
||||
async function readLaunchStateSummary(teamDir: string): Promise<LaunchStateSummary | null> {
|
||||
const bootstrapSnapshot = await readBootstrapLaunchSnapshot(path.basename(teamDir));
|
||||
const launchStatePath = path.join(teamDir, TEAM_LAUNCH_STATE_FILE);
|
||||
let launchSnapshot = null;
|
||||
try {
|
||||
const stat = await fs.promises.stat(launchStatePath);
|
||||
if (!stat.isFile() || stat.size > MAX_LAUNCH_STATE_BYTES) {
|
||||
return null;
|
||||
}
|
||||
const raw = await readFileUtf8WithTimeout(launchStatePath, PER_TEAM_READ_TIMEOUT_MS);
|
||||
const snapshot = normalizePersistedLaunchSnapshot(path.basename(teamDir), JSON.parse(raw));
|
||||
if (!snapshot) {
|
||||
return null;
|
||||
launchSnapshot = null;
|
||||
} else {
|
||||
const raw = await readFileUtf8WithTimeout(launchStatePath, PER_TEAM_READ_TIMEOUT_MS);
|
||||
launchSnapshot = normalizePersistedLaunchSnapshot(path.basename(teamDir), JSON.parse(raw));
|
||||
}
|
||||
} catch {
|
||||
launchSnapshot = null;
|
||||
}
|
||||
|
||||
const snapshot = choosePreferredLaunchSnapshot(bootstrapSnapshot, launchSnapshot);
|
||||
if (!snapshot) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const missingMembers = snapshot.expectedMembers.filter((name) => {
|
||||
const member = snapshot.members[name];
|
||||
return member?.launchState === 'failed_to_start';
|
||||
|
|
|
|||
|
|
@ -73,6 +73,13 @@ import { TeamMetaStore } from './TeamMetaStore';
|
|||
import { TeamSentMessagesStore } from './TeamSentMessagesStore';
|
||||
import { TeamTaskReader } from './TeamTaskReader';
|
||||
import { TeamLaunchStateStore } from './TeamLaunchStateStore';
|
||||
import {
|
||||
choosePreferredLaunchSnapshot,
|
||||
clearBootstrapState,
|
||||
readBootstrapLaunchSnapshot,
|
||||
readBootstrapRealTaskSubmissionState,
|
||||
readBootstrapRuntimeState,
|
||||
} from './TeamBootstrapStateReader';
|
||||
import { resolveDesktopTeammateModeDecision } from './runtimeTeammateMode';
|
||||
import {
|
||||
createPersistedLaunchSnapshot,
|
||||
|
|
@ -131,6 +138,7 @@ import type {
|
|||
TeamProvisioningState,
|
||||
TeamRuntimeState,
|
||||
TeamTask,
|
||||
EffortLevel,
|
||||
ToolActivityEventPayload,
|
||||
ToolApprovalAutoResolved,
|
||||
ToolApprovalEvent,
|
||||
|
|
@ -501,7 +509,12 @@ interface ProvisioningRun {
|
|||
provisioningComplete: boolean;
|
||||
/** Path to the generated MCP config file for later cleanup. */
|
||||
mcpConfigPath: string | null;
|
||||
/** Path to the deterministic bootstrap spec file for later cleanup. */
|
||||
bootstrapSpecPath: string | null;
|
||||
/** Path to the deferred first-user-task file consumed by runtime after bootstrap. */
|
||||
bootstrapUserPromptPath: string | null;
|
||||
isLaunch: boolean;
|
||||
deterministicBootstrap: boolean;
|
||||
leadRelayCapture: {
|
||||
leadName: string;
|
||||
startedAt: string;
|
||||
|
|
@ -1138,6 +1151,142 @@ export function buildAddMemberSpawnMessage(
|
|||
);
|
||||
}
|
||||
|
||||
type RuntimeBootstrapMemberSpec = {
|
||||
name: string;
|
||||
prompt: string;
|
||||
cwd?: string;
|
||||
model?: string;
|
||||
provider?: TeamProviderId;
|
||||
effort?: EffortLevel;
|
||||
agentType?: string;
|
||||
description?: string;
|
||||
useSplitPane?: boolean;
|
||||
planModeRequired?: boolean;
|
||||
};
|
||||
|
||||
type RuntimeBootstrapSpec = {
|
||||
version: 1;
|
||||
runId: string;
|
||||
mode: 'create';
|
||||
initiator: {
|
||||
kind: 'app';
|
||||
source: 'claude_team_freecode';
|
||||
};
|
||||
team: {
|
||||
name: string;
|
||||
displayName?: string;
|
||||
description?: string;
|
||||
color?: string;
|
||||
cwd: string;
|
||||
};
|
||||
lead: {
|
||||
providerId?: TeamProviderId;
|
||||
model?: string;
|
||||
effort?: EffortLevel;
|
||||
skipPermissions?: boolean;
|
||||
worktree?: string | null;
|
||||
extraCliArgs?: string[];
|
||||
};
|
||||
members: RuntimeBootstrapMemberSpec[];
|
||||
launch?: {
|
||||
initialUserPrompt?: string | null;
|
||||
bootstrapTimeoutMs?: number;
|
||||
continueOnPartialFailure?: boolean;
|
||||
};
|
||||
ui?: {
|
||||
emitStructuredEvents?: boolean;
|
||||
};
|
||||
};
|
||||
|
||||
function buildDeterministicBootstrapSpec(
|
||||
runId: string,
|
||||
request: TeamCreateRequest,
|
||||
effectiveMembers: TeamCreateRequest['members']
|
||||
): RuntimeBootstrapSpec {
|
||||
const displayName = request.displayName?.trim() || request.teamName;
|
||||
const leadName =
|
||||
effectiveMembers.find((member) => member.role?.toLowerCase().includes('lead'))?.name ||
|
||||
'team-lead';
|
||||
|
||||
return {
|
||||
version: 1,
|
||||
runId,
|
||||
mode: 'create',
|
||||
initiator: {
|
||||
kind: 'app',
|
||||
source: 'claude_team_freecode',
|
||||
},
|
||||
team: {
|
||||
name: request.teamName,
|
||||
...(request.displayName?.trim() ? { displayName: request.displayName.trim() } : {}),
|
||||
...(request.description?.trim() ? { description: request.description.trim() } : {}),
|
||||
...(request.color?.trim() ? { color: request.color.trim() } : {}),
|
||||
cwd: request.cwd,
|
||||
},
|
||||
lead: {
|
||||
...(request.providerId ? { providerId: request.providerId } : {}),
|
||||
...(request.model?.trim() ? { model: request.model.trim() } : {}),
|
||||
...(request.effort ? { effort: request.effort } : {}),
|
||||
...(request.skipPermissions !== undefined
|
||||
? { skipPermissions: request.skipPermissions }
|
||||
: {}),
|
||||
...(request.worktree ? { worktree: request.worktree } : {}),
|
||||
...(request.extraCliArgs ? { extraCliArgs: parseCliArgs(request.extraCliArgs) } : {}),
|
||||
},
|
||||
members: effectiveMembers.map((member) => ({
|
||||
name: member.name,
|
||||
prompt: buildMemberSpawnPrompt(member, displayName, request.teamName, leadName),
|
||||
...(member.role?.trim() ? { role: member.role.trim() } : {}),
|
||||
...(member.workflow?.trim() ? { workflow: member.workflow.trim() } : {}),
|
||||
...(request.cwd ? { cwd: request.cwd } : {}),
|
||||
...(member.model?.trim() ? { model: member.model.trim() } : {}),
|
||||
...(member.providerId ? { provider: member.providerId } : {}),
|
||||
...(member.effort ? { effort: member.effort } : {}),
|
||||
...(member.role?.trim() ? { description: member.role.trim() } : {}),
|
||||
})),
|
||||
launch: {
|
||||
continueOnPartialFailure: true,
|
||||
},
|
||||
ui: {
|
||||
emitStructuredEvents: true,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function writeDeterministicBootstrapSpecFile(spec: RuntimeBootstrapSpec): Promise<string> {
|
||||
const tempDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'agent-teams-bootstrap-'));
|
||||
const filePath = path.join(tempDir, `${spec.team.name}-${randomUUID()}.json`);
|
||||
await fs.promises.writeFile(filePath, JSON.stringify(spec), {
|
||||
encoding: 'utf8',
|
||||
mode: 0o600,
|
||||
});
|
||||
return filePath;
|
||||
}
|
||||
|
||||
async function removeDeterministicBootstrapSpecFile(filePath: string | null): Promise<void> {
|
||||
if (!filePath) return;
|
||||
await fs.promises.rm(filePath, { force: true }).catch(() => {});
|
||||
await fs.promises.rmdir(path.dirname(filePath)).catch(() => {});
|
||||
}
|
||||
|
||||
async function writeDeterministicBootstrapUserPromptFile(prompt: string): Promise<string> {
|
||||
const tempDir = await fs.promises.mkdtemp(
|
||||
path.join(os.tmpdir(), 'agent-teams-bootstrap-prompt-')
|
||||
);
|
||||
const filePath = path.join(tempDir, `${randomUUID()}.txt`);
|
||||
await fs.promises.writeFile(filePath, prompt, {
|
||||
encoding: 'utf8',
|
||||
mode: 0o600,
|
||||
});
|
||||
return filePath;
|
||||
}
|
||||
|
||||
async function removeDeterministicBootstrapUserPromptFile(filePath: string | null): Promise<void> {
|
||||
if (!filePath) return;
|
||||
await fs.promises.rm(filePath, { force: true }).catch(() => {});
|
||||
await fs.promises.rmdir(path.dirname(filePath)).catch(() => {});
|
||||
}
|
||||
|
||||
function buildTeamCtlOpsInstructions(teamName: string, leadName: string): string {
|
||||
return wrapInAgentBlock(
|
||||
[
|
||||
|
|
@ -4030,6 +4179,7 @@ export class TeamProvisioningService {
|
|||
|
||||
// Verify --mcp-config still exists; regenerate if deleted (e.g. by stale GC)
|
||||
const mcpFlagIdx = ctx.args.indexOf('--mcp-config');
|
||||
const bootstrapPromptFlagIdx = ctx.args.indexOf('--team-bootstrap-user-prompt-file');
|
||||
if (mcpFlagIdx !== -1 && mcpFlagIdx + 1 < ctx.args.length) {
|
||||
const existingConfigPath = ctx.args[mcpFlagIdx + 1];
|
||||
try {
|
||||
|
|
@ -4054,6 +4204,73 @@ export class TeamProvisioningService {
|
|||
}
|
||||
}
|
||||
|
||||
if (bootstrapPromptFlagIdx !== -1 && bootstrapPromptFlagIdx + 1 < ctx.args.length) {
|
||||
const existingPromptPath = ctx.args[bootstrapPromptFlagIdx + 1];
|
||||
try {
|
||||
await fs.promises.access(existingPromptPath, fs.constants.F_OK);
|
||||
} catch {
|
||||
const submissionState = await readBootstrapRealTaskSubmissionState(run.teamName);
|
||||
if (submissionState === 'submitted') {
|
||||
ctx.args.splice(bootstrapPromptFlagIdx, 2);
|
||||
ctx.prompt = '';
|
||||
run.bootstrapUserPromptPath = null;
|
||||
} else if (submissionState === 'unknown') {
|
||||
run.authRetryInProgress = false;
|
||||
const progress = updateProgress(
|
||||
run,
|
||||
'failed',
|
||||
'Unable to safely retry first task after auth failure',
|
||||
{
|
||||
error:
|
||||
'deterministic bootstrap recorded the first real task as unknown, so retry would risk a duplicate submission',
|
||||
cliLogsTail: extractCliLogsFromRun(run),
|
||||
}
|
||||
);
|
||||
run.onProgress(progress);
|
||||
this.cleanupRun(run);
|
||||
return;
|
||||
} else if (ctx.prompt.trim().length === 0) {
|
||||
run.authRetryInProgress = false;
|
||||
const progress = updateProgress(
|
||||
run,
|
||||
'failed',
|
||||
'Failed to restore deferred first task after auth retry',
|
||||
{
|
||||
error:
|
||||
'deterministic bootstrap user prompt file was missing and no prompt was available to regenerate it',
|
||||
cliLogsTail: extractCliLogsFromRun(run),
|
||||
}
|
||||
);
|
||||
run.onProgress(progress);
|
||||
this.cleanupRun(run);
|
||||
return;
|
||||
} else {
|
||||
logger.warn(
|
||||
`[${run.teamName}] Bootstrap user prompt file ${existingPromptPath} missing, regenerating`
|
||||
);
|
||||
try {
|
||||
const newPromptPath = await writeDeterministicBootstrapUserPromptFile(ctx.prompt);
|
||||
ctx.args[bootstrapPromptFlagIdx + 1] = newPromptPath;
|
||||
run.bootstrapUserPromptPath = newPromptPath;
|
||||
} catch (regenErr) {
|
||||
run.authRetryInProgress = false;
|
||||
const progress = updateProgress(
|
||||
run,
|
||||
'failed',
|
||||
'Failed to regenerate deferred first task for auth retry',
|
||||
{
|
||||
error: regenErr instanceof Error ? regenErr.message : String(regenErr),
|
||||
cliLogsTail: extractCliLogsFromRun(run),
|
||||
}
|
||||
);
|
||||
run.onProgress(progress);
|
||||
this.cleanupRun(run);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Respawn with saved context — CLI handles its own auth refresh.
|
||||
let child: ReturnType<typeof spawn>;
|
||||
try {
|
||||
|
|
@ -4091,8 +4308,9 @@ export class TeamProvisioningService {
|
|||
});
|
||||
run.onProgress(run.progress);
|
||||
|
||||
// Resend prompt
|
||||
if (child.stdin?.writable) {
|
||||
// Resend prompt only for legacy direct-stdin flows. Deterministic bootstrap
|
||||
// owns the first real task via --team-bootstrap-user-prompt-file.
|
||||
if (bootstrapPromptFlagIdx === -1 && child.stdin?.writable) {
|
||||
const message = JSON.stringify({
|
||||
type: 'user',
|
||||
message: {
|
||||
|
|
@ -4352,7 +4570,10 @@ export class TeamProvisioningService {
|
|||
waitingTasksSince: null,
|
||||
provisioningComplete: false,
|
||||
mcpConfigPath: null,
|
||||
bootstrapSpecPath: null,
|
||||
bootstrapUserPromptPath: null,
|
||||
isLaunch: false,
|
||||
deterministicBootstrap: true,
|
||||
fsPhase: 'waiting_config',
|
||||
leadRelayCapture: null,
|
||||
activeCrossTeamReplyHints: [],
|
||||
|
|
@ -4404,24 +4625,41 @@ export class TeamProvisioningService {
|
|||
run.onProgress(run.progress);
|
||||
await this.clearPersistedLaunchState(request.teamName);
|
||||
|
||||
const prompt = buildProvisioningPrompt(request, effectiveMemberSpecs);
|
||||
const promptSize = getPromptSizeSummary(prompt);
|
||||
const bootstrapSpec = buildDeterministicBootstrapSpec(runId, request, effectiveMemberSpecs);
|
||||
const initialUserPrompt = request.prompt?.trim() ?? '';
|
||||
const promptSize = getPromptSizeSummary(initialUserPrompt);
|
||||
let child: ReturnType<typeof spawn>;
|
||||
const { env: shellEnv, geminiRuntimeAuth } = await this.buildProvisioningEnv(
|
||||
request.providerId
|
||||
);
|
||||
shellEnv.CLAUDE_ENABLE_DETERMINISTIC_TEAM_BOOTSTRAP = '1';
|
||||
const teammateModeDecision = await resolveDesktopTeammateModeDecision(request.extraCliArgs);
|
||||
if (teammateModeDecision.forceProcessTeammates) {
|
||||
shellEnv.CLAUDE_TEAM_FORCE_PROCESS_TEAMMATES = '1';
|
||||
}
|
||||
let mcpConfigPath: string;
|
||||
let bootstrapSpecPath: string;
|
||||
let bootstrapUserPromptPath: string | null = null;
|
||||
try {
|
||||
bootstrapSpecPath = await writeDeterministicBootstrapSpecFile(bootstrapSpec);
|
||||
run.bootstrapSpecPath = bootstrapSpecPath;
|
||||
if (initialUserPrompt) {
|
||||
bootstrapUserPromptPath =
|
||||
await writeDeterministicBootstrapUserPromptFile(initialUserPrompt);
|
||||
run.bootstrapUserPromptPath = bootstrapUserPromptPath;
|
||||
}
|
||||
mcpConfigPath = await this.mcpConfigBuilder.writeConfigFile(request.cwd);
|
||||
run.mcpConfigPath = mcpConfigPath;
|
||||
await this.validateAgentTeamsMcpRuntime(claudePath, request.cwd, shellEnv, mcpConfigPath);
|
||||
} catch (error) {
|
||||
this.runs.delete(runId);
|
||||
this.provisioningRunByTeam.delete(request.teamName);
|
||||
await removeDeterministicBootstrapSpecFile(run.bootstrapSpecPath).catch(() => {});
|
||||
run.bootstrapSpecPath = null;
|
||||
await removeDeterministicBootstrapUserPromptFile(run.bootstrapUserPromptPath).catch(
|
||||
() => {}
|
||||
);
|
||||
run.bootstrapUserPromptPath = null;
|
||||
throw error;
|
||||
}
|
||||
const spawnArgs = [
|
||||
|
|
@ -4434,6 +4672,11 @@ export class TeamProvisioningService {
|
|||
'user,project,local',
|
||||
'--mcp-config',
|
||||
mcpConfigPath,
|
||||
'--team-bootstrap-spec',
|
||||
bootstrapSpecPath,
|
||||
...(bootstrapUserPromptPath
|
||||
? ['--team-bootstrap-user-prompt-file', bootstrapUserPromptPath]
|
||||
: []),
|
||||
'--disallowedTools',
|
||||
APP_TEAM_RUNTIME_DISALLOWED_TOOLS,
|
||||
// Explicit --permission-mode overrides user's defaultMode in ~/.claude/settings.json
|
||||
|
|
@ -4505,6 +4748,12 @@ export class TeamProvisioningService {
|
|||
const tasksDir = path.join(getTasksBasePath(), request.teamName);
|
||||
await fs.promises.rm(teamDir, { recursive: true, force: true }).catch(() => {});
|
||||
await fs.promises.rm(tasksDir, { recursive: true, force: true }).catch(() => {});
|
||||
await removeDeterministicBootstrapSpecFile(run.bootstrapSpecPath).catch(() => {});
|
||||
run.bootstrapSpecPath = null;
|
||||
await removeDeterministicBootstrapUserPromptFile(run.bootstrapUserPromptPath).catch(
|
||||
() => {}
|
||||
);
|
||||
run.bootstrapUserPromptPath = null;
|
||||
if (run.mcpConfigPath) {
|
||||
await this.mcpConfigBuilder.removeConfigFile(run.mcpConfigPath).catch(() => {});
|
||||
run.mcpConfigPath = null;
|
||||
|
|
@ -4525,21 +4774,9 @@ export class TeamProvisioningService {
|
|||
args: spawnArgs,
|
||||
cwd: request.cwd,
|
||||
env: { ...shellEnv },
|
||||
prompt,
|
||||
prompt: initialUserPrompt,
|
||||
};
|
||||
|
||||
// Send provisioning prompt as first stream-json message (SDKUserMessage format)
|
||||
if (child.stdin?.writable) {
|
||||
const message = JSON.stringify({
|
||||
type: 'user',
|
||||
message: {
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: prompt }],
|
||||
},
|
||||
});
|
||||
child.stdin.write(message + '\n');
|
||||
}
|
||||
|
||||
this.attachStdoutHandler(run);
|
||||
this.attachStderrHandler(run);
|
||||
|
||||
|
|
@ -4841,7 +5078,10 @@ export class TeamProvisioningService {
|
|||
waitingTasksSince: null,
|
||||
provisioningComplete: false,
|
||||
mcpConfigPath: null,
|
||||
bootstrapSpecPath: null,
|
||||
bootstrapUserPromptPath: null,
|
||||
isLaunch: true,
|
||||
deterministicBootstrap: false,
|
||||
fsPhase: 'waiting_members',
|
||||
leadRelayCapture: null,
|
||||
activeCrossTeamReplyHints: [],
|
||||
|
|
@ -5840,10 +6080,17 @@ export class TeamProvisioningService {
|
|||
return Array.from(this.aliveRunByTeam.keys()).filter((name) => this.isTeamAlive(name));
|
||||
}
|
||||
|
||||
getRuntimeState(teamName: string): TeamRuntimeState {
|
||||
async getRuntimeState(teamName: string): Promise<TeamRuntimeState> {
|
||||
const runId = this.getTrackedRunId(teamName);
|
||||
const run = runId ? (this.runs.get(runId) ?? null) : null;
|
||||
|
||||
if (!run) {
|
||||
const recovered = await readBootstrapRuntimeState(teamName);
|
||||
if (recovered) {
|
||||
return recovered;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
teamName,
|
||||
isAlive: this.isTeamAlive(teamName),
|
||||
|
|
@ -6175,7 +6422,11 @@ export class TeamProvisioningService {
|
|||
}
|
||||
|
||||
const current = run.memberSpawnStatuses.get(expected);
|
||||
if (current?.launchState === 'failed_to_start') {
|
||||
if (
|
||||
current?.launchState === 'failed_to_start' ||
|
||||
current?.bootstrapConfirmed ||
|
||||
current?.runtimeAlive
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -6224,6 +6475,7 @@ export class TeamProvisioningService {
|
|||
|
||||
private async clearPersistedLaunchState(teamName: string): Promise<void> {
|
||||
await this.launchStateStore.clear(teamName);
|
||||
await clearBootstrapState(teamName);
|
||||
}
|
||||
|
||||
private getFailedSpawnMembers(
|
||||
|
|
@ -6338,7 +6590,15 @@ export class TeamProvisioningService {
|
|||
snapshot: ReturnType<typeof createPersistedLaunchSnapshot> | null;
|
||||
statuses: Record<string, MemberSpawnStatusEntry>;
|
||||
}> {
|
||||
const bootstrapSnapshot = await readBootstrapLaunchSnapshot(teamName);
|
||||
const persisted = await this.launchStateStore.read(teamName);
|
||||
const preferredSnapshot = choosePreferredLaunchSnapshot(bootstrapSnapshot, persisted);
|
||||
if (preferredSnapshot) {
|
||||
return {
|
||||
snapshot: preferredSnapshot,
|
||||
statuses: snapshotToMemberSpawnStatuses(preferredSnapshot),
|
||||
};
|
||||
}
|
||||
if (!persisted) {
|
||||
return { snapshot: null, statuses: {} };
|
||||
}
|
||||
|
|
@ -6998,6 +7258,152 @@ export class TeamProvisioningService {
|
|||
* Process a parsed stream-json message from stdout.
|
||||
* Extracts assistant text for progress reporting and detects turn completion.
|
||||
*/
|
||||
private handleDeterministicBootstrapEvent(
|
||||
run: ProvisioningRun,
|
||||
msg: Record<string, unknown>
|
||||
): boolean {
|
||||
if (msg.type !== 'system' || msg.subtype !== 'team_bootstrap') {
|
||||
return false;
|
||||
}
|
||||
|
||||
const event = typeof msg.event === 'string' ? msg.event : undefined;
|
||||
if (!event) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (event === 'started') {
|
||||
const progress = updateProgress(run, 'configuring', 'Starting deterministic team bootstrap');
|
||||
run.onProgress(progress);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (event === 'phase_changed') {
|
||||
const phase = typeof msg.phase === 'string' ? msg.phase : '';
|
||||
if (phase === 'loading_existing_state') {
|
||||
const progress = updateProgress(run, 'configuring', 'Loading existing team state');
|
||||
run.onProgress(progress);
|
||||
} else if (phase === 'acquiring_bootstrap_lock') {
|
||||
const progress = updateProgress(
|
||||
run,
|
||||
'configuring',
|
||||
'Acquiring deterministic bootstrap lock'
|
||||
);
|
||||
run.onProgress(progress);
|
||||
} else if (phase === 'creating_team') {
|
||||
const progress = updateProgress(run, 'assembling', 'Creating team config');
|
||||
run.onProgress(progress);
|
||||
} else if (phase === 'spawning_members') {
|
||||
const progress = updateProgress(run, 'assembling', 'Spawning teammate runtimes');
|
||||
run.onProgress(progress);
|
||||
} else if (phase === 'auditing_truth') {
|
||||
const progress = updateProgress(
|
||||
run,
|
||||
'finalizing',
|
||||
'Auditing registered teammates and bootstrap truth',
|
||||
{ configReady: true }
|
||||
);
|
||||
run.onProgress(progress);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (event === 'team_created') {
|
||||
const reused = msg.reused_existing_team === true;
|
||||
const progress = updateProgress(
|
||||
run,
|
||||
'assembling',
|
||||
reused
|
||||
? 'Attached to existing team, starting teammates'
|
||||
: 'Team config created, starting teammates',
|
||||
{ configReady: true }
|
||||
);
|
||||
run.onProgress(progress);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (event === 'member_spawn_started') {
|
||||
const memberName = typeof msg.member_name === 'string' ? msg.member_name.trim() : '';
|
||||
if (memberName) {
|
||||
this.setMemberSpawnStatus(run, memberName, 'spawning');
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (event === 'member_spawn_result') {
|
||||
const memberName = typeof msg.member_name === 'string' ? msg.member_name.trim() : '';
|
||||
const outcome = typeof msg.outcome === 'string' ? msg.outcome : '';
|
||||
const reason = typeof msg.reason === 'string' ? msg.reason.trim() : undefined;
|
||||
if (!memberName) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (outcome === 'failed') {
|
||||
this.setMemberSpawnStatus(
|
||||
run,
|
||||
memberName,
|
||||
'error',
|
||||
reason || 'Deterministic bootstrap failed to spawn teammate.'
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (outcome === 'already_running') {
|
||||
this.setMemberSpawnStatus(run, memberName, 'online', undefined, 'process');
|
||||
return true;
|
||||
}
|
||||
|
||||
this.setMemberSpawnStatus(run, memberName, 'waiting');
|
||||
return true;
|
||||
}
|
||||
|
||||
if (event === 'completed') {
|
||||
const failedMembers = Array.isArray(msg.failed_members) ? msg.failed_members : [];
|
||||
for (const failed of failedMembers) {
|
||||
const memberName = typeof failed?.name === 'string' ? failed.name.trim() : '';
|
||||
const reason = typeof failed?.reason === 'string' ? failed.reason.trim() : undefined;
|
||||
if (memberName) {
|
||||
this.setMemberSpawnStatus(
|
||||
run,
|
||||
memberName,
|
||||
'error',
|
||||
reason || 'Deterministic bootstrap failed to spawn teammate.'
|
||||
);
|
||||
}
|
||||
}
|
||||
if (!run.provisioningComplete && !run.cancelRequested) {
|
||||
void this.handleProvisioningTurnComplete(run).catch((error: unknown) => {
|
||||
logger.error(
|
||||
`[${run.teamName}] deterministic bootstrap completion handler failed: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`
|
||||
);
|
||||
});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (event === 'failed') {
|
||||
if (run.progress.state === 'failed' || run.cancelRequested) {
|
||||
return true;
|
||||
}
|
||||
const reason =
|
||||
typeof msg.reason === 'string' && msg.reason.trim().length > 0
|
||||
? msg.reason.trim()
|
||||
: 'Deterministic bootstrap failed.';
|
||||
const progress = updateProgress(run, 'failed', 'Deterministic bootstrap failed', {
|
||||
error: reason,
|
||||
cliLogsTail: extractCliLogsFromRun(run),
|
||||
});
|
||||
run.onProgress(progress);
|
||||
run.processKilled = true;
|
||||
killTeamProcess(run.child);
|
||||
this.cleanupRun(run);
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private handleStreamJsonMessage(run: ProvisioningRun, msg: Record<string, unknown>): void {
|
||||
// stream-json output has various message types:
|
||||
// {"type":"assistant","content":[{"type":"text","text":"..."},...]}
|
||||
|
|
@ -7194,6 +7600,10 @@ export class TeamProvisioningService {
|
|||
}
|
||||
}
|
||||
|
||||
if (this.handleDeterministicBootstrapEvent(run, msg)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle control_request — tool approval protocol (only when --dangerously-skip-permissions is NOT set)
|
||||
if (msg.type === 'control_request') {
|
||||
this.handleControlRequest(run, msg);
|
||||
|
|
@ -9334,6 +9744,14 @@ export class TeamProvisioningService {
|
|||
void this.mcpConfigBuilder.removeConfigFile(run.mcpConfigPath);
|
||||
run.mcpConfigPath = null;
|
||||
}
|
||||
if (run.bootstrapSpecPath) {
|
||||
void removeDeterministicBootstrapSpecFile(run.bootstrapSpecPath);
|
||||
run.bootstrapSpecPath = null;
|
||||
}
|
||||
if (run.bootstrapUserPromptPath) {
|
||||
void removeDeterministicBootstrapUserPromptFile(run.bootstrapUserPromptPath);
|
||||
run.bootstrapUserPromptPath = null;
|
||||
}
|
||||
// Remove from runs Map to free memory (stdoutBuffer, stderrBuffer, claudeLogLines)
|
||||
this.runs.delete(run.runId);
|
||||
}
|
||||
|
|
@ -9397,10 +9815,32 @@ export class TeamProvisioningService {
|
|||
}
|
||||
|
||||
if (run.fsPhase === 'waiting_members') {
|
||||
if (run.deterministicBootstrap) {
|
||||
const registeredNames = await this.getRegisteredTeamMemberNames(run.teamName);
|
||||
const registeredMembers = registeredNames
|
||||
? request.members.filter((member) => registeredNames.has(member.name)).length
|
||||
: 0;
|
||||
|
||||
if (registeredMembers >= request.members.length) {
|
||||
run.fsPhase = 'all_files_found';
|
||||
if (!run.provisioningComplete) {
|
||||
void this.handleProvisioningTurnComplete(run);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (request.members.length === 0) {
|
||||
run.fsPhase = 'waiting_tasks';
|
||||
const progress = updateProgress(run, 'finalizing', 'Solo team, preparing workspace');
|
||||
run.onProgress(progress);
|
||||
if (run.deterministicBootstrap) {
|
||||
run.fsPhase = 'all_files_found';
|
||||
if (!run.provisioningComplete) {
|
||||
void this.handleProvisioningTurnComplete(run);
|
||||
}
|
||||
} else {
|
||||
run.fsPhase = 'waiting_tasks';
|
||||
const progress = updateProgress(run, 'finalizing', 'Solo team, preparing workspace');
|
||||
run.onProgress(progress);
|
||||
}
|
||||
} else {
|
||||
const teamDir = (await resolveTeamDir()) ?? configuredTeamDir;
|
||||
const inboxDir = path.join(teamDir, 'inboxes');
|
||||
|
|
@ -9735,6 +10175,10 @@ export class TeamProvisioningService {
|
|||
members: run.effectiveMembers,
|
||||
}
|
||||
);
|
||||
await this.refreshMemberSpawnStatusesFromLeadInbox(run);
|
||||
await this.maybeAuditMemberSpawnStatuses(run, { force: true });
|
||||
await this.finalizeMissingRegisteredMembersAsFailed(run);
|
||||
await this.persistLaunchStateSnapshot(run, 'finished');
|
||||
// Process was killed by timeout — mark as disconnected, not ready
|
||||
const progress = updateProgress(run, 'disconnected', 'Team provisioned but process timed out', {
|
||||
warnings,
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ describe('HTTP team runtime routes', () => {
|
|||
const launchTeam = vi.fn<
|
||||
(request: TeamLaunchRequest, onProgress: (progress: TeamProvisioningProgress) => void) => Promise<TeamLaunchResponse>
|
||||
>();
|
||||
const getRuntimeState = vi.fn<(teamName: string) => TeamRuntimeState>();
|
||||
const getRuntimeState = vi.fn<(teamName: string) => Promise<TeamRuntimeState>>();
|
||||
const getProvisioningStatus = vi.fn<(runId: string) => Promise<TeamProvisioningProgress>>();
|
||||
const stopTeam = vi.fn<(teamName: string) => void>();
|
||||
const getAliveTeams = vi.fn<() => string[]>();
|
||||
|
|
@ -82,6 +82,7 @@ describe('HTTP team runtime routes', () => {
|
|||
teamName: 'demo-team',
|
||||
cwd: '/tmp/project',
|
||||
prompt: 'Resume work',
|
||||
providerId: 'anthropic',
|
||||
skipPermissions: false,
|
||||
clearContext: true,
|
||||
},
|
||||
|
|
@ -115,7 +116,7 @@ describe('HTTP team runtime routes', () => {
|
|||
it('returns runtime state, provisioning status, and stop results', async () => {
|
||||
const { app, getRuntimeState, getProvisioningStatus, stopTeam, getAliveTeams } = await createApp();
|
||||
getRuntimeState
|
||||
.mockReturnValueOnce({
|
||||
.mockResolvedValueOnce({
|
||||
teamName: 'demo-team',
|
||||
isAlive: true,
|
||||
runId: 'run-2',
|
||||
|
|
@ -128,13 +129,13 @@ describe('HTTP team runtime routes', () => {
|
|||
updatedAt: '2026-03-12T00:00:01.000Z',
|
||||
},
|
||||
})
|
||||
.mockReturnValueOnce({
|
||||
.mockResolvedValueOnce({
|
||||
teamName: 'demo-team',
|
||||
isAlive: false,
|
||||
runId: null,
|
||||
progress: null,
|
||||
})
|
||||
.mockReturnValueOnce({
|
||||
.mockResolvedValueOnce({
|
||||
teamName: 'demo-team',
|
||||
isAlive: true,
|
||||
runId: 'run-2',
|
||||
|
|
|
|||
352
test/main/services/team/TeamBootstrapStateReader.test.ts
Normal file
352
test/main/services/team/TeamBootstrapStateReader.test.ts
Normal file
|
|
@ -0,0 +1,352 @@
|
|||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
const hoisted = vi.hoisted(() => {
|
||||
const files = new Map<
|
||||
string,
|
||||
{
|
||||
contents: string;
|
||||
size?: number;
|
||||
symbolicLink?: boolean;
|
||||
}
|
||||
>();
|
||||
|
||||
const norm = (p: string): string => p.replace(/\\/g, '/');
|
||||
|
||||
const lstat = vi.fn(async (filePath: string) => {
|
||||
const entry = files.get(norm(filePath));
|
||||
if (!entry) {
|
||||
const error = new Error('ENOENT') as NodeJS.ErrnoException;
|
||||
error.code = 'ENOENT';
|
||||
throw error;
|
||||
}
|
||||
return {
|
||||
isFile: () => !entry.symbolicLink,
|
||||
isSymbolicLink: () => Boolean(entry.symbolicLink),
|
||||
size: entry.size ?? Buffer.byteLength(entry.contents, 'utf8'),
|
||||
};
|
||||
});
|
||||
|
||||
const readFile = vi.fn(async (filePath: string) => {
|
||||
const entry = files.get(norm(filePath));
|
||||
if (!entry) {
|
||||
const error = new Error('ENOENT') as NodeJS.ErrnoException;
|
||||
error.code = 'ENOENT';
|
||||
throw error;
|
||||
}
|
||||
return entry.contents;
|
||||
});
|
||||
|
||||
const access = vi.fn(async (filePath: string) => {
|
||||
const entry = files.get(norm(filePath));
|
||||
if (!entry) {
|
||||
const error = new Error('ENOENT') as NodeJS.ErrnoException;
|
||||
error.code = 'ENOENT';
|
||||
throw error;
|
||||
}
|
||||
});
|
||||
|
||||
const rm = vi.fn(async (filePath: string) => {
|
||||
files.delete(norm(filePath));
|
||||
});
|
||||
|
||||
return { files, lstat, readFile, access, rm };
|
||||
});
|
||||
|
||||
vi.mock('fs', async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import('fs')>();
|
||||
return {
|
||||
...actual,
|
||||
promises: {
|
||||
...actual.promises,
|
||||
lstat: hoisted.lstat,
|
||||
readFile: hoisted.readFile,
|
||||
access: hoisted.access,
|
||||
rm: hoisted.rm,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock('../../../../src/main/utils/pathDecoder', () => ({
|
||||
getTeamsBasePath: () => '/mock/teams',
|
||||
}));
|
||||
|
||||
import {
|
||||
choosePreferredLaunchSnapshot,
|
||||
readBootstrapLaunchSnapshot,
|
||||
readBootstrapRealTaskSubmissionState,
|
||||
readBootstrapRuntimeState,
|
||||
} from '../../../../src/main/services/team/TeamBootstrapStateReader';
|
||||
|
||||
describe('TeamBootstrapStateReader', () => {
|
||||
beforeEach(() => {
|
||||
hoisted.files.clear();
|
||||
hoisted.lstat.mockClear();
|
||||
hoisted.readFile.mockClear();
|
||||
hoisted.access.mockClear();
|
||||
hoisted.rm.mockClear();
|
||||
});
|
||||
|
||||
it('rejects symlink bootstrap-state files', async () => {
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
|
||||
contents: '{}',
|
||||
symbolicLink: true,
|
||||
});
|
||||
|
||||
await expect(readBootstrapLaunchSnapshot('demo')).resolves.toBeNull();
|
||||
await expect(readBootstrapRuntimeState('demo')).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('projects active bootstrap-state into runtime progress', async () => {
|
||||
const nowSpy = vi.spyOn(Date, 'now').mockReturnValue(1700000001000);
|
||||
const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true as never);
|
||||
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
|
||||
contents: JSON.stringify({
|
||||
version: 1,
|
||||
runId: 'run-123',
|
||||
teamName: 'demo',
|
||||
ownerPid: 4242,
|
||||
startedAt: 1700000000000,
|
||||
updatedAt: 1700000000500,
|
||||
phase: 'acquiring_bootstrap_lock',
|
||||
members: [{ name: 'alice', status: 'pending' }],
|
||||
}),
|
||||
});
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-journal.jsonl', {
|
||||
contents: [
|
||||
JSON.stringify({ ts: 1, type: 'phase', runId: 'run-123', phase: 'loading_existing_state' }),
|
||||
JSON.stringify({ ts: 2, type: 'lock', runId: 'run-123', action: 'acquired', ownerPid: 4242 }),
|
||||
JSON.stringify({ ts: 3, type: 'member', runId: 'run-123', name: 'alice', action: 'spawn_started' }),
|
||||
].join('\n'),
|
||||
});
|
||||
|
||||
await expect(readBootstrapRuntimeState('demo')).resolves.toEqual({
|
||||
teamName: 'demo',
|
||||
isAlive: false,
|
||||
runId: 'run-123',
|
||||
progress: {
|
||||
runId: 'run-123',
|
||||
teamName: 'demo',
|
||||
state: 'configuring',
|
||||
message: 'Acquiring deterministic bootstrap lock',
|
||||
warnings: [
|
||||
'Recent deterministic bootstrap events: bootstrap phase: loading_existing_state | bootstrap lock acquired (pid 4242) | alice: spawn_started',
|
||||
],
|
||||
startedAt: '2023-11-14T22:13:20.000Z',
|
||||
updatedAt: '2023-11-14T22:13:20.500Z',
|
||||
pid: 4242,
|
||||
},
|
||||
});
|
||||
|
||||
killSpy.mockRestore();
|
||||
nowSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('ignores terminal bootstrap-state for runtime recovery projection', async () => {
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
|
||||
contents: JSON.stringify({
|
||||
version: 1,
|
||||
runId: 'run-123',
|
||||
teamName: 'demo',
|
||||
startedAt: 1700000000000,
|
||||
updatedAt: 1700000000500,
|
||||
phase: 'completed',
|
||||
terminal: {
|
||||
status: 'completed',
|
||||
finishedAt: 1700000000500,
|
||||
},
|
||||
members: [{ name: 'alice', status: 'registered' }],
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(readBootstrapRuntimeState('demo')).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('reads persisted real-task submission state', async () => {
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
|
||||
contents: JSON.stringify({
|
||||
version: 1,
|
||||
runId: 'run-123',
|
||||
teamName: 'demo',
|
||||
startedAt: 1700000000000,
|
||||
updatedAt: 1700000000500,
|
||||
phase: 'completed',
|
||||
realTaskSubmissionState: 'submitted',
|
||||
members: [],
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(readBootstrapRealTaskSubmissionState('demo')).resolves.toBe('submitted');
|
||||
});
|
||||
|
||||
it('classifies dead bootstrap owner as failed launch snapshot instead of pending', async () => {
|
||||
const nowSpy = vi.spyOn(Date, 'now').mockReturnValue(1700000300000);
|
||||
const killSpy = vi
|
||||
.spyOn(process, 'kill')
|
||||
.mockImplementation(() => {
|
||||
const error = new Error('ESRCH') as NodeJS.ErrnoException;
|
||||
error.code = 'ESRCH';
|
||||
throw error;
|
||||
});
|
||||
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
|
||||
contents: JSON.stringify({
|
||||
version: 1,
|
||||
runId: 'run-dead',
|
||||
teamName: 'demo',
|
||||
ownerPid: 777,
|
||||
startedAt: 1700000000000,
|
||||
updatedAt: 1700000000000,
|
||||
phase: 'spawning_members',
|
||||
members: [{ name: 'alice', status: 'registered' }],
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(readBootstrapLaunchSnapshot('demo')).resolves.toMatchObject({
|
||||
launchPhase: 'finished',
|
||||
members: {
|
||||
alice: {
|
||||
launchState: 'failed_to_start',
|
||||
hardFailure: true,
|
||||
hardFailureReason:
|
||||
'bootstrap owner pid 777 is gone and persisted bootstrap state is stale',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
killSpy.mockRestore();
|
||||
nowSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('projects dead bootstrap owner into failed runtime progress', async () => {
|
||||
const nowSpy = vi.spyOn(Date, 'now').mockReturnValue(1700000201000);
|
||||
const killSpy = vi
|
||||
.spyOn(process, 'kill')
|
||||
.mockImplementation(() => {
|
||||
const error = new Error('ESRCH') as NodeJS.ErrnoException;
|
||||
error.code = 'ESRCH';
|
||||
throw error;
|
||||
});
|
||||
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
|
||||
contents: JSON.stringify({
|
||||
version: 1,
|
||||
runId: 'run-dead',
|
||||
teamName: 'demo',
|
||||
ownerPid: 777,
|
||||
startedAt: 1700000000000,
|
||||
updatedAt: 1700000200000,
|
||||
phase: 'spawning_members',
|
||||
members: [{ name: 'alice', status: 'registered' }],
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(readBootstrapRuntimeState('demo')).resolves.toMatchObject({
|
||||
teamName: 'demo',
|
||||
isAlive: false,
|
||||
runId: 'run-dead',
|
||||
progress: {
|
||||
state: 'failed',
|
||||
message: 'Deterministic bootstrap owner exited before bootstrap completed',
|
||||
error:
|
||||
'bootstrap owner pid 777 is gone before bootstrap reached a terminal state',
|
||||
},
|
||||
});
|
||||
|
||||
killSpy.mockRestore();
|
||||
nowSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('projects degraded runtime progress when bootstrap-state is unreadable but lock owner is alive', async () => {
|
||||
const killSpy = vi.spyOn(process, 'kill').mockImplementation(() => true as never);
|
||||
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
|
||||
contents: '{invalid-json',
|
||||
});
|
||||
hoisted.files.set('/mock/teams/demo/.bootstrap.lock/metadata.json', {
|
||||
contents: JSON.stringify({
|
||||
pid: 4242,
|
||||
runId: 'run-lock',
|
||||
requestHash: 'hash-1',
|
||||
ownerStartedAt: 1700000000000,
|
||||
createdAt: 1700000000100,
|
||||
nonce: 'nonce-1',
|
||||
}),
|
||||
});
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-journal.jsonl', {
|
||||
contents: JSON.stringify({
|
||||
ts: 3,
|
||||
type: 'member',
|
||||
runId: 'run-lock',
|
||||
name: 'alice',
|
||||
action: 'spawn_started',
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(readBootstrapRuntimeState('demo')).resolves.toMatchObject({
|
||||
teamName: 'demo',
|
||||
isAlive: false,
|
||||
runId: 'run-lock',
|
||||
progress: {
|
||||
state: 'configuring',
|
||||
message:
|
||||
'Deterministic bootstrap recovery is degraded because persisted bootstrap state is unreadable',
|
||||
messageSeverity: 'warning',
|
||||
pid: 4242,
|
||||
},
|
||||
});
|
||||
|
||||
killSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('projects degraded failed runtime progress when bootstrap-state is unreadable and lock owner is dead', async () => {
|
||||
const killSpy = vi
|
||||
.spyOn(process, 'kill')
|
||||
.mockImplementation(() => {
|
||||
const error = new Error('ESRCH') as NodeJS.ErrnoException;
|
||||
error.code = 'ESRCH';
|
||||
throw error;
|
||||
});
|
||||
|
||||
hoisted.files.set('/mock/teams/demo/bootstrap-state.json', {
|
||||
contents: '{invalid-json',
|
||||
});
|
||||
hoisted.files.set('/mock/teams/demo/.bootstrap.lock/metadata.json', {
|
||||
contents: JSON.stringify({
|
||||
pid: 7331,
|
||||
runId: 'run-dead-lock',
|
||||
requestHash: 'hash-2',
|
||||
ownerStartedAt: 1700000000000,
|
||||
createdAt: 1700000000100,
|
||||
nonce: 'nonce-2',
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(readBootstrapRuntimeState('demo')).resolves.toMatchObject({
|
||||
teamName: 'demo',
|
||||
isAlive: false,
|
||||
runId: 'run-dead-lock',
|
||||
progress: {
|
||||
state: 'failed',
|
||||
message:
|
||||
'Deterministic bootstrap recovery failed because persisted bootstrap state is unreadable and the bootstrap owner is gone',
|
||||
messageSeverity: 'warning',
|
||||
pid: 7331,
|
||||
},
|
||||
});
|
||||
|
||||
killSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('prefers the newer launch snapshot when bootstrap snapshot is stale', () => {
|
||||
const preferred = choosePreferredLaunchSnapshot(
|
||||
{ updatedAt: '2026-04-06T10:00:00.000Z', kind: 'bootstrap' },
|
||||
{ updatedAt: '2026-04-06T10:05:00.000Z', kind: 'launch' }
|
||||
);
|
||||
|
||||
expect(preferred).toEqual({
|
||||
updatedAt: '2026-04-06T10:05:00.000Z',
|
||||
kind: 'launch',
|
||||
});
|
||||
});
|
||||
});
|
||||
Loading…
Reference in a new issue