fix(team): handle partial opencode bootstrap stalls

This commit is contained in:
777genius 2026-05-04 21:12:14 +03:00
parent cde85c0396
commit dad9fefdc7
5 changed files with 348 additions and 10 deletions

View file

@ -88,6 +88,26 @@ function preservesStrongRuntimeAlive(value: {
);
}
function hasMaterializedOpenCodeRuntimeMarker(value: {
runtimeAlive?: boolean;
runtimePid?: number;
runtimeSessionId?: string;
sessionId?: string;
livenessKind?: TeamAgentRuntimeLivenessKind;
}): boolean {
return (
value.runtimeAlive === true ||
(typeof value.runtimePid === 'number' &&
Number.isFinite(value.runtimePid) &&
value.runtimePid > 0) ||
(typeof value.runtimeSessionId === 'string' && value.runtimeSessionId.trim().length > 0) ||
(typeof value.sessionId === 'string' && value.sessionId.trim().length > 0) ||
value.livenessKind === 'runtime_process' ||
value.livenessKind === 'runtime_process_candidate' ||
value.livenessKind === 'registered_only'
);
}
function buildDiagnostics(
member: Pick<
PersistedTeamLaunchMemberState,
@ -279,7 +299,7 @@ function createSecondaryLaneMemberState(
providerId === 'opencode' &&
evidence?.bootstrapStalled === true &&
launchState === 'runtime_pending_bootstrap' &&
strongRuntimeAlive &&
hasMaterializedOpenCodeRuntimeMarker(evidence) &&
evidence.bootstrapConfirmed !== true &&
hardFailure !== true
? true

View file

@ -133,12 +133,24 @@ function isPersistedBootstrapStalled(
| 'bootstrapConfirmed'
| 'hardFailure'
| 'bootstrapStalled'
| 'runtimePid'
| 'runtimeSessionId'
| 'livenessKind'
>
): boolean {
const hasMaterializedOpenCodeRuntimeMarker =
member.runtimeAlive === true ||
(typeof member.runtimePid === 'number' &&
Number.isFinite(member.runtimePid) &&
member.runtimePid > 0) ||
(typeof member.runtimeSessionId === 'string' && member.runtimeSessionId.trim().length > 0) ||
member.livenessKind === 'runtime_process' ||
member.livenessKind === 'runtime_process_candidate' ||
member.livenessKind === 'registered_only';
return (
member.bootstrapStalled === true &&
isOpenCodeSecondaryBootstrapPending(member) &&
member.runtimeAlive === true
hasMaterializedOpenCodeRuntimeMarker
);
}

View file

@ -336,10 +336,13 @@ interface LaunchStateWriteResult {
wrote: boolean;
}
type BootstrapTranscriptSuccessSource = 'member_briefing' | 'assistant_text';
type BootstrapTranscriptOutcome =
| {
kind: 'success';
observedAt: string;
source: BootstrapTranscriptSuccessSource;
}
| {
kind: 'failure';
@ -3228,15 +3231,23 @@ function isBootstrapTranscriptSuccessText(
teamName: string,
memberName: string
): boolean {
return getBootstrapTranscriptSuccessSource(text, teamName, memberName) !== null;
}
function getBootstrapTranscriptSuccessSource(
text: string,
teamName: string,
memberName: string
): BootstrapTranscriptSuccessSource | null {
const normalizedText = text.replace(/\s+/g, ' ').trim().toLowerCase();
if (!normalizedText) {
return false;
return null;
}
const normalizedTeamName = teamName.trim().toLowerCase();
const normalizedMemberName = memberName.trim().toLowerCase();
if (!normalizedTeamName || !normalizedMemberName) {
return false;
return null;
}
if (
@ -3247,13 +3258,13 @@ function isBootstrapTranscriptSuccessText(
`member briefing for ${normalizedMemberName} on team '${normalizedTeamName}' (${normalizedTeamName}).`
)
) {
return true;
return 'member_briefing';
}
return (
normalizedText.includes(`bootstrap выполнен для \`${normalizedMemberName}\``) &&
return normalizedText.includes(`bootstrap выполнен для \`${normalizedMemberName}\``) &&
normalizedText.includes(`команде \`${normalizedTeamName}\``)
);
? 'assistant_text'
: null;
}
function isBootstrapTranscriptContextText(
@ -13030,6 +13041,29 @@ export class TeamProvisioningService {
}
return;
}
if (
this.isOpenCodeSecondaryLaneMemberInRun(run, memberName) &&
refreshed.launchState === 'runtime_pending_bootstrap' &&
refreshed.bootstrapConfirmed !== true &&
refreshed.hardFailure !== true &&
elapsedMs >= MEMBER_BOOTSTRAP_STALL_MS
) {
const enriched = {
...refreshed,
...(metadata?.livenessKind ? { livenessKind: metadata.livenessKind } : {}),
...(runtimeDiagnostic ? { runtimeDiagnostic } : {}),
...(metadata?.runtimeDiagnosticSeverity
? { runtimeDiagnosticSeverity: metadata.runtimeDiagnosticSeverity }
: {}),
};
const diagnostic = await this.buildOpenCodeSecondaryBootstrapStallDiagnostic(
run,
memberName,
enriched
);
this.setOpenCodeSecondaryBootstrapStalledStatus(run, memberName, enriched, diagnostic);
return;
}
const strictReason = restartPending
? buildRestartGraceTimeoutReason(memberName)
: (runtimeDiagnostic ??
@ -13116,6 +13150,76 @@ export class TeamProvisioningService {
}
}
private async buildOpenCodeSecondaryBootstrapStallDiagnostic(
run: ProvisioningRun,
memberName: string,
current: MemberSpawnStatusEntry
): Promise<string> {
const acceptedAtMs =
current.firstSpawnAcceptedAt != null ? Date.parse(current.firstSpawnAcceptedAt) : NaN;
const transcriptOutcome = await this.findBootstrapTranscriptOutcome(
run.teamName,
memberName,
Number.isFinite(acceptedAtMs) ? acceptedAtMs : null
);
if (transcriptOutcome?.kind === 'success' && transcriptOutcome.source === 'member_briefing') {
return 'OpenCode member_briefing completed, but runtime_bootstrap_checkin did not complete after 5 min.';
}
return 'OpenCode bootstrap did not complete runtime_bootstrap_checkin after 5 min.';
}
private setOpenCodeSecondaryBootstrapStalledStatus(
run: ProvisioningRun,
memberName: string,
current: MemberSpawnStatusEntry,
runtimeDiagnostic: string
): void {
const observedAt = nowIso();
const wasBootstrapStalled = current.bootstrapStalled === true;
const runtimeProcessAlive =
current.runtimeAlive === true && current.livenessKind === 'runtime_process';
const next: MemberSpawnStatusEntry = {
...current,
status: 'waiting',
launchState: 'runtime_pending_bootstrap',
agentToolAccepted: true,
runtimeAlive: runtimeProcessAlive,
bootstrapConfirmed: false,
hardFailure: false,
error: undefined,
hardFailureReason: undefined,
livenessSource: undefined,
livenessKind:
current.livenessKind ?? (runtimeProcessAlive ? 'runtime_process' : 'registered_only'),
runtimeDiagnostic,
runtimeDiagnosticSeverity: 'warning',
bootstrapStalled: true,
livenessLastCheckedAt: observedAt,
firstSpawnAcceptedAt: current.firstSpawnAcceptedAt ?? observedAt,
updatedAt: observedAt,
};
run.memberSpawnStatuses.set(memberName, next);
const launchDiagnostics = boundLaunchDiagnostics(buildLaunchDiagnosticsFromRun(run));
if (launchDiagnostics) {
run.progress = {
...run.progress,
updatedAt: observedAt,
launchDiagnostics,
};
run.onProgress(run.progress);
}
if (!wasBootstrapStalled) {
this.appendMemberBootstrapDiagnostic(run, memberName, runtimeDiagnostic);
}
if (!this.isCurrentTrackedRun(run)) return;
this.emitMemberSpawnChange(run, memberName);
if (run.isLaunch) {
void this.persistLaunchStateSnapshot(run, run.provisioningComplete ? 'finished' : 'active');
}
}
private scheduleOpenCodeBootstrapStallReevaluation(
run: ProvisioningRun,
memberName: string,
@ -18661,6 +18765,21 @@ export class TeamProvisioningService {
if (matchedRuntimeNames.length > 0) {
if (current?.agentToolAccepted) {
if (
this.isOpenCodeSecondaryLaneMemberInRun(run, expected) &&
current.launchState === 'runtime_pending_bootstrap' &&
current.bootstrapConfirmed !== true &&
current.hardFailure !== true &&
this.isOpenCodeBootstrapStallWindowElapsed(current.firstSpawnAcceptedAt)
) {
const diagnostic = await this.buildOpenCodeSecondaryBootstrapStallDiagnostic(
run,
expected,
current
);
this.setOpenCodeSecondaryBootstrapStalledStatus(run, expected, current, diagnostic);
continue;
}
this.setMemberSpawnStatus(run, expected, 'waiting');
}
continue;
@ -18936,6 +19055,46 @@ export class TeamProvisioningService {
}
nextStatuses[resolvedStatusKey] = nextEntry;
}
for (const [memberName, current] of Object.entries(nextStatuses)) {
const openCodeSecondaryBootstrapPending =
options?.openCodeSecondaryBootstrapPendingMembers?.has(memberName) === true &&
current.launchState === 'runtime_pending_bootstrap' &&
current.bootstrapConfirmed !== true &&
current.hardFailure !== true;
if (
!openCodeSecondaryBootstrapPending ||
current.bootstrapStalled === true ||
!this.isOpenCodeBootstrapStallWindowElapsed(current.firstSpawnAcceptedAt)
) {
continue;
}
const runtimeProcessAlive =
current.runtimeAlive === true && current.livenessKind === 'runtime_process';
const runtimeDiagnostic = runtimeProcessAlive
? 'Runtime process is alive, but no bootstrap check-in after 5 min.'
: 'OpenCode bootstrap did not complete runtime_bootstrap_checkin after 5 min.';
const nextEntry: MemberSpawnStatusEntry = {
...current,
status: 'waiting',
launchState: 'runtime_pending_bootstrap',
agentToolAccepted: true,
runtimeAlive: runtimeProcessAlive,
bootstrapConfirmed: false,
hardFailure: false,
hardFailureReason: undefined,
error: undefined,
livenessSource: undefined,
livenessKind:
current.livenessKind ?? (runtimeProcessAlive ? 'runtime_process' : 'registered_only'),
runtimeDiagnostic,
runtimeDiagnosticSeverity: 'warning',
bootstrapStalled: true,
livenessLastCheckedAt: nowIso(),
updatedAt: nowIso(),
};
nextEntry.launchState = deriveMemberLaunchState(nextEntry);
nextStatuses[memberName] = nextEntry;
}
return nextStatuses;
}
@ -22242,8 +22401,9 @@ export class TeamProvisioningService {
}
return { kind: 'failure', observedAt, reason };
}
if (isBootstrapTranscriptSuccessText(text, teamName, memberName)) {
return { kind: 'success', observedAt };
const successSource = getBootstrapTranscriptSuccessSource(text, teamName, memberName);
if (successSource) {
return { kind: 'success', observedAt, source: successSource };
}
}
} catch {

View file

@ -265,6 +265,49 @@ describe('TeamLaunchStateEvaluator', () => {
});
});
it('keeps bootstrap-stalled OpenCode registered sessions pending even without strong runtime liveness', () => {
const snapshot = normalizePersistedLaunchSnapshot('my-team', {
version: 2,
teamName: 'my-team',
updatedAt: '2026-04-23T00:00:00.000Z',
launchPhase: 'active',
expectedMembers: ['alice'],
members: {
alice: {
name: 'alice',
providerId: 'opencode',
laneKind: 'secondary',
laneOwnerProviderId: 'opencode',
laneId: 'secondary:opencode:alice',
launchState: 'runtime_pending_bootstrap',
agentToolAccepted: true,
runtimeAlive: false,
bootstrapConfirmed: false,
hardFailure: false,
livenessKind: 'registered_only',
runtimeSessionId: 'ses_alice_partial_bootstrap',
bootstrapStalled: true,
runtimeDiagnostic:
'OpenCode member_briefing completed, but runtime_bootstrap_checkin did not complete after 5 min.',
runtimeDiagnosticSeverity: 'warning',
lastEvaluatedAt: '2026-04-23T00:00:00.000Z',
},
},
});
expect(snapshot?.members.alice.bootstrapStalled).toBe(true);
const statuses = snapshotToMemberSpawnStatuses(snapshot);
expect(statuses.alice).toMatchObject({
status: 'waiting',
launchState: 'runtime_pending_bootstrap',
runtimeAlive: false,
livenessSource: undefined,
livenessKind: 'registered_only',
bootstrapStalled: true,
});
});
it('keeps OpenCode secondary runtime processes pending before bootstrap stalls', () => {
const snapshot = normalizePersistedLaunchSnapshot('my-team', {
version: 2,

View file

@ -12843,6 +12843,109 @@ describe('TeamProvisioningService', () => {
);
});
it('marks OpenCode secondary partial member_briefing bootstrap as stalled instead of confirmed', async () => {
allowConsoleLogs();
const teamName = 'zz-opencode-partial-bootstrap-stalled';
const leadSessionId = 'lead-session';
const memberSessionId = 'alice-opencode-session';
const projectPath = '/Users/test/proj';
const projectId = '-Users-test-proj';
const acceptedAt = new Date(Date.now() - 6 * 60_000).toISOString();
const successAt = new Date(Date.now() - 5 * 60_000).toISOString();
writeLaunchConfig(teamName, projectPath, leadSessionId, ['alice']);
const projectRoot = path.join(tempProjectsBase, projectId);
fs.mkdirSync(projectRoot, { recursive: true });
fs.writeFileSync(
path.join(projectRoot, `${memberSessionId}.jsonl`),
[
JSON.stringify({
timestamp: acceptedAt,
teamName,
agentName: 'alice',
type: 'user',
message: {
role: 'user',
content: `You are bootstrapping into team "${teamName}" as member "alice".`,
},
}),
JSON.stringify({
timestamp: successAt,
teamName,
agentName: 'alice',
type: 'user',
message: {
role: 'user',
content: [
{
type: 'tool_result',
tool_use_id: 'item_1',
content: `Member briefing for alice on team "${teamName}" (${teamName}).\nTask briefing for alice:\nNo actionable tasks.`,
is_error: false,
},
],
},
}),
].join('\n') + '\n',
'utf8'
);
const svc = new TeamProvisioningService();
const run = createMemberSpawnRun({
teamName,
expectedMembers: ['alice'],
memberSpawnStatuses: new Map([
[
'alice',
createMemberSpawnStatusEntry({
status: 'waiting',
launchState: 'runtime_pending_bootstrap',
agentToolAccepted: true,
runtimeAlive: false,
bootstrapConfirmed: false,
hardFailure: false,
firstSpawnAcceptedAt: acceptedAt,
livenessKind: 'registered_only',
}),
],
]),
});
run.mixedSecondaryLanes = [
{
laneId: 'secondary:opencode:alice',
providerId: 'opencode',
member: {
name: 'alice',
providerId: 'opencode',
model: 'openrouter/qwen/qwen3-coder',
},
runId: 'opencode-run-alice',
state: 'finished',
result: null,
warnings: [],
diagnostics: [],
},
];
await (svc as any).maybeAuditMemberSpawnStatuses(run, { force: true });
expect(run.memberSpawnStatuses.get('alice')).toMatchObject({
status: 'waiting',
launchState: 'runtime_pending_bootstrap',
runtimeAlive: false,
bootstrapConfirmed: false,
hardFailure: false,
bootstrapStalled: true,
runtimeDiagnostic:
'OpenCode member_briefing completed, but runtime_bootstrap_checkin did not complete after 5 min.',
runtimeDiagnosticSeverity: 'warning',
});
expect(run.provisioningOutputParts.join('\n')).not.toContain(
'bootstrap confirmed via transcript'
);
});
it('does not copy bootstrap-state success into OpenCode secondary runtime evidence', async () => {
const teamName = 'zz-opencode-bootstrap-state-not-evidence';
const leadSessionId = 'lead-session';