fix(team): handle partial opencode bootstrap stalls
This commit is contained in:
parent
cde85c0396
commit
dad9fefdc7
5 changed files with 348 additions and 10 deletions
|
|
@ -88,6 +88,26 @@ function preservesStrongRuntimeAlive(value: {
|
|||
);
|
||||
}
|
||||
|
||||
function hasMaterializedOpenCodeRuntimeMarker(value: {
|
||||
runtimeAlive?: boolean;
|
||||
runtimePid?: number;
|
||||
runtimeSessionId?: string;
|
||||
sessionId?: string;
|
||||
livenessKind?: TeamAgentRuntimeLivenessKind;
|
||||
}): boolean {
|
||||
return (
|
||||
value.runtimeAlive === true ||
|
||||
(typeof value.runtimePid === 'number' &&
|
||||
Number.isFinite(value.runtimePid) &&
|
||||
value.runtimePid > 0) ||
|
||||
(typeof value.runtimeSessionId === 'string' && value.runtimeSessionId.trim().length > 0) ||
|
||||
(typeof value.sessionId === 'string' && value.sessionId.trim().length > 0) ||
|
||||
value.livenessKind === 'runtime_process' ||
|
||||
value.livenessKind === 'runtime_process_candidate' ||
|
||||
value.livenessKind === 'registered_only'
|
||||
);
|
||||
}
|
||||
|
||||
function buildDiagnostics(
|
||||
member: Pick<
|
||||
PersistedTeamLaunchMemberState,
|
||||
|
|
@ -279,7 +299,7 @@ function createSecondaryLaneMemberState(
|
|||
providerId === 'opencode' &&
|
||||
evidence?.bootstrapStalled === true &&
|
||||
launchState === 'runtime_pending_bootstrap' &&
|
||||
strongRuntimeAlive &&
|
||||
hasMaterializedOpenCodeRuntimeMarker(evidence) &&
|
||||
evidence.bootstrapConfirmed !== true &&
|
||||
hardFailure !== true
|
||||
? true
|
||||
|
|
|
|||
|
|
@ -133,12 +133,24 @@ function isPersistedBootstrapStalled(
|
|||
| 'bootstrapConfirmed'
|
||||
| 'hardFailure'
|
||||
| 'bootstrapStalled'
|
||||
| 'runtimePid'
|
||||
| 'runtimeSessionId'
|
||||
| 'livenessKind'
|
||||
>
|
||||
): boolean {
|
||||
const hasMaterializedOpenCodeRuntimeMarker =
|
||||
member.runtimeAlive === true ||
|
||||
(typeof member.runtimePid === 'number' &&
|
||||
Number.isFinite(member.runtimePid) &&
|
||||
member.runtimePid > 0) ||
|
||||
(typeof member.runtimeSessionId === 'string' && member.runtimeSessionId.trim().length > 0) ||
|
||||
member.livenessKind === 'runtime_process' ||
|
||||
member.livenessKind === 'runtime_process_candidate' ||
|
||||
member.livenessKind === 'registered_only';
|
||||
return (
|
||||
member.bootstrapStalled === true &&
|
||||
isOpenCodeSecondaryBootstrapPending(member) &&
|
||||
member.runtimeAlive === true
|
||||
hasMaterializedOpenCodeRuntimeMarker
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -336,10 +336,13 @@ interface LaunchStateWriteResult {
|
|||
wrote: boolean;
|
||||
}
|
||||
|
||||
type BootstrapTranscriptSuccessSource = 'member_briefing' | 'assistant_text';
|
||||
|
||||
type BootstrapTranscriptOutcome =
|
||||
| {
|
||||
kind: 'success';
|
||||
observedAt: string;
|
||||
source: BootstrapTranscriptSuccessSource;
|
||||
}
|
||||
| {
|
||||
kind: 'failure';
|
||||
|
|
@ -3228,15 +3231,23 @@ function isBootstrapTranscriptSuccessText(
|
|||
teamName: string,
|
||||
memberName: string
|
||||
): boolean {
|
||||
return getBootstrapTranscriptSuccessSource(text, teamName, memberName) !== null;
|
||||
}
|
||||
|
||||
function getBootstrapTranscriptSuccessSource(
|
||||
text: string,
|
||||
teamName: string,
|
||||
memberName: string
|
||||
): BootstrapTranscriptSuccessSource | null {
|
||||
const normalizedText = text.replace(/\s+/g, ' ').trim().toLowerCase();
|
||||
if (!normalizedText) {
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
|
||||
const normalizedTeamName = teamName.trim().toLowerCase();
|
||||
const normalizedMemberName = memberName.trim().toLowerCase();
|
||||
if (!normalizedTeamName || !normalizedMemberName) {
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
|
||||
if (
|
||||
|
|
@ -3247,13 +3258,13 @@ function isBootstrapTranscriptSuccessText(
|
|||
`member briefing for ${normalizedMemberName} on team '${normalizedTeamName}' (${normalizedTeamName}).`
|
||||
)
|
||||
) {
|
||||
return true;
|
||||
return 'member_briefing';
|
||||
}
|
||||
|
||||
return (
|
||||
normalizedText.includes(`bootstrap выполнен для \`${normalizedMemberName}\``) &&
|
||||
return normalizedText.includes(`bootstrap выполнен для \`${normalizedMemberName}\``) &&
|
||||
normalizedText.includes(`команде \`${normalizedTeamName}\``)
|
||||
);
|
||||
? 'assistant_text'
|
||||
: null;
|
||||
}
|
||||
|
||||
function isBootstrapTranscriptContextText(
|
||||
|
|
@ -13030,6 +13041,29 @@ export class TeamProvisioningService {
|
|||
}
|
||||
return;
|
||||
}
|
||||
if (
|
||||
this.isOpenCodeSecondaryLaneMemberInRun(run, memberName) &&
|
||||
refreshed.launchState === 'runtime_pending_bootstrap' &&
|
||||
refreshed.bootstrapConfirmed !== true &&
|
||||
refreshed.hardFailure !== true &&
|
||||
elapsedMs >= MEMBER_BOOTSTRAP_STALL_MS
|
||||
) {
|
||||
const enriched = {
|
||||
...refreshed,
|
||||
...(metadata?.livenessKind ? { livenessKind: metadata.livenessKind } : {}),
|
||||
...(runtimeDiagnostic ? { runtimeDiagnostic } : {}),
|
||||
...(metadata?.runtimeDiagnosticSeverity
|
||||
? { runtimeDiagnosticSeverity: metadata.runtimeDiagnosticSeverity }
|
||||
: {}),
|
||||
};
|
||||
const diagnostic = await this.buildOpenCodeSecondaryBootstrapStallDiagnostic(
|
||||
run,
|
||||
memberName,
|
||||
enriched
|
||||
);
|
||||
this.setOpenCodeSecondaryBootstrapStalledStatus(run, memberName, enriched, diagnostic);
|
||||
return;
|
||||
}
|
||||
const strictReason = restartPending
|
||||
? buildRestartGraceTimeoutReason(memberName)
|
||||
: (runtimeDiagnostic ??
|
||||
|
|
@ -13116,6 +13150,76 @@ export class TeamProvisioningService {
|
|||
}
|
||||
}
|
||||
|
||||
private async buildOpenCodeSecondaryBootstrapStallDiagnostic(
|
||||
run: ProvisioningRun,
|
||||
memberName: string,
|
||||
current: MemberSpawnStatusEntry
|
||||
): Promise<string> {
|
||||
const acceptedAtMs =
|
||||
current.firstSpawnAcceptedAt != null ? Date.parse(current.firstSpawnAcceptedAt) : NaN;
|
||||
const transcriptOutcome = await this.findBootstrapTranscriptOutcome(
|
||||
run.teamName,
|
||||
memberName,
|
||||
Number.isFinite(acceptedAtMs) ? acceptedAtMs : null
|
||||
);
|
||||
if (transcriptOutcome?.kind === 'success' && transcriptOutcome.source === 'member_briefing') {
|
||||
return 'OpenCode member_briefing completed, but runtime_bootstrap_checkin did not complete after 5 min.';
|
||||
}
|
||||
return 'OpenCode bootstrap did not complete runtime_bootstrap_checkin after 5 min.';
|
||||
}
|
||||
|
||||
private setOpenCodeSecondaryBootstrapStalledStatus(
|
||||
run: ProvisioningRun,
|
||||
memberName: string,
|
||||
current: MemberSpawnStatusEntry,
|
||||
runtimeDiagnostic: string
|
||||
): void {
|
||||
const observedAt = nowIso();
|
||||
const wasBootstrapStalled = current.bootstrapStalled === true;
|
||||
const runtimeProcessAlive =
|
||||
current.runtimeAlive === true && current.livenessKind === 'runtime_process';
|
||||
const next: MemberSpawnStatusEntry = {
|
||||
...current,
|
||||
status: 'waiting',
|
||||
launchState: 'runtime_pending_bootstrap',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: runtimeProcessAlive,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: false,
|
||||
error: undefined,
|
||||
hardFailureReason: undefined,
|
||||
livenessSource: undefined,
|
||||
livenessKind:
|
||||
current.livenessKind ?? (runtimeProcessAlive ? 'runtime_process' : 'registered_only'),
|
||||
runtimeDiagnostic,
|
||||
runtimeDiagnosticSeverity: 'warning',
|
||||
bootstrapStalled: true,
|
||||
livenessLastCheckedAt: observedAt,
|
||||
firstSpawnAcceptedAt: current.firstSpawnAcceptedAt ?? observedAt,
|
||||
updatedAt: observedAt,
|
||||
};
|
||||
|
||||
run.memberSpawnStatuses.set(memberName, next);
|
||||
const launchDiagnostics = boundLaunchDiagnostics(buildLaunchDiagnosticsFromRun(run));
|
||||
if (launchDiagnostics) {
|
||||
run.progress = {
|
||||
...run.progress,
|
||||
updatedAt: observedAt,
|
||||
launchDiagnostics,
|
||||
};
|
||||
run.onProgress(run.progress);
|
||||
}
|
||||
|
||||
if (!wasBootstrapStalled) {
|
||||
this.appendMemberBootstrapDiagnostic(run, memberName, runtimeDiagnostic);
|
||||
}
|
||||
if (!this.isCurrentTrackedRun(run)) return;
|
||||
this.emitMemberSpawnChange(run, memberName);
|
||||
if (run.isLaunch) {
|
||||
void this.persistLaunchStateSnapshot(run, run.provisioningComplete ? 'finished' : 'active');
|
||||
}
|
||||
}
|
||||
|
||||
private scheduleOpenCodeBootstrapStallReevaluation(
|
||||
run: ProvisioningRun,
|
||||
memberName: string,
|
||||
|
|
@ -18661,6 +18765,21 @@ export class TeamProvisioningService {
|
|||
|
||||
if (matchedRuntimeNames.length > 0) {
|
||||
if (current?.agentToolAccepted) {
|
||||
if (
|
||||
this.isOpenCodeSecondaryLaneMemberInRun(run, expected) &&
|
||||
current.launchState === 'runtime_pending_bootstrap' &&
|
||||
current.bootstrapConfirmed !== true &&
|
||||
current.hardFailure !== true &&
|
||||
this.isOpenCodeBootstrapStallWindowElapsed(current.firstSpawnAcceptedAt)
|
||||
) {
|
||||
const diagnostic = await this.buildOpenCodeSecondaryBootstrapStallDiagnostic(
|
||||
run,
|
||||
expected,
|
||||
current
|
||||
);
|
||||
this.setOpenCodeSecondaryBootstrapStalledStatus(run, expected, current, diagnostic);
|
||||
continue;
|
||||
}
|
||||
this.setMemberSpawnStatus(run, expected, 'waiting');
|
||||
}
|
||||
continue;
|
||||
|
|
@ -18936,6 +19055,46 @@ export class TeamProvisioningService {
|
|||
}
|
||||
nextStatuses[resolvedStatusKey] = nextEntry;
|
||||
}
|
||||
for (const [memberName, current] of Object.entries(nextStatuses)) {
|
||||
const openCodeSecondaryBootstrapPending =
|
||||
options?.openCodeSecondaryBootstrapPendingMembers?.has(memberName) === true &&
|
||||
current.launchState === 'runtime_pending_bootstrap' &&
|
||||
current.bootstrapConfirmed !== true &&
|
||||
current.hardFailure !== true;
|
||||
if (
|
||||
!openCodeSecondaryBootstrapPending ||
|
||||
current.bootstrapStalled === true ||
|
||||
!this.isOpenCodeBootstrapStallWindowElapsed(current.firstSpawnAcceptedAt)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
const runtimeProcessAlive =
|
||||
current.runtimeAlive === true && current.livenessKind === 'runtime_process';
|
||||
const runtimeDiagnostic = runtimeProcessAlive
|
||||
? 'Runtime process is alive, but no bootstrap check-in after 5 min.'
|
||||
: 'OpenCode bootstrap did not complete runtime_bootstrap_checkin after 5 min.';
|
||||
const nextEntry: MemberSpawnStatusEntry = {
|
||||
...current,
|
||||
status: 'waiting',
|
||||
launchState: 'runtime_pending_bootstrap',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: runtimeProcessAlive,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: false,
|
||||
hardFailureReason: undefined,
|
||||
error: undefined,
|
||||
livenessSource: undefined,
|
||||
livenessKind:
|
||||
current.livenessKind ?? (runtimeProcessAlive ? 'runtime_process' : 'registered_only'),
|
||||
runtimeDiagnostic,
|
||||
runtimeDiagnosticSeverity: 'warning',
|
||||
bootstrapStalled: true,
|
||||
livenessLastCheckedAt: nowIso(),
|
||||
updatedAt: nowIso(),
|
||||
};
|
||||
nextEntry.launchState = deriveMemberLaunchState(nextEntry);
|
||||
nextStatuses[memberName] = nextEntry;
|
||||
}
|
||||
return nextStatuses;
|
||||
}
|
||||
|
||||
|
|
@ -22242,8 +22401,9 @@ export class TeamProvisioningService {
|
|||
}
|
||||
return { kind: 'failure', observedAt, reason };
|
||||
}
|
||||
if (isBootstrapTranscriptSuccessText(text, teamName, memberName)) {
|
||||
return { kind: 'success', observedAt };
|
||||
const successSource = getBootstrapTranscriptSuccessSource(text, teamName, memberName);
|
||||
if (successSource) {
|
||||
return { kind: 'success', observedAt, source: successSource };
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
|
|
|
|||
|
|
@ -265,6 +265,49 @@ describe('TeamLaunchStateEvaluator', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('keeps bootstrap-stalled OpenCode registered sessions pending even without strong runtime liveness', () => {
|
||||
const snapshot = normalizePersistedLaunchSnapshot('my-team', {
|
||||
version: 2,
|
||||
teamName: 'my-team',
|
||||
updatedAt: '2026-04-23T00:00:00.000Z',
|
||||
launchPhase: 'active',
|
||||
expectedMembers: ['alice'],
|
||||
members: {
|
||||
alice: {
|
||||
name: 'alice',
|
||||
providerId: 'opencode',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
laneId: 'secondary:opencode:alice',
|
||||
launchState: 'runtime_pending_bootstrap',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: false,
|
||||
livenessKind: 'registered_only',
|
||||
runtimeSessionId: 'ses_alice_partial_bootstrap',
|
||||
bootstrapStalled: true,
|
||||
runtimeDiagnostic:
|
||||
'OpenCode member_briefing completed, but runtime_bootstrap_checkin did not complete after 5 min.',
|
||||
runtimeDiagnosticSeverity: 'warning',
|
||||
lastEvaluatedAt: '2026-04-23T00:00:00.000Z',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(snapshot?.members.alice.bootstrapStalled).toBe(true);
|
||||
|
||||
const statuses = snapshotToMemberSpawnStatuses(snapshot);
|
||||
expect(statuses.alice).toMatchObject({
|
||||
status: 'waiting',
|
||||
launchState: 'runtime_pending_bootstrap',
|
||||
runtimeAlive: false,
|
||||
livenessSource: undefined,
|
||||
livenessKind: 'registered_only',
|
||||
bootstrapStalled: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('keeps OpenCode secondary runtime processes pending before bootstrap stalls', () => {
|
||||
const snapshot = normalizePersistedLaunchSnapshot('my-team', {
|
||||
version: 2,
|
||||
|
|
|
|||
|
|
@ -12843,6 +12843,109 @@ describe('TeamProvisioningService', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('marks OpenCode secondary partial member_briefing bootstrap as stalled instead of confirmed', async () => {
|
||||
allowConsoleLogs();
|
||||
const teamName = 'zz-opencode-partial-bootstrap-stalled';
|
||||
const leadSessionId = 'lead-session';
|
||||
const memberSessionId = 'alice-opencode-session';
|
||||
const projectPath = '/Users/test/proj';
|
||||
const projectId = '-Users-test-proj';
|
||||
const acceptedAt = new Date(Date.now() - 6 * 60_000).toISOString();
|
||||
const successAt = new Date(Date.now() - 5 * 60_000).toISOString();
|
||||
|
||||
writeLaunchConfig(teamName, projectPath, leadSessionId, ['alice']);
|
||||
|
||||
const projectRoot = path.join(tempProjectsBase, projectId);
|
||||
fs.mkdirSync(projectRoot, { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(projectRoot, `${memberSessionId}.jsonl`),
|
||||
[
|
||||
JSON.stringify({
|
||||
timestamp: acceptedAt,
|
||||
teamName,
|
||||
agentName: 'alice',
|
||||
type: 'user',
|
||||
message: {
|
||||
role: 'user',
|
||||
content: `You are bootstrapping into team "${teamName}" as member "alice".`,
|
||||
},
|
||||
}),
|
||||
JSON.stringify({
|
||||
timestamp: successAt,
|
||||
teamName,
|
||||
agentName: 'alice',
|
||||
type: 'user',
|
||||
message: {
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: 'item_1',
|
||||
content: `Member briefing for alice on team "${teamName}" (${teamName}).\nTask briefing for alice:\nNo actionable tasks.`,
|
||||
is_error: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
].join('\n') + '\n',
|
||||
'utf8'
|
||||
);
|
||||
|
||||
const svc = new TeamProvisioningService();
|
||||
const run = createMemberSpawnRun({
|
||||
teamName,
|
||||
expectedMembers: ['alice'],
|
||||
memberSpawnStatuses: new Map([
|
||||
[
|
||||
'alice',
|
||||
createMemberSpawnStatusEntry({
|
||||
status: 'waiting',
|
||||
launchState: 'runtime_pending_bootstrap',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: false,
|
||||
firstSpawnAcceptedAt: acceptedAt,
|
||||
livenessKind: 'registered_only',
|
||||
}),
|
||||
],
|
||||
]),
|
||||
});
|
||||
run.mixedSecondaryLanes = [
|
||||
{
|
||||
laneId: 'secondary:opencode:alice',
|
||||
providerId: 'opencode',
|
||||
member: {
|
||||
name: 'alice',
|
||||
providerId: 'opencode',
|
||||
model: 'openrouter/qwen/qwen3-coder',
|
||||
},
|
||||
runId: 'opencode-run-alice',
|
||||
state: 'finished',
|
||||
result: null,
|
||||
warnings: [],
|
||||
diagnostics: [],
|
||||
},
|
||||
];
|
||||
|
||||
await (svc as any).maybeAuditMemberSpawnStatuses(run, { force: true });
|
||||
|
||||
expect(run.memberSpawnStatuses.get('alice')).toMatchObject({
|
||||
status: 'waiting',
|
||||
launchState: 'runtime_pending_bootstrap',
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: false,
|
||||
bootstrapStalled: true,
|
||||
runtimeDiagnostic:
|
||||
'OpenCode member_briefing completed, but runtime_bootstrap_checkin did not complete after 5 min.',
|
||||
runtimeDiagnosticSeverity: 'warning',
|
||||
});
|
||||
expect(run.provisioningOutputParts.join('\n')).not.toContain(
|
||||
'bootstrap confirmed via transcript'
|
||||
);
|
||||
});
|
||||
|
||||
it('does not copy bootstrap-state success into OpenCode secondary runtime evidence', async () => {
|
||||
const teamName = 'zz-opencode-bootstrap-state-not-evidence';
|
||||
const leadSessionId = 'lead-session';
|
||||
|
|
|
|||
Loading…
Reference in a new issue