fix(team): reconcile bootstrap runtime snapshots

This commit is contained in:
777genius 2026-05-24 17:02:36 +03:00
parent 7e8f4b377d
commit 4a4c67fcb9
2 changed files with 142 additions and 13 deletions

View file

@ -11219,7 +11219,6 @@ export class TeamProvisioningService {
updatedAt: input.observedAt,
});
await this.writeLaunchStateSnapshot(input.teamName, snapshot);
this.invalidateRuntimeSnapshotCaches(input.teamName);
if (shouldEmitMemberSpawnChange) {
this.teamChangeEmitter?.({
type: 'member-spawn',
@ -13161,8 +13160,8 @@ export class TeamProvisioningService {
await this.launchStateStore.read(teamName)
);
const liveRuntimeByMember = await this.getLiveTeamAgentRuntimeMetadata(teamName);
const spawnStatusSnapshot = await this.getMemberSpawnStatuses(teamName).catch(() => null);
const liveRuntimeByMember = await this.getLiveTeamAgentRuntimeMetadata(teamName);
const activeRuntimeRunId =
run?.runId?.trim() || currentRuntimeAdapterRun?.runId?.trim() || runId?.trim() || '';
const spawnStatusRunId = spawnStatusSnapshot?.runId?.trim() ?? '';
@ -13476,6 +13475,9 @@ export class TeamProvisioningService {
launchMember?.launchState === 'confirmed_alive' ||
spawnStatusMember?.bootstrapConfirmed === true ||
spawnStatusMember?.launchState === 'confirmed_alive';
const spawnStatusConfirmsBootstrap =
spawnStatusMember?.bootstrapConfirmed === true ||
spawnStatusMember?.launchState === 'confirmed_alive';
const hasOpenCodeRuntimeHandle =
isOpenCodeMember &&
(typeof liveRuntimeMember?.pid === 'number' ||
@ -13489,22 +13491,54 @@ export class TeamProvisioningService {
spawnStatusMember?.hardFailure !== true &&
spawnStatusMember?.launchState !== 'failed_to_start' &&
spawnStatusMember?.launchState !== 'runtime_pending_permission';
const effectiveAlive = liveRuntimeMember?.alive === true || confirmedOpenCodeRuntimeAlive;
const confirmedSpawnRuntimeFallback =
!isOpenCodeMember &&
spawnStatusConfirmsBootstrap &&
spawnStatusMember?.hardFailure !== true &&
spawnStatusMember?.launchState !== 'failed_to_start' &&
!isStrongRuntimeEvidence(liveRuntimeMember);
const confirmedSpawnRuntimeDiagnostic =
spawnStatusMember?.runtimeDiagnostic ?? liveRuntimeMember?.runtimeDiagnostic;
const shouldKeepConfirmedSpawnRuntimeDiagnostic =
!!confirmedSpawnRuntimeDiagnostic &&
!shouldClearRuntimeDiagnosticAfterBootstrapConfirmation(confirmedSpawnRuntimeDiagnostic);
const effectiveAlive =
liveRuntimeMember?.alive === true ||
confirmedOpenCodeRuntimeAlive ||
confirmedSpawnRuntimeFallback;
const effectiveLivenessKind =
confirmedOpenCodeRuntimeAlive &&
liveRuntimeMember?.livenessKind === 'runtime_process_candidate'
? 'confirmed_bootstrap'
: liveRuntimeMember?.livenessKind;
: confirmedSpawnRuntimeFallback
? 'confirmed_bootstrap'
: liveRuntimeMember?.livenessKind;
const effectivePidSource =
confirmedSpawnRuntimeFallback &&
(liveRuntimeMember?.pidSource === 'persisted_metadata' ||
liveRuntimeMember?.pidSource == null)
? 'runtime_bootstrap'
: liveRuntimeMember?.pidSource;
const effectiveRuntimeDiagnostic =
confirmedOpenCodeRuntimeAlive &&
liveRuntimeMember?.livenessKind === 'runtime_process_candidate'
? 'OpenCode bootstrap confirmed; runtime host/session evidence present.'
: liveRuntimeMember?.runtimeDiagnostic;
: confirmedSpawnRuntimeFallback
? shouldKeepConfirmedSpawnRuntimeDiagnostic
? confirmedSpawnRuntimeDiagnostic
: 'bootstrap confirmed'
: liveRuntimeMember?.runtimeDiagnostic;
const effectiveRuntimeDiagnosticSeverity =
confirmedOpenCodeRuntimeAlive &&
liveRuntimeMember?.livenessKind === 'runtime_process_candidate'
? 'info'
: liveRuntimeMember?.runtimeDiagnosticSeverity;
: confirmedSpawnRuntimeFallback
? shouldKeepConfirmedSpawnRuntimeDiagnostic
? (spawnStatusMember?.runtimeDiagnosticSeverity ??
liveRuntimeMember?.runtimeDiagnosticSeverity ??
'info')
: 'info'
: liveRuntimeMember?.runtimeDiagnosticSeverity;
if (
rssPid &&
!usageStatsByPid.has(rssPid) &&
@ -13584,7 +13618,7 @@ export class TeamProvisioningService {
...(usageStats?.runtimeLoadTruncated ? { runtimeLoadTruncated: true } : {}),
...(resourceHistory && resourceHistory.length > 0 ? { resourceHistory } : {}),
...(effectiveLivenessKind ? { livenessKind: effectiveLivenessKind } : {}),
...(liveRuntimeMember?.pidSource ? { pidSource: liveRuntimeMember.pidSource } : {}),
...(effectivePidSource ? { pidSource: effectivePidSource } : {}),
...(liveRuntimeMember?.processCommand
? { processCommand: liveRuntimeMember.processCommand }
: {}),
@ -15486,7 +15520,6 @@ export class TeamProvisioningService {
updatedAt,
});
await this.writeLaunchStateSnapshot(teamName, nextSnapshot);
this.invalidateRuntimeSnapshotCaches(teamName);
}
private getMutableAliveRunOrThrow(teamName: string): ProvisioningRun {
@ -25166,6 +25199,7 @@ export class TeamProvisioningService {
await this.launchStateStore.clear(teamName);
this.launchStateWrittenRunIdByTeam.delete(teamName);
await clearBootstrapState(teamName);
this.invalidateRuntimeSnapshotCaches(teamName);
}
private async applyOpenCodeSecondaryEvidenceOverlay(params: {
@ -25429,9 +25463,13 @@ export class TeamProvisioningService {
teamName: string,
snapshot: PersistedTeamLaunchSnapshot
): Promise<PersistedTeamLaunchSnapshot> {
const result = await this.enqueueLaunchStateStoreOperation(teamName, () =>
this.writeLaunchStateSnapshotNow(teamName, snapshot)
);
const result = await this.enqueueLaunchStateStoreOperation(teamName, async () => {
const writeResult = await this.writeLaunchStateSnapshotNow(teamName, snapshot);
if (writeResult.wrote) {
this.invalidateRuntimeSnapshotCaches(teamName);
}
return writeResult;
});
return result.snapshot;
}
@ -26851,7 +26889,6 @@ export class TeamProvisioningService {
if (filteredSnapshot.teamLaunchState === 'clean_success' && launchPhase !== 'active') {
await this.clearPersistedLaunchStateNow(run.teamName, { expectedRunId: run.runId });
this.invalidateRuntimeSnapshotCaches(run.teamName);
return null;
}

View file

@ -2993,6 +2993,22 @@ describe('TeamProvisioningService', () => {
).toBe(false);
});
it('invalidates runtime cache when launch-state is cleared', async () => {
const svc = new TeamProvisioningService();
const teamName = 'launch-state-clear-invalidates-runtime-cache';
(svc as any).launchStateStore = {
read: vi.fn(async () => null),
write: vi.fn(async () => {}),
clear: vi.fn(async () => {}),
};
const invalidateRuntime = vi.spyOn(svc as any, 'invalidateRuntimeSnapshotCaches');
await (svc as any).clearPersistedLaunchState(teamName);
expect((svc as any).launchStateStore.clear).toHaveBeenCalledWith(teamName);
expect(invalidateRuntime).toHaveBeenCalledTimes(1);
});
it('does not rewrite launch-state or invalidate runtime cache for a recent semantic no-op', async () => {
vi.useFakeTimers();
vi.setSystemTime(new Date('2026-05-02T10:00:05.000Z'));
@ -4779,6 +4795,82 @@ describe('TeamProvisioningService', () => {
});
});
it('reconciles persisted launch state before building runtime snapshot metadata', async () => {
const teamName = 'zz-runtime-snapshot-reconciles-before-live-metadata';
const leadSessionId = 'lead-session';
const projectPath = '/Users/test/proj';
const bootstrapAttemptAt = '2026-05-24T09:25:33.388Z';
const bootstrapConfirmedAt = '2026-05-24T09:25:42.904Z';
const appAcceptedAt = '2026-05-24T09:25:45.178Z';
const staleRefreshAt = '2026-05-24T11:36:58.278Z';
const runtimePid = 97_255;
const bootstrapRunId = 'run-runtime-snapshot-reconcile-first';
const staleDiagnostic = 'persisted runtime pid is not alive';
writeLaunchConfig(teamName, projectPath, leadSessionId, ['tom']);
writeMemberBootstrapRunId(teamName, 'tom', bootstrapRunId);
writeLaunchState(
teamName,
leadSessionId,
{
tom: {
providerId: 'anthropic',
model: 'haiku',
laneId: 'primary',
laneKind: 'primary',
laneOwnerProviderId: 'codex',
launchState: 'failed_to_start',
agentToolAccepted: true,
runtimeAlive: false,
runtimePid,
bootstrapConfirmed: false,
hardFailure: true,
hardFailureReason:
'runtime pid could not be verified because process table is unavailable',
livenessKind: 'stale_metadata',
runtimeDiagnostic: staleDiagnostic,
runtimeDiagnosticSeverity: 'warning',
firstSpawnAcceptedAt: appAcceptedAt,
runtimeLastSeenAt: staleRefreshAt,
lastEvaluatedAt: staleRefreshAt,
},
},
{ launchPhase: 'finished', updatedAt: staleRefreshAt }
);
writeBootstrapState(
teamName,
[
{
name: 'tom',
status: 'bootstrap_confirmed',
lastAttemptAt: Date.parse(bootstrapAttemptAt),
lastObservedAt: Date.parse(bootstrapConfirmedAt),
},
],
'2026-05-24T09:26:08.090Z',
{ runId: bootstrapRunId }
);
const svc = new TeamProvisioningService();
const snapshot = await svc.getTeamAgentRuntimeSnapshot(teamName);
const persisted = JSON.parse(fs.readFileSync(getTeamLaunchStatePath(teamName), 'utf8'));
expect(snapshot.members.tom).toMatchObject({
alive: true,
livenessKind: 'confirmed_bootstrap',
runtimeDiagnostic: 'bootstrap confirmed',
runtimeDiagnosticSeverity: 'info',
});
expect(snapshot.members.tom?.runtimeDiagnostic).not.toBe(staleDiagnostic);
expect(persisted.members.tom).toMatchObject({
launchState: 'confirmed_alive',
bootstrapConfirmed: true,
hardFailure: false,
});
expect(persisted.members.tom?.runtimeDiagnostic).not.toBe(staleDiagnostic);
});
it('does not treat a reused OpenCode runtime pid as live', async () => {
const teamName = 'pure-opencode-reused-pid-team';
const projectPath = '/Users/test/project';
@ -4797,7 +4889,7 @@ describe('TeamProvisioningService', () => {
runtimeSessionId: 'session-alice',
},
});
vi.mocked(listRuntimeProcessTableForCurrentPlatform).mockResolvedValueOnce([
vi.mocked(listRuntimeProcessTableForCurrentPlatform).mockResolvedValue([
{ pid: 333, ppid: 1, command: 'node unrelated-worker.js' },
]);
vi.mocked(pidusage).mockResolvedValueOnce({