fix(team): reconcile bootstrap runtime snapshots
This commit is contained in:
parent
7e8f4b377d
commit
4a4c67fcb9
2 changed files with 142 additions and 13 deletions
|
|
@ -11219,7 +11219,6 @@ export class TeamProvisioningService {
|
|||
updatedAt: input.observedAt,
|
||||
});
|
||||
await this.writeLaunchStateSnapshot(input.teamName, snapshot);
|
||||
this.invalidateRuntimeSnapshotCaches(input.teamName);
|
||||
if (shouldEmitMemberSpawnChange) {
|
||||
this.teamChangeEmitter?.({
|
||||
type: 'member-spawn',
|
||||
|
|
@ -13161,8 +13160,8 @@ export class TeamProvisioningService {
|
|||
await this.launchStateStore.read(teamName)
|
||||
);
|
||||
|
||||
const liveRuntimeByMember = await this.getLiveTeamAgentRuntimeMetadata(teamName);
|
||||
const spawnStatusSnapshot = await this.getMemberSpawnStatuses(teamName).catch(() => null);
|
||||
const liveRuntimeByMember = await this.getLiveTeamAgentRuntimeMetadata(teamName);
|
||||
const activeRuntimeRunId =
|
||||
run?.runId?.trim() || currentRuntimeAdapterRun?.runId?.trim() || runId?.trim() || '';
|
||||
const spawnStatusRunId = spawnStatusSnapshot?.runId?.trim() ?? '';
|
||||
|
|
@ -13476,6 +13475,9 @@ export class TeamProvisioningService {
|
|||
launchMember?.launchState === 'confirmed_alive' ||
|
||||
spawnStatusMember?.bootstrapConfirmed === true ||
|
||||
spawnStatusMember?.launchState === 'confirmed_alive';
|
||||
const spawnStatusConfirmsBootstrap =
|
||||
spawnStatusMember?.bootstrapConfirmed === true ||
|
||||
spawnStatusMember?.launchState === 'confirmed_alive';
|
||||
const hasOpenCodeRuntimeHandle =
|
||||
isOpenCodeMember &&
|
||||
(typeof liveRuntimeMember?.pid === 'number' ||
|
||||
|
|
@ -13489,22 +13491,54 @@ export class TeamProvisioningService {
|
|||
spawnStatusMember?.hardFailure !== true &&
|
||||
spawnStatusMember?.launchState !== 'failed_to_start' &&
|
||||
spawnStatusMember?.launchState !== 'runtime_pending_permission';
|
||||
const effectiveAlive = liveRuntimeMember?.alive === true || confirmedOpenCodeRuntimeAlive;
|
||||
const confirmedSpawnRuntimeFallback =
|
||||
!isOpenCodeMember &&
|
||||
spawnStatusConfirmsBootstrap &&
|
||||
spawnStatusMember?.hardFailure !== true &&
|
||||
spawnStatusMember?.launchState !== 'failed_to_start' &&
|
||||
!isStrongRuntimeEvidence(liveRuntimeMember);
|
||||
const confirmedSpawnRuntimeDiagnostic =
|
||||
spawnStatusMember?.runtimeDiagnostic ?? liveRuntimeMember?.runtimeDiagnostic;
|
||||
const shouldKeepConfirmedSpawnRuntimeDiagnostic =
|
||||
!!confirmedSpawnRuntimeDiagnostic &&
|
||||
!shouldClearRuntimeDiagnosticAfterBootstrapConfirmation(confirmedSpawnRuntimeDiagnostic);
|
||||
const effectiveAlive =
|
||||
liveRuntimeMember?.alive === true ||
|
||||
confirmedOpenCodeRuntimeAlive ||
|
||||
confirmedSpawnRuntimeFallback;
|
||||
const effectiveLivenessKind =
|
||||
confirmedOpenCodeRuntimeAlive &&
|
||||
liveRuntimeMember?.livenessKind === 'runtime_process_candidate'
|
||||
? 'confirmed_bootstrap'
|
||||
: liveRuntimeMember?.livenessKind;
|
||||
: confirmedSpawnRuntimeFallback
|
||||
? 'confirmed_bootstrap'
|
||||
: liveRuntimeMember?.livenessKind;
|
||||
const effectivePidSource =
|
||||
confirmedSpawnRuntimeFallback &&
|
||||
(liveRuntimeMember?.pidSource === 'persisted_metadata' ||
|
||||
liveRuntimeMember?.pidSource == null)
|
||||
? 'runtime_bootstrap'
|
||||
: liveRuntimeMember?.pidSource;
|
||||
const effectiveRuntimeDiagnostic =
|
||||
confirmedOpenCodeRuntimeAlive &&
|
||||
liveRuntimeMember?.livenessKind === 'runtime_process_candidate'
|
||||
? 'OpenCode bootstrap confirmed; runtime host/session evidence present.'
|
||||
: liveRuntimeMember?.runtimeDiagnostic;
|
||||
: confirmedSpawnRuntimeFallback
|
||||
? shouldKeepConfirmedSpawnRuntimeDiagnostic
|
||||
? confirmedSpawnRuntimeDiagnostic
|
||||
: 'bootstrap confirmed'
|
||||
: liveRuntimeMember?.runtimeDiagnostic;
|
||||
const effectiveRuntimeDiagnosticSeverity =
|
||||
confirmedOpenCodeRuntimeAlive &&
|
||||
liveRuntimeMember?.livenessKind === 'runtime_process_candidate'
|
||||
? 'info'
|
||||
: liveRuntimeMember?.runtimeDiagnosticSeverity;
|
||||
: confirmedSpawnRuntimeFallback
|
||||
? shouldKeepConfirmedSpawnRuntimeDiagnostic
|
||||
? (spawnStatusMember?.runtimeDiagnosticSeverity ??
|
||||
liveRuntimeMember?.runtimeDiagnosticSeverity ??
|
||||
'info')
|
||||
: 'info'
|
||||
: liveRuntimeMember?.runtimeDiagnosticSeverity;
|
||||
if (
|
||||
rssPid &&
|
||||
!usageStatsByPid.has(rssPid) &&
|
||||
|
|
@ -13584,7 +13618,7 @@ export class TeamProvisioningService {
|
|||
...(usageStats?.runtimeLoadTruncated ? { runtimeLoadTruncated: true } : {}),
|
||||
...(resourceHistory && resourceHistory.length > 0 ? { resourceHistory } : {}),
|
||||
...(effectiveLivenessKind ? { livenessKind: effectiveLivenessKind } : {}),
|
||||
...(liveRuntimeMember?.pidSource ? { pidSource: liveRuntimeMember.pidSource } : {}),
|
||||
...(effectivePidSource ? { pidSource: effectivePidSource } : {}),
|
||||
...(liveRuntimeMember?.processCommand
|
||||
? { processCommand: liveRuntimeMember.processCommand }
|
||||
: {}),
|
||||
|
|
@ -15486,7 +15520,6 @@ export class TeamProvisioningService {
|
|||
updatedAt,
|
||||
});
|
||||
await this.writeLaunchStateSnapshot(teamName, nextSnapshot);
|
||||
this.invalidateRuntimeSnapshotCaches(teamName);
|
||||
}
|
||||
|
||||
private getMutableAliveRunOrThrow(teamName: string): ProvisioningRun {
|
||||
|
|
@ -25166,6 +25199,7 @@ export class TeamProvisioningService {
|
|||
await this.launchStateStore.clear(teamName);
|
||||
this.launchStateWrittenRunIdByTeam.delete(teamName);
|
||||
await clearBootstrapState(teamName);
|
||||
this.invalidateRuntimeSnapshotCaches(teamName);
|
||||
}
|
||||
|
||||
private async applyOpenCodeSecondaryEvidenceOverlay(params: {
|
||||
|
|
@ -25429,9 +25463,13 @@ export class TeamProvisioningService {
|
|||
teamName: string,
|
||||
snapshot: PersistedTeamLaunchSnapshot
|
||||
): Promise<PersistedTeamLaunchSnapshot> {
|
||||
const result = await this.enqueueLaunchStateStoreOperation(teamName, () =>
|
||||
this.writeLaunchStateSnapshotNow(teamName, snapshot)
|
||||
);
|
||||
const result = await this.enqueueLaunchStateStoreOperation(teamName, async () => {
|
||||
const writeResult = await this.writeLaunchStateSnapshotNow(teamName, snapshot);
|
||||
if (writeResult.wrote) {
|
||||
this.invalidateRuntimeSnapshotCaches(teamName);
|
||||
}
|
||||
return writeResult;
|
||||
});
|
||||
return result.snapshot;
|
||||
}
|
||||
|
||||
|
|
@ -26851,7 +26889,6 @@ export class TeamProvisioningService {
|
|||
|
||||
if (filteredSnapshot.teamLaunchState === 'clean_success' && launchPhase !== 'active') {
|
||||
await this.clearPersistedLaunchStateNow(run.teamName, { expectedRunId: run.runId });
|
||||
this.invalidateRuntimeSnapshotCaches(run.teamName);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2993,6 +2993,22 @@ describe('TeamProvisioningService', () => {
|
|||
).toBe(false);
|
||||
});
|
||||
|
||||
it('invalidates runtime cache when launch-state is cleared', async () => {
|
||||
const svc = new TeamProvisioningService();
|
||||
const teamName = 'launch-state-clear-invalidates-runtime-cache';
|
||||
(svc as any).launchStateStore = {
|
||||
read: vi.fn(async () => null),
|
||||
write: vi.fn(async () => {}),
|
||||
clear: vi.fn(async () => {}),
|
||||
};
|
||||
const invalidateRuntime = vi.spyOn(svc as any, 'invalidateRuntimeSnapshotCaches');
|
||||
|
||||
await (svc as any).clearPersistedLaunchState(teamName);
|
||||
|
||||
expect((svc as any).launchStateStore.clear).toHaveBeenCalledWith(teamName);
|
||||
expect(invalidateRuntime).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does not rewrite launch-state or invalidate runtime cache for a recent semantic no-op', async () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date('2026-05-02T10:00:05.000Z'));
|
||||
|
|
@ -4779,6 +4795,82 @@ describe('TeamProvisioningService', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('reconciles persisted launch state before building runtime snapshot metadata', async () => {
|
||||
const teamName = 'zz-runtime-snapshot-reconciles-before-live-metadata';
|
||||
const leadSessionId = 'lead-session';
|
||||
const projectPath = '/Users/test/proj';
|
||||
const bootstrapAttemptAt = '2026-05-24T09:25:33.388Z';
|
||||
const bootstrapConfirmedAt = '2026-05-24T09:25:42.904Z';
|
||||
const appAcceptedAt = '2026-05-24T09:25:45.178Z';
|
||||
const staleRefreshAt = '2026-05-24T11:36:58.278Z';
|
||||
const runtimePid = 97_255;
|
||||
const bootstrapRunId = 'run-runtime-snapshot-reconcile-first';
|
||||
const staleDiagnostic = 'persisted runtime pid is not alive';
|
||||
|
||||
writeLaunchConfig(teamName, projectPath, leadSessionId, ['tom']);
|
||||
writeMemberBootstrapRunId(teamName, 'tom', bootstrapRunId);
|
||||
writeLaunchState(
|
||||
teamName,
|
||||
leadSessionId,
|
||||
{
|
||||
tom: {
|
||||
providerId: 'anthropic',
|
||||
model: 'haiku',
|
||||
laneId: 'primary',
|
||||
laneKind: 'primary',
|
||||
laneOwnerProviderId: 'codex',
|
||||
launchState: 'failed_to_start',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: false,
|
||||
runtimePid,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: true,
|
||||
hardFailureReason:
|
||||
'runtime pid could not be verified because process table is unavailable',
|
||||
livenessKind: 'stale_metadata',
|
||||
runtimeDiagnostic: staleDiagnostic,
|
||||
runtimeDiagnosticSeverity: 'warning',
|
||||
firstSpawnAcceptedAt: appAcceptedAt,
|
||||
runtimeLastSeenAt: staleRefreshAt,
|
||||
lastEvaluatedAt: staleRefreshAt,
|
||||
},
|
||||
},
|
||||
{ launchPhase: 'finished', updatedAt: staleRefreshAt }
|
||||
);
|
||||
writeBootstrapState(
|
||||
teamName,
|
||||
[
|
||||
{
|
||||
name: 'tom',
|
||||
status: 'bootstrap_confirmed',
|
||||
lastAttemptAt: Date.parse(bootstrapAttemptAt),
|
||||
lastObservedAt: Date.parse(bootstrapConfirmedAt),
|
||||
},
|
||||
],
|
||||
'2026-05-24T09:26:08.090Z',
|
||||
{ runId: bootstrapRunId }
|
||||
);
|
||||
|
||||
const svc = new TeamProvisioningService();
|
||||
|
||||
const snapshot = await svc.getTeamAgentRuntimeSnapshot(teamName);
|
||||
const persisted = JSON.parse(fs.readFileSync(getTeamLaunchStatePath(teamName), 'utf8'));
|
||||
|
||||
expect(snapshot.members.tom).toMatchObject({
|
||||
alive: true,
|
||||
livenessKind: 'confirmed_bootstrap',
|
||||
runtimeDiagnostic: 'bootstrap confirmed',
|
||||
runtimeDiagnosticSeverity: 'info',
|
||||
});
|
||||
expect(snapshot.members.tom?.runtimeDiagnostic).not.toBe(staleDiagnostic);
|
||||
expect(persisted.members.tom).toMatchObject({
|
||||
launchState: 'confirmed_alive',
|
||||
bootstrapConfirmed: true,
|
||||
hardFailure: false,
|
||||
});
|
||||
expect(persisted.members.tom?.runtimeDiagnostic).not.toBe(staleDiagnostic);
|
||||
});
|
||||
|
||||
it('does not treat a reused OpenCode runtime pid as live', async () => {
|
||||
const teamName = 'pure-opencode-reused-pid-team';
|
||||
const projectPath = '/Users/test/project';
|
||||
|
|
@ -4797,7 +4889,7 @@ describe('TeamProvisioningService', () => {
|
|||
runtimeSessionId: 'session-alice',
|
||||
},
|
||||
});
|
||||
vi.mocked(listRuntimeProcessTableForCurrentPlatform).mockResolvedValueOnce([
|
||||
vi.mocked(listRuntimeProcessTableForCurrentPlatform).mockResolvedValue([
|
||||
{ pid: 333, ppid: 1, command: 'node unrelated-worker.js' },
|
||||
]);
|
||||
vi.mocked(pidusage).mockResolvedValueOnce({
|
||||
|
|
|
|||
Loading…
Reference in a new issue