fix(team-runtime): stabilize transient offline snapshots

This commit is contained in:
777genius 2026-05-31 20:51:07 +03:00
parent 4548a01c97
commit d6b6d3609d
3 changed files with 253 additions and 2 deletions

View file

@ -18,6 +18,7 @@ import { createLogger } from '@shared/utils/logger';
import { buildTeamGraphDefaultLayoutSeed } from '@shared/utils/teamGraphDefaultLayout';
import { areTeamAgentRuntimeSnapshotsEqual } from '../team/teamAgentRuntimeSnapshotEquality';
import { stabilizeTeamAgentRuntimeSnapshot } from '../team/teamAgentRuntimeSnapshotStabilizer';
import {
clearAllLastResolvedTeamDataRefreshes,
clearLastResolvedTeamDataRefreshAt,
@ -1479,13 +1480,14 @@ export const createTeamSlice: StateCreator<AppState, [], [], TeamSlice> = (set,
return {};
}
const previousSnapshot = prev.teamAgentRuntimeByTeam[teamName];
if (areTeamAgentRuntimeSnapshotsEqual(previousSnapshot, snapshot)) {
const stabilizedSnapshot = stabilizeTeamAgentRuntimeSnapshot(previousSnapshot, snapshot);
if (areTeamAgentRuntimeSnapshotsEqual(previousSnapshot, stabilizedSnapshot)) {
return {};
}
return {
teamAgentRuntimeByTeam: {
...prev.teamAgentRuntimeByTeam,
[teamName]: snapshot,
[teamName]: stabilizedSnapshot,
},
};
});

View file

@ -0,0 +1,116 @@
import type {
TeamAgentRuntimeEntry,
TeamAgentRuntimeLivenessKind,
TeamAgentRuntimeSnapshot,
} from '@shared/types';
const TRANSIENT_RUNTIME_OFFLINE_GRACE_MS = 15_000;
const TRANSIENT_RUNTIME_OFFLINE_KINDS = new Set<TeamAgentRuntimeLivenessKind>([
'registered_only',
'stale_metadata',
'not_found',
]);
const STRONG_LIVE_RUNTIME_KINDS = new Set<TeamAgentRuntimeLivenessKind>([
'runtime_process',
'confirmed_bootstrap',
]);
function parseTimestampMs(value: string | undefined): number | null {
if (!value) return null;
const parsed = Date.parse(value);
return Number.isFinite(parsed) ? parsed : null;
}
function getEntryObservedAtMs(
snapshot: TeamAgentRuntimeSnapshot,
entry: TeamAgentRuntimeEntry
): number | null {
return (
parseTimestampMs(entry.updatedAt) ??
parseTimestampMs(entry.runtimeLastSeenAt) ??
parseTimestampMs(snapshot.updatedAt)
);
}
function hasStrongLiveRuntimeEvidence(entry: TeamAgentRuntimeEntry | undefined): boolean {
return Boolean(
entry?.alive === true &&
(entry.livenessKind == null || STRONG_LIVE_RUNTIME_KINDS.has(entry.livenessKind))
);
}
function isTransientRuntimeOfflineEntry(entry: TeamAgentRuntimeEntry | undefined): boolean {
return Boolean(
entry?.alive === false &&
entry.runtimeDiagnosticSeverity !== 'error' &&
entry.livenessKind != null &&
TRANSIENT_RUNTIME_OFFLINE_KINDS.has(entry.livenessKind)
);
}
function shouldKeepPreviousLiveRuntimeEntry({
previousSnapshot,
previousEntry,
nextEntry,
nowMs,
}: {
previousSnapshot: TeamAgentRuntimeSnapshot;
previousEntry: TeamAgentRuntimeEntry | undefined;
nextEntry: TeamAgentRuntimeEntry | undefined;
nowMs: number;
}): boolean {
if (!hasStrongLiveRuntimeEvidence(previousEntry)) return false;
if (!isTransientRuntimeOfflineEntry(nextEntry)) return false;
const previousObservedAtMs = previousEntry
? getEntryObservedAtMs(previousSnapshot, previousEntry)
: null;
if (previousObservedAtMs == null) return false;
return nowMs - previousObservedAtMs <= TRANSIENT_RUNTIME_OFFLINE_GRACE_MS;
}
export function stabilizeTeamAgentRuntimeSnapshot(
previousSnapshot: TeamAgentRuntimeSnapshot | undefined,
nextSnapshot: TeamAgentRuntimeSnapshot,
nowMs = Date.now()
): TeamAgentRuntimeSnapshot {
if (
!previousSnapshot ||
previousSnapshot.teamName !== nextSnapshot.teamName ||
previousSnapshot.runId !== nextSnapshot.runId
) {
return nextSnapshot;
}
let stabilizedMembers: Record<string, TeamAgentRuntimeEntry> | null = null;
for (const [memberName, nextEntry] of Object.entries(nextSnapshot.members)) {
const previousEntry = previousSnapshot.members[memberName];
if (!previousEntry) continue;
if (
!shouldKeepPreviousLiveRuntimeEntry({
previousSnapshot,
previousEntry,
nextEntry,
nowMs,
})
) {
continue;
}
stabilizedMembers ??= { ...nextSnapshot.members };
stabilizedMembers[memberName] = previousEntry;
}
if (!stabilizedMembers) {
return nextSnapshot;
}
return {
...nextSnapshot,
members: stabilizedMembers,
};
}

View file

@ -0,0 +1,133 @@
import { describe, expect, it } from 'vitest';
import { stabilizeTeamAgentRuntimeSnapshot } from '../../../src/renderer/store/team/teamAgentRuntimeSnapshotStabilizer';
import type { TeamAgentRuntimeEntry, TeamAgentRuntimeSnapshot } from '../../../src/shared/types';
const BASE_TIME = '2026-05-31T10:00:00.000Z';
const BASE_TIME_MS = Date.parse(BASE_TIME);
function createRuntimeEntry(overrides: Partial<TeamAgentRuntimeEntry> = {}): TeamAgentRuntimeEntry {
return {
memberName: 'alice',
alive: true,
restartable: true,
backendType: 'process',
providerId: 'codex',
providerBackendId: 'codex-native',
pid: 12345,
runtimeModel: 'gpt-5.5-codex',
livenessKind: 'runtime_process',
pidSource: 'agent_process_table',
runtimeDiagnostic: 'verified runtime process detected',
runtimeDiagnosticSeverity: 'info',
diagnostics: ['matched process table by team-name and agent-id'],
updatedAt: BASE_TIME,
...overrides,
};
}
function createRuntimeSnapshot(
overrides: Partial<TeamAgentRuntimeSnapshot> = {}
): TeamAgentRuntimeSnapshot {
return {
teamName: 'beacon-desk-14',
updatedAt: BASE_TIME,
runId: 'run-1',
providerBackendId: 'codex-native',
members: {
alice: createRuntimeEntry(),
},
...overrides,
};
}
describe('teamAgentRuntimeSnapshotStabilizer', () => {
it('keeps a recent live runtime entry through a transient registered-only snapshot', () => {
const previous = createRuntimeSnapshot();
const next = createRuntimeSnapshot({
updatedAt: '2026-05-31T10:00:10.000Z',
members: {
alice: createRuntimeEntry({
alive: false,
livenessKind: 'registered_only',
pidSource: 'persisted_metadata',
runtimeDiagnostic: 'registered runtime metadata without live process',
runtimeDiagnosticSeverity: 'warning',
updatedAt: '2026-05-31T10:00:10.000Z',
}),
},
});
const stabilized = stabilizeTeamAgentRuntimeSnapshot(previous, next, BASE_TIME_MS + 10_000);
expect(stabilized.members.alice).toBe(previous.members.alice);
expect(stabilized.members.alice.alive).toBe(true);
expect(stabilized.members.alice.livenessKind).toBe('runtime_process');
});
it('accepts the offline snapshot after the short stability grace expires', () => {
const previous = createRuntimeSnapshot();
const offlineEntry = createRuntimeEntry({
alive: false,
livenessKind: 'stale_metadata',
pidSource: 'persisted_metadata',
runtimeDiagnostic: 'persisted runtime pid is not alive',
runtimeDiagnosticSeverity: 'warning',
updatedAt: '2026-05-31T10:00:20.000Z',
});
const next = createRuntimeSnapshot({
updatedAt: '2026-05-31T10:00:20.000Z',
members: {
alice: offlineEntry,
},
});
const stabilized = stabilizeTeamAgentRuntimeSnapshot(previous, next, BASE_TIME_MS + 20_000);
expect(stabilized.members.alice).toBe(offlineEntry);
expect(stabilized.members.alice.alive).toBe(false);
expect(stabilized.members.alice.livenessKind).toBe('stale_metadata');
});
it('does not mask explicit runtime errors', () => {
const previous = createRuntimeSnapshot();
const errorEntry = createRuntimeEntry({
alive: false,
livenessKind: 'registered_only',
runtimeDiagnostic: 'runtime failed',
runtimeDiagnosticSeverity: 'error',
updatedAt: '2026-05-31T10:00:05.000Z',
});
const next = createRuntimeSnapshot({
updatedAt: '2026-05-31T10:00:05.000Z',
members: {
alice: errorEntry,
},
});
const stabilized = stabilizeTeamAgentRuntimeSnapshot(previous, next, BASE_TIME_MS + 5_000);
expect(stabilized.members.alice).toBe(errorEntry);
expect(stabilized.members.alice.runtimeDiagnosticSeverity).toBe('error');
});
it('does not carry live state across different runtime runs', () => {
const previous = createRuntimeSnapshot({ runId: 'run-1' });
const next = createRuntimeSnapshot({
runId: 'run-2',
members: {
alice: createRuntimeEntry({
alive: false,
livenessKind: 'registered_only',
updatedAt: '2026-05-31T10:00:05.000Z',
}),
},
});
const stabilized = stabilizeTeamAgentRuntimeSnapshot(previous, next, BASE_TIME_MS + 5_000);
expect(stabilized).toBe(next);
expect(stabilized.members.alice.alive).toBe(false);
});
});