fix(team-runtime): stabilize transient offline snapshots
This commit is contained in:
parent
4548a01c97
commit
d6b6d3609d
3 changed files with 253 additions and 2 deletions
|
|
@ -18,6 +18,7 @@ import { createLogger } from '@shared/utils/logger';
|
|||
import { buildTeamGraphDefaultLayoutSeed } from '@shared/utils/teamGraphDefaultLayout';
|
||||
|
||||
import { areTeamAgentRuntimeSnapshotsEqual } from '../team/teamAgentRuntimeSnapshotEquality';
|
||||
import { stabilizeTeamAgentRuntimeSnapshot } from '../team/teamAgentRuntimeSnapshotStabilizer';
|
||||
import {
|
||||
clearAllLastResolvedTeamDataRefreshes,
|
||||
clearLastResolvedTeamDataRefreshAt,
|
||||
|
|
@ -1479,13 +1480,14 @@ export const createTeamSlice: StateCreator<AppState, [], [], TeamSlice> = (set,
|
|||
return {};
|
||||
}
|
||||
const previousSnapshot = prev.teamAgentRuntimeByTeam[teamName];
|
||||
if (areTeamAgentRuntimeSnapshotsEqual(previousSnapshot, snapshot)) {
|
||||
const stabilizedSnapshot = stabilizeTeamAgentRuntimeSnapshot(previousSnapshot, snapshot);
|
||||
if (areTeamAgentRuntimeSnapshotsEqual(previousSnapshot, stabilizedSnapshot)) {
|
||||
return {};
|
||||
}
|
||||
return {
|
||||
teamAgentRuntimeByTeam: {
|
||||
...prev.teamAgentRuntimeByTeam,
|
||||
[teamName]: snapshot,
|
||||
[teamName]: stabilizedSnapshot,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
|
|
|||
116
src/renderer/store/team/teamAgentRuntimeSnapshotStabilizer.ts
Normal file
116
src/renderer/store/team/teamAgentRuntimeSnapshotStabilizer.ts
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
import type {
|
||||
TeamAgentRuntimeEntry,
|
||||
TeamAgentRuntimeLivenessKind,
|
||||
TeamAgentRuntimeSnapshot,
|
||||
} from '@shared/types';
|
||||
|
||||
const TRANSIENT_RUNTIME_OFFLINE_GRACE_MS = 15_000;
|
||||
|
||||
const TRANSIENT_RUNTIME_OFFLINE_KINDS = new Set<TeamAgentRuntimeLivenessKind>([
|
||||
'registered_only',
|
||||
'stale_metadata',
|
||||
'not_found',
|
||||
]);
|
||||
|
||||
const STRONG_LIVE_RUNTIME_KINDS = new Set<TeamAgentRuntimeLivenessKind>([
|
||||
'runtime_process',
|
||||
'confirmed_bootstrap',
|
||||
]);
|
||||
|
||||
function parseTimestampMs(value: string | undefined): number | null {
|
||||
if (!value) return null;
|
||||
const parsed = Date.parse(value);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
function getEntryObservedAtMs(
|
||||
snapshot: TeamAgentRuntimeSnapshot,
|
||||
entry: TeamAgentRuntimeEntry
|
||||
): number | null {
|
||||
return (
|
||||
parseTimestampMs(entry.updatedAt) ??
|
||||
parseTimestampMs(entry.runtimeLastSeenAt) ??
|
||||
parseTimestampMs(snapshot.updatedAt)
|
||||
);
|
||||
}
|
||||
|
||||
function hasStrongLiveRuntimeEvidence(entry: TeamAgentRuntimeEntry | undefined): boolean {
|
||||
return Boolean(
|
||||
entry?.alive === true &&
|
||||
(entry.livenessKind == null || STRONG_LIVE_RUNTIME_KINDS.has(entry.livenessKind))
|
||||
);
|
||||
}
|
||||
|
||||
function isTransientRuntimeOfflineEntry(entry: TeamAgentRuntimeEntry | undefined): boolean {
|
||||
return Boolean(
|
||||
entry?.alive === false &&
|
||||
entry.runtimeDiagnosticSeverity !== 'error' &&
|
||||
entry.livenessKind != null &&
|
||||
TRANSIENT_RUNTIME_OFFLINE_KINDS.has(entry.livenessKind)
|
||||
);
|
||||
}
|
||||
|
||||
function shouldKeepPreviousLiveRuntimeEntry({
|
||||
previousSnapshot,
|
||||
previousEntry,
|
||||
nextEntry,
|
||||
nowMs,
|
||||
}: {
|
||||
previousSnapshot: TeamAgentRuntimeSnapshot;
|
||||
previousEntry: TeamAgentRuntimeEntry | undefined;
|
||||
nextEntry: TeamAgentRuntimeEntry | undefined;
|
||||
nowMs: number;
|
||||
}): boolean {
|
||||
if (!hasStrongLiveRuntimeEvidence(previousEntry)) return false;
|
||||
if (!isTransientRuntimeOfflineEntry(nextEntry)) return false;
|
||||
|
||||
const previousObservedAtMs = previousEntry
|
||||
? getEntryObservedAtMs(previousSnapshot, previousEntry)
|
||||
: null;
|
||||
if (previousObservedAtMs == null) return false;
|
||||
|
||||
return nowMs - previousObservedAtMs <= TRANSIENT_RUNTIME_OFFLINE_GRACE_MS;
|
||||
}
|
||||
|
||||
export function stabilizeTeamAgentRuntimeSnapshot(
|
||||
previousSnapshot: TeamAgentRuntimeSnapshot | undefined,
|
||||
nextSnapshot: TeamAgentRuntimeSnapshot,
|
||||
nowMs = Date.now()
|
||||
): TeamAgentRuntimeSnapshot {
|
||||
if (
|
||||
!previousSnapshot ||
|
||||
previousSnapshot.teamName !== nextSnapshot.teamName ||
|
||||
previousSnapshot.runId !== nextSnapshot.runId
|
||||
) {
|
||||
return nextSnapshot;
|
||||
}
|
||||
|
||||
let stabilizedMembers: Record<string, TeamAgentRuntimeEntry> | null = null;
|
||||
|
||||
for (const [memberName, nextEntry] of Object.entries(nextSnapshot.members)) {
|
||||
const previousEntry = previousSnapshot.members[memberName];
|
||||
if (!previousEntry) continue;
|
||||
if (
|
||||
!shouldKeepPreviousLiveRuntimeEntry({
|
||||
previousSnapshot,
|
||||
previousEntry,
|
||||
nextEntry,
|
||||
nowMs,
|
||||
})
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
stabilizedMembers ??= { ...nextSnapshot.members };
|
||||
stabilizedMembers[memberName] = previousEntry;
|
||||
}
|
||||
|
||||
if (!stabilizedMembers) {
|
||||
return nextSnapshot;
|
||||
}
|
||||
|
||||
return {
|
||||
...nextSnapshot,
|
||||
members: stabilizedMembers,
|
||||
};
|
||||
}
|
||||
133
test/renderer/store/teamAgentRuntimeSnapshotStabilizer.test.ts
Normal file
133
test/renderer/store/teamAgentRuntimeSnapshotStabilizer.test.ts
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { stabilizeTeamAgentRuntimeSnapshot } from '../../../src/renderer/store/team/teamAgentRuntimeSnapshotStabilizer';
|
||||
|
||||
import type { TeamAgentRuntimeEntry, TeamAgentRuntimeSnapshot } from '../../../src/shared/types';
|
||||
|
||||
const BASE_TIME = '2026-05-31T10:00:00.000Z';
|
||||
const BASE_TIME_MS = Date.parse(BASE_TIME);
|
||||
|
||||
function createRuntimeEntry(overrides: Partial<TeamAgentRuntimeEntry> = {}): TeamAgentRuntimeEntry {
|
||||
return {
|
||||
memberName: 'alice',
|
||||
alive: true,
|
||||
restartable: true,
|
||||
backendType: 'process',
|
||||
providerId: 'codex',
|
||||
providerBackendId: 'codex-native',
|
||||
pid: 12345,
|
||||
runtimeModel: 'gpt-5.5-codex',
|
||||
livenessKind: 'runtime_process',
|
||||
pidSource: 'agent_process_table',
|
||||
runtimeDiagnostic: 'verified runtime process detected',
|
||||
runtimeDiagnosticSeverity: 'info',
|
||||
diagnostics: ['matched process table by team-name and agent-id'],
|
||||
updatedAt: BASE_TIME,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function createRuntimeSnapshot(
|
||||
overrides: Partial<TeamAgentRuntimeSnapshot> = {}
|
||||
): TeamAgentRuntimeSnapshot {
|
||||
return {
|
||||
teamName: 'beacon-desk-14',
|
||||
updatedAt: BASE_TIME,
|
||||
runId: 'run-1',
|
||||
providerBackendId: 'codex-native',
|
||||
members: {
|
||||
alice: createRuntimeEntry(),
|
||||
},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('teamAgentRuntimeSnapshotStabilizer', () => {
|
||||
it('keeps a recent live runtime entry through a transient registered-only snapshot', () => {
|
||||
const previous = createRuntimeSnapshot();
|
||||
const next = createRuntimeSnapshot({
|
||||
updatedAt: '2026-05-31T10:00:10.000Z',
|
||||
members: {
|
||||
alice: createRuntimeEntry({
|
||||
alive: false,
|
||||
livenessKind: 'registered_only',
|
||||
pidSource: 'persisted_metadata',
|
||||
runtimeDiagnostic: 'registered runtime metadata without live process',
|
||||
runtimeDiagnosticSeverity: 'warning',
|
||||
updatedAt: '2026-05-31T10:00:10.000Z',
|
||||
}),
|
||||
},
|
||||
});
|
||||
|
||||
const stabilized = stabilizeTeamAgentRuntimeSnapshot(previous, next, BASE_TIME_MS + 10_000);
|
||||
|
||||
expect(stabilized.members.alice).toBe(previous.members.alice);
|
||||
expect(stabilized.members.alice.alive).toBe(true);
|
||||
expect(stabilized.members.alice.livenessKind).toBe('runtime_process');
|
||||
});
|
||||
|
||||
it('accepts the offline snapshot after the short stability grace expires', () => {
|
||||
const previous = createRuntimeSnapshot();
|
||||
const offlineEntry = createRuntimeEntry({
|
||||
alive: false,
|
||||
livenessKind: 'stale_metadata',
|
||||
pidSource: 'persisted_metadata',
|
||||
runtimeDiagnostic: 'persisted runtime pid is not alive',
|
||||
runtimeDiagnosticSeverity: 'warning',
|
||||
updatedAt: '2026-05-31T10:00:20.000Z',
|
||||
});
|
||||
const next = createRuntimeSnapshot({
|
||||
updatedAt: '2026-05-31T10:00:20.000Z',
|
||||
members: {
|
||||
alice: offlineEntry,
|
||||
},
|
||||
});
|
||||
|
||||
const stabilized = stabilizeTeamAgentRuntimeSnapshot(previous, next, BASE_TIME_MS + 20_000);
|
||||
|
||||
expect(stabilized.members.alice).toBe(offlineEntry);
|
||||
expect(stabilized.members.alice.alive).toBe(false);
|
||||
expect(stabilized.members.alice.livenessKind).toBe('stale_metadata');
|
||||
});
|
||||
|
||||
it('does not mask explicit runtime errors', () => {
|
||||
const previous = createRuntimeSnapshot();
|
||||
const errorEntry = createRuntimeEntry({
|
||||
alive: false,
|
||||
livenessKind: 'registered_only',
|
||||
runtimeDiagnostic: 'runtime failed',
|
||||
runtimeDiagnosticSeverity: 'error',
|
||||
updatedAt: '2026-05-31T10:00:05.000Z',
|
||||
});
|
||||
const next = createRuntimeSnapshot({
|
||||
updatedAt: '2026-05-31T10:00:05.000Z',
|
||||
members: {
|
||||
alice: errorEntry,
|
||||
},
|
||||
});
|
||||
|
||||
const stabilized = stabilizeTeamAgentRuntimeSnapshot(previous, next, BASE_TIME_MS + 5_000);
|
||||
|
||||
expect(stabilized.members.alice).toBe(errorEntry);
|
||||
expect(stabilized.members.alice.runtimeDiagnosticSeverity).toBe('error');
|
||||
});
|
||||
|
||||
it('does not carry live state across different runtime runs', () => {
|
||||
const previous = createRuntimeSnapshot({ runId: 'run-1' });
|
||||
const next = createRuntimeSnapshot({
|
||||
runId: 'run-2',
|
||||
members: {
|
||||
alice: createRuntimeEntry({
|
||||
alive: false,
|
||||
livenessKind: 'registered_only',
|
||||
updatedAt: '2026-05-31T10:00:05.000Z',
|
||||
}),
|
||||
},
|
||||
});
|
||||
|
||||
const stabilized = stabilizeTeamAgentRuntimeSnapshot(previous, next, BASE_TIME_MS + 5_000);
|
||||
|
||||
expect(stabilized).toBe(next);
|
||||
expect(stabilized.members.alice.alive).toBe(false);
|
||||
});
|
||||
});
|
||||
Loading…
Reference in a new issue