fix(team): recover stale mixed opencode lane state

This commit is contained in:
777genius 2026-04-22 20:34:42 +03:00
parent 7cdee429ec
commit 6211fd95ab
4 changed files with 569 additions and 31 deletions

View file

@ -166,6 +166,7 @@ import {
getOpenCodeTeamRuntimeDirectory,
migrateLegacyOpenCodeRuntimeState,
readOpenCodeRuntimeLaneIndex,
recoverStaleOpenCodeRuntimeLaneIndexEntry,
removeOpenCodeRuntimeLaneIndexEntry,
upsertOpenCodeRuntimeLaneIndexEntry,
} from './opencode/store/OpenCodeRuntimeManifestEvidenceReader';
@ -6063,12 +6064,7 @@ export class TeamProvisioningService {
);
}
if (!this.isCurrentTrackedRun(run)) return;
this.teamChangeEmitter?.({
type: 'member-spawn',
teamName: run.teamName,
runId: run.runId,
detail: memberName,
});
this.emitMemberSpawnChange(run, memberName);
if (run.isLaunch) {
void this.persistLaunchStateSnapshot(run, run.provisioningComplete ? 'finished' : 'active');
}
@ -6115,13 +6111,15 @@ export class TeamProvisioningService {
await this.persistLaunchStateSnapshot(run, run.provisioningComplete ? 'finished' : 'active');
const persisted = await this.launchStateStore.read(teamName);
const liveSnapshot = snapshotFromRuntimeMemberStatuses({
teamName: run.teamName,
expectedMembers: run.expectedMembers,
leadSessionId: run.detectedSessionId ?? undefined,
launchPhase: run.provisioningComplete ? 'finished' : 'active',
statuses: this.buildRuntimeSpawnStatusRecord(run),
});
const liveSnapshot =
this.buildLiveLaunchSnapshotForRun(run, run.provisioningComplete ? 'finished' : 'active') ??
snapshotFromRuntimeMemberStatuses({
teamName: run.teamName,
expectedMembers: run.expectedMembers,
leadSessionId: run.detectedSessionId ?? undefined,
launchPhase: run.provisioningComplete ? 'finished' : 'active',
statuses: this.buildRuntimeSpawnStatusRecord(run),
});
const snapshot = persisted ?? liveSnapshot;
const statuses = await this.attachLiveRuntimeMetadataToStatuses(
teamName,
@ -9689,6 +9687,9 @@ export class TeamProvisioningService {
this.provisioningRunByTeam.set(request.teamName, runId);
run.onProgress(run.progress);
await this.clearPersistedLaunchState(request.teamName);
for (const lane of run.mixedSecondaryLanes ?? []) {
await this.publishMixedSecondaryLaneStatusChange(run, lane);
}
// Read existing tasks to include in teammate prompts for work resumption
const taskReader = new TeamTaskReader();
@ -11813,6 +11814,53 @@ export class TeamProvisioningService {
return statuses;
}
private buildLiveLaunchSnapshotForRun(
run: ProvisioningRun,
launchPhase: PersistedTeamLaunchPhase = run.provisioningComplete ? 'finished' : 'active'
): PersistedTeamLaunchSnapshot | null {
const mixedSnapshot = this.buildMixedPersistedLaunchSnapshotForRun(run, launchPhase);
if (mixedSnapshot) {
return mixedSnapshot;
}
if (!run.isLaunch || !run.expectedMembers || run.expectedMembers.length === 0) {
return null;
}
return snapshotFromRuntimeMemberStatuses({
teamName: run.teamName,
expectedMembers: run.expectedMembers,
leadSessionId: run.detectedSessionId ?? undefined,
launchPhase,
statuses: this.buildRuntimeSpawnStatusRecord(run),
});
}
private emitMemberSpawnChange(
run: Pick<ProvisioningRun, 'teamName' | 'runId'>,
memberName: string
) {
this.teamChangeEmitter?.({
type: 'member-spawn',
teamName: run.teamName,
runId: run.runId,
detail: memberName,
});
}
private async publishMixedSecondaryLaneStatusChange(
run: ProvisioningRun,
lane: MixedSecondaryRuntimeLaneState
): Promise<void> {
if (run.isLaunch) {
await this.persistLaunchStateSnapshot(run, this.getMixedSecondaryLaunchPhase(run));
}
if (!this.isCurrentTrackedRun(run)) {
return;
}
this.emitMemberSpawnChange(run, lane.member.name);
}
private buildMixedPersistedLaunchSnapshotForRun(
run: ProvisioningRun,
launchPhase: PersistedTeamLaunchPhase
@ -11888,27 +11936,14 @@ export class TeamProvisioningService {
? 'finished'
: 'active'
): Promise<PersistedTeamLaunchSnapshot | null> {
const mixedSnapshot = this.buildMixedPersistedLaunchSnapshotForRun(run, launchPhase);
if (mixedSnapshot) {
await this.launchStateStore.write(run.teamName, mixedSnapshot);
return mixedSnapshot;
}
if (!run.isLaunch || !run.expectedMembers || run.expectedMembers.length === 0) {
const snapshot = this.buildLiveLaunchSnapshotForRun(run, launchPhase);
if (!snapshot) {
if (run.isLaunch) {
await this.clearPersistedLaunchState(run.teamName);
}
return null;
}
const snapshot = snapshotFromRuntimeMemberStatuses({
teamName: run.teamName,
expectedMembers: run.expectedMembers,
leadSessionId: run.detectedSessionId ?? undefined,
launchPhase,
statuses: this.buildRuntimeSpawnStatusRecord(run),
});
if (snapshot.teamLaunchState === 'clean_success' && launchPhase !== 'active') {
await this.clearPersistedLaunchState(run.teamName);
return null;
@ -11949,6 +11984,7 @@ export class TeamProvisioningService {
};
lane.warnings = [];
lane.diagnostics = [message];
await this.publishMixedSecondaryLaneStatusChange(run, lane);
return;
}
@ -11977,8 +12013,7 @@ export class TeamProvisioningService {
memberName: lane.member.name,
cwd: run.request.cwd,
});
await this.persistLaunchStateSnapshot(run, this.getMixedSecondaryLaunchPhase(run));
await this.publishMixedSecondaryLaneStatusChange(run, lane);
const previousLaunchState = await this.launchStateStore.read(run.teamName);
try {
@ -12050,7 +12085,7 @@ export class TeamProvisioningService {
this.deleteSecondaryRuntimeRun(run.teamName, lane.laneId);
}
await this.persistLaunchStateSnapshot(run, this.getMixedSecondaryLaunchPhase(run));
await this.publishMixedSecondaryLaneStatusChange(run, lane);
}
private async stopSingleMixedSecondaryRuntimeLane(
@ -12129,6 +12164,7 @@ export class TeamProvisioningService {
diagnostics: ['OpenCode runtime adapter is not registered for mixed team launch.'],
};
lane.diagnostics = lane.result.diagnostics;
await this.publishMixedSecondaryLaneStatusChange(run, lane);
}
return this.persistLaunchStateSnapshot(run, 'finished');
}
@ -12140,12 +12176,187 @@ export class TeamProvisioningService {
return this.persistLaunchStateSnapshot(run, this.getMixedSecondaryLaunchPhase(run));
}
private async recoverStaleMixedSecondaryLaunchSnapshot(
teamName: string,
bootstrapSnapshot: PersistedTeamLaunchSnapshot | null,
persistedSnapshot: PersistedTeamLaunchSnapshot | null
): Promise<PersistedTeamLaunchSnapshot | null> {
if (persistedSnapshot && this.hasMixedLaunchMetadata(persistedSnapshot)) {
return persistedSnapshot;
}
const teamMeta = await this.teamMetaStore.getMeta(teamName).catch(() => null);
const leadProviderId = normalizeOptionalTeamProviderId(teamMeta?.providerId);
if (!leadProviderId || leadProviderId === 'opencode') {
return null;
}
const membersMeta = await this.membersMetaStore.getMeta(teamName).catch(() => null);
const activeMembers = (membersMeta?.members ?? []).filter(
(member) => !member.removedAt && !isLeadMember({ name: member.name })
);
if (activeMembers.length === 0) {
return null;
}
const laneIndex = await readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName).catch(
() => ({
version: 1 as const,
updatedAt: nowIso(),
lanes: {} as Record<
string,
{
laneId: string;
state: 'active' | 'stopped' | 'degraded';
updatedAt: string;
diagnostics?: string[];
}
>,
})
);
const bootstrapStatuses = snapshotToMemberSpawnStatuses(bootstrapSnapshot);
const leadDefaults = {
providerId: leadProviderId,
providerBackendId:
migrateProviderBackendId(
leadProviderId,
teamMeta?.providerBackendId ?? membersMeta?.providerBackendId
) ?? null,
selectedFastMode: teamMeta?.fastMode,
resolvedFastMode:
typeof teamMeta?.launchIdentity?.resolvedFastMode === 'boolean'
? teamMeta.launchIdentity.resolvedFastMode
: null,
launchIdentity: teamMeta?.launchIdentity ?? null,
};
const primaryMembers: TeamMember[] = [];
const secondaryMembers: Array<{
laneId: string;
member: TeamMember;
leadDefaults: typeof leadDefaults;
evidence?: {
launchState?: MemberLaunchState;
agentToolAccepted?: boolean;
runtimeAlive?: boolean;
bootstrapConfirmed?: boolean;
hardFailure?: boolean;
hardFailureReason?: string;
diagnostics?: string[];
};
pendingReason?: string;
}> = [];
let recoveredAny = false;
for (const member of activeMembers) {
const laneIdentity = buildPlannedMemberLaneIdentity({
leadProviderId,
member: {
name: member.name,
providerId: normalizeOptionalTeamProviderId(member.providerId),
},
});
if (
laneIdentity.laneKind !== 'secondary' ||
laneIdentity.laneOwnerProviderId !== 'opencode'
) {
primaryMembers.push(member);
continue;
}
let laneEntry = laneIndex.lanes[laneIdentity.laneId];
if (laneEntry?.state === 'active') {
const recovery = await recoverStaleOpenCodeRuntimeLaneIndexEntry({
teamsBasePath: getTeamsBasePath(),
teamName,
laneId: laneIdentity.laneId,
});
if (recovery.stale) {
recoveredAny = true;
laneEntry = {
laneId: laneIdentity.laneId,
state: 'degraded',
updatedAt: nowIso(),
diagnostics: recovery.diagnostics,
};
}
}
if (laneEntry?.state === 'degraded') {
recoveredAny = true;
const diagnostics = laneEntry.diagnostics?.length
? [...laneEntry.diagnostics]
: [`OpenCode lane ${laneIdentity.laneId} is degraded and requires stop + relaunch.`];
secondaryMembers.push({
laneId: laneIdentity.laneId,
member,
leadDefaults,
evidence: {
launchState: 'failed_to_start',
agentToolAccepted: false,
runtimeAlive: false,
bootstrapConfirmed: false,
hardFailure: true,
hardFailureReason: diagnostics[0],
diagnostics,
},
});
continue;
}
secondaryMembers.push({
laneId: laneIdentity.laneId,
member,
leadDefaults,
pendingReason: 'Waiting for OpenCode secondary lane recovery.',
});
}
if (!recoveredAny) {
return null;
}
const primaryStatuses = Object.fromEntries(
primaryMembers.map((member) => [
member.name,
bootstrapStatuses[member.name] ?? createInitialMemberSpawnStatusEntry(),
])
);
const recoveredSnapshot = this.runtimeLaneCoordinator.buildAggregateLaunchSnapshot({
teamName,
leadSessionId: persistedSnapshot?.leadSessionId ?? bootstrapSnapshot?.leadSessionId,
launchPhase:
persistedSnapshot?.launchPhase === 'active'
? 'active'
: bootstrapSnapshot?.launchPhase === 'active'
? 'active'
: 'reconciled',
leadDefaults,
primaryMembers,
primaryStatuses,
secondaryMembers,
});
await this.launchStateStore.write(teamName, recoveredSnapshot);
return recoveredSnapshot;
}
private async reconcilePersistedLaunchState(teamName: string): Promise<{
snapshot: ReturnType<typeof createPersistedLaunchSnapshot> | null;
statuses: Record<string, MemberSpawnStatusEntry>;
}> {
const bootstrapSnapshot = await readBootstrapLaunchSnapshot(teamName);
const persisted = await this.launchStateStore.read(teamName);
const recoveredMixedSnapshot = await this.recoverStaleMixedSecondaryLaunchSnapshot(
teamName,
bootstrapSnapshot,
persisted
);
if (recoveredMixedSnapshot) {
return {
snapshot: recoveredMixedSnapshot,
statuses: snapshotToMemberSpawnStatuses(recoveredMixedSnapshot),
};
}
const preferredSnapshot = choosePreferredLaunchSnapshot(bootstrapSnapshot, persisted);
if (preferredSnapshot && preferredSnapshot === bootstrapSnapshot) {
return {

View file

@ -153,6 +153,37 @@ export function getOpenCodeRuntimeManifestPath(
);
}
export async function inspectOpenCodeRuntimeLaneStorage(params: {
teamsBasePath: string;
teamName: string;
laneId: string;
}): Promise<{
laneDirectoryExists: boolean;
hasStateOnDisk: boolean;
fileNames: string[];
}> {
const laneDir = getOpenCodeTeamRuntimeLaneDirectory(
params.teamsBasePath,
params.teamName,
params.laneId
);
const laneDirectoryExists = await fileExists(laneDir);
if (!laneDirectoryExists) {
return {
laneDirectoryExists: false,
hasStateOnDisk: false,
fileNames: [],
};
}
const fileNames = (await readdir(laneDir).catch(() => [] as string[])).sort();
return {
laneDirectoryExists: true,
hasStateOnDisk: fileNames.length > 0,
fileNames,
};
}
export function getOpenCodeLaneScopedRuntimeFilePath(params: {
teamsBasePath: string;
teamName: string;
@ -284,6 +315,51 @@ export async function clearOpenCodeRuntimeLaneStorage(params: {
await removeOpenCodeRuntimeLaneIndexEntry(params);
}
export async function recoverStaleOpenCodeRuntimeLaneIndexEntry(params: {
teamsBasePath: string;
teamName: string;
laneId: string;
}): Promise<{
stale: boolean;
degraded: boolean;
diagnostics: string[];
}> {
const index = await readOpenCodeRuntimeLaneIndex(params.teamsBasePath, params.teamName);
const entry = index.lanes[params.laneId];
if (!entry || entry.state !== 'active') {
return {
stale: false,
degraded: false,
diagnostics: [],
};
}
const storage = await inspectOpenCodeRuntimeLaneStorage(params);
if (storage.hasStateOnDisk) {
return {
stale: false,
degraded: false,
diagnostics: [],
};
}
const diagnostics = [
`OpenCode lane ${params.laneId} is marked active in lanes.json, but no lane state exists on disk.`,
];
await upsertOpenCodeRuntimeLaneIndexEntry({
teamsBasePath: params.teamsBasePath,
teamName: params.teamName,
laneId: params.laneId,
state: 'degraded',
diagnostics,
});
return {
stale: true,
degraded: true,
diagnostics,
};
}
export async function migrateLegacyOpenCodeRuntimeState(params: {
teamsBasePath: string;
teamName: string;

View file

@ -9,8 +9,10 @@ import {
getOpenCodeLaneScopedRuntimeFilePath,
getOpenCodeRuntimeLaneIndexPath,
getOpenCodeTeamRuntimeDirectory,
inspectOpenCodeRuntimeLaneStorage,
migrateLegacyOpenCodeRuntimeState,
readOpenCodeRuntimeLaneIndex,
recoverStaleOpenCodeRuntimeLaneIndexEntry,
upsertOpenCodeRuntimeLaneIndexEntry,
} from '../../../../src/main/services/team/opencode/store/OpenCodeRuntimeManifestEvidenceReader';
@ -238,4 +240,53 @@ describe('OpenCodeRuntimeManifestEvidenceReader migration', () => {
capabilitySnapshotId: 'cap-1',
});
});
it('reports missing lane storage when an active lane index entry has no lane dir or state', async () => {
const teamName = 'team-epsilon';
const laneId = 'secondary:opencode:alice';
await upsertOpenCodeRuntimeLaneIndexEntry({
teamsBasePath: tempDir,
teamName,
laneId,
state: 'active',
});
await expect(
inspectOpenCodeRuntimeLaneStorage({
teamsBasePath: tempDir,
teamName,
laneId,
})
).resolves.toEqual({
laneDirectoryExists: false,
hasStateOnDisk: false,
fileNames: [],
});
const result = await recoverStaleOpenCodeRuntimeLaneIndexEntry({
teamsBasePath: tempDir,
teamName,
laneId,
});
expect(result).toEqual({
stale: true,
degraded: true,
diagnostics: [
`OpenCode lane ${laneId} is marked active in lanes.json, but no lane state exists on disk.`,
],
});
await expect(readOpenCodeRuntimeLaneIndex(tempDir, teamName)).resolves.toMatchObject({
lanes: {
[laneId]: {
laneId,
state: 'degraded',
diagnostics: [
`OpenCode lane ${laneId} is marked active in lanes.json, but no lane state exists on disk.`,
],
},
},
});
});
});

View file

@ -268,6 +268,54 @@ function writeBootstrapState(
);
}
function writeTeamMeta(
teamName: string,
overrides: Record<string, unknown> = {}
): void {
const teamDir = path.join(tempTeamsBase, teamName);
fs.mkdirSync(teamDir, { recursive: true });
fs.writeFileSync(
path.join(teamDir, 'team.meta.json'),
`${JSON.stringify(
{
version: 1,
cwd: '/Users/test/proj',
providerId: 'codex',
providerBackendId: 'codex-native',
model: 'gpt-5.4',
effort: 'medium',
createdAt: Date.now(),
...overrides,
},
null,
2
)}\n`,
'utf8'
);
}
function writeMembersMeta(
teamName: string,
members: Record<string, unknown>[],
providerBackendId = 'codex-native'
): void {
const teamDir = path.join(tempTeamsBase, teamName);
fs.mkdirSync(teamDir, { recursive: true });
fs.writeFileSync(
path.join(teamDir, 'members.meta.json'),
`${JSON.stringify(
{
version: 1,
providerBackendId,
members,
},
null,
2
)}\n`,
'utf8'
);
}
function createMemberSpawnStatusEntry(
overrides: Record<string, unknown> = {}
): Record<string, unknown> {
@ -1733,6 +1781,7 @@ describe('TeamProvisioningService', () => {
(svc as any).launchStateStore = {
read: vi.fn(async () => null),
write: vi.fn(async () => {}),
clear: vi.fn(async () => {}),
};
const run = createMemberSpawnRun({
@ -4932,4 +4981,155 @@ describe('TeamProvisioningService', () => {
agentToolAccepted: true,
});
});
it('recovers stale mixed secondary lanes when lanes.json says active but lane state is missing', async () => {
const teamName = 'signal-ops-6212';
writeTeamMeta(teamName, {
providerId: 'codex',
providerBackendId: 'codex-native',
model: 'gpt-5.4',
});
writeMembersMeta(teamName, [
{
name: 'atlas',
providerId: 'opencode',
model: 'opencode/nemotron-3-super-free',
},
{
name: 'bob',
providerId: 'codex',
model: 'gpt-5.4',
},
{
name: 'nova',
providerId: 'codex',
model: 'gpt-5.4',
},
{
name: 'tom',
providerId: 'opencode',
model: 'opencode/minimax-m2.5-free',
},
]);
writeLaunchConfig(teamName, '/Users/test/proj', 'lead-session', ['bob', 'nova']);
writeBootstrapState(teamName, [
{ name: 'bob', status: 'registered' },
{ name: 'nova', status: 'registered' },
]);
await upsertOpenCodeRuntimeLaneIndexEntry({
teamsBasePath: tempTeamsBase,
teamName,
laneId: 'secondary:opencode:atlas',
state: 'active',
});
await upsertOpenCodeRuntimeLaneIndexEntry({
teamsBasePath: tempTeamsBase,
teamName,
laneId: 'secondary:opencode:tom',
state: 'active',
});
const svc = new TeamProvisioningService();
const result = await svc.getMemberSpawnStatuses(teamName);
expect(result.teamLaunchState).toBe('partial_failure');
expect(result.launchPhase).toBe('reconciled');
expect(result.expectedMembers).toEqual(expect.arrayContaining(['atlas', 'bob', 'nova', 'tom']));
expect(result.statuses.atlas).toMatchObject({
status: 'error',
launchState: 'failed_to_start',
error: expect.stringContaining('no lane state exists on disk'),
});
expect(result.statuses.tom).toMatchObject({
status: 'error',
launchState: 'failed_to_start',
error: expect.stringContaining('no lane state exists on disk'),
});
await expect(readOpenCodeRuntimeLaneIndex(tempTeamsBase, teamName)).resolves.toMatchObject({
lanes: {
'secondary:opencode:atlas': {
state: 'degraded',
},
'secondary:opencode:tom': {
state: 'degraded',
},
},
});
await expect(fsPromises.readFile(getTeamLaunchStatePath(teamName), 'utf8')).resolves.toContain(
'"secondary:opencode:atlas"'
);
});
it('includes queued OpenCode secondary lanes in live spawn statuses before the final mixed snapshot settles', async () => {
const svc = new TeamProvisioningService();
vi.spyOn(svc as any, 'refreshMemberSpawnStatusesFromLeadInbox').mockResolvedValue(undefined);
vi.spyOn(svc as any, 'maybeAuditMemberSpawnStatuses').mockResolvedValue(undefined);
const run = createMemberSpawnRun({
teamName: 'mixed-live-team',
runId: 'run-mixed-live-1',
expectedMembers: ['bob'],
memberSpawnStatuses: new Map([
[
'bob',
createMemberSpawnStatusEntry({
status: 'online',
launchState: 'confirmed_alive',
runtimeAlive: true,
bootstrapConfirmed: true,
livenessSource: 'heartbeat',
}),
],
]),
});
run.isLaunch = true;
run.request = {
teamName: 'mixed-live-team',
cwd: '/tmp/mixed-live-team',
providerId: 'codex',
providerBackendId: 'codex-native',
model: 'gpt-5.4',
members: [],
};
run.effectiveMembers = [
{
name: 'bob',
providerId: 'codex',
model: 'gpt-5.4',
},
];
run.mixedSecondaryLanes = [
{
laneId: 'secondary:opencode:atlas',
providerId: 'opencode',
member: {
name: 'atlas',
providerId: 'opencode',
model: 'opencode/nemotron-3-super-free',
},
runId: null,
state: 'queued',
result: null,
warnings: [],
diagnostics: [],
},
];
run.detectedSessionId = 'lead-session';
(svc as any).runs.set(run.runId, run);
(svc as any).provisioningRunByTeam.set(run.teamName, run.runId);
const result = await svc.getMemberSpawnStatuses(run.teamName);
expect(result.teamLaunchState).toBe('partial_pending');
expect(result.expectedMembers).toEqual(expect.arrayContaining(['bob', 'atlas']));
expect(result.statuses.bob).toMatchObject({
status: 'online',
launchState: 'confirmed_alive',
});
expect(result.statuses.atlas).toMatchObject({
status: 'spawning',
launchState: 'starting',
});
});
});