fix(team): recover opencode runtime lane indexes
This commit is contained in:
parent
1f7f31f3ee
commit
bef34983f5
6 changed files with 1216 additions and 43 deletions
|
|
@ -1595,6 +1595,84 @@ function isDefinitiveOpenCodePreLaunchFailure(
|
|||
);
|
||||
}
|
||||
|
||||
function hasOpenCodeRuntimeHandle(
|
||||
value:
|
||||
| Pick<PersistedTeamLaunchMemberState, 'runtimePid' | 'runtimeSessionId' | 'livenessKind'>
|
||||
| Pick<TeamRuntimeMemberLaunchEvidence, 'runtimePid' | 'sessionId' | 'livenessKind'>
|
||||
| undefined
|
||||
): boolean {
|
||||
if (!value) {
|
||||
return false;
|
||||
}
|
||||
const runtimePid =
|
||||
typeof value.runtimePid === 'number' &&
|
||||
Number.isFinite(value.runtimePid) &&
|
||||
value.runtimePid > 0;
|
||||
const runtimeSessionId = (value as { runtimeSessionId?: unknown }).runtimeSessionId;
|
||||
const runtimeEvidenceSessionId = (value as { sessionId?: unknown }).sessionId;
|
||||
const sessionId =
|
||||
(typeof runtimeSessionId === 'string' && runtimeSessionId.trim().length > 0) ||
|
||||
(typeof runtimeEvidenceSessionId === 'string' && runtimeEvidenceSessionId.trim().length > 0);
|
||||
return runtimePid || sessionId;
|
||||
}
|
||||
|
||||
function hasOpenCodeRuntimeLivenessMarker(
|
||||
value: Pick<TeamRuntimeMemberLaunchEvidence, 'livenessKind'> | undefined
|
||||
): boolean {
|
||||
return (
|
||||
value?.livenessKind === 'runtime_process' ||
|
||||
value?.livenessKind === 'runtime_process_candidate' ||
|
||||
value?.livenessKind === 'permission_blocked'
|
||||
);
|
||||
}
|
||||
|
||||
function isRecoverablePersistedOpenCodeRuntimeCandidate(
|
||||
member: PersistedTeamLaunchMemberState | undefined | null
|
||||
): boolean {
|
||||
if (!member || member.skippedForLaunch) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
member.providerId !== 'opencode' ||
|
||||
member.laneKind !== 'secondary' ||
|
||||
member.laneOwnerProviderId !== 'opencode' ||
|
||||
typeof member.laneId !== 'string' ||
|
||||
member.laneId.trim().length === 0
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
const hasPendingPermission = (member.pendingPermissionRequestIds?.length ?? 0) > 0;
|
||||
return (
|
||||
member.agentToolAccepted === true && (hasOpenCodeRuntimeHandle(member) || hasPendingPermission)
|
||||
);
|
||||
}
|
||||
|
||||
function isRecoverablePersistedOpenCodeTerminalRuntimeCandidate(
|
||||
member: PersistedTeamLaunchMemberState | undefined | null
|
||||
): boolean {
|
||||
return (
|
||||
isRecoverablePersistedOpenCodeRuntimeCandidate(member) &&
|
||||
member?.launchState === 'failed_to_start' &&
|
||||
member.hardFailure === true &&
|
||||
hasOpenCodeRuntimeHandle(member)
|
||||
);
|
||||
}
|
||||
|
||||
function isRecoverableOpenCodeRuntimeEvidence(
|
||||
evidence: TeamRuntimeMemberLaunchEvidence | undefined | null
|
||||
): evidence is TeamRuntimeMemberLaunchEvidence {
|
||||
if (!evidence) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
evidence.runtimeAlive === true ||
|
||||
evidence.bootstrapConfirmed === true ||
|
||||
(evidence.pendingPermissionRequestIds?.length ?? 0) > 0 ||
|
||||
hasOpenCodeRuntimeHandle(evidence) ||
|
||||
(evidence.agentToolAccepted === true && hasOpenCodeRuntimeLivenessMarker(evidence))
|
||||
);
|
||||
}
|
||||
|
||||
function isLaunchGraceWindowFailureReason(reason?: string): boolean {
|
||||
return reason?.trim() === 'Teammate did not join within the launch grace window.';
|
||||
}
|
||||
|
|
@ -5161,12 +5239,16 @@ export class TeamProvisioningService {
|
|||
if (!laneIndex) {
|
||||
return 0;
|
||||
}
|
||||
return await this.scanOpenCodePromptDeliveryWatchdogForActiveLanes(
|
||||
teamName,
|
||||
Object.values(laneIndex.lanes)
|
||||
.filter((lane) => lane.state === 'active')
|
||||
.map((lane) => lane.laneId)
|
||||
);
|
||||
let activeLaneIds = Object.values(laneIndex.lanes)
|
||||
.filter((lane) => lane.state === 'active')
|
||||
.map((lane) => lane.laneId);
|
||||
activeLaneIds = [
|
||||
...new Set([
|
||||
...activeLaneIds,
|
||||
...(await this.tryRecoverOpenCodeRuntimeLanesForDeliveryWatchdog(teamName)),
|
||||
]),
|
||||
];
|
||||
return await this.scanOpenCodePromptDeliveryWatchdogForActiveLanes(teamName, activeLaneIds);
|
||||
}
|
||||
|
||||
private async scanOpenCodePromptDeliveryWatchdogForActiveLanes(
|
||||
|
|
@ -5392,7 +5474,7 @@ export class TeamProvisioningService {
|
|||
return { delivered: false, reason: 'opencode_runtime_not_active' };
|
||||
}
|
||||
}
|
||||
const runtimeRunId =
|
||||
let runtimeRunId =
|
||||
laneIdentity.laneKind === 'secondary' && laneIdentity.laneOwnerProviderId === 'opencode'
|
||||
? (liveSecondaryLaneRunId ??
|
||||
(await this.resolveCurrentOpenCodeRuntimeRunId(teamName, laneIdentity.laneId)))
|
||||
|
|
@ -5411,6 +5493,33 @@ export class TeamProvisioningService {
|
|||
}
|
||||
runtimeActive = await this.isOpenCodeRuntimeLaneIndexActive(teamName, laneIdentity.laneId);
|
||||
}
|
||||
if (
|
||||
!runtimeActive &&
|
||||
laneIdentity.laneKind === 'secondary' &&
|
||||
laneIdentity.laneOwnerProviderId === 'opencode'
|
||||
) {
|
||||
const recovered = await this.tryRecoverOpenCodeRuntimeLaneBeforeDelivery({
|
||||
teamName,
|
||||
laneId: laneIdentity.laneId,
|
||||
member: {
|
||||
...(configMember ?? {}),
|
||||
...(metaMember ?? {}),
|
||||
name: canonicalMemberName,
|
||||
providerId: 'opencode',
|
||||
model: metaMember?.model ?? configMember?.model,
|
||||
role: metaMember?.role ?? configMember?.role,
|
||||
workflow: metaMember?.workflow ?? configMember?.workflow,
|
||||
effort: metaMember?.effort ?? configMember?.effort,
|
||||
cwd: memberRuntimeCwd || undefined,
|
||||
isolation: metaMember?.isolation ?? configMember?.isolation,
|
||||
},
|
||||
projectPath: config?.projectPath?.trim() || this.readPersistedTeamProjectPath(teamName),
|
||||
});
|
||||
if (recovered) {
|
||||
runtimeRunId = await this.resolveCurrentOpenCodeRuntimeRunId(teamName, laneIdentity.laneId);
|
||||
runtimeActive = true;
|
||||
}
|
||||
}
|
||||
if (!runtimeActive) {
|
||||
return { delivered: false, reason: 'opencode_runtime_not_active' };
|
||||
}
|
||||
|
|
@ -6258,6 +6367,108 @@ export class TeamProvisioningService {
|
|||
return laneIndex?.lanes[laneId]?.state === 'active';
|
||||
}
|
||||
|
||||
private async tryRecoverOpenCodeRuntimeLaneBeforeDelivery(input: {
|
||||
teamName: string;
|
||||
laneId: string;
|
||||
member: TeamMember;
|
||||
projectPath: string | null;
|
||||
}): Promise<boolean> {
|
||||
const snapshot = await this.launchStateStore.read(input.teamName).catch(() => null);
|
||||
const persistedMember =
|
||||
snapshot?.members?.[input.member.name] ??
|
||||
Object.values(snapshot?.members ?? {}).find((member) => member.laneId === input.laneId);
|
||||
if (!persistedMember || !isRecoverablePersistedOpenCodeRuntimeCandidate(persistedMember)) {
|
||||
return false;
|
||||
}
|
||||
const runtimeEvidence = await this.tryRecoverMissingOpenCodeSecondaryLaneFromRuntime({
|
||||
teamName: input.teamName,
|
||||
laneId: input.laneId,
|
||||
member: input.member,
|
||||
projectPath: input.projectPath,
|
||||
previousLaunchState: snapshot,
|
||||
persistedMember,
|
||||
});
|
||||
if (!runtimeEvidence) {
|
||||
return false;
|
||||
}
|
||||
logger.info(
|
||||
`[${input.teamName}] Recovered OpenCode lane ${input.laneId} before message delivery.`
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
private async tryRecoverOpenCodeRuntimeLanesForDeliveryWatchdog(
|
||||
teamName: string
|
||||
): Promise<string[]> {
|
||||
const snapshot = await this.launchStateStore.read(teamName).catch(() => null);
|
||||
const candidates = Object.values(snapshot?.members ?? {}).filter(
|
||||
isRecoverablePersistedOpenCodeRuntimeCandidate
|
||||
);
|
||||
if (candidates.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const [config, teamMeta, metaMembers, currentLaneIndex] = await Promise.all([
|
||||
this.configReader.getConfig(teamName).catch(() => null),
|
||||
this.teamMetaStore.getMeta(teamName).catch(() => null),
|
||||
this.membersMetaStore.getMembers(teamName).catch(() => []),
|
||||
readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName).catch(() => null),
|
||||
]);
|
||||
const projectPath = config?.projectPath?.trim() || this.readPersistedTeamProjectPath(teamName);
|
||||
const leadMember = config?.members?.find((member) => isLeadMember(member));
|
||||
const leadProviderId =
|
||||
normalizeOptionalTeamProviderId(teamMeta?.launchIdentity?.providerId) ??
|
||||
normalizeOptionalTeamProviderId(teamMeta?.providerId) ??
|
||||
normalizeOptionalTeamProviderId(leadMember?.providerId);
|
||||
const recoveredLaneIds: string[] = [];
|
||||
for (const persistedMember of candidates) {
|
||||
const memberName = persistedMember.name.trim();
|
||||
const configMember = config?.members?.find(
|
||||
(member) => member.name?.trim().toLowerCase() === memberName.toLowerCase()
|
||||
);
|
||||
const metaMember = metaMembers.find(
|
||||
(member) => member.name?.trim().toLowerCase() === memberName.toLowerCase()
|
||||
);
|
||||
if (metaMember?.removedAt != null || configMember?.removedAt != null) {
|
||||
continue;
|
||||
}
|
||||
const laneIdentity = buildPlannedMemberLaneIdentity({
|
||||
leadProviderId,
|
||||
member: {
|
||||
name: memberName,
|
||||
providerId: 'opencode',
|
||||
},
|
||||
});
|
||||
if (laneIdentity.laneId !== persistedMember.laneId) {
|
||||
continue;
|
||||
}
|
||||
if (currentLaneIndex?.lanes[laneIdentity.laneId]) {
|
||||
continue;
|
||||
}
|
||||
const recovered = await this.tryRecoverOpenCodeRuntimeLaneBeforeDelivery({
|
||||
teamName,
|
||||
laneId: laneIdentity.laneId,
|
||||
member: {
|
||||
...(configMember ?? {}),
|
||||
...(metaMember ?? {}),
|
||||
name: memberName,
|
||||
providerId: 'opencode',
|
||||
model: metaMember?.model ?? configMember?.model ?? persistedMember.model,
|
||||
role: metaMember?.role ?? configMember?.role,
|
||||
workflow: metaMember?.workflow ?? configMember?.workflow,
|
||||
effort: metaMember?.effort ?? configMember?.effort ?? persistedMember.effort,
|
||||
cwd: metaMember?.cwd ?? configMember?.cwd ?? persistedMember.cwd,
|
||||
isolation: metaMember?.isolation ?? configMember?.isolation,
|
||||
},
|
||||
projectPath,
|
||||
});
|
||||
if (recovered) {
|
||||
recoveredLaneIds.push(laneIdentity.laneId);
|
||||
}
|
||||
}
|
||||
return [...new Set(recoveredLaneIds)];
|
||||
}
|
||||
|
||||
private async resolveOpenCodeRuntimeLaneId(params: {
|
||||
teamName: string;
|
||||
runId: string;
|
||||
|
|
@ -16240,6 +16451,13 @@ export class TeamProvisioningService {
|
|||
private shouldRecoverStalePersistedMixedLaunchSnapshot(
|
||||
snapshot: PersistedTeamLaunchSnapshot
|
||||
): boolean {
|
||||
const hasRecoverableOpenCodeRuntimeCandidate = Object.values(snapshot.members).some((member) =>
|
||||
isRecoverablePersistedOpenCodeTerminalRuntimeCandidate(member)
|
||||
);
|
||||
if (hasRecoverableOpenCodeRuntimeCandidate) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (snapshot.teamLaunchState !== 'partial_pending') {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -16692,6 +16910,49 @@ export class TeamProvisioningService {
|
|||
}
|
||||
|
||||
let laneEntry = laneIndex.lanes[laneIdentity.laneId];
|
||||
const persistedMember =
|
||||
persistedSnapshot?.members?.[member.name] ?? bootstrapSnapshot?.members?.[member.name];
|
||||
if (
|
||||
!laneEntry &&
|
||||
persistedMember &&
|
||||
isRecoverablePersistedOpenCodeRuntimeCandidate(persistedMember) &&
|
||||
persistedMember.laneId === laneIdentity.laneId
|
||||
) {
|
||||
const runtimeEvidence = await this.tryRecoverMissingOpenCodeSecondaryLaneFromRuntime({
|
||||
teamName,
|
||||
laneId: laneIdentity.laneId,
|
||||
member,
|
||||
projectPath,
|
||||
previousLaunchState: persistedSnapshot ?? bootstrapSnapshot,
|
||||
persistedMember,
|
||||
});
|
||||
if (runtimeEvidence) {
|
||||
recoveredAny = true;
|
||||
secondaryMembers.push({
|
||||
laneId: laneIdentity.laneId,
|
||||
member,
|
||||
leadDefaults,
|
||||
evidence: {
|
||||
launchState: runtimeEvidence.launchState,
|
||||
agentToolAccepted: runtimeEvidence.agentToolAccepted,
|
||||
runtimeAlive: runtimeEvidence.runtimeAlive,
|
||||
bootstrapConfirmed: runtimeEvidence.bootstrapConfirmed,
|
||||
hardFailure: runtimeEvidence.hardFailure,
|
||||
hardFailureReason: runtimeEvidence.hardFailureReason,
|
||||
pendingPermissionRequestIds: runtimeEvidence.pendingPermissionRequestIds,
|
||||
runtimePid: runtimeEvidence.runtimePid,
|
||||
sessionId: runtimeEvidence.sessionId,
|
||||
runtimeSessionId: runtimeEvidence.sessionId,
|
||||
livenessKind: runtimeEvidence.livenessKind,
|
||||
pidSource: runtimeEvidence.pidSource,
|
||||
runtimeDiagnostic: runtimeEvidence.runtimeDiagnostic,
|
||||
runtimeDiagnosticSeverity: runtimeEvidence.runtimeDiagnosticSeverity,
|
||||
diagnostics: runtimeEvidence.diagnostics,
|
||||
},
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (laneEntry?.state === 'active') {
|
||||
const runtimeEvidence = await this.tryRecoverActiveOpenCodeSecondaryLaneFromRuntime({
|
||||
teamName,
|
||||
|
|
@ -16844,6 +17105,61 @@ export class TeamProvisioningService {
|
|||
}
|
||||
}
|
||||
|
||||
private async tryRecoverMissingOpenCodeSecondaryLaneFromRuntime(params: {
|
||||
teamName: string;
|
||||
laneId: string;
|
||||
member: TeamMember;
|
||||
projectPath: string | null;
|
||||
previousLaunchState: PersistedTeamLaunchSnapshot | null;
|
||||
persistedMember: PersistedTeamLaunchMemberState;
|
||||
}): Promise<TeamRuntimeMemberLaunchEvidence | null> {
|
||||
const currentLaneIndex = await readOpenCodeRuntimeLaneIndex(
|
||||
getTeamsBasePath(),
|
||||
params.teamName
|
||||
).catch(() => null);
|
||||
const currentEntry = currentLaneIndex?.lanes[params.laneId];
|
||||
if (currentEntry?.state === 'degraded' || currentEntry?.state === 'stopped') {
|
||||
return null;
|
||||
}
|
||||
if (!isRecoverablePersistedOpenCodeRuntimeCandidate(params.persistedMember)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const runtimeEvidence = await this.tryRecoverActiveOpenCodeSecondaryLaneFromRuntime({
|
||||
teamName: params.teamName,
|
||||
laneId: params.laneId,
|
||||
member: params.member,
|
||||
projectPath: params.projectPath,
|
||||
previousLaunchState: params.previousLaunchState,
|
||||
});
|
||||
if (!isRecoverableOpenCodeRuntimeEvidence(runtimeEvidence)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const diagnostics = Array.from(
|
||||
new Set([
|
||||
'Recovered missing OpenCode runtime lane index from persisted runtime evidence.',
|
||||
...(runtimeEvidence.diagnostics ?? []),
|
||||
])
|
||||
);
|
||||
await upsertOpenCodeRuntimeLaneIndexEntry({
|
||||
teamsBasePath: getTeamsBasePath(),
|
||||
teamName: params.teamName,
|
||||
laneId: params.laneId,
|
||||
state: 'active',
|
||||
diagnostics,
|
||||
}).catch((error: unknown) => {
|
||||
logger.warn(
|
||||
`[${params.teamName}] Failed to recover missing OpenCode lane index ${params.laneId}: ${getErrorMessage(error)}`
|
||||
);
|
||||
});
|
||||
|
||||
return {
|
||||
...runtimeEvidence,
|
||||
diagnostics,
|
||||
};
|
||||
}
|
||||
|
||||
private async readLeadInboxMessagesForLaunchReconcile(
|
||||
teamName: string,
|
||||
leadName: string
|
||||
|
|
|
|||
|
|
@ -5,18 +5,63 @@ const STALE_TIMEOUT_MS = 30_000;
|
|||
const ACQUIRE_TIMEOUT_MS = 5_000;
|
||||
const RETRY_INTERVAL_MS = 20;
|
||||
|
||||
function readLockAge(lockPath: string): number | null {
|
||||
export interface FileLockOptions {
|
||||
acquireTimeoutMs?: number;
|
||||
staleTimeoutMs?: number;
|
||||
retryIntervalMs?: number;
|
||||
}
|
||||
|
||||
interface LockInfo {
|
||||
pid: number | null;
|
||||
ageMs: number | null;
|
||||
}
|
||||
|
||||
function readLockInfo(lockPath: string): LockInfo {
|
||||
let pid: number | null = null;
|
||||
let ageMs: number | null = null;
|
||||
try {
|
||||
const content = fs.readFileSync(lockPath, 'utf8');
|
||||
const ts = parseInt(content.split('\n')[1] ?? '', 10);
|
||||
if (Number.isFinite(ts)) return Date.now() - ts;
|
||||
const lines = content.split('\n');
|
||||
const parsedPid = parseInt(lines[0] ?? '', 10);
|
||||
if (Number.isFinite(parsedPid) && parsedPid > 0) {
|
||||
pid = parsedPid;
|
||||
}
|
||||
const ts = parseInt(lines[1] ?? '', 10);
|
||||
if (Number.isFinite(ts)) {
|
||||
ageMs = Date.now() - ts;
|
||||
}
|
||||
} catch {
|
||||
/* lock may have been released concurrently */
|
||||
}
|
||||
return null;
|
||||
if (ageMs === null) {
|
||||
try {
|
||||
ageMs = Date.now() - fs.statSync(lockPath).mtimeMs;
|
||||
} catch {
|
||||
/* lock may have been released concurrently */
|
||||
}
|
||||
}
|
||||
return { pid, ageMs };
|
||||
}
|
||||
|
||||
function tryAcquire(lockPath: string): boolean {
|
||||
function isProcessAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (error) {
|
||||
const code = (error as NodeJS.ErrnoException).code;
|
||||
return code !== 'ESRCH';
|
||||
}
|
||||
}
|
||||
|
||||
function shouldBreakExistingLock(lockPath: string, staleTimeoutMs: number): boolean {
|
||||
const info = readLockInfo(lockPath);
|
||||
if (info.pid !== null && !isProcessAlive(info.pid)) {
|
||||
return true;
|
||||
}
|
||||
return info.ageMs !== null && info.ageMs > staleTimeoutMs;
|
||||
}
|
||||
|
||||
function tryAcquire(lockPath: string, options: Required<FileLockOptions>): boolean {
|
||||
try {
|
||||
const dir = path.dirname(lockPath);
|
||||
if (!fs.existsSync(dir)) {
|
||||
|
|
@ -28,8 +73,7 @@ function tryAcquire(lockPath: string): boolean {
|
|||
return true;
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code === 'EEXIST') {
|
||||
const age = readLockAge(lockPath);
|
||||
if (age !== null && age > STALE_TIMEOUT_MS) {
|
||||
if (shouldBreakExistingLock(lockPath, options.staleTimeoutMs)) {
|
||||
try {
|
||||
fs.unlinkSync(lockPath);
|
||||
} catch {
|
||||
|
|
@ -50,15 +94,24 @@ function releaseLock(lockPath: string): void {
|
|||
}
|
||||
}
|
||||
|
||||
export async function withFileLock<T>(filePath: string, fn: () => Promise<T>): Promise<T> {
|
||||
export async function withFileLock<T>(
|
||||
filePath: string,
|
||||
fn: () => Promise<T>,
|
||||
options: FileLockOptions = {}
|
||||
): Promise<T> {
|
||||
const resolvedOptions = {
|
||||
acquireTimeoutMs: options.acquireTimeoutMs ?? ACQUIRE_TIMEOUT_MS,
|
||||
staleTimeoutMs: options.staleTimeoutMs ?? STALE_TIMEOUT_MS,
|
||||
retryIntervalMs: options.retryIntervalMs ?? RETRY_INTERVAL_MS,
|
||||
};
|
||||
const lockPath = `${filePath}.lock`;
|
||||
const deadline = Date.now() + ACQUIRE_TIMEOUT_MS;
|
||||
const deadline = Date.now() + resolvedOptions.acquireTimeoutMs;
|
||||
|
||||
while (!tryAcquire(lockPath)) {
|
||||
while (!tryAcquire(lockPath, resolvedOptions)) {
|
||||
if (Date.now() >= deadline) {
|
||||
throw new Error(`File lock timeout: ${filePath}`);
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, RETRY_INTERVAL_MS));
|
||||
await new Promise((resolve) => setTimeout(resolve, resolvedOptions.retryIntervalMs));
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -28,6 +28,11 @@ const OPENCODE_TEAM_RUNTIME_LANES_DIR = 'lanes';
|
|||
const OPENCODE_TEAM_RUNTIME_LANES_INDEX_FILE = 'lanes.json';
|
||||
const OPENCODE_RUNTIME_MANIFEST_FILE = 'manifest.json';
|
||||
const OPENCODE_RUNTIME_RUN_TOMBSTONES_FILE = 'opencode-run-tombstones.json';
|
||||
const OPENCODE_LANE_INDEX_LOCK_OPTIONS = {
|
||||
acquireTimeoutMs: 30_000,
|
||||
staleTimeoutMs: 25_000,
|
||||
retryIntervalMs: 25,
|
||||
} as const;
|
||||
|
||||
export interface OpenCodeRuntimeLaneIndexEntry {
|
||||
laneId: string;
|
||||
|
|
@ -362,9 +367,13 @@ export async function writeOpenCodeRuntimeLaneIndex(
|
|||
index: OpenCodeRuntimeLaneIndex
|
||||
): Promise<void> {
|
||||
const filePath = getOpenCodeRuntimeLaneIndexPath(teamsBasePath, teamName);
|
||||
await withFileLock(filePath, async () => {
|
||||
await writeOpenCodeRuntimeLaneIndexUnlocked(teamsBasePath, teamName, index);
|
||||
});
|
||||
await withFileLock(
|
||||
filePath,
|
||||
async () => {
|
||||
await writeOpenCodeRuntimeLaneIndexUnlocked(teamsBasePath, teamName, index);
|
||||
},
|
||||
OPENCODE_LANE_INDEX_LOCK_OPTIONS
|
||||
);
|
||||
}
|
||||
|
||||
export async function upsertOpenCodeRuntimeLaneIndexEntry(params: {
|
||||
|
|
@ -375,17 +384,24 @@ export async function upsertOpenCodeRuntimeLaneIndexEntry(params: {
|
|||
diagnostics?: string[];
|
||||
}): Promise<void> {
|
||||
const filePath = getOpenCodeRuntimeLaneIndexPath(params.teamsBasePath, params.teamName);
|
||||
await withFileLock(filePath, async () => {
|
||||
const index = await readOpenCodeRuntimeLaneIndexUnlocked(params.teamsBasePath, params.teamName);
|
||||
index.updatedAt = new Date().toISOString();
|
||||
index.lanes[params.laneId] = {
|
||||
laneId: params.laneId,
|
||||
state: params.state,
|
||||
updatedAt: index.updatedAt,
|
||||
diagnostics: params.diagnostics?.length ? [...params.diagnostics] : undefined,
|
||||
};
|
||||
await writeOpenCodeRuntimeLaneIndexUnlocked(params.teamsBasePath, params.teamName, index);
|
||||
});
|
||||
await withFileLock(
|
||||
filePath,
|
||||
async () => {
|
||||
const index = await readOpenCodeRuntimeLaneIndexUnlocked(
|
||||
params.teamsBasePath,
|
||||
params.teamName
|
||||
);
|
||||
index.updatedAt = new Date().toISOString();
|
||||
index.lanes[params.laneId] = {
|
||||
laneId: params.laneId,
|
||||
state: params.state,
|
||||
updatedAt: index.updatedAt,
|
||||
diagnostics: params.diagnostics?.length ? [...params.diagnostics] : undefined,
|
||||
};
|
||||
await writeOpenCodeRuntimeLaneIndexUnlocked(params.teamsBasePath, params.teamName, index);
|
||||
},
|
||||
OPENCODE_LANE_INDEX_LOCK_OPTIONS
|
||||
);
|
||||
}
|
||||
|
||||
export async function setOpenCodeRuntimeActiveRunManifest(params: {
|
||||
|
|
@ -463,15 +479,22 @@ export async function removeOpenCodeRuntimeLaneIndexEntry(params: {
|
|||
laneId: string;
|
||||
}): Promise<void> {
|
||||
const filePath = getOpenCodeRuntimeLaneIndexPath(params.teamsBasePath, params.teamName);
|
||||
await withFileLock(filePath, async () => {
|
||||
const index = await readOpenCodeRuntimeLaneIndexUnlocked(params.teamsBasePath, params.teamName);
|
||||
if (!index.lanes[params.laneId]) {
|
||||
return;
|
||||
}
|
||||
delete index.lanes[params.laneId];
|
||||
index.updatedAt = new Date().toISOString();
|
||||
await writeOpenCodeRuntimeLaneIndexUnlocked(params.teamsBasePath, params.teamName, index);
|
||||
});
|
||||
await withFileLock(
|
||||
filePath,
|
||||
async () => {
|
||||
const index = await readOpenCodeRuntimeLaneIndexUnlocked(
|
||||
params.teamsBasePath,
|
||||
params.teamName
|
||||
);
|
||||
if (!index.lanes[params.laneId]) {
|
||||
return;
|
||||
}
|
||||
delete index.lanes[params.laneId];
|
||||
index.updatedAt = new Date().toISOString();
|
||||
await writeOpenCodeRuntimeLaneIndexUnlocked(params.teamsBasePath, params.teamName, index);
|
||||
},
|
||||
OPENCODE_LANE_INDEX_LOCK_OPTIONS
|
||||
);
|
||||
}
|
||||
|
||||
export async function clearOpenCodeRuntimeLaneStorage(params: {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import { promises as fs } from 'fs';
|
|||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import { TeamProvisioningService } from '../../../../src/main/services/team/TeamProvisioningService';
|
||||
import type {
|
||||
|
|
@ -31,6 +31,7 @@ import {
|
|||
} from '../../../../src/main/utils/pathDecoder';
|
||||
import { createPersistedLaunchSnapshot } from '../../../../src/main/services/team/TeamLaunchStateEvaluator';
|
||||
import {
|
||||
getOpenCodeRuntimeLaneIndexPath,
|
||||
readOpenCodeRuntimeLaneIndex,
|
||||
upsertOpenCodeRuntimeLaneIndexEntry,
|
||||
} from '../../../../src/main/services/team/opencode/store/OpenCodeRuntimeManifestEvidenceReader';
|
||||
|
|
@ -2004,6 +2005,57 @@ describe('Team agent launch matrix safe e2e', () => {
|
|||
]);
|
||||
});
|
||||
|
||||
it('launches mixed OpenCode lanes after a fresh abandoned lane index lock', async () => {
|
||||
const teamName = 'mixed-opencode-abandoned-lane-lock-safe-e2e';
|
||||
await writeMixedTeamConfig({ teamName, projectPath });
|
||||
await writeTeamMeta(teamName, projectPath);
|
||||
await writeMembersMeta(teamName);
|
||||
const lockPath = `${getOpenCodeRuntimeLaneIndexPath(getTeamsBasePath(), teamName)}.lock`;
|
||||
const abandonedPid = 424_242;
|
||||
await fs.mkdir(path.dirname(lockPath), { recursive: true });
|
||||
await fs.writeFile(lockPath, `${abandonedPid}\n${Date.now()}\n`, 'utf8');
|
||||
const killSpy = vi.spyOn(process, 'kill').mockImplementation(((pid: number | string) => {
|
||||
if (pid === abandonedPid) {
|
||||
const error = new Error('process is gone') as NodeJS.ErrnoException;
|
||||
error.code = 'ESRCH';
|
||||
throw error;
|
||||
}
|
||||
return true;
|
||||
}) as typeof process.kill);
|
||||
const adapter = new FakeOpenCodeRuntimeAdapter('clean_success', {
|
||||
bob: 'confirmed',
|
||||
tom: 'confirmed',
|
||||
});
|
||||
const svc = new TeamProvisioningService();
|
||||
svc.setRuntimeAdapterRegistry(new TeamRuntimeAdapterRegistry([adapter]));
|
||||
const run = createMixedLiveRun({ teamName, projectPath });
|
||||
run.child = { kill: () => undefined };
|
||||
trackLiveRun(svc, run);
|
||||
|
||||
try {
|
||||
await (svc as any).launchMixedSecondaryLaneIfNeeded(run);
|
||||
} finally {
|
||||
killSpy.mockRestore();
|
||||
}
|
||||
|
||||
await waitForCondition(() => adapter.launchInputs.length === 2);
|
||||
await expect(readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName)).resolves.toMatchObject(
|
||||
{
|
||||
lanes: {
|
||||
'secondary:opencode:bob': { state: 'active' },
|
||||
'secondary:opencode:tom': { state: 'active' },
|
||||
},
|
||||
}
|
||||
);
|
||||
await waitForCondition(() =>
|
||||
run.mixedSecondaryLanes.every((lane: { state: string }) => lane.state === 'finished')
|
||||
);
|
||||
expect(run.mixedSecondaryLanes.map((lane: { state: string }) => lane.state)).toEqual([
|
||||
'finished',
|
||||
'finished',
|
||||
]);
|
||||
});
|
||||
|
||||
it('stopAllTeams stops in-flight mixed OpenCode secondary lanes without late failure degrading launch state', async () => {
|
||||
const teamName = 'mixed-opencode-stop-all-inflight-safe-e2e';
|
||||
await writeMixedTeamConfig({ teamName, projectPath });
|
||||
|
|
@ -10434,6 +10486,7 @@ describe('Team agent launch matrix safe e2e', () => {
|
|||
text: 'message recovered pure opencode lane',
|
||||
messageId: 'msg-recovered-pure-opencode',
|
||||
});
|
||||
expect(messageAdapter.messageInputs[0]?.runId).toBe(launchAdapter.launchInputs[0]?.runId);
|
||||
});
|
||||
|
||||
it('delivers direct OpenCode member messages to recovered pure OpenCode lanes despite stale terminal provisioning state', async () => {
|
||||
|
|
@ -10967,6 +11020,565 @@ describe('Team agent launch matrix safe e2e', () => {
|
|||
expect(adapter.messageInputs).toEqual([]);
|
||||
});
|
||||
|
||||
it('recovers a missing mixed OpenCode lane index from materialized persisted runtime evidence before direct delivery', async () => {
|
||||
const teamName = 'mixed-opencode-direct-message-recovers-missing-lane-safe-e2e';
|
||||
await writeMixedTeamConfig({ teamName, projectPath });
|
||||
await writeTeamMeta(teamName, projectPath);
|
||||
await writeMembersMeta(teamName);
|
||||
await fs.writeFile(
|
||||
path.join(getTeamsBasePath(), teamName, 'launch-state.json'),
|
||||
`${JSON.stringify(
|
||||
createPersistedLaunchSnapshot({
|
||||
teamName,
|
||||
expectedMembers: ['alice', 'bob', 'tom'],
|
||||
leadSessionId: 'lead-session',
|
||||
launchPhase: 'reconciled',
|
||||
members: {
|
||||
alice: {
|
||||
name: 'alice',
|
||||
providerId: 'codex',
|
||||
laneId: 'primary',
|
||||
laneKind: 'primary',
|
||||
laneOwnerProviderId: 'codex',
|
||||
launchState: 'confirmed_alive',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: true,
|
||||
bootstrapConfirmed: true,
|
||||
hardFailure: false,
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
bob: {
|
||||
name: 'bob',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/minimax-m2.5-free',
|
||||
laneId: 'secondary:opencode:bob',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
launchState: 'failed_to_start',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: true,
|
||||
hardFailureReason: 'OpenCode bridge reported member launch failure',
|
||||
runtimePid: 7743,
|
||||
runtimeSessionId: 'ses_bob_materialized',
|
||||
livenessKind: 'runtime_process_candidate',
|
||||
pidSource: 'opencode_bridge',
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
tom: {
|
||||
name: 'tom',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/nemotron-3-super-free',
|
||||
laneId: 'secondary:opencode:tom',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
launchState: 'failed_to_start',
|
||||
agentToolAccepted: false,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: true,
|
||||
hardFailureReason: 'OpenCode bridge reported member launch failure',
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
},
|
||||
updatedAt: '2026-04-23T10:00:00.000Z',
|
||||
}),
|
||||
null,
|
||||
2
|
||||
)}\n`,
|
||||
'utf8'
|
||||
);
|
||||
const adapter = new FakeOpenCodeRuntimeAdapter('partial_pending', {
|
||||
bob: 'launching',
|
||||
tom: 'failed',
|
||||
});
|
||||
const restartedService = new TeamProvisioningService();
|
||||
restartedService.setRuntimeAdapterRegistry(new TeamRuntimeAdapterRegistry([adapter]));
|
||||
|
||||
await expect(
|
||||
readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName)
|
||||
).resolves.toMatchObject({ lanes: {} });
|
||||
await expect(
|
||||
restartedService.deliverOpenCodeMemberMessage(teamName, {
|
||||
memberName: 'bob',
|
||||
text: 'recovered bob receives direct message',
|
||||
messageId: 'msg-recovered-missing-lane-bob',
|
||||
})
|
||||
).resolves.toEqual({
|
||||
delivered: true,
|
||||
diagnostics: [],
|
||||
});
|
||||
|
||||
expect(adapter.reconcileInputs).toHaveLength(1);
|
||||
expect(adapter.reconcileInputs[0]).toMatchObject({
|
||||
teamName,
|
||||
laneId: 'secondary:opencode:bob',
|
||||
reason: 'startup_recovery',
|
||||
});
|
||||
await expect(readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName)).resolves.toMatchObject(
|
||||
{
|
||||
lanes: {
|
||||
'secondary:opencode:bob': {
|
||||
state: 'active',
|
||||
},
|
||||
},
|
||||
}
|
||||
);
|
||||
expect(adapter.messageInputs).toHaveLength(1);
|
||||
expect(adapter.messageInputs[0]).toMatchObject({
|
||||
teamName,
|
||||
laneId: 'secondary:opencode:bob',
|
||||
memberName: 'bob',
|
||||
cwd: projectPath,
|
||||
text: 'recovered bob receives direct message',
|
||||
messageId: 'msg-recovered-missing-lane-bob',
|
||||
});
|
||||
});
|
||||
|
||||
it('recovers a missing mixed OpenCode lane index from confirmed-alive persisted runtime evidence before direct delivery', async () => {
|
||||
const teamName = 'mixed-opencode-direct-message-recovers-confirmed-missing-lane-safe-e2e';
|
||||
await writeMixedTeamConfig({ teamName, projectPath });
|
||||
await writeTeamMeta(teamName, projectPath);
|
||||
await writeMembersMeta(teamName);
|
||||
await fs.writeFile(
|
||||
path.join(getTeamsBasePath(), teamName, 'launch-state.json'),
|
||||
`${JSON.stringify(
|
||||
createPersistedLaunchSnapshot({
|
||||
teamName,
|
||||
expectedMembers: ['alice', 'bob'],
|
||||
leadSessionId: 'lead-session',
|
||||
launchPhase: 'reconciled',
|
||||
members: {
|
||||
alice: {
|
||||
name: 'alice',
|
||||
providerId: 'codex',
|
||||
laneId: 'primary',
|
||||
laneKind: 'primary',
|
||||
laneOwnerProviderId: 'codex',
|
||||
launchState: 'confirmed_alive',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: true,
|
||||
bootstrapConfirmed: true,
|
||||
hardFailure: false,
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
bob: {
|
||||
name: 'bob',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/minimax-m2.5-free',
|
||||
laneId: 'secondary:opencode:bob',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
launchState: 'confirmed_alive',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: true,
|
||||
bootstrapConfirmed: true,
|
||||
hardFailure: false,
|
||||
runtimePid: 7743,
|
||||
runtimeSessionId: 'ses_bob_confirmed_materialized',
|
||||
livenessKind: 'runtime_process',
|
||||
pidSource: 'opencode_bridge',
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
},
|
||||
updatedAt: '2026-04-23T10:00:00.000Z',
|
||||
}),
|
||||
null,
|
||||
2
|
||||
)}\n`,
|
||||
'utf8'
|
||||
);
|
||||
const adapter = new FakeOpenCodeRuntimeAdapter('clean_success', {
|
||||
bob: 'confirmed',
|
||||
});
|
||||
const restartedService = new TeamProvisioningService();
|
||||
restartedService.setRuntimeAdapterRegistry(new TeamRuntimeAdapterRegistry([adapter]));
|
||||
|
||||
await expect(
|
||||
readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName)
|
||||
).resolves.toMatchObject({ lanes: {} });
|
||||
await expect(
|
||||
restartedService.deliverOpenCodeMemberMessage(teamName, {
|
||||
memberName: 'bob',
|
||||
text: 'confirmed alive missing lane recovers',
|
||||
messageId: 'msg-recovered-confirmed-missing-lane-bob',
|
||||
})
|
||||
).resolves.toEqual({
|
||||
delivered: true,
|
||||
diagnostics: [],
|
||||
});
|
||||
|
||||
expect(adapter.reconcileInputs).toHaveLength(1);
|
||||
expect(adapter.reconcileInputs[0]).toMatchObject({
|
||||
teamName,
|
||||
laneId: 'secondary:opencode:bob',
|
||||
reason: 'startup_recovery',
|
||||
});
|
||||
await expect(readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName)).resolves.toMatchObject(
|
||||
{
|
||||
lanes: {
|
||||
'secondary:opencode:bob': {
|
||||
state: 'active',
|
||||
},
|
||||
},
|
||||
}
|
||||
);
|
||||
expect(adapter.messageInputs[0]).toMatchObject({
|
||||
teamName,
|
||||
laneId: 'secondary:opencode:bob',
|
||||
memberName: 'bob',
|
||||
text: 'confirmed alive missing lane recovers',
|
||||
messageId: 'msg-recovered-confirmed-missing-lane-bob',
|
||||
});
|
||||
});
|
||||
|
||||
it('recovers a missing mixed OpenCode lane index before watchdog scans unread OpenCode inbox', async () => {
|
||||
const teamName = 'mixed-opencode-watchdog-recovers-missing-lane-safe-e2e';
|
||||
await writeMixedTeamConfig({ teamName, projectPath });
|
||||
await writeTeamMeta(teamName, projectPath);
|
||||
await writeMembersMeta(teamName);
|
||||
await fs.writeFile(
|
||||
path.join(getTeamsBasePath(), teamName, 'launch-state.json'),
|
||||
`${JSON.stringify(
|
||||
createPersistedLaunchSnapshot({
|
||||
teamName,
|
||||
expectedMembers: ['alice', 'bob', 'tom'],
|
||||
leadSessionId: 'lead-session',
|
||||
launchPhase: 'reconciled',
|
||||
members: {
|
||||
alice: {
|
||||
name: 'alice',
|
||||
providerId: 'codex',
|
||||
laneId: 'primary',
|
||||
laneKind: 'primary',
|
||||
laneOwnerProviderId: 'codex',
|
||||
launchState: 'confirmed_alive',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: true,
|
||||
bootstrapConfirmed: true,
|
||||
hardFailure: false,
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
bob: {
|
||||
name: 'bob',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/minimax-m2.5-free',
|
||||
laneId: 'secondary:opencode:bob',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
launchState: 'failed_to_start',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: true,
|
||||
hardFailureReason: 'OpenCode bridge reported member launch failure',
|
||||
runtimePid: 7743,
|
||||
runtimeSessionId: 'ses_bob_materialized',
|
||||
livenessKind: 'runtime_process_candidate',
|
||||
pidSource: 'opencode_bridge',
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
tom: {
|
||||
name: 'tom',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/nemotron-3-super-free',
|
||||
laneId: 'secondary:opencode:tom',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
launchState: 'failed_to_start',
|
||||
agentToolAccepted: false,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: true,
|
||||
hardFailureReason: 'OpenCode bridge reported member launch failure',
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
},
|
||||
updatedAt: '2026-04-23T10:00:00.000Z',
|
||||
}),
|
||||
null,
|
||||
2
|
||||
)}\n`,
|
||||
'utf8'
|
||||
);
|
||||
const inboxDir = path.join(getTeamsBasePath(), teamName, 'inboxes');
|
||||
await fs.mkdir(inboxDir, { recursive: true });
|
||||
await fs.writeFile(
|
||||
path.join(inboxDir, 'bob.json'),
|
||||
`${JSON.stringify(
|
||||
[
|
||||
{
|
||||
from: 'user',
|
||||
to: 'bob',
|
||||
text: 'recover this unread OpenCode message',
|
||||
timestamp: '2026-04-23T10:01:00.000Z',
|
||||
read: false,
|
||||
messageId: 'msg-watchdog-recovers-missing-lane-bob',
|
||||
},
|
||||
],
|
||||
null,
|
||||
2
|
||||
)}\n`,
|
||||
'utf8'
|
||||
);
|
||||
const adapter = new FakeOpenCodeRuntimeAdapter('partial_pending', {
|
||||
bob: 'launching',
|
||||
tom: 'failed',
|
||||
});
|
||||
const restartedService = new TeamProvisioningService();
|
||||
restartedService.setRuntimeAdapterRegistry(new TeamRuntimeAdapterRegistry([adapter]));
|
||||
const scheduledWatchdogJobs: unknown[] = [];
|
||||
(restartedService as any).scheduleOpenCodePromptDeliveryWatchdog = (input: unknown): void => {
|
||||
scheduledWatchdogJobs.push(input);
|
||||
};
|
||||
|
||||
await expect(
|
||||
readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName)
|
||||
).resolves.toMatchObject({ lanes: {} });
|
||||
|
||||
await expect(restartedService.scanOpenCodePromptDeliveryWatchdog(teamName)).resolves.toBe(1);
|
||||
|
||||
expect(adapter.reconcileInputs).toHaveLength(1);
|
||||
expect(adapter.reconcileInputs[0]).toMatchObject({
|
||||
teamName,
|
||||
laneId: 'secondary:opencode:bob',
|
||||
reason: 'startup_recovery',
|
||||
});
|
||||
await expect(readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName)).resolves.toMatchObject(
|
||||
{
|
||||
lanes: {
|
||||
'secondary:opencode:bob': {
|
||||
state: 'active',
|
||||
},
|
||||
},
|
||||
}
|
||||
);
|
||||
expect(scheduledWatchdogJobs).toEqual([
|
||||
expect.objectContaining({
|
||||
teamName,
|
||||
memberName: 'bob',
|
||||
messageId: 'msg-watchdog-recovers-missing-lane-bob',
|
||||
delayMs: 500,
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it('recovers one missing mixed OpenCode lane before watchdog scans while sibling lane is active', async () => {
|
||||
const teamName = 'mixed-opencode-watchdog-recovers-one-missing-lane-safe-e2e';
|
||||
await writeMixedTeamConfig({ teamName, projectPath });
|
||||
await writeTeamMeta(teamName, projectPath);
|
||||
await writeMembersMeta(teamName);
|
||||
await fs.writeFile(
|
||||
path.join(getTeamsBasePath(), teamName, 'launch-state.json'),
|
||||
`${JSON.stringify(
|
||||
createPersistedLaunchSnapshot({
|
||||
teamName,
|
||||
expectedMembers: ['alice', 'bob', 'tom'],
|
||||
leadSessionId: 'lead-session',
|
||||
launchPhase: 'reconciled',
|
||||
members: {
|
||||
alice: {
|
||||
name: 'alice',
|
||||
providerId: 'codex',
|
||||
laneId: 'primary',
|
||||
laneKind: 'primary',
|
||||
laneOwnerProviderId: 'codex',
|
||||
launchState: 'confirmed_alive',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: true,
|
||||
bootstrapConfirmed: true,
|
||||
hardFailure: false,
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
bob: {
|
||||
name: 'bob',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/minimax-m2.5-free',
|
||||
laneId: 'secondary:opencode:bob',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
launchState: 'failed_to_start',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: true,
|
||||
hardFailureReason: 'OpenCode bridge reported member launch failure',
|
||||
runtimePid: 7743,
|
||||
runtimeSessionId: 'ses_bob_materialized',
|
||||
livenessKind: 'runtime_process_candidate',
|
||||
pidSource: 'opencode_bridge',
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
tom: {
|
||||
name: 'tom',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/nemotron-3-super-free',
|
||||
laneId: 'secondary:opencode:tom',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
launchState: 'confirmed_alive',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: true,
|
||||
bootstrapConfirmed: true,
|
||||
hardFailure: false,
|
||||
runtimePid: 7750,
|
||||
runtimeSessionId: 'ses_tom_active',
|
||||
livenessKind: 'runtime_process',
|
||||
pidSource: 'opencode_bridge',
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
},
|
||||
updatedAt: '2026-04-23T10:00:00.000Z',
|
||||
}),
|
||||
null,
|
||||
2
|
||||
)}\n`,
|
||||
'utf8'
|
||||
);
|
||||
await upsertOpenCodeRuntimeLaneIndexEntry({
|
||||
teamsBasePath: getTeamsBasePath(),
|
||||
teamName,
|
||||
laneId: 'secondary:opencode:tom',
|
||||
state: 'active',
|
||||
});
|
||||
const inboxDir = path.join(getTeamsBasePath(), teamName, 'inboxes');
|
||||
await fs.mkdir(inboxDir, { recursive: true });
|
||||
await fs.writeFile(
|
||||
path.join(inboxDir, 'bob.json'),
|
||||
`${JSON.stringify(
|
||||
[
|
||||
{
|
||||
from: 'user',
|
||||
to: 'bob',
|
||||
text: 'recover only bob while tom stays active',
|
||||
timestamp: '2026-04-23T10:01:00.000Z',
|
||||
read: false,
|
||||
messageId: 'msg-watchdog-recovers-one-missing-lane-bob',
|
||||
},
|
||||
],
|
||||
null,
|
||||
2
|
||||
)}\n`,
|
||||
'utf8'
|
||||
);
|
||||
const adapter = new FakeOpenCodeRuntimeAdapter('partial_pending', {
|
||||
bob: 'launching',
|
||||
tom: 'confirmed',
|
||||
});
|
||||
const restartedService = new TeamProvisioningService();
|
||||
restartedService.setRuntimeAdapterRegistry(new TeamRuntimeAdapterRegistry([adapter]));
|
||||
const scheduledWatchdogJobs: unknown[] = [];
|
||||
(restartedService as any).scheduleOpenCodePromptDeliveryWatchdog = (input: unknown): void => {
|
||||
scheduledWatchdogJobs.push(input);
|
||||
};
|
||||
|
||||
await expect(restartedService.scanOpenCodePromptDeliveryWatchdog(teamName)).resolves.toBe(1);
|
||||
|
||||
expect(adapter.reconcileInputs).toHaveLength(1);
|
||||
expect(adapter.reconcileInputs[0]).toMatchObject({
|
||||
teamName,
|
||||
laneId: 'secondary:opencode:bob',
|
||||
reason: 'startup_recovery',
|
||||
});
|
||||
await expect(readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName)).resolves.toMatchObject(
|
||||
{
|
||||
lanes: {
|
||||
'secondary:opencode:bob': {
|
||||
state: 'active',
|
||||
},
|
||||
'secondary:opencode:tom': {
|
||||
state: 'active',
|
||||
},
|
||||
},
|
||||
}
|
||||
);
|
||||
expect(scheduledWatchdogJobs).toEqual([
|
||||
expect.objectContaining({
|
||||
teamName,
|
||||
memberName: 'bob',
|
||||
messageId: 'msg-watchdog-recovers-one-missing-lane-bob',
|
||||
delayMs: 500,
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it('does not recover a missing mixed OpenCode lane index from liveness-only persisted metadata', async () => {
|
||||
const teamName = 'mixed-opencode-direct-message-liveness-only-missing-lane-safe-e2e';
|
||||
await writeMixedTeamConfig({ teamName, projectPath });
|
||||
await writeTeamMeta(teamName, projectPath);
|
||||
await writeMembersMeta(teamName);
|
||||
await fs.writeFile(
|
||||
path.join(getTeamsBasePath(), teamName, 'launch-state.json'),
|
||||
`${JSON.stringify(
|
||||
createPersistedLaunchSnapshot({
|
||||
teamName,
|
||||
expectedMembers: ['alice', 'bob'],
|
||||
leadSessionId: 'lead-session',
|
||||
launchPhase: 'reconciled',
|
||||
members: {
|
||||
alice: {
|
||||
name: 'alice',
|
||||
providerId: 'codex',
|
||||
laneId: 'primary',
|
||||
laneKind: 'primary',
|
||||
laneOwnerProviderId: 'codex',
|
||||
launchState: 'confirmed_alive',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: true,
|
||||
bootstrapConfirmed: true,
|
||||
hardFailure: false,
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
bob: {
|
||||
name: 'bob',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/minimax-m2.5-free',
|
||||
laneId: 'secondary:opencode:bob',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
launchState: 'failed_to_start',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: true,
|
||||
hardFailureReason: 'OpenCode bridge reported member launch failure',
|
||||
livenessKind: 'runtime_process_candidate',
|
||||
pidSource: 'opencode_bridge',
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
},
|
||||
updatedAt: '2026-04-23T10:00:00.000Z',
|
||||
}),
|
||||
null,
|
||||
2
|
||||
)}\n`,
|
||||
'utf8'
|
||||
);
|
||||
const adapter = new FakeOpenCodeRuntimeAdapter('partial_pending', {
|
||||
bob: 'launching',
|
||||
});
|
||||
const restartedService = new TeamProvisioningService();
|
||||
restartedService.setRuntimeAdapterRegistry(new TeamRuntimeAdapterRegistry([adapter]));
|
||||
|
||||
await expect(
|
||||
restartedService.deliverOpenCodeMemberMessage(teamName, {
|
||||
memberName: 'bob',
|
||||
text: 'must not recover from liveness-only stale metadata',
|
||||
messageId: 'msg-liveness-only-missing-lane-bob',
|
||||
})
|
||||
).resolves.toEqual({
|
||||
delivered: false,
|
||||
reason: 'opencode_runtime_not_active',
|
||||
});
|
||||
|
||||
expect(adapter.reconcileInputs).toEqual([]);
|
||||
expect(adapter.messageInputs).toEqual([]);
|
||||
await expect(readOpenCodeRuntimeLaneIndex(getTeamsBasePath(), teamName)).resolves.toMatchObject(
|
||||
{ lanes: {} }
|
||||
);
|
||||
});
|
||||
|
||||
it('does not deliver direct OpenCode member messages to one detached mixed lane while its sibling lane stays live', async () => {
|
||||
const teamName = 'mixed-opencode-direct-message-one-detached-lane-safe-e2e';
|
||||
await writeMixedTeamConfig({ teamName, projectPath });
|
||||
|
|
|
|||
|
|
@ -10955,6 +10955,152 @@ describe('TeamProvisioningService', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('recovers missing mixed secondary lane index from materialized OpenCode runtime evidence', async () => {
|
||||
const teamName = 'relay-works-missing-lane-recovery';
|
||||
writeTeamMeta(teamName, {
|
||||
providerId: 'codex',
|
||||
providerBackendId: 'codex-native',
|
||||
model: 'gpt-5.4',
|
||||
});
|
||||
writeMembersMeta(teamName, [
|
||||
{
|
||||
name: 'atlas',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/nemotron-3-super-free',
|
||||
},
|
||||
{
|
||||
name: 'bob',
|
||||
providerId: 'codex',
|
||||
model: 'gpt-5.4',
|
||||
},
|
||||
]);
|
||||
writeLaunchConfig(teamName, '/Users/test/proj', 'lead-session', ['bob']);
|
||||
writeBootstrapState(teamName, [{ name: 'bob', status: 'registered' }]);
|
||||
fs.writeFileSync(
|
||||
getTeamLaunchStatePath(teamName),
|
||||
`${JSON.stringify(
|
||||
{
|
||||
version: 2,
|
||||
teamName,
|
||||
updatedAt: '2026-04-23T10:00:00.000Z',
|
||||
expectedMembers: ['atlas', 'bob'],
|
||||
bootstrapExpectedMembers: ['bob'],
|
||||
leadSessionId: 'lead-session',
|
||||
launchPhase: 'reconciled',
|
||||
members: {
|
||||
atlas: {
|
||||
name: 'atlas',
|
||||
providerId: 'opencode',
|
||||
model: 'opencode/nemotron-3-super-free',
|
||||
laneId: 'secondary:opencode:atlas',
|
||||
laneKind: 'secondary',
|
||||
laneOwnerProviderId: 'opencode',
|
||||
launchState: 'failed_to_start',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: true,
|
||||
hardFailureReason: 'OpenCode bridge reported member launch failure',
|
||||
runtimePid: 44123,
|
||||
runtimeSessionId: 'ses_atlas_materialized',
|
||||
livenessKind: 'runtime_process_candidate',
|
||||
pidSource: 'opencode_bridge',
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
bob: {
|
||||
name: 'bob',
|
||||
providerId: 'codex',
|
||||
laneId: 'primary',
|
||||
laneKind: 'primary',
|
||||
laneOwnerProviderId: 'codex',
|
||||
launchState: 'confirmed_alive',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: true,
|
||||
bootstrapConfirmed: true,
|
||||
hardFailure: false,
|
||||
lastEvaluatedAt: '2026-04-23T10:00:00.000Z',
|
||||
},
|
||||
},
|
||||
summary: {
|
||||
confirmedCount: 1,
|
||||
pendingCount: 0,
|
||||
failedCount: 1,
|
||||
runtimeAlivePendingCount: 0,
|
||||
},
|
||||
teamLaunchState: 'partial_failure',
|
||||
},
|
||||
null,
|
||||
2
|
||||
)}\n`,
|
||||
'utf8'
|
||||
);
|
||||
|
||||
const adapterReconcile = vi.fn(async (input: Record<string, unknown>) => {
|
||||
const member = (input.expectedMembers as Array<{ name: string }>)[0]?.name;
|
||||
return {
|
||||
runId: String(input.runId),
|
||||
teamName,
|
||||
launchPhase: 'reconciled',
|
||||
teamLaunchState: 'partial_pending',
|
||||
members: member
|
||||
? {
|
||||
[member]: {
|
||||
memberName: member,
|
||||
providerId: 'opencode',
|
||||
launchState: 'runtime_pending_bootstrap',
|
||||
agentToolAccepted: true,
|
||||
runtimeAlive: false,
|
||||
bootstrapConfirmed: false,
|
||||
hardFailure: false,
|
||||
runtimePid: 44123,
|
||||
sessionId: 'ses_atlas_materialized',
|
||||
livenessKind: 'runtime_process_candidate',
|
||||
diagnostics: ['runtime process candidate recovered'],
|
||||
},
|
||||
}
|
||||
: {},
|
||||
snapshot: null,
|
||||
warnings: [],
|
||||
diagnostics: ['fake reconcile recovered materialized runtime'],
|
||||
};
|
||||
});
|
||||
const svc = new TeamProvisioningService();
|
||||
svc.setRuntimeAdapterRegistry(
|
||||
new TeamRuntimeAdapterRegistry([
|
||||
{
|
||||
providerId: 'opencode',
|
||||
prepare: vi.fn(),
|
||||
launch: vi.fn(),
|
||||
reconcile: adapterReconcile,
|
||||
stop: vi.fn(),
|
||||
} as any,
|
||||
])
|
||||
);
|
||||
|
||||
const result = await svc.getMemberSpawnStatuses(teamName);
|
||||
|
||||
expect(adapterReconcile).toHaveBeenCalledTimes(1);
|
||||
expect(adapterReconcile).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
teamName,
|
||||
laneId: 'secondary:opencode:atlas',
|
||||
reason: 'startup_recovery',
|
||||
})
|
||||
);
|
||||
expect(result.expectedMembers).toEqual(expect.arrayContaining(['atlas', 'bob']));
|
||||
expect(result.statuses.atlas).toMatchObject({
|
||||
status: 'waiting',
|
||||
launchState: 'runtime_pending_bootstrap',
|
||||
});
|
||||
await expect(readOpenCodeRuntimeLaneIndex(tempTeamsBase, teamName)).resolves.toMatchObject({
|
||||
lanes: {
|
||||
'secondary:opencode:atlas': {
|
||||
state: 'active',
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('reconciles stale persisted mixed pending OpenCode lanes instead of keeping them pending forever', async () => {
|
||||
const teamName = 'signal-ops-7';
|
||||
writeTeamMeta(teamName, {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import { withFileLock } from '@main/services/team/fileLock';
|
||||
|
||||
|
|
@ -67,6 +67,29 @@ describe('withFileLock', () => {
|
|||
expect(result).toBe('ok');
|
||||
});
|
||||
|
||||
it('removes a fresh abandoned lock when the owner process is gone', async () => {
|
||||
const lockPath = `${testFile}.lock`;
|
||||
const abandonedPid = 424_242;
|
||||
fs.writeFileSync(lockPath, `${abandonedPid}\n${Date.now()}\n`, 'utf8');
|
||||
const killSpy = vi.spyOn(process, 'kill').mockImplementation(((pid: number | string) => {
|
||||
if (pid === abandonedPid) {
|
||||
const error = new Error('process is gone') as NodeJS.ErrnoException;
|
||||
error.code = 'ESRCH';
|
||||
throw error;
|
||||
}
|
||||
return true;
|
||||
}) as typeof process.kill);
|
||||
|
||||
try {
|
||||
const result = await withFileLock(testFile, async () => 'ok');
|
||||
|
||||
expect(result).toBe('ok');
|
||||
expect(fs.existsSync(lockPath)).toBe(false);
|
||||
} finally {
|
||||
killSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
it('creates parent directories for lock file', async () => {
|
||||
const nested = path.join(tmpDir, 'a', 'b', 'deep.json');
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue