diff --git a/runtime.lock.json b/runtime.lock.json index 4b46f212..481af2ff 100644 --- a/runtime.lock.json +++ b/runtime.lock.json @@ -1,27 +1,27 @@ { - "version": "0.0.16", - "sourceRef": "v0.0.16", + "version": "0.0.17", + "sourceRef": "v0.0.17", "sourceRepository": "777genius/agent_teams_orchestrator", "releaseRepository": "777genius/claude_agent_teams_ui", "releaseTag": "v1.2.0", "assets": { "darwin-arm64": { - "file": "agent-teams-runtime-darwin-arm64-v0.0.16.tar.gz", + "file": "agent-teams-runtime-darwin-arm64-v0.0.17.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "darwin-x64": { - "file": "agent-teams-runtime-darwin-x64-v0.0.16.tar.gz", + "file": "agent-teams-runtime-darwin-x64-v0.0.17.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "linux-x64": { - "file": "agent-teams-runtime-linux-x64-v0.0.16.tar.gz", + "file": "agent-teams-runtime-linux-x64-v0.0.17.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "win32-x64": { - "file": "agent-teams-runtime-win32-x64-v0.0.16.zip", + "file": "agent-teams-runtime-win32-x64-v0.0.17.zip", "archiveKind": "zip", "binaryName": "claude-multimodel.exe" } diff --git a/src/features/team-runtime-lanes/core/domain/buildMixedPersistedLaunchSnapshot.ts b/src/features/team-runtime-lanes/core/domain/buildMixedPersistedLaunchSnapshot.ts index 1b58bcca..d5715e62 100644 --- a/src/features/team-runtime-lanes/core/domain/buildMixedPersistedLaunchSnapshot.ts +++ b/src/features/team-runtime-lanes/core/domain/buildMixedPersistedLaunchSnapshot.ts @@ -48,6 +48,7 @@ export interface MixedSecondaryLaneMemberStateInput { runtimeDiagnostic?: string; runtimeDiagnosticSeverity?: TeamAgentRuntimeDiagnosticSeverity; bootstrapStalled?: boolean; + firstSpawnAcceptedAt?: string; diagnostics?: string[]; } | null; pendingReason?: string; @@ -108,6 +109,59 @@ function hasMaterializedOpenCodeRuntimeMarker(value: { ); } +const OPENCODE_MEMBER_SESSION_RECORDED_AT_PATTERN = + /\bmember_session_recorded\s+at\s+([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9:.+-]+Z?)\b/i; + +function normalizeIsoTimestamp(value: unknown): string | null { + if (typeof value !== 'string') { + return null; + } + const trimmed = value.trim(); + if (!trimmed) { + return null; + } + const parsed = Date.parse(trimmed); + return Number.isFinite(parsed) ? new Date(parsed).toISOString() : null; +} + +function selectEarliestIsoTimestamp(values: readonly unknown[]): string | undefined { + let selected: { value: string; timeMs: number } | null = null; + for (const value of values) { + const normalized = normalizeIsoTimestamp(value); + if (!normalized) { + continue; + } + const timeMs = Date.parse(normalized); + if (!selected || timeMs < selected.timeMs) { + selected = { value: normalized, timeMs }; + } + } + return selected?.value; +} + +function extractOpenCodeMemberSessionRecordedAt( + diagnostics: readonly string[] | undefined +): string[] { + return (diagnostics ?? []).flatMap((diagnostic) => { + const match = diagnostic.match(OPENCODE_MEMBER_SESSION_RECORDED_AT_PATTERN); + return match?.[1] ? [match[1]] : []; + }); +} + +function resolveOpenCodeSecondaryFirstSpawnAcceptedAt( + evidence: NonNullable, + fallbackUpdatedAt: string +): string | undefined { + if (evidence.agentToolAccepted !== true) { + return undefined; + } + return selectEarliestIsoTimestamp([ + evidence.firstSpawnAcceptedAt, + ...extractOpenCodeMemberSessionRecordedAt(evidence.diagnostics), + fallbackUpdatedAt, + ]); +} + function buildDiagnostics( member: Pick< PersistedTeamLaunchMemberState, @@ -252,6 +306,9 @@ function createSecondaryLaneMemberState( }); const hardFailure = evidence?.hardFailure === true || launchState === 'failed_to_start'; const hardFailureReason = hardFailure ? evidence?.hardFailureReason : undefined; + const firstSpawnAcceptedAt = evidence + ? resolveOpenCodeSecondaryFirstSpawnAcceptedAt(evidence, params.updatedAt) + : undefined; const base: PersistedTeamLaunchMemberState = { name: params.member.name.trim(), providerId, @@ -304,7 +361,7 @@ function createSecondaryLaneMemberState( hardFailure !== true ? true : undefined, - firstSpawnAcceptedAt: evidence?.agentToolAccepted ? params.updatedAt : undefined, + firstSpawnAcceptedAt, lastHeartbeatAt: evidence?.bootstrapConfirmed ? params.updatedAt : undefined, runtimeLastSeenAt: strongRuntimeAlive ? params.updatedAt : undefined, lastRuntimeAliveAt: strongRuntimeAlive ? params.updatedAt : undefined, diff --git a/src/main/services/team/TeamProvisioningService.ts b/src/main/services/team/TeamProvisioningService.ts index e76d0832..9e92316f 100644 --- a/src/main/services/team/TeamProvisioningService.ts +++ b/src/main/services/team/TeamProvisioningService.ts @@ -2220,6 +2220,148 @@ function isPersistedOpenCodeSecondaryLaneMember( ); } +const OPENCODE_BOOTSTRAP_CHECKIN_RETRY_SENT_PREFIX = 'opencode_bootstrap_checkin_retry_prompt_sent'; + +function getOpenCodeBootstrapCheckinRetryMarker(runId: string, runtimeSessionId: string): string { + return `${OPENCODE_BOOTSTRAP_CHECKIN_RETRY_SENT_PREFIX}:${runId}:${runtimeSessionId}`; +} + +const OPENCODE_MEMBER_SESSION_RECORDED_AT_PATTERN = + /\bmember_session_recorded\s+at\s+([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9:.+-]+Z?)\b/i; + +function normalizeIsoTimestamp(value: unknown): string | null { + if (typeof value !== 'string') { + return null; + } + const trimmed = value.trim(); + if (!trimmed) { + return null; + } + const parsed = Date.parse(trimmed); + return Number.isFinite(parsed) ? new Date(parsed).toISOString() : null; +} + +function selectEarliestIsoTimestamp(values: readonly unknown[]): string | undefined { + let selected: { value: string; timeMs: number } | null = null; + for (const value of values) { + const normalized = normalizeIsoTimestamp(value); + if (!normalized) { + continue; + } + const timeMs = Date.parse(normalized); + if (!selected || timeMs < selected.timeMs) { + selected = { value: normalized, timeMs }; + } + } + return selected?.value; +} + +function extractOpenCodeMemberSessionRecordedAt( + diagnostics: readonly string[] | undefined +): string[] { + return (diagnostics ?? []).flatMap((diagnostic) => { + const match = diagnostic.match(OPENCODE_MEMBER_SESSION_RECORDED_AT_PATTERN); + return match?.[1] ? [match[1]] : []; + }); +} + +function resolveOpenCodeBootstrapAcceptedAt( + member: Pick +): string | undefined { + return selectEarliestIsoTimestamp([ + member.firstSpawnAcceptedAt, + ...extractOpenCodeMemberSessionRecordedAt(member.diagnostics), + ]); +} + +function hasOpenCodeSecondaryFatalBootstrapDiagnostic( + member: Pick< + PersistedTeamLaunchMemberState, + 'diagnostics' | 'runtimeDiagnostic' | 'hardFailureReason' + > +): boolean { + const text = [member.runtimeDiagnostic, member.hardFailureReason, ...(member.diagnostics ?? [])] + .filter((value): value is string => typeof value === 'string' && value.trim().length > 0) + .join('\n') + .toLowerCase(); + return text.length > 0 && hasRealOpenCodeFailureDiagnostic(text); +} + +function selectOpenCodeSecondaryBootstrapStallDiagnostic( + values: readonly unknown[] +): string | null { + const normalizedValues = values + .filter((value): value is string => typeof value === 'string') + .map((value) => normalizeOpenCodePersistedFailureReason(value)) + .filter((value): value is string => typeof value === 'string' && value.length > 0); + + const runtimeCheckinDiagnostic = normalizedValues.find((value) => + value.toLowerCase().includes('runtime_bootstrap_checkin') + ); + if (runtimeCheckinDiagnostic) { + return runtimeCheckinDiagnostic; + } + + const memberBriefingDiagnostic = normalizedValues.find((value) => + value.toLowerCase().includes('member_briefing') + ); + if (memberBriefingDiagnostic) { + return `${memberBriefingDiagnostic}; runtime_bootstrap_checkin did not complete after 5 min.`; + } + + return null; +} + +function getOpenCodeSecondaryBootstrapStallDiagnosticFromPersisted( + member: PersistedTeamLaunchMemberState +): string { + const selected = selectOpenCodeSecondaryBootstrapStallDiagnostic([ + member.runtimeDiagnostic, + ...(member.diagnostics ?? []), + member.hardFailureReason, + ]); + if (selected) { + return selected; + } + + return 'OpenCode bootstrap did not complete runtime_bootstrap_checkin after 5 min.'; +} + +function shouldMarkPersistedOpenCodeBootstrapStalled( + member: PersistedTeamLaunchMemberState, + nowMs: number +): boolean { + if (!isPersistedOpenCodeSecondaryLaneMember(member)) { + return false; + } + if ( + member.launchState !== 'runtime_pending_bootstrap' || + member.bootstrapConfirmed === true || + member.hardFailure === true || + member.skippedForLaunch === true || + (member.pendingPermissionRequestIds?.length ?? 0) > 0 + ) { + return false; + } + if (hasOpenCodeSecondaryFatalBootstrapDiagnostic(member)) { + return false; + } + const acceptedAt = resolveOpenCodeBootstrapAcceptedAt(member); + const acceptedAtMs = acceptedAt ? Date.parse(acceptedAt) : NaN; + if (!Number.isFinite(acceptedAtMs) || nowMs - acceptedAtMs < MEMBER_BOOTSTRAP_STALL_MS) { + return false; + } + return ( + hasOpenCodeRuntimeHandle(member) || + hasOpenCodeRuntimeLivenessMarker(member) || + hasRecoverableOpenCodeBootstrapDiagnostic( + [member.runtimeDiagnostic, ...(member.diagnostics ?? [])].filter( + (value): value is string => typeof value === 'string' + ) + ) + ); +} + function namesMatchCaseInsensitive(left: string, right: string): boolean { return left.trim().toLowerCase() === right.trim().toLowerCase(); } @@ -2506,6 +2648,15 @@ function isRegisteredRuntimeMetadataFailureReason(reason?: string): boolean { return reason?.trim() === 'registered runtime metadata without live process'; } +function isBootstrapMcpResourceReadFailureReason(reason?: string): boolean { + const text = reason?.trim().toLowerCase() ?? ''; + return ( + text.includes('resources/read failed') && + text.includes('member_briefing') && + (text.includes('method not found') || text.includes('mcp error')) + ); +} + function isTmuxNoServerRunningError(error: unknown): boolean { const text = error instanceof Error ? error.message : String(error ?? ''); return ( @@ -2520,7 +2671,8 @@ function isAutoClearableLaunchFailureReason(reason?: string): boolean { isLaunchGraceWindowFailureReason(reason) || isConfigRegistrationFailureReason(reason) || isRegisteredRuntimeMetadataFailureReason(reason) || - isOpenCodeBridgeLaunchFailureReason(reason) + isOpenCodeBridgeLaunchFailureReason(reason) || + isBootstrapMcpResourceReadFailureReason(reason) ); } @@ -6384,6 +6536,12 @@ export class TeamProvisioningService { taskRefs?: TaskRef[]; ledgerRecord?: OpenCodePromptDeliveryLedgerRecord | null; }): boolean { + if (this.isOpenCodeDirectUserPromptDelivery(input.ledgerRecord)) { + return Boolean( + input.ledgerRecord?.visibleReplyInbox?.trim() && + input.ledgerRecord?.visibleReplyMessageId?.trim() + ); + } const preview = input.ledgerRecord?.observedAssistantPreview?.trim(); if (!preview) { return true; @@ -6419,6 +6577,13 @@ export class TeamProvisioningService { ) { return 'plain_text_ack_only_still_requires_answer'; } + if ( + this.isOpenCodeDirectUserPromptDelivery(record) && + !record?.visibleReplyMessageId && + !record?.inboxReadCommittedAt + ) { + return 'plain_text_visible_reply_not_materialized_yet'; + } } if (state === 'responded_visible_message' && !input.visibleReply) { return 'visible_reply_destination_not_found_yet'; @@ -6508,6 +6673,12 @@ export class TeamProvisioningService { return diagnostics.some((diagnostic) => isProbeTimeoutMessage(diagnostic)); } + private isOpenCodeDirectUserPromptDelivery( + ledgerRecord?: OpenCodePromptDeliveryLedgerRecord | null + ): boolean { + return ledgerRecord?.replyRecipient?.trim().toLowerCase() === 'user'; + } + private isOpenCodePromptDeliveryWatchdogEnabled(): boolean { const enabled = process.env.CLAUDE_TEAM_OPENCODE_PROMPT_DELIVERY_WATCHDOG !== '0'; if (!enabled && !this.openCodePromptDeliveryWatchdogDisabledLogged) { @@ -6678,6 +6849,128 @@ export class TeamProvisioningService { return { ledgerRecord, visibleReply }; } + private buildOpenCodePlainTextVisibleReplyMessageId( + record: OpenCodePromptDeliveryLedgerRecord + ): string { + const safeId = record.id.replace(/[^a-zA-Z0-9_-]/g, '-').slice(0, 96); + return `opencode-plain-reply-${safeId}`; + } + + private buildOpenCodePlainTextVisibleReplySummary(text: string): string { + const normalized = text.replace(/\s+/g, ' ').trim(); + return normalized.length > 120 ? `${normalized.slice(0, 117).trimEnd()}...` : normalized; + } + + private async materializeOpenCodePlainTextReplyIfNeeded(input: { + ledger: OpenCodePromptDeliveryLedgerStore; + ledgerRecord: OpenCodePromptDeliveryLedgerRecord; + teamName: string; + memberName: string; + visibleReply?: OpenCodeVisibleReplyProof | null; + }): Promise<{ + ledgerRecord: OpenCodePromptDeliveryLedgerRecord; + visibleReply: OpenCodeVisibleReplyProof | null; + }> { + if (input.visibleReply) { + return { ledgerRecord: input.ledgerRecord, visibleReply: input.visibleReply }; + } + if ( + input.ledgerRecord.responseState !== 'responded_plain_text' || + !this.isOpenCodeDirectUserPromptDelivery(input.ledgerRecord) || + input.ledgerRecord.visibleReplyMessageId || + input.ledgerRecord.visibleReplyInbox + ) { + return { ledgerRecord: input.ledgerRecord, visibleReply: null }; + } + + const text = input.ledgerRecord.observedAssistantPreview?.trim(); + if (!text) { + return { ledgerRecord: input.ledgerRecord, visibleReply: null }; + } + const semantic = isOpenCodeVisibleReplySemanticallySufficient({ + actionMode: input.ledgerRecord.actionMode, + taskRefs: input.ledgerRecord.taskRefs, + text, + }); + if (!semantic.sufficient) { + return { ledgerRecord: input.ledgerRecord, visibleReply: null }; + } + + const messageId = this.buildOpenCodePlainTextVisibleReplyMessageId(input.ledgerRecord); + const existing = await this.findOpenCodeVisibleReplyByRelayOfMessageId({ + teamName: input.teamName, + replyRecipient: 'user', + from: input.memberName, + relayOfMessageId: input.ledgerRecord.inboxMessageId, + expectedMessageId: messageId, + }); + + if (existing) { + const ledgerRecord = await input.ledger.applyDestinationProof({ + id: input.ledgerRecord.id, + visibleReplyInbox: existing.inboxName, + visibleReplyMessageId: existing.message.messageId, + visibleReplyCorrelation: 'plain_assistant_text', + semanticallySufficient: true, + diagnostics: existing.missingRuntimeDeliverySource + ? ['plain_text_visible_reply_missing_runtime_delivery_source'] + : [], + observedAt: nowIso(), + }); + return { ledgerRecord, visibleReply: existing }; + } + + const timestamp = + input.ledgerRecord.respondedAt ?? + input.ledgerRecord.lastObservedAt ?? + input.ledgerRecord.updatedAt ?? + nowIso(); + try { + const written = await this.inboxWriter.sendMessage(input.teamName, { + member: 'user', + from: input.memberName, + to: 'user', + text, + summary: this.buildOpenCodePlainTextVisibleReplySummary(text), + timestamp, + messageId, + relayOfMessageId: input.ledgerRecord.inboxMessageId, + source: 'runtime_delivery', + }); + const visibleReply: OpenCodeVisibleReplyProof = { + inboxName: 'user', + message: { + from: input.memberName, + to: 'user', + text, + timestamp, + read: false, + summary: this.buildOpenCodePlainTextVisibleReplySummary(text), + messageId: written.messageId, + relayOfMessageId: input.ledgerRecord.inboxMessageId, + source: 'runtime_delivery', + }, + }; + const ledgerRecord = await input.ledger.applyDestinationProof({ + id: input.ledgerRecord.id, + visibleReplyInbox: 'user', + visibleReplyMessageId: written.messageId, + visibleReplyCorrelation: 'plain_assistant_text', + semanticallySufficient: true, + diagnostics: written.deduplicated + ? ['opencode_plain_text_reply_materialized_deduplicated'] + : ['opencode_plain_text_reply_materialized_to_user_inbox'], + observedAt: nowIso(), + }); + return { ledgerRecord, visibleReply }; + } catch (error) { + logger.warn( + `[${input.teamName}] Failed to materialize OpenCode plain-text reply for ${input.memberName}/${input.ledgerRecord.inboxMessageId}: ${getErrorMessage(error)}` + ); + return { ledgerRecord: input.ledgerRecord, visibleReply: null }; + } + } + private getOpenCodeDeliveryWatchdogKey(input: { teamName: string; memberName: string; @@ -6787,6 +7080,22 @@ export class TeamProvisioningService { return OPENCODE_PROMPT_DELIVERY_RETRY_DELAY_MS; } + private isOpenCodePromptDeliveryWatchdogRecordTerminal( + record: OpenCodePromptDeliveryLedgerRecord + ): boolean { + if (record.status === 'failed_terminal') { + return true; + } + if (record.status !== 'responded') { + return false; + } + return !( + record.responseState === 'responded_plain_text' && + !record.visibleReplyMessageId && + !record.inboxReadCommittedAt + ); + } + private async scheduleOpenCodePromptLedgerFollowUp(input: { ledger: OpenCodePromptDeliveryLedgerStore; ledgerRecord: OpenCodePromptDeliveryLedgerRecord; @@ -7006,7 +7315,7 @@ export class TeamProvisioningService { }); const records = await ledger.list().catch(() => []); for (const record of records) { - if (record.status === 'failed_terminal' || record.status === 'responded') { + if (this.isOpenCodePromptDeliveryWatchdogRecordTerminal(record)) { continue; } const nextAttemptMs = record.nextAttemptAt ? Date.parse(record.nextAttemptAt) : NaN; @@ -7351,7 +7660,7 @@ export class TeamProvisioningService { }) : null; if (active && active.inboxMessageId !== messageId && ledger) { - const proof = await this.applyOpenCodeVisibleDestinationProof({ + let proof = await this.applyOpenCodeVisibleDestinationProof({ ledger, ledgerRecord: active, teamName, @@ -7359,6 +7668,14 @@ export class TeamProvisioningService { memberName: canonicalMemberName, }); active = proof.ledgerRecord; + proof = await this.materializeOpenCodePlainTextReplyIfNeeded({ + ledger, + ledgerRecord: active, + teamName, + memberName: canonicalMemberName, + visibleReply: proof.visibleReply, + }); + active = proof.ledgerRecord; const activeReadAllowed = this.isOpenCodeDeliveryResponseReadCommitAllowed({ responseState: active.responseState, actionMode: active.actionMode ?? undefined, @@ -7433,6 +7750,14 @@ export class TeamProvisioningService { memberName: canonicalMemberName, }); ledgerRecord = proof.ledgerRecord; + proof = await this.materializeOpenCodePlainTextReplyIfNeeded({ + ledger, + ledgerRecord, + teamName, + memberName: canonicalMemberName, + visibleReply: proof.visibleReply, + }); + ledgerRecord = proof.ledgerRecord; let readAllowed = this.isOpenCodeDeliveryResponseReadCommitAllowed({ responseState: ledgerRecord.responseState, actionMode: ledgerRecord.actionMode ?? undefined, @@ -7574,6 +7899,14 @@ export class TeamProvisioningService { memberName: canonicalMemberName, }); ledgerRecord = proof.ledgerRecord; + proof = await this.materializeOpenCodePlainTextReplyIfNeeded({ + ledger, + ledgerRecord, + teamName, + memberName: canonicalMemberName, + visibleReply: proof.visibleReply, + }); + ledgerRecord = proof.ledgerRecord; readAllowed = this.isOpenCodeDeliveryResponseReadCommitAllowed({ responseState: ledgerRecord.responseState, actionMode: ledgerRecord.actionMode ?? undefined, @@ -7678,7 +8011,7 @@ export class TeamProvisioningService { reason: result.ok ? result.responseObservation?.reason : result.diagnostics[0], now: nowIso(), }); - const proof = await this.applyOpenCodeVisibleDestinationProof({ + let proof = await this.applyOpenCodeVisibleDestinationProof({ ledger, ledgerRecord, teamName, @@ -7686,6 +8019,14 @@ export class TeamProvisioningService { memberName: canonicalMemberName, }); ledgerRecord = proof.ledgerRecord; + proof = await this.materializeOpenCodePlainTextReplyIfNeeded({ + ledger, + ledgerRecord, + teamName, + memberName: canonicalMemberName, + visibleReply: proof.visibleReply, + }); + ledgerRecord = proof.ledgerRecord; this.logOpenCodePromptDeliveryEvent( result.ok ? ledgerRecord.status === 'unanswered' @@ -12975,18 +13316,37 @@ export class TeamProvisioningService { const runtimeDiagnostic = metadata?.runtimeDiagnostic; if (metadata?.livenessKind === 'runtime_process') { if (this.isOpenCodeSecondaryLaneMemberInRun(run, memberName)) { + const bootstrapStalled = elapsedMs >= MEMBER_BOOTSTRAP_STALL_MS; + const stalledDiagnostic = bootstrapStalled + ? await this.buildOpenCodeSecondaryBootstrapStallDiagnostic(run, memberName, refreshed) + : null; + const runtimeProcessStallDiagnostic = + stalledDiagnostic === + 'OpenCode bootstrap did not complete runtime_bootstrap_checkin after 5 min.' + ? 'Runtime process is alive, but no bootstrap check-in after 5 min.' + : stalledDiagnostic; this.setOpenCodeRuntimePendingBootstrapStatus(run, memberName, refreshed, { - bootstrapStalled: elapsedMs >= MEMBER_BOOTSTRAP_STALL_MS, - runtimeDiagnostic: - elapsedMs >= MEMBER_BOOTSTRAP_STALL_MS - ? 'Runtime process is alive, but no bootstrap check-in after 5 min.' - : (runtimeDiagnostic ?? - 'OpenCode runtime process is alive, waiting for bootstrap check-in.'), - runtimeDiagnosticSeverity: - elapsedMs >= MEMBER_BOOTSTRAP_STALL_MS - ? 'warning' - : (metadata.runtimeDiagnosticSeverity ?? 'info'), + bootstrapStalled, + runtimeDiagnostic: bootstrapStalled + ? (runtimeProcessStallDiagnostic ?? + 'Runtime process is alive, but no bootstrap check-in after 5 min.') + : (runtimeDiagnostic ?? + 'OpenCode runtime process is alive, waiting for bootstrap check-in.'), + runtimeDiagnosticSeverity: bootstrapStalled + ? 'warning' + : (metadata.runtimeDiagnosticSeverity ?? 'info'), }); + if (bootstrapStalled) { + await this.maybeSendOpenCodeSecondaryBootstrapCheckinRetryPrompt({ + run, + memberName, + current: refreshed, + runtimeDiagnostic: + runtimeProcessStallDiagnostic ?? + 'Runtime process is alive, but no bootstrap check-in after 5 min.', + runtimeSessionId: metadata.runtimeSessionId, + }); + } if (elapsedMs < MEMBER_BOOTSTRAP_STALL_MS) { this.scheduleOpenCodeBootstrapStallReevaluation( run, @@ -13062,6 +13422,13 @@ export class TeamProvisioningService { enriched ); this.setOpenCodeSecondaryBootstrapStalledStatus(run, memberName, enriched, diagnostic); + await this.maybeSendOpenCodeSecondaryBootstrapCheckinRetryPrompt({ + run, + memberName, + current: enriched, + runtimeDiagnostic: diagnostic, + runtimeSessionId: metadata?.runtimeSessionId, + }); return; } const strictReason = restartPending @@ -13155,6 +13522,24 @@ export class TeamProvisioningService { memberName: string, current: MemberSpawnStatusEntry ): Promise { + const lane = (run.mixedSecondaryLanes ?? []).find( + (candidate) => + candidate.providerId === 'opencode' && + matchesTeamMemberIdentity(candidate.member.name, memberName) + ); + const selectedDiagnostic = selectOpenCodeSecondaryBootstrapStallDiagnostic([ + current.runtimeDiagnostic, + ...(lane?.diagnostics ?? []), + ...(lane?.result?.diagnostics ?? []), + ...(lane?.result?.members[memberName]?.diagnostics ?? []), + ...Object.values(lane?.result?.members ?? {}) + .filter((member) => matchesTeamMemberIdentity(member.memberName ?? '', memberName)) + .flatMap((member) => member.diagnostics ?? []), + ]); + if (selectedDiagnostic) { + return selectedDiagnostic; + } + const acceptedAtMs = current.firstSpawnAcceptedAt != null ? Date.parse(current.firstSpawnAcceptedAt) : NaN; const transcriptOutcome = await this.findBootstrapTranscriptOutcome( @@ -13220,6 +13605,103 @@ export class TeamProvisioningService { } } + private async maybeSendOpenCodeSecondaryBootstrapCheckinRetryPrompt(input: { + run: ProvisioningRun; + memberName: string; + current: MemberSpawnStatusEntry; + runtimeDiagnostic: string; + runtimeSessionId?: string; + }): Promise { + const { run, memberName, current, runtimeDiagnostic } = input; + if ( + !this.isCurrentTrackedRun(run) || + run.processKilled || + run.cancelRequested || + current.launchState !== 'runtime_pending_bootstrap' || + current.bootstrapConfirmed === true || + current.hardFailure === true || + current.skippedForLaunch === true || + (current.pendingPermissionRequestIds?.length ?? 0) > 0 + ) { + return; + } + + const lane = (run.mixedSecondaryLanes ?? []).find( + (candidate) => + candidate.providerId === 'opencode' && + matchesTeamMemberIdentity(candidate.member.name, memberName) + ); + const laneRunId = lane?.runId?.trim(); + const runtimeSessionId = + input.runtimeSessionId?.trim() || + lane?.result?.members[memberName]?.sessionId?.trim() || + Object.values(lane?.result?.members ?? {}) + .find((member) => matchesTeamMemberIdentity(member.memberName ?? '', memberName)) + ?.sessionId?.trim() || + ''; + if (!lane || !laneRunId || !isMaterializedOpenCodeSessionId(runtimeSessionId)) { + return; + } + + const diagnostics = [ + runtimeDiagnostic, + current.runtimeDiagnostic, + ...(lane.diagnostics ?? []), + ...(lane.result?.diagnostics ?? []), + ...(lane.result?.members[memberName]?.diagnostics ?? []), + ].filter((value): value is string => typeof value === 'string' && value.trim().length > 0); + if (hasRealOpenCodeFailureDiagnostic(diagnostics.join('\n').toLowerCase())) { + return; + } + + const marker = getOpenCodeBootstrapCheckinRetryMarker(laneRunId, runtimeSessionId); + if ( + run.provisioningOutputParts.some((line) => line.includes(marker)) || + diagnostics.some((line) => line.includes(marker)) + ) { + return; + } + + const adapter = this.getOpenCodeRuntimeMessageAdapter(); + if (!adapter) { + return; + } + + lane.diagnostics = [...new Set([...(lane.diagnostics ?? []), marker])]; + this.appendMemberBootstrapDiagnostic(run, memberName, marker); + + try { + const result = await adapter.sendMessageToMember({ + runId: laneRunId, + teamName: run.teamName, + laneId: lane.laneId, + memberName, + cwd: lane.member.cwd?.trim() || run.request.cwd, + text: '', + messageId: `bootstrap-checkin-retry-${run.runId}-${memberName}-${runtimeSessionId}`, + bootstrapCheckinRetry: { + runtimeSessionId, + reason: runtimeDiagnostic, + }, + }); + if (!result.ok) { + this.appendMemberBootstrapDiagnostic( + run, + memberName, + `opencode_bootstrap_checkin_retry_prompt_failed: ${ + result.diagnostics.join('; ') || 'OpenCode bridge did not accept retry prompt' + }` + ); + } + } catch (error) { + this.appendMemberBootstrapDiagnostic( + run, + memberName, + `opencode_bootstrap_checkin_retry_prompt_failed: ${getErrorMessage(error)}` + ); + } + } + private scheduleOpenCodeBootstrapStallReevaluation( run: ProvisioningRun, memberName: string, @@ -18747,16 +19229,35 @@ export class TeamProvisioningService { const bootstrapStalled = base.bootstrapStalled === true || this.isOpenCodeBootstrapStallWindowElapsed(base.firstSpawnAcceptedAt); + const stalledDiagnostic = bootstrapStalled + ? await this.buildOpenCodeSecondaryBootstrapStallDiagnostic(run, expected, base) + : null; + const runtimeProcessStallDiagnostic = + stalledDiagnostic === + 'OpenCode bootstrap did not complete runtime_bootstrap_checkin after 5 min.' + ? 'Runtime process is alive, but no bootstrap check-in after 5 min.' + : stalledDiagnostic; this.setOpenCodeRuntimePendingBootstrapStatus(run, expected, base, { bootstrapStalled, runtimeDiagnostic: bootstrapStalled - ? 'Runtime process is alive, but no bootstrap check-in after 5 min.' + ? (runtimeProcessStallDiagnostic ?? + 'Runtime process is alive, but no bootstrap check-in after 5 min.') : (base.runtimeDiagnostic ?? 'OpenCode runtime process is alive, waiting for bootstrap check-in.'), runtimeDiagnosticSeverity: bootstrapStalled ? 'warning' : (base.runtimeDiagnosticSeverity ?? 'info'), }); + if (bootstrapStalled) { + await this.maybeSendOpenCodeSecondaryBootstrapCheckinRetryPrompt({ + run, + memberName: expected, + current: base, + runtimeDiagnostic: + runtimeProcessStallDiagnostic ?? + 'Runtime process is alive, but no bootstrap check-in after 5 min.', + }); + } continue; } this.setMemberSpawnStatus(run, expected, 'online', undefined, 'process'); @@ -18778,6 +19279,12 @@ export class TeamProvisioningService { current ); this.setOpenCodeSecondaryBootstrapStalledStatus(run, expected, current, diagnostic); + await this.maybeSendOpenCodeSecondaryBootstrapCheckinRetryPrompt({ + run, + memberName: expected, + current, + runtimeDiagnostic: diagnostic, + }); continue; } this.setMemberSpawnStatus(run, expected, 'waiting'); @@ -19119,6 +19626,81 @@ export class TeamProvisioningService { return new Set(names); } + private applyOpenCodeSecondaryBootstrapStallOverlay( + snapshot: PersistedTeamLaunchSnapshot | null + ): PersistedTeamLaunchSnapshot | null { + if (!snapshot) { + return null; + } + + const nowMs = Date.now(); + const updatedAt = nowIso(); + let changed = false; + const members: Record = { ...snapshot.members }; + + for (const memberName of this.getPersistedLaunchMemberNames(snapshot)) { + let current = members[memberName]; + if (!current) { + continue; + } + + const stableFirstSpawnAcceptedAt = isPersistedOpenCodeSecondaryLaneMember(current) + ? resolveOpenCodeBootstrapAcceptedAt(current) + : undefined; + if ( + stableFirstSpawnAcceptedAt && + stableFirstSpawnAcceptedAt !== current.firstSpawnAcceptedAt + ) { + current = { + ...current, + firstSpawnAcceptedAt: stableFirstSpawnAcceptedAt, + }; + members[memberName] = current; + changed = true; + } + + if (!shouldMarkPersistedOpenCodeBootstrapStalled(current, nowMs)) { + continue; + } + + const runtimeDiagnostic = getOpenCodeSecondaryBootstrapStallDiagnosticFromPersisted(current); + members[memberName] = { + ...current, + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + runtimeAlive: current.runtimeAlive === true && current.livenessKind === 'runtime_process', + bootstrapConfirmed: false, + hardFailure: false, + hardFailureReason: undefined, + livenessKind: current.livenessKind ?? 'registered_only', + runtimeDiagnostic, + runtimeDiagnosticSeverity: 'warning', + bootstrapStalled: true, + firstSpawnAcceptedAt: stableFirstSpawnAcceptedAt ?? current.firstSpawnAcceptedAt, + lastEvaluatedAt: updatedAt, + diagnostics: mergeRuntimeDiagnostics(current.diagnostics, [ + runtimeDiagnostic, + 'opencode_bootstrap_stalled', + ]), + }; + changed = true; + } + + if (!changed) { + return snapshot; + } + + return createPersistedLaunchSnapshot({ + teamName: snapshot.teamName, + expectedMembers: snapshot.expectedMembers, + bootstrapExpectedMembers: snapshot.bootstrapExpectedMembers, + leadSessionId: snapshot.leadSessionId, + launchPhase: snapshot.launchPhase, + members, + updatedAt, + }); + } + private async getLiveTeamAgentNames(teamName: string): Promise> { const runtimeByMember = await this.getLiveTeamAgentRuntimeMetadata(teamName); return new Set( @@ -20093,21 +20675,23 @@ export class TeamProvisioningService { previousSnapshot, metaMembers, }); + const normalizedSnapshot = + this.applyOpenCodeSecondaryBootstrapStallOverlay(overlaidSnapshot) ?? overlaidSnapshot; if ( options?.allowNoopSkip === true && typeof options.runId === 'string' && this.launchStateWrittenRunIdByTeam.get(teamName) === options.runId && previousSnapshot && - this.areLaunchStateSnapshotsSemanticallyEqual(previousSnapshot, overlaidSnapshot) && + this.areLaunchStateSnapshotsSemanticallyEqual(previousSnapshot, normalizedSnapshot) && !this.isLaunchStateNoopRefreshDue(previousSnapshot) ) { return { snapshot: previousSnapshot, wrote: false }; } - await this.launchStateStore.write(teamName, overlaidSnapshot); + await this.launchStateStore.write(teamName, normalizedSnapshot); if (typeof options?.runId === 'string') { this.launchStateWrittenRunIdByTeam.set(teamName, options.runId); } - return { snapshot: overlaidSnapshot, wrote: true }; + return { snapshot: normalizedSnapshot, wrote: true }; } private isLaunchStateNoopRefreshDue(snapshot: PersistedTeamLaunchSnapshot): boolean { @@ -20504,6 +21088,154 @@ export class TeamProvisioningService { } } + private async applyPrimaryBootstrapTruthToLaunchReportingSnapshot( + run: ProvisioningRun, + snapshot: PersistedTeamLaunchSnapshot | null + ): Promise { + if (!run.isLaunch || !snapshot) { + return snapshot; + } + + let bootstrapSnapshot: PersistedTeamLaunchSnapshot | null = null; + try { + bootstrapSnapshot = await readBootstrapLaunchSnapshot(run.teamName); + } catch { + return snapshot; + } + if (!bootstrapSnapshot) { + return snapshot; + } + + const runStartedAtMs = Date.parse(run.startedAt); + const bootstrapUpdatedAtMs = Date.parse(bootstrapSnapshot.updatedAt); + if ( + !Number.isFinite(runStartedAtMs) || + !Number.isFinite(bootstrapUpdatedAtMs) || + bootstrapUpdatedAtMs < runStartedAtMs + ) { + return snapshot; + } + + const primaryMemberNames = new Set( + [ + ...(run.effectiveMembers ?? []).map((member) => member.name?.trim() ?? ''), + ...(snapshot.bootstrapExpectedMembers ?? []), + ].filter((name): name is string => name.length > 0) + ); + if (primaryMemberNames.size === 0) { + return snapshot; + } + + let changed = false; + const updatedAt = nowIso(); + const nextMembers: Record = { ...snapshot.members }; + for (const memberName of primaryMemberNames) { + const current = nextMembers[memberName]; + const bootstrapMember = bootstrapSnapshot.members[memberName]; + if (!current || bootstrapMember?.bootstrapConfirmed !== true) { + continue; + } + if ( + current.providerId === 'opencode' || + isPersistedOpenCodeSecondaryLaneMember(current) || + this.isOpenCodeSecondaryLaneMemberInRun(run, memberName) + ) { + continue; + } + if (current.launchState === 'skipped_for_launch' || current.skippedForLaunch === true) { + continue; + } + + const persistedError = + typeof (current as { error?: unknown }).error === 'string' + ? (current as { error?: string }).error + : undefined; + const failureReason = + current.hardFailureReason ?? persistedError ?? current.runtimeDiagnostic; + const hasFailure = + current.launchState === 'failed_to_start' || + current.hardFailure === true || + typeof current.hardFailureReason === 'string' || + typeof persistedError === 'string'; + if (hasFailure && !isAutoClearableLaunchFailureReason(failureReason)) { + continue; + } + + const observedAt = + bootstrapMember.lastHeartbeatAt ?? + bootstrapMember.lastEvaluatedAt ?? + bootstrapSnapshot.updatedAt ?? + updatedAt; + nextMembers[memberName] = { + ...current, + launchState: 'confirmed_alive', + agentToolAccepted: true, + runtimeAlive: current.runtimeAlive === true || bootstrapMember.runtimeAlive === true, + bootstrapConfirmed: true, + hardFailure: false, + hardFailureReason: undefined, + runtimeDiagnostic: isAutoClearableLaunchFailureReason(current.runtimeDiagnostic) + ? undefined + : current.runtimeDiagnostic, + runtimeDiagnosticSeverity: isAutoClearableLaunchFailureReason(current.runtimeDiagnostic) + ? undefined + : current.runtimeDiagnosticSeverity, + bootstrapStalled: undefined, + firstSpawnAcceptedAt: + current.firstSpawnAcceptedAt ?? bootstrapMember.firstSpawnAcceptedAt ?? observedAt, + lastHeartbeatAt: current.lastHeartbeatAt ?? bootstrapMember.lastHeartbeatAt ?? observedAt, + lastRuntimeAliveAt: + current.lastRuntimeAliveAt ?? bootstrapMember.lastRuntimeAliveAt ?? observedAt, + lastEvaluatedAt: updatedAt, + sources: { + ...(current.sources ?? {}), + nativeHeartbeat: true, + hardFailureSignal: undefined, + }, + diagnostics: undefined, + }; + changed = true; + } + + if (!changed) { + return snapshot; + } + + return createPersistedLaunchSnapshot({ + teamName: snapshot.teamName, + expectedMembers: snapshot.expectedMembers, + bootstrapExpectedMembers: snapshot.bootstrapExpectedMembers, + leadSessionId: snapshot.leadSessionId, + launchPhase: snapshot.launchPhase, + members: nextMembers, + updatedAt, + }); + } + + private async reconcileFinalLaunchReportingSnapshot( + run: ProvisioningRun, + snapshot: PersistedTeamLaunchSnapshot | null + ): Promise { + const reconciled = await this.applyPrimaryBootstrapTruthToLaunchReportingSnapshot( + run, + snapshot + ); + if (!reconciled || reconciled === snapshot) { + return reconciled; + } + this.syncRunMemberSpawnStatusesFromSnapshot(run, reconciled); + try { + return await this.writeLaunchStateSnapshot(run.teamName, reconciled); + } catch (error) { + logger.warn( + `[${run.teamName}] Failed to persist reconciled launch reporting snapshot: ${getErrorMessage( + error + )}` + ); + return reconciled; + } + } + private syncRunMemberSpawnStatusesFromSnapshot( run: ProvisioningRun, snapshot: PersistedTeamLaunchSnapshot @@ -20708,6 +21440,12 @@ export class TeamProvisioningService { secondaryMembers: mixedSecondaryLanes.map((secondaryLane) => { const evidenceEntry = secondaryLane.result?.members[secondaryLane.member.name]; const currentSpawnStatus = run.memberSpawnStatuses.get(secondaryLane.member.name); + const laneFirstSpawnAcceptedAt = + currentSpawnStatus?.firstSpawnAcceptedAt ?? + (typeof secondaryLane.launchFinishedAtMs === 'number' && + Number.isFinite(secondaryLane.launchFinishedAtMs) + ? new Date(secondaryLane.launchFinishedAtMs).toISOString() + : undefined); const finishedWithoutRuntimeEvidence = secondaryLane.state === 'finished' && !secondaryLane.result; return { @@ -20741,6 +21479,7 @@ export class TeamProvisioningService { runtimeDiagnostic: evidenceEntry.runtimeDiagnostic, runtimeDiagnosticSeverity: evidenceEntry.runtimeDiagnosticSeverity, bootstrapStalled: currentSpawnStatus?.bootstrapStalled === true ? true : undefined, + firstSpawnAcceptedAt: laneFirstSpawnAcceptedAt, diagnostics: evidenceEntry.diagnostics, } : finishedWithoutRuntimeEvidence @@ -21481,6 +22220,7 @@ export class TeamProvisioningService { pidSource?: TeamAgentRuntimePidSource; runtimeDiagnostic?: string; runtimeDiagnosticSeverity?: TeamAgentRuntimeDiagnosticSeverity; + firstSpawnAcceptedAt?: string; diagnostics?: string[]; }; pendingReason?: string; @@ -21542,6 +22282,7 @@ export class TeamProvisioningService { pidSource: runtimeEvidence.pidSource, runtimeDiagnostic: runtimeEvidence.runtimeDiagnostic, runtimeDiagnosticSeverity: runtimeEvidence.runtimeDiagnosticSeverity, + firstSpawnAcceptedAt: persistedMember.firstSpawnAcceptedAt, diagnostics: runtimeEvidence.diagnostics, }, }); @@ -21576,6 +22317,7 @@ export class TeamProvisioningService { pidSource: runtimeEvidence.pidSource, runtimeDiagnostic: runtimeEvidence.runtimeDiagnostic, runtimeDiagnosticSeverity: runtimeEvidence.runtimeDiagnosticSeverity, + firstSpawnAcceptedAt: persistedMember?.firstSpawnAcceptedAt, diagnostics: runtimeEvidence.diagnostics, }, }); @@ -21948,14 +22690,17 @@ export class TeamProvisioningService { metaMembers, }) : null; + const recoveredMixedSnapshotWithBootstrapStall = + this.applyOpenCodeSecondaryBootstrapStallOverlay(overlaidRecoveredMixedSnapshot); const stableRecoveredMixedSnapshotWithCommittedEvidence = - overlaidRecoveredMixedSnapshot && - this.hasCommittedOpenCodeSecondaryEvidenceOverlayDelta( - overlaidRecoveredMixedSnapshot, + recoveredMixedSnapshotWithBootstrapStall && + (this.hasCommittedOpenCodeSecondaryEvidenceOverlayDelta( + recoveredMixedSnapshotWithBootstrapStall, persisted - ) - ? await this.writeLaunchStateSnapshot(teamName, overlaidRecoveredMixedSnapshot) - : overlaidRecoveredMixedSnapshot; + ) || + recoveredMixedSnapshotWithBootstrapStall !== overlaidRecoveredMixedSnapshot) + ? await this.writeLaunchStateSnapshot(teamName, recoveredMixedSnapshotWithBootstrapStall) + : recoveredMixedSnapshotWithBootstrapStall; const promotedRecoveredMixedSnapshot = promoteOpenCodePersistedFailureReasonsFromDiagnostics( stableRecoveredMixedSnapshotWithCommittedEvidence ); @@ -21993,14 +22738,25 @@ export class TeamProvisioningService { metaMembers, }) : null; + const filteredPersistedWithBootstrapStall = + this.applyOpenCodeSecondaryBootstrapStallOverlay(filteredPersisted); const shouldPersistCommittedEvidenceOverlay = - this.hasCommittedOpenCodeSecondaryEvidenceOverlayDelta(filteredPersisted, persisted); - const promotedPersisted = - promoteOpenCodePersistedFailureReasonsFromDiagnostics(filteredPersisted); - const shouldPersistFailureReasonPromotion = promotedPersisted !== filteredPersisted; + this.hasCommittedOpenCodeSecondaryEvidenceOverlayDelta( + filteredPersistedWithBootstrapStall, + persisted + ); + const promotedPersisted = promoteOpenCodePersistedFailureReasonsFromDiagnostics( + filteredPersistedWithBootstrapStall + ); + const shouldPersistFailureReasonPromotion = + promotedPersisted !== filteredPersistedWithBootstrapStall; + const shouldPersistBootstrapStallOverlay = + filteredPersistedWithBootstrapStall !== filteredPersisted; const persistedWithCommittedEvidence = promotedPersisted && - (shouldPersistCommittedEvidenceOverlay || shouldPersistFailureReasonPromotion) + (shouldPersistCommittedEvidenceOverlay || + shouldPersistFailureReasonPromotion || + shouldPersistBootstrapStallOverlay) ? await this.writeLaunchStateSnapshot(teamName, promotedPersisted) : promotedPersisted; const preferredSnapshot = choosePreferredLaunchSnapshot( @@ -22162,7 +22918,7 @@ export class TeamProvisioningService { const currentProvesSpawnAcceptance = current.agentToolAccepted === true || typeof current.firstSpawnAcceptedAt === 'string'; if ( - isNeverSpawnedDuringLaunchReason(current.hardFailureReason) && + isAutoClearableLaunchFailureReason(current.hardFailureReason) && (bootstrapProvesSpawnAcceptance || currentProvesSpawnAcceptance) ) { current.hardFailure = false; @@ -22171,6 +22927,17 @@ export class TeamProvisioningService { current.sources.hardFailureSignal = undefined; } } + if ( + current.bootstrapConfirmed && + !isOpenCodeSecondaryLaneMember && + isAutoClearableLaunchFailureReason(current.hardFailureReason) + ) { + current.hardFailure = false; + current.hardFailureReason = undefined; + if (current.sources) { + current.sources.hardFailureSignal = undefined; + } + } if (heartbeatReason) { current.hardFailure = true; current.hardFailureReason = heartbeatReason; @@ -22205,10 +22972,33 @@ export class TeamProvisioningService { current.agentToolAccepted === true && Number.isFinite(acceptedAtMs) && Date.now() - acceptedAtMs >= MEMBER_LAUNCH_GRACE_MS; + if ( + isOpenCodeSecondaryLaneMember && + shouldMarkPersistedOpenCodeBootstrapStalled(current, Date.now()) + ) { + const runtimeDiagnostic = + getOpenCodeSecondaryBootstrapStallDiagnosticFromPersisted(current); + current.launchState = 'runtime_pending_bootstrap'; + current.agentToolAccepted = true; + current.runtimeAlive = + current.runtimeAlive === true && current.livenessKind === 'runtime_process'; + current.bootstrapConfirmed = false; + current.hardFailure = false; + current.hardFailureReason = undefined; + current.livenessKind = current.livenessKind ?? 'registered_only'; + current.runtimeDiagnostic = runtimeDiagnostic; + current.runtimeDiagnosticSeverity = 'warning'; + current.bootstrapStalled = true; + current.diagnostics = mergeRuntimeDiagnostics(current.diagnostics, [ + runtimeDiagnostic, + 'opencode_bootstrap_stalled', + ]); + } if ( !current.bootstrapConfirmed && !current.runtimeAlive && !current.hardFailure && + current.bootstrapStalled !== true && graceExpired ) { current.hardFailure = true; @@ -25387,7 +26177,10 @@ export class TeamProvisioningService { await this.refreshMemberSpawnStatusesFromLeadInbox(run); await this.maybeAuditMemberSpawnStatuses(run, { force: true }); await this.finalizeMissingRegisteredMembersAsFailed(run); - const persistedLaunchSnapshot = await this.launchMixedSecondaryLaneIfNeeded(run); + const persistedLaunchSnapshot = await this.reconcileFinalLaunchReportingSnapshot( + run, + await this.launchMixedSecondaryLaneIfNeeded(run) + ); const failedSpawnMembers = persistedLaunchSnapshot ? persistedLaunchSnapshot.expectedMembers .filter( @@ -25568,7 +26361,10 @@ export class TeamProvisioningService { await this.refreshMemberSpawnStatusesFromLeadInbox(run); await this.maybeAuditMemberSpawnStatuses(run, { force: true }); await this.finalizeMissingRegisteredMembersAsFailed(run); - const persistedLaunchSnapshot = await this.launchMixedSecondaryLaneIfNeeded(run); + const persistedLaunchSnapshot = await this.reconcileFinalLaunchReportingSnapshot( + run, + await this.launchMixedSecondaryLaneIfNeeded(run) + ); const failedSpawnMembers = persistedLaunchSnapshot ? persistedLaunchSnapshot.expectedMembers .filter( @@ -25820,7 +26616,12 @@ export class TeamProvisioningService { live?.bootstrapConfirmed === true || live?.launchState === 'skipped_for_launch' || live?.skippedForLaunch === true; - if (liveResolved) { + const persistedResolved = + persisted?.launchState === 'confirmed_alive' || + persisted?.bootstrapConfirmed === true || + persisted?.launchState === 'skipped_for_launch' || + persisted?.skippedForLaunch === true; + if (liveResolved || persistedResolved) { failedNames.delete(memberName); continue; } diff --git a/src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts b/src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts index b87140d0..98180867 100644 --- a/src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts +++ b/src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts @@ -141,7 +141,7 @@ export interface ApplyOpenCodePromptDestinationProofInput { id: string; visibleReplyInbox: string; visibleReplyMessageId: string; - visibleReplyCorrelation: 'relayOfMessageId'; + visibleReplyCorrelation: OpenCodeDeliveryVisibleReplyCorrelation; semanticallySufficient: boolean; diagnostics?: string[]; observedAt: string; @@ -357,10 +357,14 @@ export class OpenCodePromptDeliveryLedgerStore { async applyDestinationProof( input: ApplyOpenCodePromptDestinationProofInput ): Promise { + const responseState = + input.visibleReplyCorrelation === 'plain_assistant_text' + ? 'responded_plain_text' + : 'responded_visible_message'; return await this.updateExisting(input.id, (record) => ({ ...record, status: input.semanticallySufficient ? 'responded' : record.status, - responseState: 'responded_visible_message', + responseState, lastObservedAt: input.observedAt, respondedAt: input.semanticallySufficient ? (record.respondedAt ?? input.observedAt) @@ -786,6 +790,14 @@ function isTaskRefArray(value: unknown): value is TaskRef[] { } function isTerminalForAutomaticSelection(record: OpenCodePromptDeliveryLedgerRecord): boolean { + if ( + record.status === 'responded' && + record.responseState === 'responded_plain_text' && + !record.visibleReplyMessageId && + !record.inboxReadCommittedAt + ) { + return false; + } return record.status === 'failed_terminal' || record.status === 'responded'; } diff --git a/src/main/services/team/runtime/OpenCodeTeamRuntimeAdapter.ts b/src/main/services/team/runtime/OpenCodeTeamRuntimeAdapter.ts index 85eaf06d..0a725514 100644 --- a/src/main/services/team/runtime/OpenCodeTeamRuntimeAdapter.ts +++ b/src/main/services/team/runtime/OpenCodeTeamRuntimeAdapter.ts @@ -59,6 +59,10 @@ export interface OpenCodeTeamRuntimeMessageInput { replyRecipient?: string; actionMode?: AgentActionMode; taskRefs?: TaskRef[]; + bootstrapCheckinRetry?: { + runtimeSessionId: string; + reason?: string; + }; } export interface OpenCodeTeamRuntimeMessageResult { @@ -743,6 +747,30 @@ function buildMemberBootstrapPrompt( } function buildOpenCodeRuntimeMessageText(input: OpenCodeTeamRuntimeMessageInput): string { + if (input.bootstrapCheckinRetry) { + const runtimeSessionId = input.bootstrapCheckinRetry.runtimeSessionId.trim(); + return [ + '', + 'The desktop app detected that this OpenCode session exists, but runtime_bootstrap_checkin has not committed durable runtime evidence yet.', + input.bootstrapCheckinRetry.reason + ? `Reason: ${input.bootstrapCheckinRetry.reason.trim()}` + : null, + 'Before any other tool or message, call MCP tool agent-teams_runtime_bootstrap_checkin or mcp__agent-teams__runtime_bootstrap_checkin with exactly:', + JSON.stringify({ + runId: input.runId, + teamName: input.teamName, + memberName: input.memberName, + runtimeSessionId, + }), + 'Do not call member_briefing, task tools, message_send, or cross_team_send before runtime_bootstrap_checkin completes.', + 'After runtime_bootstrap_checkin succeeds, stop this turn immediately and wait silently.', + 'If runtime_bootstrap_checkin is unavailable or fails, reply with one short sentence containing the exact error text, then stop.', + '', + ] + .filter((line): line is string => line !== null) + .join('\n'); + } + const replyRecipient = input.replyRecipient?.trim() || 'user'; const deliveryContext = input.messageId && input.taskRefs?.length diff --git a/src/renderer/utils/openCodeRuntimeDeliveryDiagnostics.ts b/src/renderer/utils/openCodeRuntimeDeliveryDiagnostics.ts index a7317841..5cce4646 100644 --- a/src/renderer/utils/openCodeRuntimeDeliveryDiagnostics.ts +++ b/src/renderer/utils/openCodeRuntimeDeliveryDiagnostics.ts @@ -22,6 +22,17 @@ const PENDING_WARNING = const FAILED_WARNING = 'OpenCode runtime delivery failed. Message was saved to inbox, but live delivery did not complete.'; +function formatOpenCodeRuntimeDeliveryFailureReason(reason: string | null | undefined): string { + const normalized = reason?.trim(); + if (!normalized) { + return ''; + } + if (normalized === 'empty_assistant_turn') { + return 'OpenCode returned an empty assistant turn.'; + } + return ''; +} + export function buildOpenCodeRuntimeDeliveryDiagnostics( result: SendMessageResult ): OpenCodeRuntimeDeliveryDiagnostics { @@ -36,8 +47,19 @@ export function buildOpenCodeRuntimeDeliveryDiagnostics( return { warning: null, debugDetails: null }; } + const failureReason = isFailed + ? formatOpenCodeRuntimeDeliveryFailureReason( + runtimeDelivery.reason ?? runtimeDelivery.diagnostics?.[0] + ) + : ''; + return { - warning: isFailed ? FAILED_WARNING : PENDING_WARNING, + warning: + isFailed && failureReason + ? `${FAILED_WARNING} Reason: ${failureReason}` + : isFailed + ? FAILED_WARNING + : PENDING_WARNING, debugDetails: { messageId: result.messageId, providerId: runtimeDelivery.providerId, diff --git a/test/main/services/team/OpenCodePromptDeliveryLedger.test.ts b/test/main/services/team/OpenCodePromptDeliveryLedger.test.ts index 4d1d2d2f..c1f5f2aa 100644 --- a/test/main/services/team/OpenCodePromptDeliveryLedger.test.ts +++ b/test/main/services/team/OpenCodePromptDeliveryLedger.test.ts @@ -286,6 +286,71 @@ describe('OpenCodePromptDeliveryLedger', () => { expect(observed.observedAssistantPreview).toBe('Понял'); }); + it('keeps plain-text responses active until their visible inbox reply is materialized', async () => { + const store = createStore(); + const record = await store.ensurePending({ + teamName: 'team-a', + memberName: 'jack', + laneId: 'secondary:opencode:jack', + inboxMessageId: 'msg-plain-visible', + inboxTimestamp: '2026-04-25T09:59:00.000Z', + source: 'watcher', + replyRecipient: 'user', + actionMode: 'ask', + taskRefs: [], + payloadHash: 'sha256:plain-visible', + now: '2026-04-25T10:00:00.000Z', + }); + + const responded = await store.applyDeliveryResult({ + id: record.id, + accepted: true, + attempted: true, + responseObservation: { + state: 'responded_plain_text', + deliveredUserMessageId: 'oc-user-plain', + assistantMessageId: 'oc-assistant-plain', + toolCallNames: [], + visibleMessageToolCallId: null, + visibleReplyMessageId: null, + visibleReplyCorrelation: null, + latestAssistantPreview: 'Concrete visible answer.', + reason: null, + }, + now: '2026-04-25T10:00:05.000Z', + }); + expect(responded.status).toBe('responded'); + + await expect(store.getActiveForMember({ + teamName: 'team-a', + memberName: 'jack', + laneId: 'secondary:opencode:jack', + })).resolves.toMatchObject({ + id: record.id, + responseState: 'responded_plain_text', + }); + + const materialized = await store.applyDestinationProof({ + id: record.id, + visibleReplyInbox: 'user', + visibleReplyMessageId: 'opencode-plain-reply-1', + visibleReplyCorrelation: 'plain_assistant_text', + semanticallySufficient: true, + observedAt: '2026-04-25T10:00:06.000Z', + }); + expect(materialized).toMatchObject({ + status: 'responded', + responseState: 'responded_plain_text', + visibleReplyCorrelation: 'plain_assistant_text', + }); + + await expect(store.getActiveForMember({ + teamName: 'team-a', + memberName: 'jack', + laneId: 'secondary:opencode:jack', + })).resolves.toBeNull(); + }); + it('does not keep responded live deliveries active when no inbox commit is needed', async () => { const store = createStore(); const direct = await store.ensurePending({ diff --git a/test/main/services/team/TeamProvisioningService.test.ts b/test/main/services/team/TeamProvisioningService.test.ts index 31664715..69dc98d1 100644 --- a/test/main/services/team/TeamProvisioningService.test.ts +++ b/test/main/services/team/TeamProvisioningService.test.ts @@ -895,6 +895,133 @@ describe('TeamProvisioningService', () => { expect(payload.body).not.toContain('0/4'); expect(payload.body).not.toContain('did not join'); }); + + it('does not report persisted bootstrap-confirmed primary members as failed from a stale failed list', async () => { + const { NotificationManager } = + await import('@main/services/infrastructure/NotificationManager'); + const addTeamNotification = vi.fn(async (_payload: unknown) => undefined); + NotificationManager.setInstance({ addTeamNotification } as never); + + try { + const svc = new TeamProvisioningService(); + const run = { + runId: 'run-forge-labs-15', + teamName: 'forge-labs-15', + isLaunch: true, + request: { + cwd: tempClaudeRoot, + displayName: 'forge-labs-15', + }, + expectedMembers: ['bob', 'jack', 'alice', 'tom'], + allEffectiveMembers: [ + { name: 'bob' }, + { name: 'jack' }, + { name: 'alice' }, + { name: 'tom' }, + ], + memberSpawnStatuses: new Map([ + [ + 'bob', + createMemberSpawnStatusEntry({ + status: 'error', + launchState: 'failed_to_start', + runtimeAlive: false, + bootstrapConfirmed: false, + hardFailure: true, + hardFailureReason: 'Teammate was never spawned during launch.', + }), + ], + [ + 'jack', + createMemberSpawnStatusEntry({ + status: 'error', + launchState: 'failed_to_start', + runtimeAlive: false, + bootstrapConfirmed: false, + hardFailure: true, + hardFailureReason: 'Teammate was never spawned during launch.', + }), + ], + [ + 'alice', + createMemberSpawnStatusEntry({ + status: 'waiting', + launchState: 'runtime_pending_bootstrap', + runtimeAlive: false, + bootstrapConfirmed: false, + }), + ], + [ + 'tom', + createMemberSpawnStatusEntry({ + status: 'online', + launchState: 'confirmed_alive', + runtimeAlive: true, + bootstrapConfirmed: true, + }), + ], + ]), + }; + const reconciledSnapshot = { + expectedMembers: ['bob', 'jack', 'alice', 'tom'], + members: { + bob: { + name: 'bob', + launchState: 'confirmed_alive', + agentToolAccepted: true, + runtimeAlive: true, + bootstrapConfirmed: true, + hardFailure: false, + lastEvaluatedAt: '2026-05-04T19:32:37.000Z', + }, + jack: { + name: 'jack', + launchState: 'confirmed_alive', + agentToolAccepted: true, + runtimeAlive: true, + bootstrapConfirmed: true, + hardFailure: false, + lastEvaluatedAt: '2026-05-04T19:32:30.000Z', + }, + alice: { + name: 'alice', + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + runtimeAlive: false, + bootstrapConfirmed: false, + hardFailure: false, + lastEvaluatedAt: '2026-05-04T19:35:49.000Z', + }, + tom: { + name: 'tom', + launchState: 'confirmed_alive', + agentToolAccepted: true, + runtimeAlive: true, + bootstrapConfirmed: true, + hardFailure: false, + lastEvaluatedAt: '2026-05-04T19:35:49.000Z', + }, + }, + summary: { + confirmedCount: 3, + pendingCount: 1, + failedCount: 0, + runtimeAlivePendingCount: 0, + }, + }; + + await (svc as any).fireTeamLaunchIncompleteNotification( + run, + [{ name: 'bob' }, { name: 'jack' }], + reconciledSnapshot.summary, + reconciledSnapshot + ); + } finally { + NotificationManager.resetInstance(); + } + + expect(addTeamNotification).not.toHaveBeenCalled(); + }); }); describe('getClaudeLogs', () => { @@ -4882,7 +5009,7 @@ describe('TeamProvisioningService', () => { visibleMessageToolCallId: null, visibleReplyMessageId: null, visibleReplyCorrelation: null, - latestAssistantPreview: null, + latestAssistantPreview: 'Answer after observe.', reason: null, }, diagnostics: [], @@ -4971,8 +5098,21 @@ describe('TeamProvisioningService', () => { }) ).resolves.toMatchObject({ delivered: true, - responsePending: false, - responseState: 'responded_plain_text', + responsePending: false, + responseState: 'responded_plain_text', + visibleReplyCorrelation: 'plain_assistant_text', + }); + + const userInbox = JSON.parse( + await fsPromises.readFile(path.join(tempTeamsBase, 'team-a', 'inboxes', 'user.json'), 'utf8') + ) as Array>; + expect(userInbox).toHaveLength(1); + expect(userInbox[0]).toMatchObject({ + from: 'bob', + to: 'user', + text: 'Answer after observe.', + relayOfMessageId: 'msg-ledger-1', + source: 'runtime_delivery', }); expect(sendMessageToMember).toHaveBeenCalledTimes(1); @@ -14319,6 +14459,205 @@ describe('TeamProvisioningService', () => { }); }); + it('self-heals stale persisted OpenCode secondary bootstrap without live metadata', async () => { + const teamName = 'zz-opencode-persisted-bootstrap-stall-no-live'; + const leadSessionId = 'lead-session'; + const acceptedAt = new Date(Date.now() - 6 * 60_000).toISOString(); + + writeTeamMeta(teamName, { + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + }); + writeMembersMeta(teamName, [ + { + name: 'tom', + providerId: 'opencode', + model: 'openrouter/minimax/minimax-m2.5', + }, + ]); + writeLaunchConfig(teamName, '/Users/test/proj', leadSessionId, ['tom']); + writeLaunchState(teamName, leadSessionId, { + tom: { + providerId: 'opencode', + model: 'openrouter/minimax/minimax-m2.5', + laneId: 'secondary:opencode:tom', + laneKind: 'secondary', + laneOwnerProviderId: 'opencode', + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + runtimeAlive: false, + bootstrapConfirmed: false, + hardFailure: false, + runtimeSessionId: 'ses_tom_partial', + runtimePid: 55947, + livenessKind: 'registered_only', + firstSpawnAcceptedAt: acceptedAt, + diagnostics: [ + 'OpenCode bootstrap MCP tool failed before required attach completed: runtime_bootstrap_checkin', + 'member_briefing at 2026-05-04T18:25:43.091Z', + ], + lastEvaluatedAt: acceptedAt, + }, + }); + + const svc = new TeamProvisioningService(); + vi.spyOn(svc as any, 'getLiveTeamAgentRuntimeMetadata').mockResolvedValue(new Map()); + + const result = await svc.getMemberSpawnStatuses(teamName); + + expect(result.statuses.tom).toMatchObject({ + launchState: 'runtime_pending_bootstrap', + bootstrapConfirmed: false, + hardFailure: false, + bootstrapStalled: true, + runtimeDiagnostic: + 'OpenCode bootstrap MCP tool failed before required attach completed: runtime_bootstrap_checkin', + runtimeDiagnosticSeverity: 'warning', + }); + const persisted = JSON.parse( + await fsPromises.readFile(getTeamLaunchStatePath(teamName), 'utf8') + ); + expect(persisted.members.tom).toMatchObject({ + launchState: 'runtime_pending_bootstrap', + bootstrapConfirmed: false, + hardFailure: false, + bootstrapStalled: true, + }); + }); + + it('sends one targeted OpenCode bootstrap check-in retry when a partial bootstrap stalls', async () => { + const teamName = 'zz-opencode-bootstrap-checkin-retry'; + const acceptedAt = new Date(Date.now() - 6 * 60_000).toISOString(); + const sendMessageToMember = vi.fn(async (input: Record) => ({ + ok: true, + providerId: 'opencode', + memberName: String(input.memberName), + diagnostics: [], + })); + const svc = new TeamProvisioningService(); + svc.setRuntimeAdapterRegistry( + new TeamRuntimeAdapterRegistry([ + { + providerId: 'opencode', + prepare: vi.fn(), + launch: vi.fn(), + reconcile: vi.fn(), + stop: vi.fn(), + sendMessageToMember, + } as any, + ]) + ); + vi.spyOn(svc as any, 'refreshMemberSpawnStatusesFromLeadInbox').mockResolvedValue(undefined); + vi.spyOn(svc as any, 'maybeAuditMemberSpawnStatuses').mockResolvedValue(undefined); + vi.spyOn(svc as any, 'getLiveTeamAgentRuntimeMetadata').mockResolvedValue( + new Map([ + [ + 'tom', + { + alive: true, + providerId: 'opencode', + livenessKind: 'runtime_process', + runtimeSessionId: 'ses_tom_partial', + runtimeDiagnostic: 'OpenCode runtime process detected', + runtimeDiagnosticSeverity: 'info', + }, + ], + ]) + ); + + const run = createMemberSpawnRun({ + teamName, + runId: 'run-bootstrap-checkin-retry', + expectedMembers: ['tom'], + memberSpawnStatuses: new Map([ + [ + 'tom', + createMemberSpawnStatusEntry({ + status: 'waiting', + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + runtimeAlive: true, + bootstrapConfirmed: false, + hardFailure: false, + firstSpawnAcceptedAt: acceptedAt, + livenessKind: 'runtime_process', + }), + ], + ]), + }); + run.onProgress = vi.fn(); + run.isLaunch = false; + run.request = { + teamName, + cwd: '/Users/test/proj', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + members: [], + }; + run.mixedSecondaryLanes = [ + { + laneId: 'secondary:opencode:tom', + providerId: 'opencode', + member: { + name: 'tom', + providerId: 'opencode', + model: 'openrouter/minimax/minimax-m2.5', + cwd: '/Users/test/proj', + }, + runId: 'opencode-run-tom', + state: 'finished', + result: { + runId: 'opencode-run-tom', + teamName, + launchPhase: 'active', + teamLaunchState: 'partial_pending', + members: { + tom: { + memberName: 'tom', + providerId: 'opencode', + launchState: 'runtime_pending_bootstrap', + agentToolAccepted: true, + runtimeAlive: false, + bootstrapConfirmed: false, + hardFailure: false, + sessionId: 'ses_tom_partial', + diagnostics: [ + 'runtime_bootstrap_checkin failed: Not connected', + 'member_briefing at 2026-05-04T18:25:43.091Z', + ], + }, + }, + warnings: [], + diagnostics: [], + }, + warnings: [], + diagnostics: [], + }, + ]; + (svc as any).runs.set(run.runId, run); + (svc as any).aliveRunByTeam.set(teamName, run.runId); + + await (svc as any).reevaluateMemberLaunchStatus(run, 'tom'); + await (svc as any).reevaluateMemberLaunchStatus(run, 'tom'); + + expect(sendMessageToMember).toHaveBeenCalledTimes(1); + expect(sendMessageToMember).toHaveBeenCalledWith( + expect.objectContaining({ + runId: 'opencode-run-tom', + teamName, + laneId: 'secondary:opencode:tom', + memberName: 'tom', + cwd: '/Users/test/proj', + bootstrapCheckinRetry: { + runtimeSessionId: 'ses_tom_partial', + reason: 'runtime_bootstrap_checkin failed: Not connected', + }, + }) + ); + }); + it('keeps process table diagnostics visible when live metadata has no primary diagnostic', async () => { const svc = new TeamProvisioningService(); (svc as any).getLiveTeamAgentRuntimeMetadata = vi.fn( @@ -16288,6 +16627,8 @@ describe('TeamProvisioningService', () => { const teamName = 'atlas-hq-source-aware-persisted'; const exactOpenCodeReason = 'Latest assistant message msg_alice failed with APIError - Insufficient credits.'; + const transientBobMcpFailure = + 'resources/read failed: resources/read failed for `agent-teams` (member_briefing?teamName=atlas-hq-source-aware-persisted&memberName=bob): Mcp error: -32601: Method not found'; writeTeamMeta(teamName, { providerId: 'codex', providerBackendId: 'codex-native', @@ -16337,7 +16678,7 @@ describe('TeamProvisioningService', () => { runtimeAlive: false, bootstrapConfirmed: false, hardFailure: true, - hardFailureReason: 'Teammate was never spawned during launch.', + hardFailureReason: transientBobMcpFailure, lastEvaluatedAt: '2026-04-23T10:02:00.000Z', }, jack: { diff --git a/test/renderer/components/team/messages/OpenCodeDeliveryWarning.test.tsx b/test/renderer/components/team/messages/OpenCodeDeliveryWarning.test.tsx index e363a0dd..ac8086df 100644 --- a/test/renderer/components/team/messages/OpenCodeDeliveryWarning.test.tsx +++ b/test/renderer/components/team/messages/OpenCodeDeliveryWarning.test.tsx @@ -173,6 +173,29 @@ describe('OpenCodeDeliveryWarning', () => { }); }); + it('shows terminal empty assistant turn reason in the compact failed warning', async () => { + const failedWarning = + 'OpenCode runtime delivery failed. Message was saved to inbox, but live delivery did not complete. Reason: OpenCode returned an empty assistant turn.'; + const { host, root } = renderWarning({ + warning: failedWarning, + debugDetails: { + ...debugDetails, + delivered: false, + responsePending: false, + responseState: 'empty_assistant_turn', + ledgerStatus: 'failed_terminal', + reason: 'empty_assistant_turn', + diagnostics: ['empty_assistant_turn'], + }, + }); + + expect(host.textContent).toContain(failedWarning); + + await act(async () => { + root.unmount(); + }); + }); + it('hides details again when a different runtime delivery payload arrives', async () => { const { host, root } = renderWarning(); diff --git a/test/renderer/utils/openCodeRuntimeDeliveryDiagnostics.test.ts b/test/renderer/utils/openCodeRuntimeDeliveryDiagnostics.test.ts new file mode 100644 index 00000000..84e91000 --- /dev/null +++ b/test/renderer/utils/openCodeRuntimeDeliveryDiagnostics.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from 'vitest'; + +import { buildOpenCodeRuntimeDeliveryDiagnostics } from '../../../src/renderer/utils/openCodeRuntimeDeliveryDiagnostics'; + +describe('openCodeRuntimeDeliveryDiagnostics', () => { + it('surfaces terminal empty assistant turn in the compact failed warning', () => { + const diagnostics = buildOpenCodeRuntimeDeliveryDiagnostics({ + deliveredToInbox: true, + messageId: 'msg-empty', + runtimeDelivery: { + providerId: 'opencode', + attempted: true, + delivered: false, + responsePending: false, + responseState: 'empty_assistant_turn', + ledgerStatus: 'failed_terminal', + reason: 'empty_assistant_turn', + diagnostics: ['empty_assistant_turn'], + }, + }); + + expect(diagnostics.warning).toBe( + 'OpenCode runtime delivery failed. Message was saved to inbox, but live delivery did not complete. Reason: OpenCode returned an empty assistant turn.' + ); + expect(diagnostics.debugDetails).toMatchObject({ + responseState: 'empty_assistant_turn', + reason: 'empty_assistant_turn', + }); + }); +});