import { promises as fs } from 'fs'; import * as os from 'os'; import * as path from 'path'; import { describe, expect, it } from 'vitest'; import { setClaudeBasePathOverride } from '../../../../src/main/utils/pathDecoder'; import { buildOpenCodeScenarioTeamRequest, loadOpenCodeSemanticScenario, materializeOpenCodeScenarioProject, materializeOpenCodeScenarioTasks, parseOpenCodeE2EModelList, taskRefForScenario, type OpenCodeSemanticScenario, } from './openCodeSemanticScenarioHarness'; import { createOpenCodeLiveHarness, getRuntimeTranscript, type InboxMessage, waitForMemberInboxMessage, waitForOpenCodeLanesStopped, waitForOpenCodePeerRelay, waitForUserInboxReply, } from './openCodeLiveTestHarness'; import type { TaskRef, TeamProvisioningProgress } from '../../../../src/shared/types'; const liveDescribe = process.env.OPENCODE_E2E === '1' && process.env.OPENCODE_E2E_SEMANTIC_MODEL_MATRIX === '1' ? describe : describe.skip; interface ModelMatrixReport { generatedAt: string; models: ModelResult[]; } interface ModelResult { model: string; passed: boolean; score: number; durationMs: number; stages: { launchBootstrap: boolean; directReply: boolean; peerRelay: boolean; taskRefs: boolean; longPrompt: boolean; latencyStable: boolean; }; diagnostics: string[]; } liveDescribe('OpenCode semantic model matrix live e2e', () => { it( 'launches realistic OpenCode teams and scores model behavior sequentially', async () => { const scenario = await loadOpenCodeSemanticScenario(); const models = parseOpenCodeE2EModelList(); const results: ModelResult[] = []; for (const model of models) { results.push(await runModelScenario({ scenario, model })); } await writeModelMatrixReport({ generatedAt: new Date().toISOString(), models: results, }); const failures = results.filter((result) => !result.passed); expect(failures, JSON.stringify(results, null, 2)).toEqual([]); }, Math.max(420_000, parseOpenCodeE2EModelList().length * 420_000) ); }); async function runModelScenario(input: { scenario: OpenCodeSemanticScenario; model: string; }): Promise { const startedAt = Date.now(); const stages: ModelResult['stages'] = { launchBootstrap: false, directReply: false, peerRelay: false, taskRefs: false, longPrompt: false, latencyStable: false, }; const diagnostics: string[] = []; const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'opencode-semantic-model-matrix-')); const tempClaudeRoot = path.join(tempDir, '.claude'); const projectPath = path.join(tempDir, 'project'); const teamName = `${input.scenario.teamNamePrefix}-${sanitizeModelForTeamName(input.model)}-${Date.now()}`; let harness: Awaited> | null = null; try { await fs.mkdir(tempClaudeRoot, { recursive: true }); await fs.mkdir(projectPath, { recursive: true }); setClaudeBasePathOverride(tempClaudeRoot); await materializeOpenCodeScenarioProject(input.scenario, projectPath); harness = await createOpenCodeLiveHarness({ tempDir, selectedModel: input.model, }); const progressEvents: TeamProvisioningProgress[] = []; const createStartedAt = Date.now(); const { runId } = await harness.svc.createTeam( buildOpenCodeScenarioTeamRequest({ scenario: input.scenario, teamName, projectPath, model: harness.selectedModel, }), (progress) => progressEvents.push(progress) ); diagnostics.push(`runId=${runId}`); await materializeOpenCodeScenarioTasks({ scenario: input.scenario, teamName, projectPath }); const progressDump = formatProgressDump(progressEvents); if (!progressEvents.some((progress) => progress.message.includes('OpenCode team launch is ready'))) { throw new Error(`OpenCode launch did not reach ready state.\n${progressDump}`); } const runtimeSnapshot = await harness.svc.getTeamAgentRuntimeSnapshot(teamName); for (const member of input.scenario.members) { const snapshot = runtimeSnapshot.members[member.name]; if (!snapshot?.alive) { throw new Error( `OpenCode member ${member.name} is not alive. Snapshot: ${JSON.stringify( runtimeSnapshot, null, 2 )}` ); } if (snapshot.runtimeModel !== harness.selectedModel) { diagnostics.push( `${member.name} runtime model ${snapshot.runtimeModel ?? 'unknown'} differs from ${harness.selectedModel}` ); } } stages.launchBootstrap = true; stages.longPrompt = input.scenario.teamPromptLines.join('\n').length > 1_500; stages.latencyStable = Date.now() - createStartedAt < 240_000; const directTaskRef = taskRefForScenario( input.scenario, input.scenario.directDelivery.taskIndex, teamName ); const directDelivery = await harness.svc.deliverOpenCodeMemberMessage(teamName, { memberName: input.scenario.directDelivery.memberName, messageId: `ui-direct-${Date.now()}`, replyRecipient: input.scenario.directDelivery.replyRecipient, actionMode: input.scenario.directDelivery.actionMode, taskRefs: [directTaskRef], source: 'manual', text: input.scenario.directDelivery.textLines.join('\n'), }); if (!directDelivery.delivered) { throw new Error(`Direct OpenCode delivery failed: ${JSON.stringify(directDelivery, null, 2)}`); } const directReply = await waitForReplyWithTranscript({ bridgeClient: harness.bridgeClient, teamName, memberName: input.scenario.directDelivery.memberName, projectPath, expectedToken: input.scenario.directDelivery.expectedReplyToken, timeoutMs: 180_000, }); assertVisibleReplyContract(directReply, { expectedFrom: input.scenario.directDelivery.memberName, expectedTo: 'user', expectedTaskRef: directTaskRef, }); stages.directReply = true; stages.taskRefs = hasTaskRef(directReply, directTaskRef); const peerTaskRef = taskRefForScenario( input.scenario, input.scenario.peerDelivery.taskIndex, teamName ); const peerDelivery = await harness.svc.deliverOpenCodeMemberMessage(teamName, { memberName: input.scenario.peerDelivery.senderName, messageId: `ui-peer-${Date.now()}`, replyRecipient: input.scenario.peerDelivery.replyRecipient, actionMode: input.scenario.peerDelivery.actionMode, taskRefs: [peerTaskRef], source: 'manual', text: input.scenario.peerDelivery.textLines.join('\n'), }); if (!peerDelivery.delivered) { throw new Error(`Peer OpenCode delivery failed: ${JSON.stringify(peerDelivery, null, 2)}`); } const peerMessage = await waitForMemberInboxMessage( teamName, input.scenario.peerDelivery.recipientName, input.scenario.peerDelivery.senderName, input.scenario.peerDelivery.peerToken, 180_000 ); assertVisibleReplyContract(peerMessage, { expectedFrom: input.scenario.peerDelivery.senderName, expectedTo: input.scenario.peerDelivery.recipientName, expectedTaskRef: peerTaskRef, }); await waitForOpenCodePeerRelay( harness.svc, teamName, input.scenario.peerDelivery.recipientName, peerMessage.messageId, 180_000 ); const peerReply = await waitForReplyWithTranscript({ bridgeClient: harness.bridgeClient, teamName, memberName: input.scenario.peerDelivery.recipientName, projectPath, expectedToken: input.scenario.peerDelivery.expectedReplyToken, timeoutMs: 180_000, }); assertVisibleReplyContract(peerReply, { expectedFrom: input.scenario.peerDelivery.recipientName, expectedTo: 'user', }); stages.peerRelay = true; const score = scoreModel(stages); return { model: input.model, passed: score === 100, score, durationMs: Date.now() - startedAt, stages, diagnostics, }; } catch (error) { diagnostics.push(error instanceof Error ? error.message : String(error)); return { model: input.model, passed: false, score: scoreModel(stages), durationMs: Date.now() - startedAt, stages, diagnostics, }; } finally { if (harness) { await harness.svc.stopTeam(teamName).catch(() => undefined); await harness.dispose().catch(() => undefined); await waitForOpenCodeLanesStopped(teamName).catch(() => undefined); } setClaudeBasePathOverride(null); await fs.rm(tempDir, { recursive: true, force: true }).catch(() => undefined); } } async function waitForReplyWithTranscript(input: { bridgeClient: Parameters[0]['bridgeClient']; teamName: string; memberName: string; projectPath: string; expectedToken: string; timeoutMs: number; }): Promise { try { return await waitForUserInboxReply( input.teamName, input.memberName, input.expectedToken, input.timeoutMs ); } catch (error) { const transcript = await getRuntimeTranscript({ bridgeClient: input.bridgeClient, teamName: input.teamName, memberName: input.memberName, projectPath: input.projectPath, }); throw new Error( `${error instanceof Error ? error.message : String(error)}\nTranscript: ${JSON.stringify( transcript, null, 2 )}` ); } } function assertVisibleReplyContract( message: InboxMessage, input: { expectedFrom: string; expectedTo: string; expectedTaskRef?: TaskRef; } ): void { expect(message).toMatchObject({ from: input.expectedFrom, to: input.expectedTo, }); const text = message.text ?? ''; expect(text).not.toContain('SendMessage'); expect(text).not.toContain('runtime_deliver_message'); expect(text).not.toContain('#00000000'); expect(text.trim()).not.toBe('\u041f\u043e\u043d\u044f\u043b'); if (input.expectedTaskRef) { expect(hasTaskRef(message, input.expectedTaskRef)).toBe(true); } } function hasTaskRef(message: InboxMessage, expected: TaskRef): boolean { return Boolean( message.taskRefs?.some( (taskRef) => taskRef.teamName === expected.teamName && taskRef.taskId === expected.taskId && taskRef.displayId === expected.displayId ) ); } function scoreModel(stages: ModelResult['stages']): number { return ( (stages.launchBootstrap ? 25 : 0) + (stages.directReply ? 25 : 0) + (stages.peerRelay ? 20 : 0) + (stages.taskRefs ? 15 : 0) + (stages.longPrompt ? 10 : 0) + (stages.latencyStable ? 5 : 0) ); } async function writeModelMatrixReport(report: ModelMatrixReport): Promise { const outputDir = process.env.OPENCODE_E2E_REPORT_DIR?.trim() ? path.resolve(process.env.OPENCODE_E2E_REPORT_DIR.trim()) : path.join(process.cwd(), 'test-results', 'opencode-semantic-model-matrix'); await fs.mkdir(outputDir, { recursive: true }); await fs.writeFile( path.join(outputDir, `report-${Date.now()}.json`), `${JSON.stringify(report, null, 2)}\n`, 'utf8' ); } function formatProgressDump(progressEvents: TeamProvisioningProgress[]): string { return progressEvents .map((progress) => [ progress.state, progress.message, progress.messageSeverity, progress.error, progress.cliLogsTail, ] .filter(Boolean) .join(' | ') ) .join('\n'); } function sanitizeModelForTeamName(model: string): string { return model .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, '') .slice(0, 48); }