From 9ad4269ebc19c69538c8c406d39daa2968ea7236 Mon Sep 17 00:00:00 2001 From: 777genius Date: Thu, 21 May 2026 19:03:47 +0300 Subject: [PATCH] feat(runtime): harden MCP launch orchestration --- .../test/atomicFile.test.js | 20 +- .../test/controller.test.js | 9 + agent-teams-controller/test/crossTeam.test.js | 6 + mcp-server/src/index.ts | 73 +- mcp-server/test/http.e2e.test.ts | 133 +++ mcp-server/test/startOptions.test.ts | 40 +- mcp-server/test/tools.test.ts | 11 +- runtime.lock.json | 14 +- .../composition/createCodexAccountFeature.ts | 74 +- .../services/team/AgentTeamsMcpHttpServer.ts | 1021 ++++++++++++++++- .../components/dashboard/CliStatusBanner.tsx | 5 - .../main/createCodexAccountFeature.test.ts | 87 ++ ...gentTeamsMcpHttpServer.integration.test.ts | 119 +- .../team/AgentTeamsMcpHttpServer.test.ts | 484 +++++++- test/setup.ts | 53 +- 15 files changed, 2029 insertions(+), 120 deletions(-) create mode 100644 mcp-server/test/http.e2e.test.ts diff --git a/agent-teams-controller/test/atomicFile.test.js b/agent-teams-controller/test/atomicFile.test.js index 9395d97c..1ab3687e 100644 --- a/agent-teams-controller/test/atomicFile.test.js +++ b/agent-teams-controller/test/atomicFile.test.js @@ -19,9 +19,23 @@ function withMockedRenameSync(mockRenameSync, callback) { } describe('atomic file writes', () => { + const tempDirs = []; + + function makeTempDir() { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-teams-atomic-')); + tempDirs.push(dir); + return dir; + } + + afterEach(() => { + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } + }); + ['EPERM', 'EACCES', 'EBUSY'].forEach((code) => { it(`retries transient ${code} rename failures before publishing JSON`, () => { - const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-teams-atomic-')); + const dir = makeTempDir(); const filePath = path.join(dir, 'state.json'); let attempts = 0; @@ -47,7 +61,7 @@ describe('atomic file writes', () => { }); it('does not retry ENOENT rename failures and removes the temp file', () => { - const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-teams-atomic-')); + const dir = makeTempDir(); const filePath = path.join(dir, 'state.json'); let attempts = 0; @@ -69,7 +83,7 @@ describe('atomic file writes', () => { }); it('removes the temp file after retryable rename failures are exhausted', () => { - const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-teams-atomic-')); + const dir = makeTempDir(); const filePath = path.join(dir, 'state.json'); let attempts = 0; diff --git a/agent-teams-controller/test/controller.test.js b/agent-teams-controller/test/controller.test.js index c7a93130..ed42338a 100644 --- a/agent-teams-controller/test/controller.test.js +++ b/agent-teams-controller/test/controller.test.js @@ -6,8 +6,17 @@ const path = require('path'); const { createController } = require('../src/index.js'); describe('agent-teams-controller API', () => { + const tempDirs = []; + + afterEach(() => { + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } + }); + function makeClaudeDir() { const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-teams-controller-')); + tempDirs.push(dir); fs.mkdirSync(path.join(dir, 'teams', 'my-team'), { recursive: true }); fs.mkdirSync(path.join(dir, 'tasks', 'my-team'), { recursive: true }); fs.writeFileSync( diff --git a/agent-teams-controller/test/crossTeam.test.js b/agent-teams-controller/test/crossTeam.test.js index 9499f71e..60251f1c 100644 --- a/agent-teams-controller/test/crossTeam.test.js +++ b/agent-teams-controller/test/crossTeam.test.js @@ -6,8 +6,11 @@ const { createController } = require('../src/index.js'); const { CROSS_TEAM_SOURCE, CROSS_TEAM_TAG_NAME } = require('../src/internal/crossTeamProtocol.js'); describe('crossTeam module', () => { + const tempDirs = []; + function makeClaudeDir(teams = {}) { const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'crossteam-test-')); + tempDirs.push(dir); for (const [teamName, config] of Object.entries(teams)) { const teamDir = path.join(dir, 'teams', teamName); @@ -28,6 +31,9 @@ describe('crossTeam module', () => { // Reset cascade guard between tests const cascadeGuard = require('../src/internal/cascadeGuard.js'); cascadeGuard.reset(); + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } }); describe('sendCrossTeamMessage', () => { diff --git a/mcp-server/src/index.ts b/mcp-server/src/index.ts index 5445e709..13d8ba49 100644 --- a/mcp-server/src/index.ts +++ b/mcp-server/src/index.ts @@ -9,6 +9,11 @@ const HTTP_TRANSPORT = 'httpStream'; const STDIO_TRANSPORT = 'stdio'; const DEFAULT_HTTP_HOST = '127.0.0.1'; const DEFAULT_HTTP_ENDPOINT = '/mcp'; +const MCP_HTTP_IDENTITY_SERVICE = 'agent-teams-mcp-http'; +const MCP_HTTP_IDENTITY_SERVICE_ENV = 'AGENT_TEAMS_MCP_HTTP_IDENTITY_SERVICE'; +const MCP_HTTP_CLAUDE_DIR_HASH_ENV = 'AGENT_TEAMS_MCP_HTTP_CLAUDE_DIR_HASH'; +const MCP_HTTP_LAUNCH_SPEC_HASH_ENV = 'AGENT_TEAMS_MCP_HTTP_LAUNCH_SPEC_HASH'; +const MCP_HTTP_OWNER_INSTANCE_ID_ENV = 'AGENT_TEAMS_MCP_HTTP_OWNER_INSTANCE_ID'; export type AgentTeamsMcpStartOptions = | { @@ -23,10 +28,32 @@ export type AgentTeamsMcpStartOptions = }; }; -export function createServer() { +export interface AgentTeamsMcpHttpHealthIdentity { + schemaVersion: 1; + service: typeof MCP_HTTP_IDENTITY_SERVICE; + transport: typeof HTTP_TRANSPORT; + host: string; + port: number; + endpoint: `/${string}`; + claudeDirHash: string; + launchSpecHash: string; + ownerInstanceId: string; +} + +export function createServer(input: { healthIdentity?: AgentTeamsMcpHttpHealthIdentity | null } = {}) { const server = new FastMCP({ name: 'agent-teams-mcp', version: '1.0.0', + ...(input.healthIdentity + ? { + health: { + enabled: true, + path: '/health', + status: 200, + message: JSON.stringify(input.healthIdentity), + }, + } + : {}), }); registerTools(server); @@ -64,6 +91,45 @@ function parsePort(value: string | null | undefined): number { return parsed; } +function readIdentityValue(env: NodeJS.ProcessEnv, name: string): string | null { + const value = env[name]?.trim(); + return value && value.length > 0 ? value : null; +} + +export function buildHttpHealthIdentity( + options: AgentTeamsMcpStartOptions, + env: NodeJS.ProcessEnv = process.env +): AgentTeamsMcpHttpHealthIdentity | null { + if (options.transportType !== HTTP_TRANSPORT) { + return null; + } + + const service = readIdentityValue(env, MCP_HTTP_IDENTITY_SERVICE_ENV); + const claudeDirHash = readIdentityValue(env, MCP_HTTP_CLAUDE_DIR_HASH_ENV); + const launchSpecHash = readIdentityValue(env, MCP_HTTP_LAUNCH_SPEC_HASH_ENV); + const ownerInstanceId = readIdentityValue(env, MCP_HTTP_OWNER_INSTANCE_ID_ENV); + if ( + service !== MCP_HTTP_IDENTITY_SERVICE || + !claudeDirHash || + !launchSpecHash || + !ownerInstanceId + ) { + return null; + } + + return { + schemaVersion: 1, + service: MCP_HTTP_IDENTITY_SERVICE, + transport: HTTP_TRANSPORT, + host: options.httpStream.host, + port: options.httpStream.port, + endpoint: options.httpStream.endpoint, + claudeDirHash, + launchSpecHash, + ownerInstanceId, + }; +} + export function resolveStartOptions( argv: string[] = process.argv, env: NodeJS.ProcessEnv = process.env @@ -92,6 +158,7 @@ export function resolveStartOptions( } if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) { - const server = createServer(); - void server.start(resolveStartOptions()); + const startOptions = resolveStartOptions(); + const server = createServer({ healthIdentity: buildHttpHealthIdentity(startOptions) }); + void server.start(startOptions); } diff --git a/mcp-server/test/http.e2e.test.ts b/mcp-server/test/http.e2e.test.ts new file mode 100644 index 00000000..7972bd4d --- /dev/null +++ b/mcp-server/test/http.e2e.test.ts @@ -0,0 +1,133 @@ +import { spawn, type ChildProcess } from 'node:child_process'; +import http from 'node:http'; +import net from 'node:net'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { afterEach, describe, expect, it } from 'vitest'; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..'); +const serverEntry = path.join(repoRoot, 'dist', 'index.js'); + +const children: ChildProcess[] = []; + +async function allocateLoopbackPort(): Promise { + return new Promise((resolve, reject) => { + const server = net.createServer(); + server.once('error', reject); + server.listen(0, '127.0.0.1', () => { + const address = server.address(); + if (!address || typeof address === 'string') { + server.close(() => reject(new Error('Failed to allocate HTTP e2e port'))); + return; + } + server.close(() => resolve(address.port)); + }); + }); +} + +async function readHealthBody(port: number): Promise<{ statusCode: number | null; body: string }> { + return new Promise((resolve) => { + let body = ''; + const request = http.get( + { + host: '127.0.0.1', + port, + path: '/health', + timeout: 1_000, + }, + (response) => { + response.setEncoding('utf8'); + response.on('data', (chunk: string) => { + body += chunk; + }); + response.on('end', () => resolve({ statusCode: response.statusCode ?? null, body })); + } + ); + request.once('timeout', () => { + request.destroy(); + resolve({ statusCode: null, body: '' }); + }); + request.once('error', () => resolve({ statusCode: null, body: '' })); + }); +} + +async function waitForHealthBody(port: number): Promise<{ statusCode: number | null; body: string }> { + const startedAt = Date.now(); + while (Date.now() - startedAt < 10_000) { + const result = await readHealthBody(port); + if (result.statusCode === 200) { + return result; + } + await new Promise((resolve) => setTimeout(resolve, 100)); + } + throw new Error(`HTTP MCP server did not become healthy on port ${port}`); +} + +afterEach(async () => { + await Promise.all( + children.splice(0).map( + (child) => + new Promise((resolve) => { + if (child.exitCode !== null || child.signalCode !== null) { + resolve(); + return; + } + child.once('exit', () => resolve()); + child.kill('SIGTERM'); + setTimeout(() => { + if (child.exitCode === null && child.signalCode === null) { + child.kill('SIGKILL'); + } + }, 500).unref(); + }) + ) + ); +}); + +describe('agent-teams-mcp HTTP e2e', () => { + it('returns app-managed JSON identity from /health when identity env is present', async () => { + const port = await allocateLoopbackPort(); + const child = spawn( + process.execPath, + [ + serverEntry, + '--transport', + 'httpStream', + '--host', + '127.0.0.1', + '--port', + String(port), + '--endpoint', + 'mcp', + ], + { + env: { + ...process.env, + AGENT_TEAMS_MCP_HTTP_IDENTITY_SERVICE: 'agent-teams-mcp-http', + AGENT_TEAMS_MCP_HTTP_CLAUDE_DIR_HASH: 'claude-dir-hash-e2e', + AGENT_TEAMS_MCP_HTTP_LAUNCH_SPEC_HASH: 'launch-spec-hash-e2e', + AGENT_TEAMS_MCP_HTTP_OWNER_INSTANCE_ID: 'owner-e2e', + }, + stdio: ['ignore', 'ignore', 'pipe'], + } + ); + children.push(child); + + const health = await waitForHealthBody(port); + const parsed = JSON.parse(health.body) as Record; + + expect(health.statusCode).toBe(200); + expect(parsed).toEqual({ + schemaVersion: 1, + service: 'agent-teams-mcp-http', + transport: 'httpStream', + host: '127.0.0.1', + port, + endpoint: '/mcp', + claudeDirHash: 'claude-dir-hash-e2e', + launchSpecHash: 'launch-spec-hash-e2e', + ownerInstanceId: 'owner-e2e', + }); + }); +}); diff --git a/mcp-server/test/startOptions.test.ts b/mcp-server/test/startOptions.test.ts index 867a1918..416e80e9 100644 --- a/mcp-server/test/startOptions.test.ts +++ b/mcp-server/test/startOptions.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest'; -import { resolveStartOptions } from '../src/index'; +import { buildHttpHealthIdentity, resolveStartOptions } from '../src/index'; describe('agent-teams MCP start options', () => { it('defaults to stdio transport', () => { @@ -51,4 +51,42 @@ describe('agent-teams MCP start options', () => { }, }); }); + + it('builds HTTP health identity only when app identity env is present', () => { + const options = resolveStartOptions( + ['node', 'index.js', '--transport', 'httpStream', '--port', '43125'], + {} + ); + + expect(buildHttpHealthIdentity(options, {})).toBeNull(); + expect( + buildHttpHealthIdentity(options, { + AGENT_TEAMS_MCP_HTTP_IDENTITY_SERVICE: 'agent-teams-mcp-http', + AGENT_TEAMS_MCP_HTTP_CLAUDE_DIR_HASH: 'claude-hash', + AGENT_TEAMS_MCP_HTTP_LAUNCH_SPEC_HASH: 'launch-hash', + AGENT_TEAMS_MCP_HTTP_OWNER_INSTANCE_ID: 'owner-id', + }) + ).toEqual({ + schemaVersion: 1, + service: 'agent-teams-mcp-http', + transport: 'httpStream', + host: '127.0.0.1', + port: 43125, + endpoint: '/mcp', + claudeDirHash: 'claude-hash', + launchSpecHash: 'launch-hash', + ownerInstanceId: 'owner-id', + }); + }); + + it('does not build HTTP health identity for stdio transport', () => { + const options = resolveStartOptions(['node', 'index.js'], { + AGENT_TEAMS_MCP_HTTP_IDENTITY_SERVICE: 'agent-teams-mcp-http', + AGENT_TEAMS_MCP_HTTP_CLAUDE_DIR_HASH: 'claude-hash', + AGENT_TEAMS_MCP_HTTP_LAUNCH_SPEC_HASH: 'launch-hash', + AGENT_TEAMS_MCP_HTTP_OWNER_INSTANCE_ID: 'owner-id', + }); + + expect(buildHttpHealthIdentity(options)).toBeNull(); + }); }); diff --git a/mcp-server/test/tools.test.ts b/mcp-server/test/tools.test.ts index ee0fb476..637106ff 100644 --- a/mcp-server/test/tools.test.ts +++ b/mcp-server/test/tools.test.ts @@ -31,6 +31,13 @@ function parseJsonToolResult(result: unknown) { describe('agent-teams-mcp tools', () => { const tools = collectTools(); + const tempDirs: string[] = []; + + afterEach(() => { + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } + }); function getTool(name: string) { const tool = tools.get(name); @@ -39,7 +46,9 @@ describe('agent-teams-mcp tools', () => { } function makeClaudeDir() { - return fs.mkdtempSync(path.join(os.tmpdir(), 'agent-teams-mcp-')); + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-teams-mcp-')); + tempDirs.push(dir); + return dir; } function writeTeamConfig( diff --git a/runtime.lock.json b/runtime.lock.json index 55271663..11a2ff0b 100644 --- a/runtime.lock.json +++ b/runtime.lock.json @@ -1,27 +1,27 @@ { - "version": "0.0.44", - "sourceRef": "v0.0.44", + "version": "0.0.45", + "sourceRef": "v0.0.45", "sourceRepository": "777genius/agent_teams_orchestrator", "releaseRepository": "777genius/agent-teams-ai", - "releaseTag": "v2.0.0", + "releaseTag": "v2.1.0", "assets": { "darwin-arm64": { - "file": "agent-teams-runtime-darwin-arm64-v0.0.44.tar.gz", + "file": "agent-teams-runtime-darwin-arm64-v0.0.45.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "darwin-x64": { - "file": "agent-teams-runtime-darwin-x64-v0.0.44.tar.gz", + "file": "agent-teams-runtime-darwin-x64-v0.0.45.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "linux-x64": { - "file": "agent-teams-runtime-linux-x64-v0.0.44.tar.gz", + "file": "agent-teams-runtime-linux-x64-v0.0.45.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "win32-x64": { - "file": "agent-teams-runtime-win32-x64-v0.0.44.zip", + "file": "agent-teams-runtime-win32-x64-v0.0.45.zip", "archiveKind": "zip", "binaryName": "claude-multimodel.exe" } diff --git a/src/features/codex-account/main/composition/createCodexAccountFeature.ts b/src/features/codex-account/main/composition/createCodexAccountFeature.ts index a248c49f..4810bf96 100644 --- a/src/features/codex-account/main/composition/createCodexAccountFeature.ts +++ b/src/features/codex-account/main/composition/createCodexAccountFeature.ts @@ -42,6 +42,8 @@ const SNAPSHOT_CACHE_TTL_MS = 5_000; const RATE_LIMITS_CACHE_TTL_MS = 45_000; const LAST_KNOWN_GOOD_MANAGED_ACCOUNT_TTL_MS = 60_000; const CODEX_BINARY_COLD_RETRY_TIMEOUT_MS = 12_000; +const CODEX_CLI_NOT_FOUND_MESSAGE = + 'Codex CLI not found. Install Codex to use native account management.'; interface CodexLastKnownAccount { payload: CodexAppServerGetAccountResponse; @@ -487,15 +489,48 @@ class CodexAccountFeatureFacadeImpl implements CodexAccountFeatureFacade { const now = Date.now(); if (!binaryPath) { + const freshRuntimeContext = this.getFreshLastKnownRuntimeContext(now); + if (freshRuntimeContext) { + const freshAccountPayload = this.getFreshLastKnownAccount(now); + const accountPayload = freshAccountPayload ?? null; + const managedAccount = asCodexManagedAccount(accountPayload?.account ?? null); + const readiness = evaluateCodexLaunchReadiness({ + preferredAuthMode, + managedAccount, + apiKey, + appServerState: 'healthy', + appServerStatusMessage: null, + localActiveChatgptAccountPresent, + }); + const snapshot = this.setSnapshot({ + preferredAuthMode, + effectiveAuthMode: readiness.effectiveAuthMode, + launchAllowed: readiness.launchAllowed, + launchIssueMessage: readiness.issueMessage, + launchReadinessState: readiness.state, + appServerState: 'healthy', + appServerStatusMessage: null, + managedAccount, + apiKey, + requiresOpenaiAuth: accountPayload?.requiresOpenaiAuth ?? null, + localAccountArtifactsPresent, + localActiveChatgptAccountPresent, + runtimeContext: freshRuntimeContext, + login, + rateLimits: this.snapshotCache?.rateLimits ?? null, + updatedAt: new Date(now).toISOString(), + }); + return snapshot; + } + const snapshot = this.setSnapshot({ preferredAuthMode, effectiveAuthMode: null, launchAllowed: false, - launchIssueMessage: 'Codex CLI not found. Install Codex to use native account management.', + launchIssueMessage: CODEX_CLI_NOT_FOUND_MESSAGE, launchReadinessState: 'runtime_missing', appServerState: 'runtime-missing', - appServerStatusMessage: - 'Codex CLI not found. Install Codex to use native account management.', + appServerStatusMessage: CODEX_CLI_NOT_FOUND_MESSAGE, managedAccount: null, apiKey, requiresOpenaiAuth: null, @@ -521,7 +556,15 @@ class CodexAccountFeatureFacadeImpl implements CodexAccountFeatureFacade { let appServerStatusMessage: string | null = null; let accountPayload = this.lastKnownAccount?.payload ?? null; let requiresOpenaiAuth: boolean | null = accountPayload?.requiresOpenaiAuth ?? null; - let runtimeContext = createRuntimeContext(binaryPath, null); + const previousRuntimeContext = this.getFreshLastKnownRuntimeContext(now); + let runtimeContext = createRuntimeContext( + binaryPath, + previousRuntimeContext?.binaryPath === binaryPath ? previousRuntimeContext.codexHome : null + ); + this.lastKnownRuntimeContext = { + payload: runtimeContext, + observedAt: now, + }; const cachedRateLimitsAreFresh = this.hasFreshRateLimits(now); const shouldRequestRateLimits = options?.includeRateLimits === true && !cachedRateLimitsAreFresh; @@ -706,6 +749,29 @@ class CodexAccountFeatureFacadeImpl implements CodexAccountFeatureFacade { ); } + private getFreshLastKnownRuntimeContext(now: number): CodexRuntimeContext | null { + if ( + !this.lastKnownRuntimeContext || + now - this.lastKnownRuntimeContext.observedAt > LAST_KNOWN_GOOD_MANAGED_ACCOUNT_TTL_MS || + !this.lastKnownRuntimeContext.payload.binaryPath + ) { + return null; + } + + return this.lastKnownRuntimeContext.payload; + } + + private getFreshLastKnownAccount(now: number): CodexAppServerGetAccountResponse | null { + if ( + !this.lastKnownAccount || + now - this.lastKnownAccount.observedAt > LAST_KNOWN_GOOD_MANAGED_ACCOUNT_TTL_MS + ) { + return null; + } + + return this.lastKnownAccount.payload; + } + private async emitCurrentSnapshot(): Promise { if (!this.snapshotCache) { return this.refreshSnapshot(); diff --git a/src/main/services/team/AgentTeamsMcpHttpServer.ts b/src/main/services/team/AgentTeamsMcpHttpServer.ts index 48299425..bf9f01e7 100644 --- a/src/main/services/team/AgentTeamsMcpHttpServer.ts +++ b/src/main/services/team/AgentTeamsMcpHttpServer.ts @@ -1,18 +1,25 @@ -import { applyAgentTeamsIdentityEnv } from '@main/services/identity/AgentTeamsIdentityStore'; -import { killProcessTree, spawnCli } from '@main/utils/childProcess'; -import { getClaudeBasePath } from '@main/utils/pathDecoder'; -import { createLogger } from '@shared/utils/logger'; -import { type ChildProcess } from 'child_process'; -import { createHash } from 'crypto'; -import http from 'http'; -import net from 'net'; +import { type ChildProcess, execFile, type ExecFileException } from 'node:child_process'; +import { createHash, randomUUID } from 'node:crypto'; +import * as fs from 'node:fs'; +import http from 'node:http'; +import net from 'node:net'; +import * as path from 'node:path'; +import { type RuntimeProcessTableRow } from '@features/tmux-installer/main'; +import { applyAgentTeamsIdentityEnv } from '@main/services/identity/AgentTeamsIdentityStore'; +import { atomicWriteAsync } from '@main/utils/atomicWrite'; +import { killProcessTree, spawnCli } from '@main/utils/childProcess'; +import { getAppDataPath, getClaudeBasePath } from '@main/utils/pathDecoder'; +import { killProcessByPid } from '@main/utils/processKill'; +import { createLogger } from '@shared/utils/logger'; + +import { type FileLockOptions, withFileLock } from './fileLock'; import { type McpLaunchSpec, resolveAgentTeamsMcpLaunchSpec } from './TeamMcpConfigBuilder'; const logger = createLogger('Service:AgentTeamsMcpHttpServer'); const MCP_HTTP_HOST = '127.0.0.1'; const MCP_HTTP_ENDPOINT = '/mcp'; -const MCP_HTTP_READY_TIMEOUT_MS = 5_000; +const MCP_HTTP_READY_TIMEOUT_MS = 10_000; const MCP_HTTP_EXISTING_HANDLE_READY_TIMEOUT_MS = 3_000; const MCP_HTTP_READY_POLL_MS = 100; const MCP_HTTP_PORT_RELEASE_TIMEOUT_MS = 3_000; @@ -20,6 +27,21 @@ const MCP_HTTP_STABLE_PORT_BASE = 43_100; const MCP_HTTP_STABLE_PORT_SPAN = 700; const MCP_HTTP_STABLE_PORT_SCAN_LIMIT = 20; const MCP_HTTP_PORT_ENV = 'CLAUDE_TEAM_OPENCODE_MCP_HTTP_PORT'; +const MCP_HTTP_HEALTH_BODY_MAX_BYTES = 8 * 1024; +const MCP_HTTP_IDENTITY_SERVICE = 'agent-teams-mcp-http'; +const MCP_HTTP_IDENTITY_SERVICE_ENV = 'AGENT_TEAMS_MCP_HTTP_IDENTITY_SERVICE'; +const MCP_HTTP_CLAUDE_DIR_HASH_ENV = 'AGENT_TEAMS_MCP_HTTP_CLAUDE_DIR_HASH'; +const MCP_HTTP_LAUNCH_SPEC_HASH_ENV = 'AGENT_TEAMS_MCP_HTTP_LAUNCH_SPEC_HASH'; +const MCP_HTTP_OWNER_INSTANCE_ID_ENV = 'AGENT_TEAMS_MCP_HTTP_OWNER_INSTANCE_ID'; +const MCP_HTTP_STATE_DIR = 'mcp-http-server'; +const MCP_HTTP_STATE_FILE = 'state.json'; +const MCP_HTTP_STATE_LOCK_OPTIONS: FileLockOptions = { + acquireTimeoutMs: 5_000, + staleTimeoutMs: 30_000, + retryIntervalMs: 25, +}; +const MCP_HTTP_CLEANUP_DISABLED_ENV = 'CLAUDE_TEAM_DISABLE_MCP_ORPHAN_CLEANUP'; +const MCP_HTTP_ORPHAN_TERMINATE_GRACE_MS = 250; export interface AgentTeamsMcpHttpTransportEvidence { schemaVersion: 1; @@ -33,6 +55,18 @@ export interface AgentTeamsMcpHttpTransportEvidence { observedAt: string; } +export interface AgentTeamsMcpHttpIdentity { + schemaVersion: 1; + service: typeof MCP_HTTP_IDENTITY_SERVICE; + transport: 'httpStream'; + host: string; + port: number; + endpoint: string; + claudeDirHash: string; + launchSpecHash: string; + ownerInstanceId: string; +} + export interface AgentTeamsMcpHttpServerHandle { url: string; port: number; @@ -43,13 +77,67 @@ export interface AgentTeamsMcpHttpServerHandle { diagnostics: string[]; } +export interface AgentTeamsMcpHttpHealthProbe { + healthy: boolean; + statusCode: number | null; + identity: AgentTeamsMcpHttpIdentity | null; +} + export interface AgentTeamsMcpHttpServerDeps { resolveLaunchSpec?: () => Promise; allocatePort?: () => Promise; spawnProcess?: (command: string, args: string[], env: NodeJS.ProcessEnv) => ChildProcess; waitForPort?: (host: string, port: number, timeoutMs: number) => Promise; + probeHealth?: (host: string, port: number) => Promise; + canListenOnPort?: (host: string, port: number) => Promise; + statePath?: string | null; + withStateLock?: ( + filePath: string, + fn: () => Promise, + options?: FileLockOptions + ) => Promise; + disableOrphanCleanup?: boolean; + listProcessRows?: () => Promise; + readProcessDetails?: (pid: number) => Promise; + readProcessStartTimeMs?: (pid: number) => Promise; + killProcess?: (pid: number) => void; + forceKillProcess?: (pid: number) => void; + isProcessAlive?: (pid: number) => boolean; + sleepMs?: (ms: number) => Promise; } +interface AgentTeamsMcpExpectedHttpIdentity { + service: typeof MCP_HTTP_IDENTITY_SERVICE; + transport: 'httpStream'; + host: string; + endpoint: string; + claudeDirHash: string; + launchSpecHash: string; + ownerInstanceId: string; +} + +interface AgentTeamsMcpHttpState { + schemaVersion: 1; + service: typeof MCP_HTTP_IDENTITY_SERVICE; + transport: 'httpStream'; + host: string; + port: number; + endpoint: string; + url: string; + urlHash: string; + pid: number | null; + claudeDirHash: string; + launchSpecHash: string; + ownerInstanceId: string; + startedAt: string; + updatedAt: string; +} + +type PortClassification = + | { kind: 'available' } + | { kind: 'owned'; identity: AgentTeamsMcpHttpIdentity } + | { kind: 'occupied_unknown'; healthy: boolean }; + async function allocateLoopbackPort(): Promise { return new Promise((resolve, reject) => { const server = net.createServer(); @@ -76,7 +164,11 @@ async function canListenOnLoopbackPort(host: string, port: number): Promise { const server = net.createServer(); server.once('error', () => { - resolve(false); + try { + server.close(() => resolve(false)); + } catch { + resolve(false); + } }); server.listen(port, host, () => { server.close(() => resolve(true)); @@ -88,8 +180,89 @@ async function sleep(ms: number): Promise { await new Promise((resolve) => setTimeout(resolve, ms)); } -async function isHealthReady(host: string, port: number): Promise { +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function asString(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value : null; +} + +function asPort(value: unknown): number | null { + return Number.isInteger(value) && Number(value) > 0 && Number(value) <= 65_535 + ? Number(value) + : null; +} + +function asPositiveInteger(value: unknown): number | null { + return Number.isInteger(value) && Number(value) > 0 ? Number(value) : null; +} + +function parseHealthIdentity(raw: string): AgentTeamsMcpHttpIdentity | null { + const trimmed = raw.trim(); + if (!trimmed.startsWith('{')) { + return null; + } + + try { + const parsed = JSON.parse(trimmed) as unknown; + if (!isRecord(parsed)) { + return null; + } + + const service = parsed.service; + const transport = parsed.transport; + const host = asString(parsed.host); + const port = asPort(parsed.port); + const endpoint = asString(parsed.endpoint); + const claudeDirHash = asString(parsed.claudeDirHash); + const launchSpecHash = asString(parsed.launchSpecHash); + const ownerInstanceId = asString(parsed.ownerInstanceId); + if ( + parsed.schemaVersion !== 1 || + service !== MCP_HTTP_IDENTITY_SERVICE || + transport !== 'httpStream' || + !host || + port === null || + !endpoint || + !claudeDirHash || + !launchSpecHash || + !ownerInstanceId + ) { + return null; + } + + return { + schemaVersion: 1, + service: MCP_HTTP_IDENTITY_SERVICE, + transport: 'httpStream', + host, + port, + endpoint, + claudeDirHash, + launchSpecHash, + ownerInstanceId, + }; + } catch { + return null; + } +} + +async function probeLoopbackHealth( + host: string, + port: number +): Promise { return new Promise((resolve) => { + let settled = false; + let body = ''; + const finish = (probe: AgentTeamsMcpHttpHealthProbe): void => { + if (settled) { + return; + } + settled = true; + resolve(probe); + }; + const request = http.get( { host, @@ -98,16 +271,30 @@ async function isHealthReady(host: string, port: number): Promise { timeout: MCP_HTTP_READY_POLL_MS, }, (response) => { - response.resume(); - resolve((response.statusCode ?? 500) >= 200 && (response.statusCode ?? 500) < 300); + response.setEncoding('utf8'); + response.on('data', (chunk: string) => { + if (body.length >= MCP_HTTP_HEALTH_BODY_MAX_BYTES) { + return; + } + body += chunk.slice(0, MCP_HTTP_HEALTH_BODY_MAX_BYTES - body.length); + }); + response.on('end', () => { + const statusCode = response.statusCode ?? null; + const healthy = statusCode !== null && statusCode >= 200 && statusCode < 300; + finish({ + healthy, + statusCode, + identity: healthy ? parseHealthIdentity(body) : null, + }); + }); } ); request.once('timeout', () => { request.destroy(); - resolve(false); + finish({ healthy: false, statusCode: null, identity: null }); }); request.once('error', () => { - resolve(false); + finish({ healthy: false, statusCode: null, identity: null }); }); }); } @@ -115,7 +302,7 @@ async function isHealthReady(host: string, port: number): Promise { async function waitForLoopbackPort(host: string, port: number, timeoutMs: number): Promise { const startedAt = Date.now(); while (Date.now() - startedAt < timeoutMs) { - if (await isHealthReady(host, port)) { + if ((await probeLoopbackHealth(host, port)).healthy) { return; } await sleep(MCP_HTTP_READY_POLL_MS); @@ -211,12 +398,289 @@ function buildTransportEvidence( }; } +function buildStatePath(): string { + return path.join(getAppDataPath(), MCP_HTTP_STATE_DIR, MCP_HTTP_STATE_FILE); +} + +function buildLaunchSpecHash(launchSpec: McpLaunchSpec): string { + return sha256Hex(JSON.stringify({ command: launchSpec.command, args: launchSpec.args })); +} + +function buildExpectedIdentity( + launchSpec: McpLaunchSpec, + ownerInstanceId: string +): AgentTeamsMcpExpectedHttpIdentity { + return { + service: MCP_HTTP_IDENTITY_SERVICE, + transport: 'httpStream', + host: MCP_HTTP_HOST, + endpoint: MCP_HTTP_ENDPOINT, + claudeDirHash: sha256Hex(getClaudeBasePath()), + launchSpecHash: buildLaunchSpecHash(launchSpec), + ownerInstanceId, + }; +} + +function identityMatchesExpected( + identity: AgentTeamsMcpHttpIdentity, + expected: AgentTeamsMcpExpectedHttpIdentity, + port?: number +): boolean { + return ( + identity.service === expected.service && + identity.transport === expected.transport && + identity.host === expected.host && + identity.endpoint === expected.endpoint && + identity.claudeDirHash === expected.claudeDirHash && + identity.launchSpecHash === expected.launchSpecHash && + (port === undefined || identity.port === port) + ); +} + +function buildState( + handle: AgentTeamsMcpHttpServerHandle, + identity: AgentTeamsMcpHttpIdentity, + pid: number | null, + startedAt: string +): AgentTeamsMcpHttpState { + return { + schemaVersion: 1, + service: MCP_HTTP_IDENTITY_SERVICE, + transport: 'httpStream', + host: MCP_HTTP_HOST, + port: handle.port, + endpoint: MCP_HTTP_ENDPOINT, + url: handle.url, + urlHash: handle.urlHash, + pid, + claudeDirHash: identity.claudeDirHash, + launchSpecHash: identity.launchSpecHash, + ownerInstanceId: identity.ownerInstanceId, + startedAt, + updatedAt: new Date().toISOString(), + }; +} + +function parseState(raw: string): AgentTeamsMcpHttpState | null { + try { + const parsed = JSON.parse(raw) as unknown; + if (!isRecord(parsed)) { + return null; + } + + const host = asString(parsed.host); + const port = asPort(parsed.port); + const endpoint = asString(parsed.endpoint); + const url = asString(parsed.url); + const urlHash = asString(parsed.urlHash); + const pid = parsed.pid === null ? null : asPositiveInteger(parsed.pid); + const claudeDirHash = asString(parsed.claudeDirHash); + const launchSpecHash = asString(parsed.launchSpecHash); + const ownerInstanceId = asString(parsed.ownerInstanceId); + const startedAt = asString(parsed.startedAt); + const updatedAt = asString(parsed.updatedAt); + if ( + parsed.schemaVersion !== 1 || + parsed.service !== MCP_HTTP_IDENTITY_SERVICE || + parsed.transport !== 'httpStream' || + !host || + port === null || + !endpoint || + !url || + !urlHash || + (pid === null && parsed.pid !== null) || + !claudeDirHash || + !launchSpecHash || + !ownerInstanceId || + !startedAt || + !updatedAt + ) { + return null; + } + + return { + schemaVersion: 1, + service: MCP_HTTP_IDENTITY_SERVICE, + transport: 'httpStream', + host, + port, + endpoint, + url, + urlHash, + pid, + claudeDirHash, + launchSpecHash, + ownerInstanceId, + startedAt, + updatedAt, + }; + } catch { + return null; + } +} + +function stateMatchesExpected( + state: AgentTeamsMcpHttpState, + expected: AgentTeamsMcpExpectedHttpIdentity +): boolean { + return ( + state.service === expected.service && + state.transport === expected.transport && + state.host === expected.host && + state.endpoint === expected.endpoint && + state.url === `http://${MCP_HTTP_HOST}:${state.port}${MCP_HTTP_ENDPOINT}` && + state.urlHash === sha256Hex(state.url) && + state.claudeDirHash === expected.claudeDirHash && + state.launchSpecHash === expected.launchSpecHash + ); +} + +function isFileLockTimeoutError(error: unknown): boolean { + return error instanceof Error && error.message.startsWith('File lock timeout:'); +} + +function diagnostic(message: string, diagnostics: string[]): void { + diagnostics.push(message); +} + +function emitDiagnostics(diagnostics: readonly string[]): void { + for (const item of diagnostics) { + logger.warn(`Agent Teams MCP HTTP diagnostic: ${item}`); + } +} + +function parseCommandArg(command: string, flag: string): string | null { + const escaped = flag.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const match = new RegExp(`(?:^|\\s)${escaped}(?:=|\\s+)(?:"([^"]+)"|'([^']+)'|(\\S+))`).exec( + command + ); + return match?.[1] ?? match?.[2] ?? match?.[3] ?? null; +} + +function parseCommandPort(command: string): number | null { + const raw = parseCommandArg(command, '--port'); + const parsed = raw ? Number.parseInt(raw, 10) : NaN; + return Number.isInteger(parsed) && parsed > 0 && parsed <= 65_535 ? parsed : null; +} + +function commandArgEquals(command: string, flag: string, expected: string): boolean { + return parseCommandArg(command, flag) === expected; +} + +function isMcpHttpServerCommand(command: string): boolean { + const normalized = command.trim(); + return ( + /mcp-server[/\\](?:src[/\\]index\.ts|dist[/\\]index\.js|index\.js)(?=\s|$)/.test(normalized) && + commandArgEquals(normalized, '--transport', 'httpStream') && + commandArgEquals(normalized, '--host', MCP_HTTP_HOST) && + commandArgEquals(normalized, '--endpoint', MCP_HTTP_ENDPOINT) && + parseCommandPort(normalized) !== null + ); +} + +function processDetailsIncludeMarker(details: string, marker: string): boolean { + return new RegExp(`(^|\\s)${marker.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}(?=\\s|$)`).test( + details + ); +} + +function hasManagedMcpDetails(details: string, port: number): boolean { + return ( + processDetailsIncludeMarker(details, 'AGENT_TEAMS_MCP_TRANSPORT=httpStream') && + processDetailsIncludeMarker(details, `AGENT_TEAMS_MCP_HTTP_HOST=${MCP_HTTP_HOST}`) && + processDetailsIncludeMarker(details, `AGENT_TEAMS_MCP_HTTP_PORT=${port}`) && + processDetailsIncludeMarker(details, `AGENT_TEAMS_MCP_HTTP_ENDPOINT=${MCP_HTTP_ENDPOINT}`) && + processDetailsIncludeMarker(details, `AGENT_TEAMS_MCP_CLAUDE_DIR=${getClaudeBasePath()}`) + ); +} + +function isNativeProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch (error) { + return (error as NodeJS.ErrnoException).code === 'EPERM'; + } +} + +async function readNativeProcessCommandWithEnv(pid: number): Promise { + return execFileText('ps', ['eww', '-p', String(pid), '-o', 'command='], 2_000, 2 * 1024 * 1024); +} + +async function readNativeProcessStartTimeMs(pid: number): Promise { + const output = await execFileText('ps', ['-p', String(pid), '-o', 'lstart='], 2_000, 64 * 1024); + if (!output) { + return null; + } + const parsed = Date.parse(output.trim()); + return Number.isFinite(parsed) ? parsed : null; +} + +function parseNativeProcessRows(output: string): RuntimeProcessTableRow[] { + const rows: RuntimeProcessTableRow[] = []; + for (const line of output.split('\n')) { + const match = /^\s*(\d+)\s+(\d+)\s+(.*)$/.exec(line); + if (!match) { + continue; + } + const pid = Number.parseInt(match[1], 10); + const ppid = Number.parseInt(match[2], 10); + const command = match[3]?.trim() ?? ''; + if (pid > 0 && ppid >= 0 && command.length > 0) { + rows.push({ pid, ppid, command }); + } + } + return rows; +} + +async function listNativeProcessRows(): Promise { + if (process.platform === 'win32') { + return []; + } + const output = await execFileText( + 'ps', + ['-ax', '-o', 'pid=,ppid=,command='], + 2_000, + 4 * 1024 * 1024 + ); + return output ? parseNativeProcessRows(output) : []; +} + +function execFileText( + command: string, + args: string[], + timeout: number, + maxBuffer: number +): Promise { + return new Promise((resolve) => { + execFile( + command, + args, + { + encoding: 'utf8', + timeout, + maxBuffer, + windowsHide: true, + }, + (error: ExecFileException | null, stdout: string | Buffer) => { + if (error) { + resolve(null); + return; + } + resolve(String(stdout)); + } + ); + }); +} + export class AgentTeamsMcpHttpServer { private startPromise: Promise | null = null; private child: ChildProcess | null = null; private handle: AgentTeamsMcpHttpServerHandle | null = null; private generation = 0; private readonly expectedStopChildren = new WeakSet(); + private readonly ownerInstanceId = randomUUID(); + private readonly startedAtMs = Date.now(); constructor(private readonly deps: AgentTeamsMcpHttpServerDeps = {}) {} @@ -235,12 +699,16 @@ export class AgentTeamsMcpHttpServer { async stop(): Promise { const child = this.child; - const releasePort = this.handle?.port ?? null; + const handle = this.handle; + const releasePort = child ? (handle?.port ?? null) : null; this.child = null; this.handle = null; if (child) { this.expectedStopChildren.add(child); killProcessTree(child, 'SIGKILL'); + if (handle) { + await this.clearStateForOwnedHandle(handle); + } } if (releasePort) { await waitForLoopbackPortAvailable( @@ -255,6 +723,13 @@ export class AgentTeamsMcpHttpServer { return this.handle; } + private resolveStatePath(): string | null { + if (this.deps.statePath === null) { + return null; + } + return this.deps.statePath ?? buildStatePath(); + } + private async reuseOrRestartExistingHandle( handle: AgentTeamsMcpHttpServerHandle ): Promise { @@ -285,23 +760,210 @@ export class AgentTeamsMcpHttpServer { return this.startOnce(); } - private async resolveStartPort(preferredPort?: number | null): Promise<{ - port: number; - diagnostics: string[]; - }> { - const diagnostics: string[] = []; - if (preferredPort && (await canListenOnLoopbackPort(MCP_HTTP_HOST, preferredPort))) { - return { port: preferredPort, diagnostics }; + private async readStateSafe( + statePath: string, + diagnostics: string[] + ): Promise { + try { + const raw = await fs.promises.readFile(statePath, 'utf8'); + const parsed = parseState(raw); + if (!parsed) { + diagnostic('opencode_app_mcp_state_ignored:parse_failed', diagnostics); + } + return parsed; + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'ENOENT') { + diagnostic('opencode_app_mcp_state_ignored:read_failed', diagnostics); + } + return null; } + } + + private async writeStateSafe( + statePath: string | null, + state: AgentTeamsMcpHttpState, + diagnostics: string[] + ): Promise { + if (!statePath) { + return; + } + try { + await atomicWriteAsync(statePath, `${JSON.stringify(state, null, 2)}\n`); + } catch { + diagnostic('opencode_app_mcp_state_ignored:write_failed', diagnostics); + } + } + + private async clearStateForOwnedHandle(handle: AgentTeamsMcpHttpServerHandle): Promise { + const statePath = this.resolveStatePath(); + if (!statePath) { + return; + } + const lock = this.deps.withStateLock ?? withFileLock; + try { + await lock( + statePath, + async () => { + const diagnostics: string[] = []; + const state = await this.readStateSafe(statePath, diagnostics); + if ( + state && + state.port === handle.port && + state.urlHash === handle.urlHash && + state.ownerInstanceId === this.ownerInstanceId + ) { + await fs.promises.rm(statePath, { force: true }); + } + }, + MCP_HTTP_STATE_LOCK_OPTIONS + ); + } catch { + logger.warn('Agent Teams MCP HTTP diagnostic: opencode_app_mcp_state_ignored:clear_failed'); + } + } + + private async classifyPort( + port: number, + expectedIdentity: AgentTeamsMcpExpectedHttpIdentity + ): Promise { + const canListen = this.deps.canListenOnPort ?? canListenOnLoopbackPort; + if (await canListen(MCP_HTTP_HOST, port)) { + return { kind: 'available' }; + } + + const probeHealth = this.deps.probeHealth ?? probeLoopbackHealth; + const probe = await probeHealth(MCP_HTTP_HOST, port); + if ( + probe.healthy && + probe.identity && + identityMatchesExpected(probe.identity, expectedIdentity, port) + ) { + return { kind: 'owned', identity: probe.identity }; + } + + return { kind: 'occupied_unknown', healthy: probe.healthy }; + } + + private async tryAdoptStateHandle( + statePath: string, + expectedIdentity: AgentTeamsMcpExpectedHttpIdentity, + diagnostics: string[] + ): Promise { + const state = await this.readStateSafe(statePath, diagnostics); + if (!state) { + return null; + } + if (!stateMatchesExpected(state, expectedIdentity)) { + diagnostic('opencode_app_mcp_state_ignored:identity_mismatch', diagnostics); + return null; + } + + const probeHealth = this.deps.probeHealth ?? probeLoopbackHealth; + const probe = await probeHealth(MCP_HTTP_HOST, state.port); + if (!probe.healthy) { + diagnostic('opencode_app_mcp_state_ignored:unhealthy', diagnostics); + return null; + } + if (!probe.identity || !identityMatchesExpected(probe.identity, expectedIdentity, state.port)) { + diagnostic('opencode_app_mcp_state_ignored:identity_mismatch', diagnostics); + return null; + } + + return this.adoptHandle({ + identity: probe.identity, + pid: state.pid, + diagnostics, + diagnosticMessage: `opencode_app_mcp_adopted_state_server:${state.port}`, + statePath, + }); + } + + private async tryAdoptPortHandle( + port: number, + expectedIdentity: AgentTeamsMcpExpectedHttpIdentity, + statePath: string | null, + diagnostics: string[] + ): Promise { + const classification = await this.classifyPort(port, expectedIdentity); + if (classification.kind === 'available') { + return null; + } + if (classification.kind === 'owned') { + return this.adoptHandle({ + identity: classification.identity, + pid: null, + diagnostics, + diagnosticMessage: `opencode_app_mcp_adopted_port_server:${port}`, + statePath, + }); + } + diagnostic(`opencode_app_mcp_port_occupied_unknown:${port}`, diagnostics); + return null; + } + + private async adoptHandle(input: { + identity: AgentTeamsMcpHttpIdentity; + pid: number | null; + diagnostics: string[]; + diagnosticMessage: string; + statePath: string | null; + }): Promise { + diagnostic(input.diagnosticMessage, input.diagnostics); + const generation = this.generation + 1; + const transportEvidence = buildTransportEvidence(input.identity.port, generation); + this.generation = generation; + this.child = null; + this.handle = { + url: transportEvidence.url, + port: input.identity.port, + pid: input.pid, + generation, + urlHash: transportEvidence.urlHash, + transportEvidence, + diagnostics: input.diagnostics, + }; + await this.writeStateSafe( + input.statePath, + buildState(this.handle, input.identity, input.pid, new Date().toISOString()), + input.diagnostics + ); + logger.info(`Agent Teams MCP HTTP server adopted at ${this.handle.url}`); + emitDiagnostics(input.diagnostics); + this.scheduleOrphanCleanup(input.identity, this.handle); + return this.handle; + } + + private async resolveStartTarget( + preferredPort: number | null | undefined, + expectedIdentity: AgentTeamsMcpExpectedHttpIdentity, + statePath: string | null, + diagnostics: string[] + ): Promise< + { kind: 'port'; port: number } | { kind: 'handle'; handle: AgentTeamsMcpHttpServerHandle } + > { + const canListen = this.deps.canListenOnPort ?? canListenOnLoopbackPort; if (preferredPort) { - diagnostics.push(`opencode_app_mcp_preferred_port_unavailable:${preferredPort}`); + if (await canListen(MCP_HTTP_HOST, preferredPort)) { + return { kind: 'port', port: preferredPort }; + } + const adopted = await this.tryAdoptPortHandle( + preferredPort, + expectedIdentity, + statePath, + diagnostics + ); + if (adopted) { + return { kind: 'handle', handle: adopted }; + } + diagnostic(`opencode_app_mcp_preferred_port_unavailable:${preferredPort}`, diagnostics); } if (this.deps.allocatePort && (!preferredPort || diagnostics.length > 0)) { - return { port: await this.deps.allocatePort(), diagnostics }; + return { kind: 'port', port: await this.deps.allocatePort() }; } const stablePort = resolveDefaultStablePort(); + let stablePortUnavailable = false; for (let offset = 0; offset < MCP_HTTP_STABLE_PORT_SCAN_LIMIT; offset += 1) { const candidate = stablePort + offset; if (candidate > 65_535) { @@ -310,18 +972,33 @@ export class AgentTeamsMcpHttpServer { if (preferredPort === candidate) { continue; } - if (await canListenOnLoopbackPort(MCP_HTTP_HOST, candidate)) { - if (candidate !== stablePort) { - diagnostics.push(`opencode_app_mcp_preferred_port_unavailable:${stablePort}`); + const classification = await this.classifyPort(candidate, expectedIdentity); + if (classification.kind === 'available') { + if (candidate !== stablePort || stablePortUnavailable) { + diagnostic(`opencode_app_mcp_preferred_port_unavailable:${stablePort}`, diagnostics); } - return { port: candidate, diagnostics }; + return { kind: 'port', port: candidate }; } + if (classification.kind === 'owned') { + return { + kind: 'handle', + handle: await this.adoptHandle({ + identity: classification.identity, + pid: null, + diagnostics, + diagnosticMessage: `opencode_app_mcp_adopted_port_server:${candidate}`, + statePath, + }), + }; + } + stablePortUnavailable = stablePortUnavailable || candidate === stablePort; + diagnostic(`opencode_app_mcp_port_occupied_unknown:${candidate}`, diagnostics); } const allocatePort = this.deps.allocatePort ?? allocateLoopbackPort; const port = await allocatePort(); - diagnostics.push('opencode_app_mcp_stable_port_range_unavailable'); - return { port, diagnostics }; + diagnostic('opencode_app_mcp_stable_port_range_unavailable', diagnostics); + return { kind: 'port', port }; } private async startOnce( @@ -332,12 +1009,73 @@ export class AgentTeamsMcpHttpServer { } = {} ): Promise { const resolveLaunchSpec = this.deps.resolveLaunchSpec ?? resolveAgentTeamsMcpLaunchSpec; + const launchSpec = await resolveLaunchSpec(); + const expectedIdentity = buildExpectedIdentity(launchSpec, this.ownerInstanceId); + const statePath = this.resolveStatePath(); + const startUnlocked = async (effectiveStatePath: string | null, diagnostics: string[]) => + this.startOnceUnlocked(input, launchSpec, expectedIdentity, effectiveStatePath, diagnostics); + + if (!statePath) { + return startUnlocked(null, []); + } + + const lock = this.deps.withStateLock ?? withFileLock; + try { + return await lock(statePath, () => startUnlocked(statePath, []), MCP_HTTP_STATE_LOCK_OPTIONS); + } catch (error) { + if (!isFileLockTimeoutError(error)) { + throw error; + } + const diagnostics = ['opencode_app_mcp_state_ignored:lock_failed']; + return startUnlocked(null, diagnostics); + } + } + + private async startOnceUnlocked( + input: { + preferredPort?: number | null; + previousUrlHash?: string | null; + reason?: string; + }, + launchSpec: McpLaunchSpec, + expectedIdentity: AgentTeamsMcpExpectedHttpIdentity, + statePath: string | null, + initialDiagnostics: string[] + ): Promise { + const diagnostics = [...initialDiagnostics]; const spawnProcess = this.deps.spawnProcess ?? defaultSpawnProcess; const waitForPort = this.deps.waitForPort ?? waitForLoopbackPort; - const launchSpec = await resolveLaunchSpec(); - const selectedPort = await this.resolveStartPort(input.preferredPort ?? null); - const port = selectedPort.port; + + if (statePath) { + const adopted = await this.tryAdoptStateHandle(statePath, expectedIdentity, diagnostics); + if (adopted) { + return adopted; + } + } + + const selectedTarget = await this.resolveStartTarget( + input.preferredPort ?? null, + expectedIdentity, + statePath, + diagnostics + ); + if (selectedTarget.kind === 'handle') { + return selectedTarget.handle; + } + + const port = selectedTarget.port; const args = buildHttpServerArgs(launchSpec, port); + const childIdentity: AgentTeamsMcpHttpIdentity = { + schemaVersion: 1, + service: MCP_HTTP_IDENTITY_SERVICE, + transport: 'httpStream', + host: MCP_HTTP_HOST, + port, + endpoint: MCP_HTTP_ENDPOINT, + claudeDirHash: expectedIdentity.claudeDirHash, + launchSpecHash: expectedIdentity.launchSpecHash, + ownerInstanceId: expectedIdentity.ownerInstanceId, + }; const childEnv = applyAgentTeamsIdentityEnv({ ...process.env, AGENT_TEAMS_MCP_CLAUDE_DIR: getClaudeBasePath(), @@ -345,6 +1083,10 @@ export class AgentTeamsMcpHttpServer { AGENT_TEAMS_MCP_HTTP_HOST: MCP_HTTP_HOST, AGENT_TEAMS_MCP_HTTP_PORT: String(port), AGENT_TEAMS_MCP_HTTP_ENDPOINT: MCP_HTTP_ENDPOINT, + [MCP_HTTP_IDENTITY_SERVICE_ENV]: MCP_HTTP_IDENTITY_SERVICE, + [MCP_HTTP_CLAUDE_DIR_HASH_ENV]: expectedIdentity.claudeDirHash, + [MCP_HTTP_LAUNCH_SPEC_HASH_ENV]: expectedIdentity.launchSpecHash, + [MCP_HTTP_OWNER_INSTANCE_ID_ENV]: expectedIdentity.ownerInstanceId, }); const child = spawnProcess(launchSpec.command, args, childEnv); @@ -416,12 +1158,11 @@ export class AgentTeamsMcpHttpServer { const generation = this.generation + 1; const transportEvidence = buildTransportEvidence(port, generation); this.generation = generation; - const diagnostics = [...selectedPort.diagnostics]; if (input.previousUrlHash && input.previousUrlHash !== transportEvidence.urlHash) { - diagnostics.push('opencode_app_mcp_public_url_changed'); + diagnostic('opencode_app_mcp_public_url_changed', diagnostics); } if (input.reason) { - diagnostics.push(`opencode_app_mcp_restart_reason:${input.reason}`); + diagnostic(`opencode_app_mcp_restart_reason:${input.reason}`, diagnostics); } this.handle = { url: transportEvidence.url, @@ -432,12 +1173,210 @@ export class AgentTeamsMcpHttpServer { transportEvidence, diagnostics, }; + await this.writeStateSafe( + statePath, + buildState(this.handle, childIdentity, child.pid ?? null, new Date().toISOString()), + diagnostics + ); logger.info(`Agent Teams MCP HTTP server running at ${this.handle.url}`); - for (const diagnostic of diagnostics) { - logger.warn(`Agent Teams MCP HTTP diagnostic: ${diagnostic}`); - } + emitDiagnostics(diagnostics); + this.scheduleOrphanCleanup(childIdentity, this.handle); return this.handle; } + + private scheduleOrphanCleanup( + expectedIdentity: AgentTeamsMcpHttpIdentity, + currentHandle: AgentTeamsMcpHttpServerHandle + ): void { + if ( + this.deps.disableOrphanCleanup || + this.resolveStatePath() === null || + process.env[MCP_HTTP_CLEANUP_DISABLED_ENV] === '1' + ) { + return; + } + + void this.tryCleanupOwnedOrphans(expectedIdentity, currentHandle).catch(() => { + logger.warn('Agent Teams MCP HTTP diagnostic: opencode_app_mcp_orphan_cleanup_failed'); + }); + } + + private async tryCleanupOwnedOrphans( + expectedIdentity: AgentTeamsMcpHttpIdentity, + currentHandle: AgentTeamsMcpHttpServerHandle + ): Promise { + const listRows = this.deps.listProcessRows ?? listNativeProcessRows; + const readDetails = + this.deps.readProcessDetails ?? + (process.platform === 'win32' ? async () => null : readNativeProcessCommandWithEnv); + const readStartTimeMs = + this.deps.readProcessStartTimeMs ?? + (process.platform === 'win32' ? async () => null : readNativeProcessStartTimeMs); + const killProcess = this.deps.killProcess ?? killProcessByPid; + const forceKillProcess = + this.deps.forceKillProcess ?? ((pid: number) => process.kill(pid, 'SIGKILL')); + const isProcessAlive = this.deps.isProcessAlive ?? isNativeProcessAlive; + const sleepMs = this.deps.sleepMs ?? sleep; + const probeHealth = this.deps.probeHealth ?? probeLoopbackHealth; + + const rows = await listRows(); + for (const row of rows) { + if (row.pid === currentHandle.pid || row.pid === process.pid) { + continue; + } + if (!isMcpHttpServerCommand(row.command)) { + continue; + } + const port = parseCommandPort(row.command); + if (!port || port === currentHandle.port) { + continue; + } + + const parentMayStillOwnProcess = + process.platform === 'win32' ? row.ppid > 0 && isProcessAlive(row.ppid) : row.ppid !== 1; + if (parentMayStillOwnProcess) { + continue; + } + + const startedAtMs = await readStartTimeMs(row.pid); + if ( + !Number.isFinite(startedAtMs) || + startedAtMs === null || + startedAtMs >= this.startedAtMs + ) { + continue; + } + + const details = await readDetails(row.pid); + if (!details || !hasManagedMcpDetails(details, port)) { + continue; + } + + const probe = await probeHealth(MCP_HTTP_HOST, port); + const hasMatchingIdentity = + probe.identity !== null && identityMatchesExpected(probe.identity, expectedIdentity, port); + if (probe.identity && !hasMatchingIdentity) { + continue; + } + + const ownedPids = await this.collectOwnedMcpProcessTreePids(rows, row.pid, port, readDetails); + const ownedPidSet = new Set(ownedPids); + + if (await this.hasLiveMcpConsumers(rows, ownedPidSet, port, readDetails)) { + this.recordCleanupDiagnostic( + currentHandle, + `opencode_app_mcp_legacy_orphan_kept_live_consumers:${port}` + ); + continue; + } + + try { + let cleanupFailed = false; + for (const pid of [...ownedPids].reverse()) { + if (!isProcessAlive(pid)) { + continue; + } + try { + killProcess(pid); + } catch { + cleanupFailed = cleanupFailed || isProcessAlive(pid); + } + } + await sleepMs(MCP_HTTP_ORPHAN_TERMINATE_GRACE_MS); + for (const pid of [...ownedPids].reverse()) { + if (!isProcessAlive(pid)) { + continue; + } + try { + forceKillProcess(pid); + } catch { + cleanupFailed = true; + } + } + if (ownedPids.some((pid) => isProcessAlive(pid))) { + cleanupFailed = true; + } + if (cleanupFailed) { + this.recordCleanupDiagnostic( + currentHandle, + `opencode_app_mcp_state_ignored:cleanup_failed` + ); + continue; + } + this.recordCleanupDiagnostic( + currentHandle, + `opencode_app_mcp_legacy_orphan_cleaned:${port}` + ); + } catch { + this.recordCleanupDiagnostic( + currentHandle, + `opencode_app_mcp_state_ignored:cleanup_failed` + ); + } + } + } + + private async collectOwnedMcpProcessTreePids( + rows: readonly RuntimeProcessTableRow[], + rootPid: number, + port: number, + readDetails: (pid: number) => Promise + ): Promise { + const ownedPids = [rootPid]; + const visited = new Set(ownedPids); + for (let index = 0; index < ownedPids.length; index += 1) { + const parentPid = ownedPids[index]; + for (const row of rows) { + if (row.ppid !== parentPid || visited.has(row.pid)) { + continue; + } + if (!isMcpHttpServerCommand(row.command) || parseCommandPort(row.command) !== port) { + continue; + } + const details = await readDetails(row.pid); + if (!details || !hasManagedMcpDetails(details, port)) { + continue; + } + visited.add(row.pid); + ownedPids.push(row.pid); + } + } + return ownedPids; + } + + private async hasLiveMcpConsumers( + rows: readonly RuntimeProcessTableRow[], + candidatePids: ReadonlySet, + port: number, + readDetails: (pid: number) => Promise + ): Promise { + const url = `http://${MCP_HTTP_HOST}:${port}${MCP_HTTP_ENDPOINT}`; + const urlHash = sha256Hex(url); + for (const row of rows) { + if (candidatePids.has(row.pid)) { + continue; + } + const details = (await readDetails(row.pid)) ?? row.command; + if ( + processDetailsIncludeMarker(details, `CLAUDE_MULTIMODEL_AGENT_TEAMS_MCP_URL=${url}`) || + processDetailsIncludeMarker( + details, + `CLAUDE_MULTIMODEL_AGENT_TEAMS_MCP_URL_HASH=${urlHash}` + ) + ) { + return true; + } + } + return false; + } + + private recordCleanupDiagnostic( + handle: AgentTeamsMcpHttpServerHandle, + diagnosticMessage: string + ): void { + handle.diagnostics.push(diagnosticMessage); + logger.warn(`Agent Teams MCP HTTP diagnostic: ${diagnosticMessage}`); + } } export const agentTeamsMcpHttpServer = new AgentTeamsMcpHttpServer(); diff --git a/src/renderer/components/dashboard/CliStatusBanner.tsx b/src/renderer/components/dashboard/CliStatusBanner.tsx index 9007878e..d1670d7c 100644 --- a/src/renderer/components/dashboard/CliStatusBanner.tsx +++ b/src/renderer/components/dashboard/CliStatusBanner.tsx @@ -900,11 +900,6 @@ const InstalledBanner = ({
-
- - Multimodel - -
{/* Extensions button — available whenever the runtime is installed */} {canOpenExtensions && (