From 50b2c715e7a8fb10e38c05a2c04cecbe8014efbf Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 20:51:05 +0300 Subject: [PATCH 01/25] feat(changes): use default opencode evidence path --- src/main/services/team/ChangeExtractorService.ts | 5 ----- test/main/services/team/ChangeExtractorService.test.ts | 2 -- 2 files changed, 7 deletions(-) diff --git a/src/main/services/team/ChangeExtractorService.ts b/src/main/services/team/ChangeExtractorService.ts index 6a75053d..fe46e6b0 100644 --- a/src/main/services/team/ChangeExtractorService.ts +++ b/src/main/services/team/ChangeExtractorService.ts @@ -46,7 +46,6 @@ import type { AgentChangeSet, ChangeStats, TaskChangeSetV2 } from '@shared/types const logger = createLogger('Service:ChangeExtractorService'); const OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE = 'strict-delivery' as const; -const OPEN_CODE_AUTO_BACKFILL_EVIDENCE_MODE = 'chain-only' as const; const OPEN_CODE_MAX_DISCOVERED_LANES = 500; /** Кеш-запись: данные + mtime файла + время протухания */ @@ -426,7 +425,6 @@ export class ChangeExtractorService { sourceGeneration, deliveryContextFingerprint, attributionMode: OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, - evidenceMode: OPEN_CODE_AUTO_BACKFILL_EVIDENCE_MODE, }); const now = Date.now(); const cached = this.openCodeBackfillCache.get(cacheKey); @@ -501,7 +499,6 @@ export class ChangeExtractorService { projectDir, workspaceRoot, attributionMode: OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, - evidenceMode: OPEN_CODE_AUTO_BACKFILL_EVIDENCE_MODE, ...(deliveryContext.filePath ? { deliveryContextPath: deliveryContext.filePath } : {}), }); void appendOpenCodeTaskChangeDiag({ @@ -841,7 +838,6 @@ export class ChangeExtractorService { sourceGeneration?: string | null; deliveryContextFingerprint: string; attributionMode: typeof OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE; - evidenceMode: typeof OPEN_CODE_AUTO_BACKFILL_EVIDENCE_MODE; }): string { return JSON.stringify({ teamName: input.teamName, @@ -852,7 +848,6 @@ export class ChangeExtractorService { sourceGeneration: input.sourceGeneration ?? '', deliveryContextFingerprint: input.deliveryContextFingerprint, attributionMode: input.attributionMode, - evidenceMode: input.evidenceMode, }); } diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index 302c35b1..fdd32b5b 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -1058,7 +1058,6 @@ describe('ChangeExtractorService', () => { projectDir, workspaceRoot: projectPath, attributionMode: 'strict-delivery', - evidenceMode: 'chain-only', }) ); expect(workerClient.computeTaskChanges).not.toHaveBeenCalled(); @@ -1183,7 +1182,6 @@ describe('ChangeExtractorService', () => { workspaceRoot: projectPath, deliveryContextPath: expect.stringContaining('delivery-context.json'), attributionMode: 'strict-delivery', - evidenceMode: 'chain-only', }) ); }); From ff506d0d96d3bedd6e28f1edaa7263d1443e5d46 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 21:08:10 +0300 Subject: [PATCH 02/25] fix(changes): fingerprint projected ledger summaries --- .../services/team/TaskChangeLedgerReader.ts | 92 +++++++++-- .../team/TaskChangeLedgerReader.test.ts | 153 ++++++++++++++++++ test/renderer/store/changeReviewSlice.test.ts | 80 +++++++++ 3 files changed, 315 insertions(+), 10 deletions(-) diff --git a/src/main/services/team/TaskChangeLedgerReader.ts b/src/main/services/team/TaskChangeLedgerReader.ts index f36c7595..f07e523e 100644 --- a/src/main/services/team/TaskChangeLedgerReader.ts +++ b/src/main/services/team/TaskChangeLedgerReader.ts @@ -428,11 +428,7 @@ export class TaskChangeLedgerReader { return null; } - const provenance = this.buildLedgerProvenance( - bundle.journalStamp, - bundle.integrity, - bundle.schemaVersion - ); + const provenance = this.buildLedgerProvenanceFromSummaryBundle(bundle); if ( freshness && @@ -450,11 +446,7 @@ export class TaskChangeLedgerReader { ) { return { bundle, - provenance: this.buildLedgerProvenance( - journalStamp, - bundle.integrity, - bundle.schemaVersion - ), + provenance: this.buildLedgerProvenanceFromSummaryBundle(bundle, journalStamp), mode: 'validated', }; } @@ -694,6 +686,86 @@ export class TaskChangeLedgerReader { return this.buildLedgerProvenance(journalStamp, integrity, bundleSchemaVersion); } + private buildLedgerProvenanceFromSummaryBundle( + bundle: LedgerSummaryBundleV2, + journalStamp: TaskChangeJournalStamp = bundle.journalStamp + ): TaskChangeProvenance { + return { + sourceKind: 'ledger', + sourceFingerprint: this.hashFingerprintPayload(this.buildProjectedSummaryIdentity(bundle)), + journalStamp, + bundleSchemaVersion: bundle.schemaVersion, + integrity: bundle.integrity, + }; + } + + private buildProjectedSummaryIdentity(bundle: LedgerSummaryBundleV2): unknown { + return { + kind: 'ledger-summary-v2-projected-identity', + schemaVersion: bundle.schemaVersion, + bundleKind: bundle.bundleKind, + taskId: bundle.taskId, + integrity: bundle.integrity, + totalFiles: bundle.totalFiles, + totalLinesAdded: bundle.totalLinesAdded, + totalLinesRemoved: bundle.totalLinesRemoved, + diffStatCompleteness: bundle.diffStatCompleteness, + confidence: bundle.confidence, + files: [...bundle.files] + .map((file) => ({ + changeKey: this.normalizeSummaryChangeKey(file), + filePath: normalizePathForComparison(file.filePath), + relativePath: normalizePathForComparison(file.relativePath), + displayPath: file.displayPath ? normalizePathForComparison(file.displayPath) : undefined, + linesAdded: file.linesAdded, + linesRemoved: file.linesRemoved, + diffStatKnown: file.diffStatKnown, + latestOperation: file.latestOperation, + createdInTask: file.createdInTask, + deletedInTask: file.deletedInTask, + baselineExists: file.baselineExists, + finalExists: file.finalExists, + latestBeforeHash: file.latestBeforeHash, + latestAfterHash: file.latestAfterHash, + latestBeforeState: this.contentStateFingerprint(file.latestBeforeState), + latestAfterState: this.contentStateFingerprint(file.latestAfterState), + contentAvailability: file.contentAvailability, + reviewability: file.reviewability, + relation: file.relation + ? { + kind: file.relation.kind, + oldPath: normalizePathForComparison(file.relation.oldPath), + newPath: normalizePathForComparison(file.relation.newPath), + } + : undefined, + worktreePath: file.worktreePath + ? normalizePathForComparison(file.worktreePath) + : undefined, + worktreeBranch: file.worktreeBranch, + baseWorkspaceRoot: file.baseWorkspaceRoot + ? normalizePathForComparison(file.baseWorkspaceRoot) + : undefined, + })) + .sort( + (left, right) => + left.changeKey.localeCompare(right.changeKey) || + left.filePath.localeCompare(right.filePath) + ), + }; + } + + private contentStateFingerprint(state: LedgerContentState | undefined): unknown { + if (!state) { + return undefined; + } + return { + exists: state.exists, + sha256: state.sha256, + sizeBytes: state.sizeBytes, + unavailableReason: state.unavailableReason, + }; + } + private hashFingerprintPayload(payload: unknown): string { return createHash('sha256').update(JSON.stringify(payload)).digest('hex'); } diff --git a/test/main/services/team/TaskChangeLedgerReader.test.ts b/test/main/services/team/TaskChangeLedgerReader.test.ts index a60807d0..969fac6c 100644 --- a/test/main/services/team/TaskChangeLedgerReader.test.ts +++ b/test/main/services/team/TaskChangeLedgerReader.test.ts @@ -614,6 +614,91 @@ describe('TaskChangeLedgerReader', () => { ); }); + it('keeps v2 provenance fingerprint stable when only raw journal metadata changes', async () => { + tmpDir = await makeSummaryLedgerBundleV2({ + bundle: { + journalStamp: { events: { bytes: 10, mtimeMs: 1, tailSha256: 'raw-a' } }, + eventCount: 1, + noticeCount: 0, + warningCount: 0, + warnings: [], + }, + file: { + eventCount: 1, + firstTimestamp: '2026-03-01T10:00:00.000Z', + lastTimestamp: '2026-03-01T10:00:00.000Z', + agentIds: ['alice@team'], + }, + }); + const reader = new TaskChangeLedgerReader(); + const first = await reader.readTaskChanges({ + teamName: 'team', + taskId: TASK_ID, + projectDir: tmpDir, + projectPath: '/repo', + includeDetails: false, + }); + + tmpDir = await makeSummaryLedgerBundleV2({ + bundle: { + generatedAt: '2026-03-01T11:00:00.000Z', + journalStamp: { events: { bytes: 999, mtimeMs: 99, tailSha256: 'raw-b' } }, + eventCount: 7, + noticeCount: 3, + warningCount: 1, + warnings: ['raw journal had a recovered warning'], + }, + file: { + eventCount: 7, + firstTimestamp: '2026-03-01T09:00:00.000Z', + lastTimestamp: '2026-03-01T11:00:00.000Z', + agentIds: ['alice@team', 'bob@team'], + }, + }); + const second = await reader.readTaskChanges({ + teamName: 'team', + taskId: TASK_ID, + projectDir: tmpDir, + projectPath: '/repo', + includeDetails: false, + }); + + expect(first?.provenance?.sourceFingerprint).toBe(second?.provenance?.sourceFingerprint); + }); + + it('changes v2 provenance fingerprint when projected file evidence changes', async () => { + tmpDir = await makeSummaryLedgerBundleV2({ + file: { + latestAfterHash: sha('after-v1'), + latestAfterState: { exists: true, sha256: sha('after-v1'), sizeBytes: 8 }, + }, + }); + const reader = new TaskChangeLedgerReader(); + const first = await reader.readTaskChanges({ + teamName: 'team', + taskId: TASK_ID, + projectDir: tmpDir, + projectPath: '/repo', + includeDetails: false, + }); + + tmpDir = await makeSummaryLedgerBundleV2({ + file: { + latestAfterHash: sha('after-v2'), + latestAfterState: { exists: true, sha256: sha('after-v2'), sizeBytes: 8 }, + }, + }); + const second = await reader.readTaskChanges({ + teamName: 'team', + taskId: TASK_ID, + projectDir: tmpDir, + projectPath: '/repo', + includeDetails: false, + }); + + expect(first?.provenance?.sourceFingerprint).not.toBe(second?.provenance?.sourceFingerprint); + }); + it('keeps identical relative rename relations isolated by worktree path', async () => { tmpDir = await fsTempDir(); const bundleDir = path.join(tmpDir, '.board-task-changes', 'bundles'); @@ -969,6 +1054,74 @@ async function makeLedgerBundle(params: { return dir; } +async function makeSummaryLedgerBundleV2(params: { + bundle?: Record; + file?: Record; +} = {}): Promise { + const dir = await fsTempDir(); + const bundleDir = path.join(dir, '.board-task-changes', 'bundles'); + await mkdir(bundleDir, { recursive: true }); + const file = { + changeKey: 'path:/repo/src/file.ts', + filePath: '/repo/src/file.ts', + relativePath: 'src/file.ts', + linesAdded: 1, + linesRemoved: 1, + diffStatKnown: true, + eventCount: 1, + firstTimestamp: '2026-03-01T10:00:00.000Z', + lastTimestamp: '2026-03-01T10:00:00.000Z', + latestOperation: 'modify', + createdInTask: false, + deletedInTask: false, + latestBeforeHash: sha('before'), + latestAfterHash: sha('after'), + latestBeforeState: { exists: true, sha256: sha('before'), sizeBytes: 6 }, + latestAfterState: { exists: true, sha256: sha('after'), sizeBytes: 5 }, + contentAvailability: 'full-text', + reviewability: 'full-text', + agentIds: ['alice@team'], + ...params.file, + }; + await writeFile( + path.join(bundleDir, `${encodeURIComponent(TASK_ID)}.json`), + JSON.stringify({ + schemaVersion: 2, + source: 'task-change-ledger', + bundleKind: 'summary', + taskId: TASK_ID, + generatedAt: '2026-03-01T10:00:00.000Z', + journalStamp: { events: { bytes: 10, mtimeMs: 1, tailSha256: 'raw' } }, + integrity: 'ok', + eventCount: 1, + noticeCount: 0, + scope: { + confidence: { tier: 1, label: 'high', reason: 'bundle' }, + memberName: 'alice', + agentIds: ['alice@team'], + startTimestamp: '2026-03-01T10:00:00.000Z', + endTimestamp: '2026-03-01T10:00:00.000Z', + toolUseIds: ['tool-1'], + toolUseCount: 1, + phaseSet: ['work'], + visibleFileCount: 1, + contributors: [], + }, + files: [file], + totalLinesAdded: 1, + totalLinesRemoved: 1, + diffStatCompleteness: 'complete', + totalFiles: 1, + confidence: 'high', + warningCount: 0, + warnings: [], + ...params.bundle, + }), + 'utf8' + ); + return dir; +} + async function fsTempDir(): Promise { return mkdtemp(path.join(os.tmpdir(), 'ledger-reader-')); } diff --git a/test/renderer/store/changeReviewSlice.test.ts b/test/renderer/store/changeReviewSlice.test.ts index da924758..19b8f388 100644 --- a/test/renderer/store/changeReviewSlice.test.ts +++ b/test/renderer/store/changeReviewSlice.test.ts @@ -1651,4 +1651,84 @@ describe('changeReviewSlice task changes', () => { }); expect(store.getState().activeChangeSet).toEqual(current); }); + + it('does not force re-review when ledger provenance stays stable despite warning changes', async () => { + const store = createSliceStore(); + const current = { + ...makeTaskChangeSet('task-ledger', '/repo/file.ts'), + provenance: { + sourceKind: 'ledger', + sourceFingerprint: 'projected-fp-stable', + }, + warnings: [], + }; + const fresh = { + ...current, + computedAt: '2026-03-01T13:00:00.000Z', + warnings: ['raw journal warning changed'], + }; + hoisted.getTaskChanges.mockResolvedValueOnce(fresh); + hoisted.applyDecisions.mockResolvedValueOnce({ + applied: 1, + skipped: 0, + conflicts: 0, + errors: [], + }); + + store.setState({ + activeChangeSet: current, + hunkDecisions: { '/repo/file.ts:0': 'rejected' }, + fileDecisions: { '/repo/file.ts': 'rejected' }, + fileChunkCounts: { '/repo/file.ts': 1 }, + changeSetEpoch: 0, + fileContentVersionByPath: {}, + }); + + await store.getState().applyReview('team-a', 'task-ledger'); + + expect(store.getState().applyError).toBeNull(); + expect(hoisted.applyDecisions).toHaveBeenCalledTimes(1); + expect(store.getState().activeChangeSet).toEqual(current); + }); + + it('forces re-review when ledger projected provenance changes with the same file paths', async () => { + const store = createSliceStore(); + const current = { + ...makeTaskChangeSet('task-ledger', '/repo/file.ts'), + provenance: { + sourceKind: 'ledger', + sourceFingerprint: 'projected-fp-v1', + }, + }; + const fresh = { + ...current, + provenance: { + sourceKind: 'ledger', + sourceFingerprint: 'projected-fp-v2', + }, + }; + hoisted.getTaskChanges.mockResolvedValueOnce(fresh); + + store.setState({ + activeChangeSet: current, + hunkDecisions: { '/repo/file.ts:0': 'rejected' }, + fileDecisions: { '/repo/file.ts': 'rejected' }, + fileChunkCounts: { '/repo/file.ts': 1 }, + reviewUndoStack: [{ hunkDecisions: { '/repo/file.ts:0': 'rejected' }, fileDecisions: { '/repo/file.ts': 'rejected' } }], + changeSetEpoch: 2, + fileContentVersionByPath: { '/repo/file.ts': 3 }, + }); + + await store.getState().applyReview('team-a', 'task-ledger'); + + expect(hoisted.applyDecisions).not.toHaveBeenCalled(); + expect(store.getState().activeChangeSet).toEqual(fresh); + expect(store.getState().applyError).toBe( + 'Changes have been updated since you started reviewing. Please re-review.' + ); + expect(store.getState().hunkDecisions).toEqual({}); + expect(store.getState().fileDecisions).toEqual({}); + expect(store.getState().reviewUndoStack).toEqual([]); + expect(store.getState().fileContentVersionByPath).toEqual({}); + }); }); From c065dc703da8344fa77e6781a2fa3257efedd646 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 21:38:22 +0300 Subject: [PATCH 03/25] fix(changes): remove opencode evidence mode from ui bridge --- .../team/opencode/bridge/OpenCodeBridgeCommandContract.ts | 8 -------- .../team/opencode/bridge/OpenCodeReadinessBridge.ts | 1 - test/main/services/team/ChangeExtractorService.test.ts | 3 +++ 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts b/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts index eca989d8..eca8e93a 100644 --- a/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts +++ b/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts @@ -240,17 +240,10 @@ export interface OpenCodeBackfillTaskLedgerCommandBody { workspaceRoot?: string; deliveryContextPath?: string; attributionMode?: OpenCodeBackfillTaskLedgerAttributionMode; - evidenceMode?: OpenCodeBackfillTaskLedgerEvidenceMode; dryRun?: boolean; } export type OpenCodeBackfillTaskLedgerAttributionMode = 'strict-delivery' | 'compatible'; -export type OpenCodeBackfillTaskLedgerEvidenceMode = - | 'off' - | 'metadata-only' - | 'chain-only' - | 'snapshot-probe' - | 'snapshot-auto'; export type OpenCodeBackfillTaskLedgerOutcome = | 'imported' @@ -271,7 +264,6 @@ export interface OpenCodeBackfillTaskLedgerCommandData { workspaceRoot?: string; dryRun: boolean; attributionMode?: OpenCodeBackfillTaskLedgerAttributionMode; - evidenceMode?: OpenCodeBackfillTaskLedgerEvidenceMode; strictWindowCandidateCount?: number; openCodeDbFingerprint?: string; deliveryLedgerFingerprint?: string; diff --git a/src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts b/src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts index cba24f84..ad4dff69 100644 --- a/src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts +++ b/src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts @@ -308,7 +308,6 @@ export class OpenCodeReadinessBridge implements OpenCodeTeamRuntimeBridgePort { ...(input.workspaceRoot ? { workspaceRoot: input.workspaceRoot } : {}), dryRun: input.dryRun === true, ...(input.attributionMode ? { attributionMode: input.attributionMode } : {}), - ...(input.evidenceMode ? { evidenceMode: input.evidenceMode } : {}), scannedSessions: 0, scannedToolparts: 0, candidateEvents: 0, diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index fdd32b5b..5076fd01 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -1060,6 +1060,7 @@ describe('ChangeExtractorService', () => { attributionMode: 'strict-delivery', }) ); + expect(backfillOpenCodeTaskLedger.mock.calls[0]?.[0]).not.toHaveProperty('evidenceMode'); expect(workerClient.computeTaskChanges).not.toHaveBeenCalled(); }); @@ -1185,6 +1186,8 @@ describe('ChangeExtractorService', () => { }) ); }); + const backfillCalls = backfillOpenCodeTaskLedger.mock.calls as unknown as Array<[Record]>; + expect(backfillCalls[0]?.[0]).not.toHaveProperty('evidenceMode'); expect(settled).toBe(false); expect(workerClient.computeTaskChanges).not.toHaveBeenCalled(); From 9f785ee3b20db7b964d0dc0d7908dd81c0ce5b1e Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 21:47:52 +0300 Subject: [PATCH 04/25] test(changes): keep opencode transient backfill retryable --- .../team/ChangeExtractorService.test.ts | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index 5076fd01..90238931 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -1332,24 +1332,30 @@ describe('ChangeExtractorService', () => { 'utf8' ); - const backfillOpenCodeTaskLedger = vi.fn(async (input: any) => ({ - schemaVersion: 1, - providerId: 'opencode', - teamName: input.teamName, - taskId: input.taskId, - projectDir: input.projectDir, - workspaceRoot: input.workspaceRoot, - dryRun: false, - attributionMode: input.attributionMode, - scannedSessions: 1, - scannedToolparts: 0, - candidateEvents: 0, - importedEvents: 0, - skippedEvents: 0, - outcome: 'no-attribution', - notices: [], - diagnostics: [], - })); + let backfillAttempt = 0; + const backfillOpenCodeTaskLedger = vi.fn(async (input: any) => { + const outcome = backfillAttempt++ === 0 ? 'transient-error' : 'no-attribution'; + return { + schemaVersion: 1, + providerId: 'opencode', + teamName: input.teamName, + taskId: input.taskId, + projectDir: input.projectDir, + workspaceRoot: input.workspaceRoot, + dryRun: false, + attributionMode: input.attributionMode, + scannedSessions: 1, + scannedToolparts: 0, + candidateEvents: 0, + importedEvents: 0, + skippedEvents: 0, + outcome, + notices: [], + diagnostics: outcome === 'transient-error' + ? ['OpenCode SQLite file changed while snapshot was read; using transaction snapshot.'] + : [], + }; + }); const workerClient = { isAvailable: vi.fn(() => true), computeTaskChanges: vi.fn(async () => From 819a1f6e8fb6a5d2acfd74c87f6a514a7f7b45a1 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 21:58:24 +0300 Subject: [PATCH 05/25] test(changes): reset decisions on ledger evidence upgrade --- test/renderer/store/changeReviewSlice.test.ts | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/test/renderer/store/changeReviewSlice.test.ts b/test/renderer/store/changeReviewSlice.test.ts index 19b8f388..0bc2d397 100644 --- a/test/renderer/store/changeReviewSlice.test.ts +++ b/test/renderer/store/changeReviewSlice.test.ts @@ -1731,4 +1731,73 @@ describe('changeReviewSlice task changes', () => { expect(store.getState().reviewUndoStack).toEqual([]); expect(store.getState().fileContentVersionByPath).toEqual({}); }); + + it('clears metadata-only decisions when ledger evidence upgrades to full text for the same changeKey', async () => { + const store = createSliceStore(); + const changeKey = 'path:/repo/file.ts'; + const currentFile = { + ...makeFile('/repo/file.ts'), + changeKey, + snippets: [], + ledgerSummary: { + latestOperation: 'modify', + contentAvailability: 'metadata-only', + reviewability: 'metadata-only', + }, + }; + const freshFile = { + ...makeFile('/repo/file.ts'), + changeKey, + ledgerSummary: { + latestOperation: 'modify', + contentAvailability: 'full-text', + reviewability: 'full-text', + beforeState: { exists: true, sha256: 'before-hash', sizeBytes: 6 }, + afterState: { exists: true, sha256: 'after-hash', sizeBytes: 5 }, + }, + }; + const current = { + ...makeTaskChangeSet('task-ledger', '/repo/file.ts'), + files: [currentFile], + provenance: { + sourceKind: 'ledger', + sourceFingerprint: 'metadata-only-projection', + }, + }; + const fresh = { + ...current, + files: [freshFile], + provenance: { + sourceKind: 'ledger', + sourceFingerprint: 'snapshot-full-text-projection', + }, + }; + hoisted.getTaskChanges.mockResolvedValueOnce(fresh); + + store.setState({ + activeChangeSet: current, + hunkDecisions: { [`${changeKey}:0`]: 'rejected' }, + fileDecisions: { [changeKey]: 'rejected' }, + hunkContextHashesByFile: { [changeKey]: { 0: 'metadata-only-context' } }, + fileChunkCounts: { [changeKey]: 1 }, + reviewUndoStack: [ + { + hunkDecisions: { [`${changeKey}:0`]: 'rejected' }, + fileDecisions: { [changeKey]: 'rejected' }, + }, + ], + changeSetEpoch: 4, + fileContentVersionByPath: { '/repo/file.ts': 2 }, + }); + + await store.getState().applyReview('team-a', 'task-ledger'); + + expect(hoisted.applyDecisions).not.toHaveBeenCalled(); + expect(store.getState().activeChangeSet).toEqual(fresh); + expect(store.getState().fileDecisions).toEqual({}); + expect(store.getState().hunkDecisions).toEqual({}); + expect(store.getState().hunkContextHashesByFile).toEqual({}); + expect(store.getState().reviewUndoStack).toEqual([]); + expect(store.getState().fileContentVersionByPath).toEqual({}); + }); }); From ba09010fcba287a643de30108800a58364e17515 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 22:28:58 +0300 Subject: [PATCH 06/25] fix(changes): derive opencode backfill member from delivery --- .../services/team/ChangeExtractorService.ts | 30 +++- .../team/ChangeExtractorService.test.ts | 146 ++++++++++++++++++ 2 files changed, 170 insertions(+), 6 deletions(-) diff --git a/src/main/services/team/ChangeExtractorService.ts b/src/main/services/team/ChangeExtractorService.ts index fe46e6b0..493b0094 100644 --- a/src/main/services/team/ChangeExtractorService.ts +++ b/src/main/services/team/ChangeExtractorService.ts @@ -413,6 +413,10 @@ export class ChangeExtractorService { input.teamName, input.taskId ); + const backfillMemberName = this.resolveOpenCodeBackfillMemberName( + input.effectiveOptions.owner, + deliveryContextRecords + ); const deliveryContextFingerprint = this.hashOpenCodeDeliveryContextRecords(deliveryContextRecords); @@ -444,7 +448,7 @@ export class ChangeExtractorService { teamName: input.teamName, taskId: input.taskId, displayId: input.taskMeta?.displayId ?? null, - memberName: input.effectiveOptions.owner ?? null, + memberName: backfillMemberName ?? input.effectiveOptions.owner ?? null, projectDir, workspaceRoot, sourceGeneration, @@ -466,7 +470,8 @@ export class ChangeExtractorService { workspaceRoot, cacheKey, deliveryContextRecords, - sourceGeneration + sourceGeneration, + backfillMemberName ).finally(() => { this.openCodeBackfillInFlight.delete(cacheKey); }); @@ -482,7 +487,8 @@ export class ChangeExtractorService { deliveryContextRecords: Awaited< ReturnType >, - sourceGeneration: string | null + sourceGeneration: string | null, + backfillMemberName?: string ): Promise { const deliveryContext = await this.createOpenCodeDeliveryContextTempFile( input.teamName, @@ -495,10 +501,10 @@ export class ChangeExtractorService { teamName: input.teamName, taskId: input.taskId, taskDisplayId: input.taskMeta?.displayId, - memberName: input.effectiveOptions.owner, projectDir, workspaceRoot, attributionMode: OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, + ...(backfillMemberName ? { memberName: backfillMemberName } : {}), ...(deliveryContext.filePath ? { deliveryContextPath: deliveryContext.filePath } : {}), }); void appendOpenCodeTaskChangeDiag({ @@ -507,7 +513,7 @@ export class ChangeExtractorService { teamName: input.teamName, taskId: input.taskId, displayId: input.taskMeta?.displayId ?? null, - memberName: input.effectiveOptions.owner ?? null, + memberName: backfillMemberName ?? input.effectiveOptions.owner ?? null, projectDir, workspaceRoot, sourceGeneration, @@ -562,7 +568,7 @@ export class ChangeExtractorService { teamName: input.teamName, taskId: input.taskId, displayId: input.taskMeta?.displayId ?? null, - memberName: input.effectiveOptions.owner ?? null, + memberName: backfillMemberName ?? input.effectiveOptions.owner ?? null, projectDir, workspaceRoot, deliveryRecordCount: deliveryContextRecords.length, @@ -745,6 +751,18 @@ export class ChangeExtractorService { return records.slice(-200); } + private resolveOpenCodeBackfillMemberName( + owner: string | undefined, + records: Awaited> + ): string | undefined { + const members = [...new Set(records.map((record) => record.memberName.trim()).filter(Boolean))]; + const normalizedOwner = owner?.trim(); + if (normalizedOwner && members.includes(normalizedOwner)) { + return normalizedOwner; + } + return members.length === 1 ? members[0] : undefined; + } + private async readOpenCodeRuntimeLaneIdsFromDisk( teamsBasePath: string, teamName: string diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index 90238931..81f8a1d3 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -1064,6 +1064,152 @@ describe('ChangeExtractorService', () => { expect(workerClient.computeTaskChanges).not.toHaveBeenCalled(); }); + it('uses the OpenCode delivery member when the current task owner changed later', async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); + setClaudeBasePathOverride(tmpDir); + await writeTaskFile(tmpDir, { displayId: 'abc12345', owner: 'alice' }); + const projectDir = path.join(tmpDir, 'project-dir'); + const projectPath = path.join(tmpDir, 'repo'); + await fs.mkdir(projectDir, { recursive: true }); + await fs.mkdir(projectPath, { recursive: true }); + await writeOpenCodeDeliveryLedger(tmpDir, { memberName: 'bob' }); + + const backfillOpenCodeTaskLedger = vi.fn(async (input: any) => ({ + schemaVersion: 1, + providerId: 'opencode', + teamName: input.teamName, + taskId: input.taskId, + projectDir: input.projectDir, + workspaceRoot: input.workspaceRoot, + dryRun: false, + attributionMode: input.attributionMode, + scannedSessions: 0, + scannedToolparts: 0, + candidateEvents: 0, + importedEvents: 0, + skippedEvents: 0, + outcome: 'no-history', + notices: [], + diagnostics: [], + })); + const workerClient = { + isAvailable: vi.fn(() => true), + computeTaskChanges: vi.fn(async () => + makeTaskChangeResult(TASK_ID, { content: '', confidence: 'fallback' }) + ), + }; + + const service = new ChangeExtractorService( + { + getLogSourceWatchContext: vi.fn(async () => ({ + projectDir, + projectPath, + sessionIds: [], + })), + findLogFileRefsForTask: vi.fn(async () => []), + findMemberLogPaths: vi.fn(async () => []), + } as any, + { + parseBoundaries: vi.fn(async () => ({ + boundaries: [], + scopes: [], + isSingleTaskSession: true, + detectedMechanism: 'none' as const, + })), + } as any, + { getConfig: vi.fn(async () => ({ projectPath })) } as any, + undefined, + workerClient as any, + { backfillOpenCodeTaskLedger } as any, + { getMeta: vi.fn(async () => ({ providerId: 'opencode' })) } as any + ); + + await service.getTaskChanges(TEAM_NAME, TASK_ID, { + owner: 'alice', + status: 'completed', + }); + + expect(backfillOpenCodeTaskLedger).toHaveBeenCalledWith( + expect.objectContaining({ + memberName: 'bob', + attributionMode: 'strict-delivery', + }) + ); + }); + + it('omits member filter when multiple OpenCode delivery members match the task', async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); + setClaudeBasePathOverride(tmpDir); + await writeTaskFile(tmpDir, { displayId: 'abc12345', owner: 'alice' }); + const projectDir = path.join(tmpDir, 'project-dir'); + const projectPath = path.join(tmpDir, 'repo'); + await fs.mkdir(projectDir, { recursive: true }); + await fs.mkdir(projectPath, { recursive: true }); + await writeOpenCodeDeliveryLedger(tmpDir, { memberName: 'bob', runtimeSessionId: 'session-1' }); + await writeOpenCodeDeliveryLedger(tmpDir, { + memberName: 'carol', + runtimeSessionId: 'session-2', + }); + + const backfillOpenCodeTaskLedger = vi.fn(async (input: any) => ({ + schemaVersion: 1, + providerId: 'opencode', + teamName: input.teamName, + taskId: input.taskId, + projectDir: input.projectDir, + workspaceRoot: input.workspaceRoot, + dryRun: false, + attributionMode: input.attributionMode, + scannedSessions: 0, + scannedToolparts: 0, + candidateEvents: 0, + importedEvents: 0, + skippedEvents: 0, + outcome: 'no-history', + notices: [], + diagnostics: [], + })); + const workerClient = { + isAvailable: vi.fn(() => true), + computeTaskChanges: vi.fn(async () => + makeTaskChangeResult(TASK_ID, { content: '', confidence: 'fallback' }) + ), + }; + + const service = new ChangeExtractorService( + { + getLogSourceWatchContext: vi.fn(async () => ({ + projectDir, + projectPath, + sessionIds: [], + })), + findLogFileRefsForTask: vi.fn(async () => []), + findMemberLogPaths: vi.fn(async () => []), + } as any, + { + parseBoundaries: vi.fn(async () => ({ + boundaries: [], + scopes: [], + isSingleTaskSession: true, + detectedMechanism: 'none' as const, + })), + } as any, + { getConfig: vi.fn(async () => ({ projectPath })) } as any, + undefined, + workerClient as any, + { backfillOpenCodeTaskLedger } as any, + { getMeta: vi.fn(async () => ({ providerId: 'opencode' })) } as any + ); + + await service.getTaskChanges(TEAM_NAME, TASK_ID, { + owner: 'alice', + status: 'completed', + }); + + expect(backfillOpenCodeTaskLedger).toHaveBeenCalledTimes(1); + expect(backfillOpenCodeTaskLedger.mock.calls[0]?.[0]).not.toHaveProperty('memberName'); + }); + it('does not run OpenCode backfill for explicit non-OpenCode teams even if stale runtime files exist', async () => { tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); setClaudeBasePathOverride(tmpDir); From 9d9c7fbd38ce1b69c80a0c544c296aa25729d598 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 22:33:11 +0300 Subject: [PATCH 07/25] fix(changes): reread ledger after opencode backfill failure --- .../services/team/ChangeExtractorService.ts | 32 ++-- .../team/ChangeExtractorService.test.ts | 180 ++++++++++++------ 2 files changed, 143 insertions(+), 69 deletions(-) diff --git a/src/main/services/team/ChangeExtractorService.ts b/src/main/services/team/ChangeExtractorService.ts index 493b0094..a84d570d 100644 --- a/src/main/services/team/ChangeExtractorService.ts +++ b/src/main/services/team/ChangeExtractorService.ts @@ -70,6 +70,11 @@ interface OpenCodeBackfillCacheEntry { expiresAt: number; } +interface OpenCodeBackfillAttempt { + attempted: boolean; + backfilled: boolean; +} + interface OpenCodeDeliveryContextTempFile { filePath: string | null; cleanup: () => Promise; @@ -81,7 +86,7 @@ export class ChangeExtractorService { private taskChangeSummaryInFlight = new Map>(); private taskChangeSummaryVersionByTask = new Map(); private taskChangeSummaryValidationInFlight = new Set(); - private openCodeBackfillInFlight = new Map>(); + private openCodeBackfillInFlight = new Map>(); private openCodeBackfillCache = new Map(); private openCodeTeamEligibilityCache = new Map(); private readonly cacheTtl = 30 * 1000; // 30 сек — shorter TTL to reduce stale data risk @@ -209,7 +214,8 @@ export class ChangeExtractorService { return ledgerResult; } - if (await this.tryBackfillOpenCodeLedger(resolvedInput)) { + const openCodeBackfill = await this.tryBackfillOpenCodeLedger(resolvedInput); + if (openCodeBackfill.backfilled || openCodeBackfill.attempted) { const backfilledLedgerResult = await this.readLedgerTaskChanges(resolvedInput); if (backfilledLedgerResult) { await this.recordTaskChangePresence( @@ -378,15 +384,17 @@ export class ChangeExtractorService { } } - private async tryBackfillOpenCodeLedger(input: ResolvedTaskChangeComputeInput): Promise { + private async tryBackfillOpenCodeLedger( + input: ResolvedTaskChangeComputeInput + ): Promise { if (!this.openCodeLedgerBackfillPort) { - return false; + return { attempted: false, backfilled: false }; } if (!(await this.isOpenCodeTeamCandidate(input.teamName))) { - return false; + return { attempted: false, backfilled: false }; } if (typeof this.logsFinder.getLogSourceWatchContext !== 'function') { - return false; + return { attempted: false, backfilled: false }; } const context = await this.logsFinder @@ -400,7 +408,7 @@ export class ChangeExtractorService { !path.isAbsolute(projectDir) || !path.isAbsolute(workspaceRoot) ) { - return false; + return { attempted: false, backfilled: false }; } const sourceGeneration = this.teamLogSourceTracker @@ -433,7 +441,7 @@ export class ChangeExtractorService { const now = Date.now(); const cached = this.openCodeBackfillCache.get(cacheKey); if (cached && cached.expiresAt > now) { - return cached.backfilledAt > 0; + return { attempted: false, backfilled: cached.backfilledAt > 0 }; } this.openCodeBackfillCache.delete(cacheKey); @@ -456,7 +464,7 @@ export class ChangeExtractorService { deliveryContextFingerprint, attributionMode: OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, }).catch(() => undefined); - return false; + return { attempted: false, backfilled: false }; } const existing = this.openCodeBackfillInFlight.get(cacheKey); @@ -489,7 +497,7 @@ export class ChangeExtractorService { >, sourceGeneration: string | null, backfillMemberName?: string - ): Promise { + ): Promise { const deliveryContext = await this.createOpenCodeDeliveryContextTempFile( input.teamName, input.taskId, @@ -557,7 +565,7 @@ export class ChangeExtractorService { `OpenCode ledger backfill for ${input.teamName}/${input.taskId}: ${result.outcome}; ${result.diagnostics.join('; ')}` ); } - return backfilled; + return { attempted: true, backfilled }; } catch (error) { logger.warn( `OpenCode ledger backfill failed for ${input.teamName}/${input.taskId}: ${error instanceof Error ? error.message : String(error)}` @@ -583,7 +591,7 @@ export class ChangeExtractorService { } else { this.openCodeBackfillCache.delete(cacheKey); } - return false; + return { attempted: true, backfilled: false }; } finally { await deliveryContext.cleanup(); } diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index 81f8a1d3..384dcf03 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -126,6 +126,70 @@ async function writeOpenCodeDeliveryLedger( return filePath; } +async function writeOpenCodeLedgerBundle( + projectDir: string, + projectPath: string, + taskId: string = TASK_ID +): Promise { + const bundleDir = path.join(projectDir, '.board-task-changes', 'bundles'); + await fs.mkdir(bundleDir, { recursive: true }); + await fs.writeFile( + path.join(bundleDir, `${encodeURIComponent(taskId)}.json`), + JSON.stringify({ + schemaVersion: 1, + source: 'task-change-ledger', + taskId, + generatedAt: '2026-03-01T10:00:00.000Z', + eventCount: 1, + files: [ + { + filePath: path.join(projectPath, 'src/opencode.ts'), + relativePath: 'src/opencode.ts', + eventIds: ['event-1'], + linesAdded: 1, + linesRemoved: 0, + isNewFile: true, + latestAfterHash: null, + }, + ], + totalLinesAdded: 1, + totalLinesRemoved: 0, + totalFiles: 1, + confidence: 'high', + warnings: [], + events: [ + { + schemaVersion: 1, + eventId: 'event-1', + taskId, + taskRef: taskId, + taskRefKind: 'canonical', + phase: 'work', + executionSeq: 0, + sessionId: 'opencode-session-1', + memberName: 'bob', + toolUseId: 'part-1', + source: 'opencode_toolpart_write', + operation: 'create', + confidence: 'exact', + workspaceRoot: projectPath, + filePath: path.join(projectPath, 'src/opencode.ts'), + relativePath: 'src/opencode.ts', + timestamp: '2026-03-01T10:00:00.000Z', + toolStatus: 'succeeded', + before: null, + after: null, + oldString: '', + newString: 'export const source = "opencode";\n', + linesAdded: 1, + linesRemoved: 0, + }, + ], + }), + 'utf8' + ); +} + function persistedEntryPath(baseDir: string): string { return path.join(baseDir, 'task-change-summaries', encodeURIComponent(TEAM_NAME), `${TASK_ID}.json`); } @@ -935,63 +999,7 @@ describe('ChangeExtractorService', () => { await writeOpenCodeDeliveryLedger(tmpDir); const backfillOpenCodeTaskLedger = vi.fn(async (input: any) => { - const bundleDir = path.join(input.projectDir, '.board-task-changes', 'bundles'); - await fs.mkdir(bundleDir, { recursive: true }); - await fs.writeFile( - path.join(bundleDir, `${encodeURIComponent(TASK_ID)}.json`), - JSON.stringify({ - schemaVersion: 1, - source: 'task-change-ledger', - taskId: TASK_ID, - generatedAt: '2026-03-01T10:00:00.000Z', - eventCount: 1, - files: [ - { - filePath: path.join(projectPath, 'src/opencode.ts'), - relativePath: 'src/opencode.ts', - eventIds: ['event-1'], - linesAdded: 1, - linesRemoved: 0, - isNewFile: true, - latestAfterHash: null, - }, - ], - totalLinesAdded: 1, - totalLinesRemoved: 0, - totalFiles: 1, - confidence: 'high', - warnings: [], - events: [ - { - schemaVersion: 1, - eventId: 'event-1', - taskId: TASK_ID, - taskRef: TASK_ID, - taskRefKind: 'canonical', - phase: 'work', - executionSeq: 0, - sessionId: 'opencode-session-1', - memberName: 'bob', - toolUseId: 'part-1', - source: 'opencode_toolpart_write', - operation: 'create', - confidence: 'exact', - workspaceRoot: projectPath, - filePath: path.join(projectPath, 'src/opencode.ts'), - relativePath: 'src/opencode.ts', - timestamp: '2026-03-01T10:00:00.000Z', - toolStatus: 'succeeded', - before: null, - after: null, - oldString: '', - newString: 'export const source = "opencode";\n', - linesAdded: 1, - linesRemoved: 0, - }, - ], - }), - 'utf8' - ); + await writeOpenCodeLedgerBundle(input.projectDir, projectPath); return { schemaVersion: 1, providerId: 'opencode', @@ -1064,6 +1072,64 @@ describe('ChangeExtractorService', () => { expect(workerClient.computeTaskChanges).not.toHaveBeenCalled(); }); + it('rereads ledger when OpenCode backfill writes artifacts and then fails', async () => { + vi.spyOn(console, 'warn').mockImplementation(() => {}); + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); + setClaudeBasePathOverride(tmpDir); + await writeTaskFile(tmpDir, { displayId: 'abc12345', owner: 'bob' }); + const projectDir = path.join(tmpDir, 'project-dir'); + const projectPath = path.join(tmpDir, 'repo'); + await fs.mkdir(projectDir, { recursive: true }); + await fs.mkdir(projectPath, { recursive: true }); + await writeOpenCodeDeliveryLedger(tmpDir); + + const backfillOpenCodeTaskLedger = vi.fn(async (input: any) => { + await writeOpenCodeLedgerBundle(input.projectDir, projectPath); + throw new Error('timeout after import'); + }); + const workerClient = { + isAvailable: vi.fn(() => true), + computeTaskChanges: vi.fn(async () => + makeTaskChangeResult(TASK_ID, { content: '', confidence: 'fallback' }) + ), + }; + + const service = new ChangeExtractorService( + { + getLogSourceWatchContext: vi.fn(async () => ({ + projectDir, + projectPath, + sessionIds: [], + })), + findLogFileRefsForTask: vi.fn(async () => []), + findMemberLogPaths: vi.fn(async () => []), + } as any, + { + parseBoundaries: vi.fn(async () => ({ + boundaries: [], + scopes: [], + isSingleTaskSession: true, + detectedMechanism: 'none' as const, + })), + } as any, + { getConfig: vi.fn(async () => ({ projectPath })) } as any, + undefined, + workerClient as any, + { backfillOpenCodeTaskLedger } as any, + { getMeta: vi.fn(async () => ({ providerId: 'opencode' })) } as any + ); + + const result = await service.getTaskChanges(TEAM_NAME, TASK_ID, { + owner: 'bob', + status: 'completed', + }); + + expect(result.files).toHaveLength(1); + expect(result.files[0]?.snippets[0]?.toolName).toBe('Write'); + expect(backfillOpenCodeTaskLedger).toHaveBeenCalledTimes(1); + expect(workerClient.computeTaskChanges).not.toHaveBeenCalled(); + }); + it('uses the OpenCode delivery member when the current task owner changed later', async () => { tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); setClaudeBasePathOverride(tmpDir); From 6bc9ddbc3ea85897fe691566d07448ac58228758 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 22:34:27 +0300 Subject: [PATCH 08/25] test(changes): ignore display-only opencode delivery --- .../team/ChangeExtractorService.test.ts | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index 384dcf03..5076c321 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -1276,6 +1276,65 @@ describe('ChangeExtractorService', () => { expect(backfillOpenCodeTaskLedger.mock.calls[0]?.[0]).not.toHaveProperty('memberName'); }); + it('ignores OpenCode delivery records that match only a recreated task display id', async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); + setClaudeBasePathOverride(tmpDir); + await writeTaskFile(tmpDir, { displayId: 'abc12345', owner: 'bob' }); + const projectDir = path.join(tmpDir, 'project-dir'); + const projectPath = path.join(tmpDir, 'repo'); + await fs.mkdir(projectDir, { recursive: true }); + await fs.mkdir(projectPath, { recursive: true }); + await writeOpenCodeDeliveryLedger(tmpDir, { + taskId: 'old-task', + displayId: 'abc12345', + memberName: 'bob', + }); + + const backfillOpenCodeTaskLedger = vi.fn(async () => { + throw new Error('display-id-only delivery record must not backfill'); + }); + const workerClient = { + isAvailable: vi.fn(() => true), + computeTaskChanges: vi.fn(async () => + makeTaskChangeResult(TASK_ID, { content: '', confidence: 'fallback' }) + ), + }; + + const service = new ChangeExtractorService( + { + getLogSourceWatchContext: vi.fn(async () => ({ + projectDir, + projectPath, + sessionIds: [], + })), + findLogFileRefsForTask: vi.fn(async () => []), + findMemberLogPaths: vi.fn(async () => []), + } as any, + { + parseBoundaries: vi.fn(async () => ({ + boundaries: [], + scopes: [], + isSingleTaskSession: true, + detectedMechanism: 'none' as const, + })), + } as any, + { getConfig: vi.fn(async () => ({ projectPath })) } as any, + undefined, + workerClient as any, + { backfillOpenCodeTaskLedger } as any, + { getMeta: vi.fn(async () => ({ providerId: 'opencode' })) } as any + ); + + const result = await service.getTaskChanges(TEAM_NAME, TASK_ID, { + owner: 'bob', + status: 'completed', + }); + + expect(result.files).toHaveLength(0); + expect(backfillOpenCodeTaskLedger).not.toHaveBeenCalled(); + expect(workerClient.computeTaskChanges).toHaveBeenCalledTimes(1); + }); + it('does not run OpenCode backfill for explicit non-OpenCode teams even if stale runtime files exist', async () => { tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); setClaudeBasePathOverride(tmpDir); From ee590d0a62a6cf3f530a07060aa1bbf3931d1f10 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 22:39:47 +0300 Subject: [PATCH 09/25] test(changes): ignore related opencode deliveries --- .../team/ChangeExtractorService.test.ts | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index 5076c321..19ca95c6 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -79,6 +79,7 @@ async function writeOpenCodeDeliveryLedger( taskId: string; displayId: string; teamName: string; + taskRefs: { taskId: string; displayId: string; teamName: string }[]; }> ): Promise { const memberName = overrides?.memberName ?? 'bob'; @@ -108,7 +109,7 @@ async function writeOpenCodeDeliveryLedger( observedAssistantMessageId: overrides?.observedAssistantMessageId ?? null, prePromptCursor: null, postPromptCursor: null, - taskRefs: [ + taskRefs: overrides?.taskRefs ?? [ { taskId: overrides?.taskId ?? TASK_ID, displayId: overrides?.displayId ?? 'abc12345', @@ -1335,6 +1336,65 @@ describe('ChangeExtractorService', () => { expect(workerClient.computeTaskChanges).toHaveBeenCalledTimes(1); }); + it('ignores OpenCode delivery records that only mention related tasks', async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); + setClaudeBasePathOverride(tmpDir); + await writeTaskFile(tmpDir, { displayId: 'abc12345', owner: 'bob' }); + const projectDir = path.join(tmpDir, 'project-dir'); + const projectPath = path.join(tmpDir, 'repo'); + await fs.mkdir(projectDir, { recursive: true }); + await fs.mkdir(projectPath, { recursive: true }); + await writeOpenCodeDeliveryLedger(tmpDir, { + taskId: 'related-task', + displayId: 'def67890', + memberName: 'bob', + }); + + const backfillOpenCodeTaskLedger = vi.fn(async () => { + throw new Error('related-only delivery record must not backfill'); + }); + const workerClient = { + isAvailable: vi.fn(() => true), + computeTaskChanges: vi.fn(async () => + makeTaskChangeResult(TASK_ID, { content: '', confidence: 'fallback' }) + ), + }; + + const service = new ChangeExtractorService( + { + getLogSourceWatchContext: vi.fn(async () => ({ + projectDir, + projectPath, + sessionIds: [], + })), + findLogFileRefsForTask: vi.fn(async () => []), + findMemberLogPaths: vi.fn(async () => []), + } as any, + { + parseBoundaries: vi.fn(async () => ({ + boundaries: [], + scopes: [], + isSingleTaskSession: true, + detectedMechanism: 'none' as const, + })), + } as any, + { getConfig: vi.fn(async () => ({ projectPath })) } as any, + undefined, + workerClient as any, + { backfillOpenCodeTaskLedger } as any, + { getMeta: vi.fn(async () => ({ providerId: 'opencode' })) } as any + ); + + const result = await service.getTaskChanges(TEAM_NAME, TASK_ID, { + owner: 'bob', + status: 'completed', + }); + + expect(result.files).toHaveLength(0); + expect(backfillOpenCodeTaskLedger).not.toHaveBeenCalled(); + expect(workerClient.computeTaskChanges).toHaveBeenCalledTimes(1); + }); + it('does not run OpenCode backfill for explicit non-OpenCode teams even if stale runtime files exist', async () => { tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); setClaudeBasePathOverride(tmpDir); From 53012ed623c08cb7974ab82491ac168278430d36 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 22:52:05 +0300 Subject: [PATCH 10/25] fix(changes): pass verified opencode delivery context --- .../services/team/ChangeExtractorService.ts | 39 +++++++++++-------- .../bridge/OpenCodeBridgeCommandContract.ts | 1 + .../team/ChangeExtractorService.test.ts | 16 ++++++++ .../team/OpenCodeReadinessBridge.test.ts | 4 ++ 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/src/main/services/team/ChangeExtractorService.ts b/src/main/services/team/ChangeExtractorService.ts index a84d570d..ac1c68d2 100644 --- a/src/main/services/team/ChangeExtractorService.ts +++ b/src/main/services/team/ChangeExtractorService.ts @@ -9,7 +9,7 @@ import { } from '@shared/utils/taskChangeState'; import { createHash } from 'crypto'; import { existsSync } from 'fs'; -import { mkdtemp, readdir, readFile, rm, stat, writeFile } from 'fs/promises'; +import { chmod, mkdtemp, readdir, readFile, rm, stat, writeFile } from 'fs/promises'; import * as os from 'os'; import * as path from 'path'; @@ -77,6 +77,7 @@ interface OpenCodeBackfillAttempt { interface OpenCodeDeliveryContextTempFile { filePath: string | null; + hash: string | null; cleanup: () => Promise; } @@ -513,7 +514,12 @@ export class ChangeExtractorService { workspaceRoot, attributionMode: OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, ...(backfillMemberName ? { memberName: backfillMemberName } : {}), - ...(deliveryContext.filePath ? { deliveryContextPath: deliveryContext.filePath } : {}), + ...(deliveryContext.filePath + ? { + deliveryContextPath: deliveryContext.filePath, + deliveryContextHash: deliveryContext.hash ?? undefined, + } + : {}), }); void appendOpenCodeTaskChangeDiag({ event: 'backfill_result', @@ -661,27 +667,26 @@ export class ChangeExtractorService { records: Awaited> ): Promise { if (records.length === 0) { - return { filePath: null, cleanup: async () => undefined }; + return { filePath: null, hash: null, cleanup: async () => undefined }; } const dir = await mkdtemp(path.join(os.tmpdir(), 'claude-team-opencode-ledger-context-')); + await chmod(dir, 0o700).catch(() => undefined); const filePath = path.join(dir, 'delivery-context.json'); - await writeFile( - filePath, - `${JSON.stringify( - { - schemaVersion: 1, - teamName, - taskId, - records, - }, - null, - 2 - )}\n`, - { encoding: 'utf8', mode: 0o600 } - ); + const rawContext = `${JSON.stringify( + { + schemaVersion: 1, + teamName, + taskId, + records, + }, + null, + 2 + )}\n`; + await writeFile(filePath, rawContext, { encoding: 'utf8', mode: 0o600 }); return { filePath, + hash: createHash('sha256').update(rawContext).digest('hex'), cleanup: async () => { await rm(dir, { recursive: true, force: true }).catch(() => undefined); }, diff --git a/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts b/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts index eca8e93a..9a478e01 100644 --- a/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts +++ b/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts @@ -239,6 +239,7 @@ export interface OpenCodeBackfillTaskLedgerCommandBody { projectDir?: string; workspaceRoot?: string; deliveryContextPath?: string; + deliveryContextHash?: string; attributionMode?: OpenCodeBackfillTaskLedgerAttributionMode; dryRun?: boolean; } diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index 19ca95c6..daaa5428 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -1,5 +1,6 @@ import * as os from 'os'; import * as path from 'path'; +import { createHash } from 'crypto'; import { afterEach, describe, expect, it, vi } from 'vitest'; import * as fs from 'fs/promises'; @@ -999,7 +1000,12 @@ describe('ChangeExtractorService', () => { await fs.mkdir(projectPath, { recursive: true }); await writeOpenCodeDeliveryLedger(tmpDir); + let deliveryContextHashVerified = false; const backfillOpenCodeTaskLedger = vi.fn(async (input: any) => { + deliveryContextHashVerified = + createHash('sha256') + .update(await fs.readFile(input.deliveryContextPath, 'utf8')) + .digest('hex') === input.deliveryContextHash; await writeOpenCodeLedgerBundle(input.projectDir, projectPath); return { schemaVersion: 1, @@ -1069,6 +1075,12 @@ describe('ChangeExtractorService', () => { attributionMode: 'strict-delivery', }) ); + const backfillInput = backfillOpenCodeTaskLedger.mock.calls[0]?.[0]; + expect(backfillInput.deliveryContextPath).toEqual( + expect.stringContaining('delivery-context.json') + ); + expect(backfillInput.deliveryContextHash).toMatch(/^[a-f0-9]{64}$/); + expect(deliveryContextHashVerified).toBe(true); expect(backfillOpenCodeTaskLedger.mock.calls[0]?.[0]).not.toHaveProperty('evidenceMode'); expect(workerClient.computeTaskChanges).not.toHaveBeenCalled(); }); @@ -1513,6 +1525,7 @@ describe('ChangeExtractorService', () => { projectDir, workspaceRoot: projectPath, deliveryContextPath: expect.stringContaining('delivery-context.json'), + deliveryContextHash: expect.stringMatching(/^[a-f0-9]{64}$/), attributionMode: 'strict-delivery', }) ); @@ -1621,6 +1634,7 @@ describe('ChangeExtractorService', () => { expect(backfillOpenCodeTaskLedger.mock.calls[0]?.[0]?.deliveryContextPath).toEqual( expect.stringContaining('delivery-context.json') ); + expect(backfillOpenCodeTaskLedger.mock.calls[0]?.[0]?.deliveryContextHash).toMatch(/^[a-f0-9]{64}$/); }); it('does not cache negative OpenCode backfill while delivery context already exists', async () => { @@ -1732,8 +1746,10 @@ describe('ChangeExtractorService', () => { expect(backfillOpenCodeTaskLedger.mock.calls[0]?.[0]?.deliveryContextPath).toEqual( expect.stringContaining('delivery-context.json') ); + expect(backfillOpenCodeTaskLedger.mock.calls[0]?.[0]?.deliveryContextHash).toMatch(/^[a-f0-9]{64}$/); expect(backfillOpenCodeTaskLedger.mock.calls[1]?.[0]?.deliveryContextPath).toEqual( expect.stringContaining('delivery-context.json') ); + expect(backfillOpenCodeTaskLedger.mock.calls[1]?.[0]?.deliveryContextHash).toMatch(/^[a-f0-9]{64}$/); }); }); diff --git a/test/main/services/team/OpenCodeReadinessBridge.test.ts b/test/main/services/team/OpenCodeReadinessBridge.test.ts index 26079377..090a4bd7 100644 --- a/test/main/services/team/OpenCodeReadinessBridge.test.ts +++ b/test/main/services/team/OpenCodeReadinessBridge.test.ts @@ -170,6 +170,8 @@ describe('OpenCodeReadinessBridge', () => { taskDisplayId: 'abc12345', projectDir: '/claude/project', workspaceRoot: '/repo', + deliveryContextPath: '/tmp/claude-team-opencode-ledger-context-test/delivery-context.json', + deliveryContextHash: 'a'.repeat(64), }) ).resolves.toMatchObject({ outcome: 'imported', @@ -184,6 +186,8 @@ describe('OpenCodeReadinessBridge', () => { taskDisplayId: 'abc12345', projectDir: '/claude/project', workspaceRoot: '/repo', + deliveryContextPath: '/tmp/claude-team-opencode-ledger-context-test/delivery-context.json', + deliveryContextHash: 'a'.repeat(64), }, { cwd: '/repo', From 33a8a5fabc70488316ff7f5c6270498e306515ce Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 23:01:42 +0300 Subject: [PATCH 11/25] fix(changes): key opencode backfill by context hash --- .../services/team/ChangeExtractorService.ts | 90 ++++++++----------- 1 file changed, 35 insertions(+), 55 deletions(-) diff --git a/src/main/services/team/ChangeExtractorService.ts b/src/main/services/team/ChangeExtractorService.ts index ac1c68d2..26e53f0d 100644 --- a/src/main/services/team/ChangeExtractorService.ts +++ b/src/main/services/team/ChangeExtractorService.ts @@ -81,6 +81,11 @@ interface OpenCodeDeliveryContextTempFile { cleanup: () => Promise; } +interface OpenCodeDeliveryContextPayload { + rawContext: string; + hash: string; +} + export class ChangeExtractorService { private cache = new Map(); private taskChangeSummaryCache = new Map(); @@ -422,12 +427,16 @@ export class ChangeExtractorService { input.teamName, input.taskId ); + const deliveryContextPayload = this.buildOpenCodeDeliveryContextPayload( + input.teamName, + input.taskId, + deliveryContextRecords + ); const backfillMemberName = this.resolveOpenCodeBackfillMemberName( input.effectiveOptions.owner, deliveryContextRecords ); - const deliveryContextFingerprint = - this.hashOpenCodeDeliveryContextRecords(deliveryContextRecords); + const deliveryContextFingerprint = deliveryContextPayload.hash; const cacheKey = this.buildOpenCodeBackfillCacheKey({ teamName: input.teamName, @@ -479,6 +488,7 @@ export class ChangeExtractorService { workspaceRoot, cacheKey, deliveryContextRecords, + deliveryContextPayload, sourceGeneration, backfillMemberName ).finally(() => { @@ -496,13 +506,15 @@ export class ChangeExtractorService { deliveryContextRecords: Awaited< ReturnType >, + deliveryContextPayload: OpenCodeDeliveryContextPayload, sourceGeneration: string | null, backfillMemberName?: string ): Promise { const deliveryContext = await this.createOpenCodeDeliveryContextTempFile( input.teamName, input.taskId, - deliveryContextRecords + deliveryContextRecords, + deliveryContextPayload ); try { const result = await this.openCodeLedgerBackfillPort!.backfillOpenCodeTaskLedger({ @@ -532,7 +544,7 @@ export class ChangeExtractorService { workspaceRoot, sourceGeneration, deliveryRecordCount: deliveryContextRecords.length, - deliveryContextFingerprint: this.hashOpenCodeDeliveryContextRecords(deliveryContextRecords), + deliveryContextFingerprint: deliveryContextPayload.hash, result: { attributionMode: result.attributionMode ?? OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, outcome: result.outcome, @@ -586,7 +598,7 @@ export class ChangeExtractorService { projectDir, workspaceRoot, deliveryRecordCount: deliveryContextRecords.length, - deliveryContextFingerprint: this.hashOpenCodeDeliveryContextRecords(deliveryContextRecords), + deliveryContextFingerprint: deliveryContextPayload.hash, error: error instanceof Error ? error.message : String(error), }).catch(() => undefined); if (deliveryContextRecords.length === 0) { @@ -664,7 +676,8 @@ export class ChangeExtractorService { private async createOpenCodeDeliveryContextTempFile( teamName: string, taskId: string, - records: Awaited> + records: Awaited>, + payload = this.buildOpenCodeDeliveryContextPayload(teamName, taskId, records) ): Promise { if (records.length === 0) { return { filePath: null, hash: null, cleanup: async () => undefined }; @@ -673,6 +686,21 @@ export class ChangeExtractorService { const dir = await mkdtemp(path.join(os.tmpdir(), 'claude-team-opencode-ledger-context-')); await chmod(dir, 0o700).catch(() => undefined); const filePath = path.join(dir, 'delivery-context.json'); + await writeFile(filePath, payload.rawContext, { encoding: 'utf8', mode: 0o600 }); + return { + filePath, + hash: payload.hash, + cleanup: async () => { + await rm(dir, { recursive: true, force: true }).catch(() => undefined); + }, + }; + } + + private buildOpenCodeDeliveryContextPayload( + teamName: string, + taskId: string, + records: Awaited> + ): OpenCodeDeliveryContextPayload { const rawContext = `${JSON.stringify( { schemaVersion: 1, @@ -683,13 +711,9 @@ export class ChangeExtractorService { null, 2 )}\n`; - await writeFile(filePath, rawContext, { encoding: 'utf8', mode: 0o600 }); return { - filePath, hash: createHash('sha256').update(rawContext).digest('hex'), - cleanup: async () => { - await rm(dir, { recursive: true, force: true }).catch(() => undefined); - }, + rawContext, }; } @@ -798,50 +822,6 @@ export class ChangeExtractorService { return laneIds.sort((left, right) => left.localeCompare(right)); } - private hashOpenCodeDeliveryContextRecords( - records: Awaited> - ): string { - const stableRecords = records - .map((record) => ({ - memberName: record.memberName, - laneId: record.laneId ?? '', - runtimeSessionId: record.runtimeSessionId ?? '', - inboxMessageId: record.inboxMessageId ?? '', - deliveredUserMessageId: record.deliveredUserMessageId ?? '', - taskRefs: record.taskRefs - .map((taskRef) => ({ - taskId: taskRef.taskId, - displayId: taskRef.displayId, - teamName: taskRef.teamName, - })) - .sort((left, right) => - `${left.teamName}\0${left.taskId}\0${left.displayId}`.localeCompare( - `${right.teamName}\0${right.taskId}\0${right.displayId}` - ) - ), - })) - .sort((left, right) => - [ - left.laneId, - left.memberName, - left.runtimeSessionId, - left.inboxMessageId, - left.deliveredUserMessageId, - ] - .join('\0') - .localeCompare( - [ - right.laneId, - right.memberName, - right.runtimeSessionId, - right.inboxMessageId, - right.deliveredUserMessageId, - ].join('\0') - ) - ); - return createHash('sha256').update(JSON.stringify(stableRecords)).digest('hex'); - } - private async readOpenCodePromptDeliveryLedgerRecords( filePath: string ): Promise { From 7b5924c8bd4a331614396810712c75d5e2edcf89 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 23:05:44 +0300 Subject: [PATCH 12/25] fix(changes): accept opencode evidence contract version --- .../bridge/OpenCodeBridgeCommandContract.ts | 8 +++- .../OpenCodeBridgeCommandContract.test.ts | 37 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts b/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts index 9a478e01..f941cc6d 100644 --- a/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts +++ b/src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts @@ -1,6 +1,7 @@ import { createHash } from 'crypto'; export const OPEN_CODE_BRIDGE_SCHEMA_VERSION = 1 as const; +export const OPEN_CODE_TASK_LEDGER_EVIDENCE_CONTRACT_VERSION = 1 as const; export type OpenCodeBridgeCommandName = | 'opencode.handshake' @@ -259,6 +260,7 @@ export type OpenCodeBackfillTaskLedgerOutcome = export interface OpenCodeBackfillTaskLedgerCommandData { schemaVersion: 1; providerId: 'opencode'; + opencodeTaskLedgerEvidenceContractVersion?: number; teamName: string; taskId?: string; projectDir?: string; @@ -362,6 +364,7 @@ export interface OpenCodeBridgePeerIdentity { minVersion: number; currentVersion: number; supportedCommands: OpenCodeBridgeCommandName[]; + opencodeTaskLedgerEvidenceContractVersion?: number; }; runtime: { providerId: 'opencode'; @@ -846,7 +849,10 @@ function isPeerIdentity(value: unknown): value is OpenCodeBridgePeerIdentity { (bridgeProtocol.minVersion as number) < 1 || (bridgeProtocol.currentVersion as number) < (bridgeProtocol.minVersion as number) || !Array.isArray(bridgeProtocol.supportedCommands) || - !bridgeProtocol.supportedCommands.every(isOpenCodeBridgeCommandName) + !bridgeProtocol.supportedCommands.every(isOpenCodeBridgeCommandName) || + (bridgeProtocol.opencodeTaskLedgerEvidenceContractVersion !== undefined && + (!Number.isInteger(bridgeProtocol.opencodeTaskLedgerEvidenceContractVersion) || + (bridgeProtocol.opencodeTaskLedgerEvidenceContractVersion as number) < 1)) ) { return false; } diff --git a/test/main/services/team/OpenCodeBridgeCommandContract.test.ts b/test/main/services/team/OpenCodeBridgeCommandContract.test.ts index 48c5051c..7be4e603 100644 --- a/test/main/services/team/OpenCodeBridgeCommandContract.test.ts +++ b/test/main/services/team/OpenCodeBridgeCommandContract.test.ts @@ -6,6 +6,7 @@ import { createOpenCodeBridgeHandshakeIdentityHash, createOpenCodeBridgeIdempotencyKey, isOpenCodeBridgeCommandName, + OPEN_CODE_TASK_LEDGER_EVIDENCE_CONTRACT_VERSION, parseSingleBridgeJsonResult, stableHash, validateBridgeResultEnvelope, @@ -202,6 +203,42 @@ describe('OpenCodeBridgeCommandContract', () => { }); }); + it('accepts handshake evidence contract version and rejects invalid values', () => { + const client = peerIdentity('claude_team'); + const server = peerIdentity('agent_teams_orchestrator'); + server.bridgeProtocol.opencodeTaskLedgerEvidenceContractVersion = + OPEN_CODE_TASK_LEDGER_EVIDENCE_CONTRACT_VERSION; + const validHandshake = buildHandshake({ client, server }); + + expect( + validateOpenCodeBridgeHandshake({ + handshake: validHandshake, + expectedClient: client, + requiredCommand: 'opencode.launchTeam', + expectedCapabilitySnapshotId: 'cap-1', + expectedManifestHighWatermark: 10, + expectedRunId: 'run-1', + }) + ).toEqual({ ok: true }); + + server.bridgeProtocol.opencodeTaskLedgerEvidenceContractVersion = 0; + const invalidHandshake = buildHandshake({ client, server }); + + expect( + validateOpenCodeBridgeHandshake({ + handshake: invalidHandshake, + expectedClient: client, + requiredCommand: 'opencode.launchTeam', + expectedCapabilitySnapshotId: 'cap-1', + expectedManifestHighWatermark: 10, + expectedRunId: 'run-1', + }) + ).toEqual({ + ok: false, + reason: 'Bridge handshake peer identity is invalid', + }); + }); + it('creates deterministic idempotency keys for equivalent JSON bodies', () => { const first = createOpenCodeBridgeIdempotencyKey({ command: 'opencode.launchTeam', From ef82755b1711232e61a926e5053211ceac9868e7 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 23:10:41 +0300 Subject: [PATCH 13/25] fix(changes): avoid caching stale opencode contracts --- .../services/team/ChangeExtractorService.ts | 33 +++++++-- .../team/ChangeExtractorService.test.ts | 73 +++++++++++++++++++ 2 files changed, 99 insertions(+), 7 deletions(-) diff --git a/src/main/services/team/ChangeExtractorService.ts b/src/main/services/team/ChangeExtractorService.ts index 26e53f0d..a28b758c 100644 --- a/src/main/services/team/ChangeExtractorService.ts +++ b/src/main/services/team/ChangeExtractorService.ts @@ -14,6 +14,7 @@ import * as os from 'os'; import * as path from 'path'; import { JsonTaskChangeSummaryCacheRepository } from './cache/JsonTaskChangeSummaryCacheRepository'; +import { OPEN_CODE_TASK_LEDGER_EVIDENCE_CONTRACT_VERSION } from './opencode/bridge/OpenCodeBridgeCommandContract'; import { getOpenCodeLaneScopedRuntimeFilePath, getOpenCodeTeamRuntimeDirectory, @@ -533,9 +534,25 @@ export class ChangeExtractorService { } : {}), }); + const evidenceContractVersion = + typeof result.opencodeTaskLedgerEvidenceContractVersion === 'number' && + Number.isInteger(result.opencodeTaskLedgerEvidenceContractVersion) + ? result.opencodeTaskLedgerEvidenceContractVersion + : 0; + const hasExpectedEvidenceContract = + evidenceContractVersion >= OPEN_CODE_TASK_LEDGER_EVIDENCE_CONTRACT_VERSION; + const diagnostics = hasExpectedEvidenceContract + ? (result.diagnostics ?? []) + : [ + `OpenCode task ledger evidence contract is unsupported or missing: ${evidenceContractVersion}.`, + ...(result.diagnostics ?? []), + ]; void appendOpenCodeTaskChangeDiag({ event: 'backfill_result', - reason: this.classifyOpenCodeBackfillResult(result), + reason: + !hasExpectedEvidenceContract && result.importedEvents <= 0 + ? 'unsupported-evidence-contract' + : this.classifyOpenCodeBackfillResult(result), teamName: input.teamName, taskId: input.taskId, displayId: input.taskMeta?.displayId ?? null, @@ -546,6 +563,7 @@ export class ChangeExtractorService { deliveryRecordCount: deliveryContextRecords.length, deliveryContextFingerprint: deliveryContextPayload.hash, result: { + opencodeTaskLedgerEvidenceContractVersion: evidenceContractVersion, attributionMode: result.attributionMode ?? OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, outcome: result.outcome, dryRun: result.dryRun, @@ -555,13 +573,14 @@ export class ChangeExtractorService { importedEvents: result.importedEvents, skippedEvents: result.skippedEvents, }, - diagnostics: (result.diagnostics ?? []).slice(0, 25), + diagnostics: diagnostics.slice(0, 25), notices: (result.notices ?? []).slice(0, 25), }).catch(() => undefined); const backfilled = result.importedEvents > 0 || - result.outcome === 'imported' || - (result.outcome === 'duplicates-only' && result.candidateEvents > 0); + (hasExpectedEvidenceContract && + (result.outcome === 'imported' || + (result.outcome === 'duplicates-only' && result.candidateEvents > 0))); if (result.importedEvents > 0) { await this.invalidateTaskChangeSummaries(input.teamName, [input.taskId], { @@ -569,7 +588,7 @@ export class ChangeExtractorService { }); } - if (backfilled || deliveryContextRecords.length === 0) { + if ((hasExpectedEvidenceContract && backfilled) || deliveryContextRecords.length === 0) { this.openCodeBackfillCache.set(cacheKey, { backfilledAt: backfilled ? Date.now() : 0, expiresAt: Date.now() + this.openCodeBackfillCacheTtl, @@ -578,9 +597,9 @@ export class ChangeExtractorService { this.openCodeBackfillCache.delete(cacheKey); } - if (result.diagnostics.length > 0 && result.outcome !== 'no-history') { + if (diagnostics.length > 0 && result.outcome !== 'no-history') { logger.debug( - `OpenCode ledger backfill for ${input.teamName}/${input.taskId}: ${result.outcome}; ${result.diagnostics.join('; ')}` + `OpenCode ledger backfill for ${input.teamName}/${input.taskId}: ${result.outcome}; ${diagnostics.join('; ')}` ); } return { attempted: true, backfilled }; diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index daaa5428..eb0535a3 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -1752,4 +1752,77 @@ describe('ChangeExtractorService', () => { ); expect(backfillOpenCodeTaskLedger.mock.calls[1]?.[0]?.deliveryContextHash).toMatch(/^[a-f0-9]{64}$/); }); + + it('does not cache duplicates-only OpenCode backfill from an old evidence contract', async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); + setClaudeBasePathOverride(tmpDir); + await writeTaskFile(tmpDir, { displayId: 'abc12345', owner: 'bob' }); + const projectDir = path.join(tmpDir, 'project-dir'); + const projectPath = path.join(tmpDir, 'repo'); + await fs.mkdir(projectDir, { recursive: true }); + await fs.mkdir(projectPath, { recursive: true }); + await writeOpenCodeDeliveryLedger(tmpDir); + + const backfillOpenCodeTaskLedger = vi.fn(async (input: any) => ({ + schemaVersion: 1, + providerId: 'opencode', + teamName: input.teamName, + taskId: input.taskId, + projectDir: input.projectDir, + workspaceRoot: input.workspaceRoot, + dryRun: false, + attributionMode: input.attributionMode, + scannedSessions: 1, + scannedToolparts: 1, + candidateEvents: 1, + importedEvents: 0, + skippedEvents: 1, + outcome: 'duplicates-only', + notices: [], + diagnostics: [], + })); + const workerClient = { + isAvailable: vi.fn(() => true), + computeTaskChanges: vi.fn(async () => + makeTaskChangeResult(TASK_ID, { content: '', confidence: 'fallback' }) + ), + }; + + const service = new ChangeExtractorService( + { + getLogSourceWatchContext: vi.fn(async () => ({ + projectDir, + projectPath, + sessionIds: [], + })), + findLogFileRefsForTask: vi.fn(async () => []), + findMemberLogPaths: vi.fn(async () => []), + } as any, + { + parseBoundaries: vi.fn(async () => ({ + boundaries: [], + scopes: [], + isSingleTaskSession: true, + detectedMechanism: 'none' as const, + })), + } as any, + { getConfig: vi.fn(async () => ({ projectPath })) } as any, + undefined, + workerClient as any, + { backfillOpenCodeTaskLedger } as any, + { getMeta: vi.fn(async () => ({ providerId: 'opencode' })) } as any + ); + + await service.getTaskChanges(TEAM_NAME, TASK_ID, { + owner: 'bob', + status: 'completed', + }); + await service.getTaskChanges(TEAM_NAME, TASK_ID, { + owner: 'bob', + status: 'completed', + }); + + expect(backfillOpenCodeTaskLedger).toHaveBeenCalledTimes(2); + expect(workerClient.computeTaskChanges).toHaveBeenCalledTimes(2); + }); }); From fe722cd8bca60be219f07c97ba85cc226480a05d Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 23:11:45 +0300 Subject: [PATCH 14/25] fix(changes): version opencode backfill cache --- src/main/services/team/ChangeExtractorService.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/services/team/ChangeExtractorService.ts b/src/main/services/team/ChangeExtractorService.ts index a28b758c..6b84d7c6 100644 --- a/src/main/services/team/ChangeExtractorService.ts +++ b/src/main/services/team/ChangeExtractorService.ts @@ -47,6 +47,7 @@ import type { AgentChangeSet, ChangeStats, TaskChangeSetV2 } from '@shared/types const logger = createLogger('Service:ChangeExtractorService'); const OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE = 'strict-delivery' as const; +const OPEN_CODE_AUTO_BACKFILL_EVIDENCE_PIPELINE = 'opencode-session-snapshot-v1' as const; const OPEN_CODE_MAX_DISCOVERED_LANES = 500; /** Кеш-запись: данные + mtime файла + время протухания */ @@ -448,6 +449,7 @@ export class ChangeExtractorService { sourceGeneration, deliveryContextFingerprint, attributionMode: OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, + evidencePipeline: OPEN_CODE_AUTO_BACKFILL_EVIDENCE_PIPELINE, }); const now = Date.now(); const cached = this.openCodeBackfillCache.get(cacheKey); @@ -474,6 +476,7 @@ export class ChangeExtractorService { deliveryRecordCount: 0, deliveryContextFingerprint, attributionMode: OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, + evidencePipeline: OPEN_CODE_AUTO_BACKFILL_EVIDENCE_PIPELINE, }).catch(() => undefined); return { attempted: false, backfilled: false }; } @@ -562,6 +565,7 @@ export class ChangeExtractorService { sourceGeneration, deliveryRecordCount: deliveryContextRecords.length, deliveryContextFingerprint: deliveryContextPayload.hash, + evidencePipeline: OPEN_CODE_AUTO_BACKFILL_EVIDENCE_PIPELINE, result: { opencodeTaskLedgerEvidenceContractVersion: evidenceContractVersion, attributionMode: result.attributionMode ?? OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE, @@ -618,6 +622,7 @@ export class ChangeExtractorService { workspaceRoot, deliveryRecordCount: deliveryContextRecords.length, deliveryContextFingerprint: deliveryContextPayload.hash, + evidencePipeline: OPEN_CODE_AUTO_BACKFILL_EVIDENCE_PIPELINE, error: error instanceof Error ? error.message : String(error), }).catch(() => undefined); if (deliveryContextRecords.length === 0) { @@ -868,6 +873,7 @@ export class ChangeExtractorService { sourceGeneration?: string | null; deliveryContextFingerprint: string; attributionMode: typeof OPEN_CODE_AUTO_BACKFILL_ATTRIBUTION_MODE; + evidencePipeline: typeof OPEN_CODE_AUTO_BACKFILL_EVIDENCE_PIPELINE; }): string { return JSON.stringify({ teamName: input.teamName, @@ -878,6 +884,7 @@ export class ChangeExtractorService { sourceGeneration: input.sourceGeneration ?? '', deliveryContextFingerprint: input.deliveryContextFingerprint, attributionMode: input.attributionMode, + evidencePipeline: input.evidencePipeline, }); } From 143f905b8694a3af347f5079f41e6450c94d14e5 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 23:14:22 +0300 Subject: [PATCH 15/25] test(changes): cover supported opencode backfill cache --- .../team/ChangeExtractorService.test.ts | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index eb0535a3..b7b3898a 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -6,6 +6,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import * as fs from 'fs/promises'; import { ChangeExtractorService } from '../../../../src/main/services/team/ChangeExtractorService'; +import { OPEN_CODE_TASK_LEDGER_EVIDENCE_CONTRACT_VERSION } from '../../../../src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract'; import { buildTaskChangePresenceDescriptor } from '../../../../src/main/services/team/taskChangePresenceUtils'; import { setClaudeBasePathOverride } from '../../../../src/main/utils/pathDecoder'; @@ -1825,4 +1826,79 @@ describe('ChangeExtractorService', () => { expect(backfillOpenCodeTaskLedger).toHaveBeenCalledTimes(2); expect(workerClient.computeTaskChanges).toHaveBeenCalledTimes(2); }); + + it('caches duplicates-only OpenCode backfill from the current evidence contract', async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'change-extractor-service-')); + setClaudeBasePathOverride(tmpDir); + await writeTaskFile(tmpDir, { displayId: 'abc12345', owner: 'bob' }); + const projectDir = path.join(tmpDir, 'project-dir'); + const projectPath = path.join(tmpDir, 'repo'); + await fs.mkdir(projectDir, { recursive: true }); + await fs.mkdir(projectPath, { recursive: true }); + await writeOpenCodeDeliveryLedger(tmpDir); + + const backfillOpenCodeTaskLedger = vi.fn(async (input: any) => ({ + schemaVersion: 1, + providerId: 'opencode', + opencodeTaskLedgerEvidenceContractVersion: + OPEN_CODE_TASK_LEDGER_EVIDENCE_CONTRACT_VERSION, + teamName: input.teamName, + taskId: input.taskId, + projectDir: input.projectDir, + workspaceRoot: input.workspaceRoot, + dryRun: false, + attributionMode: input.attributionMode, + scannedSessions: 1, + scannedToolparts: 1, + candidateEvents: 1, + importedEvents: 0, + skippedEvents: 1, + outcome: 'duplicates-only', + notices: [], + diagnostics: [], + })); + const workerClient = { + isAvailable: vi.fn(() => true), + computeTaskChanges: vi.fn(async () => + makeTaskChangeResult(TASK_ID, { content: '', confidence: 'fallback' }) + ), + }; + + const service = new ChangeExtractorService( + { + getLogSourceWatchContext: vi.fn(async () => ({ + projectDir, + projectPath, + sessionIds: [], + })), + findLogFileRefsForTask: vi.fn(async () => []), + findMemberLogPaths: vi.fn(async () => []), + } as any, + { + parseBoundaries: vi.fn(async () => ({ + boundaries: [], + scopes: [], + isSingleTaskSession: true, + detectedMechanism: 'none' as const, + })), + } as any, + { getConfig: vi.fn(async () => ({ projectPath })) } as any, + undefined, + workerClient as any, + { backfillOpenCodeTaskLedger } as any, + { getMeta: vi.fn(async () => ({ providerId: 'opencode' })) } as any + ); + + await service.getTaskChanges(TEAM_NAME, TASK_ID, { + owner: 'bob', + status: 'completed', + }); + await service.getTaskChanges(TEAM_NAME, TASK_ID, { + owner: 'bob', + status: 'completed', + }); + + expect(backfillOpenCodeTaskLedger).toHaveBeenCalledTimes(1); + expect(workerClient.computeTaskChanges).toHaveBeenCalledTimes(2); + }); }); From fdf5ddeb61753b5f9830ef823a0806fe2ccbce77 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 23:34:17 +0300 Subject: [PATCH 16/25] fix(changes): project opencode upgrades in detail view --- .../services/team/TaskChangeLedgerReader.ts | 104 +++++++++++++++--- .../opencode-snapshot-upgrade/manifest.json | 15 +++ .../fixture-opencode-snapshot-upgrade.json | 1 + ...bb741815adaa06f6d396f46739c279eec0fc25cfb6 | 1 + ...fe77a71f364d012bad8e892b1ba9adaa30909fb887 | 1 + .../fixture-opencode-snapshot-upgrade.json | 1 + .../fixture-opencode-snapshot-upgrade.jsonl | 2 + .../project/src/snapshot-only.js | 1 + ...skChangeLedgerFixtures.integration.test.ts | 44 ++++++++ 9 files changed, 157 insertions(+), 13 deletions(-) create mode 100644 test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/manifest.json create mode 100644 test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-change-freshness/fixture-opencode-snapshot-upgrade.json create mode 100644 test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/blobs/sha256/402c3103f57599660a8b57bb741815adaa06f6d396f46739c279eec0fc25cfb6 create mode 100644 test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/blobs/sha256/892dd6554b064c9dec7454fe77a71f364d012bad8e892b1ba9adaa30909fb887 create mode 100644 test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/bundles/fixture-opencode-snapshot-upgrade.json create mode 100644 test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/events/fixture-opencode-snapshot-upgrade.jsonl create mode 100644 test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/src/snapshot-only.js diff --git a/src/main/services/team/TaskChangeLedgerReader.ts b/src/main/services/team/TaskChangeLedgerReader.ts index f07e523e..3f5d3cff 100644 --- a/src/main/services/team/TaskChangeLedgerReader.ts +++ b/src/main/services/team/TaskChangeLedgerReader.ts @@ -129,6 +129,13 @@ interface LedgerEvent { linesRemoved?: number; replaceAll?: boolean; warnings?: string[]; + sourceRuntime?: 'opencode'; + sourceProvider?: 'opencode'; + sourceImportKey?: string; + evidenceProof?: string; + supersedesEventId?: string; + snapshotId?: string; + snapshotSource?: string; } interface LedgerNotice { @@ -196,7 +203,7 @@ interface LedgerSummaryScopeV2 { primaryAgentId?: string; primaryMemberName?: string; memberName: string; - agentIds: string[]; + agentIds?: string[]; memberNames?: string[]; startTimestamp: string; endTimestamp: string; @@ -865,9 +872,10 @@ export class TaskChangeLedgerReader { bundle?: LedgerSummaryBundleV2; provenance: TaskChangeProvenance; }): Promise { - const snippets = await this.buildSnippets(params.projectDir, params.journal.events); + const projectedEvents = this.projectJournalEventsForUi(params.journal.events); + const snippets = await this.buildSnippets(params.projectDir, projectedEvents); const groupedSnippets = this.groupSnippets(snippets); - const warnings = this.collectWarnings(params.journal.events, params.journal.notices, { + const warnings = this.collectWarnings(projectedEvents, params.journal.notices, { recovered: params.journal.recovered, }); @@ -908,15 +916,15 @@ export class TaskChangeLedgerReader { totalLinesAdded = fallback.totalLinesAdded; totalLinesRemoved = fallback.totalLinesRemoved; totalFiles = fallback.files.length; - confidence = params.journal.events.some((event) => event.confidence === 'low') + confidence = projectedEvents.some((event) => event.confidence === 'low') ? 'low' - : params.journal.events.some((event) => event.confidence === 'medium') + : projectedEvents.some((event) => event.confidence === 'medium') ? 'medium' : 'high'; scope = this.buildFallbackScope( params.taskId, files, - params.journal.events, + projectedEvents, params.journal.notices ); diffStatCompleteness = fallback.files.every((file) => file.diffStatKnown !== false) @@ -955,7 +963,8 @@ export class TaskChangeLedgerReader { undefined, params.journal.recovered ? 'recovered' : 'ok' ); - const snippets = params.journal.events.map((event) => this.eventToSnippet(event, null, null)); + const projectedEvents = this.projectJournalEventsForUi(params.journal.events); + const snippets = projectedEvents.map((event) => this.eventToSnippet(event, null, null)); const grouped = this.groupSnippets(snippets); const fallback = this.buildFallbackFilesFromGroupedSnippets(grouped, params.projectPath); return { @@ -965,20 +974,20 @@ export class TaskChangeLedgerReader { totalLinesAdded: fallback.totalLinesAdded, totalLinesRemoved: fallback.totalLinesRemoved, totalFiles: fallback.files.length, - confidence: params.journal.events.some((event) => event.confidence === 'low') + confidence: projectedEvents.some((event) => event.confidence === 'low') ? 'low' - : params.journal.events.some((event) => event.confidence === 'medium') + : projectedEvents.some((event) => event.confidence === 'medium') ? 'medium' : 'high', computedAt: new Date().toISOString(), scope: this.buildFallbackScope( params.taskId, fallback.files, - params.journal.events, + projectedEvents, params.journal.notices ), warnings: [ - ...this.collectWarnings(params.journal.events, params.journal.notices, { + ...this.collectWarnings(projectedEvents, params.journal.notices, { recovered: params.journal.recovered, }), 'Task change summary fell back to journal reconstruction.', @@ -1044,6 +1053,7 @@ export class TaskChangeLedgerReader { private mapV2SummaryFile(file: LedgerSummaryFileV2, projectPath?: string): FileChangeSummary { const displayPath = file.displayPath ?? file.filePath; const filePath = this.normalizeLedgerFilePath(file.filePath); + const agentIds = Array.isArray(file.agentIds) ? file.agentIds : []; return { filePath, relativePath: this.relativePath(displayPath, projectPath, file.relativePath), @@ -1065,7 +1075,7 @@ export class TaskChangeLedgerReader { ...(file.latestBeforeState ? { beforeState: file.latestBeforeState } : {}), ...(file.latestAfterState ? { afterState: file.latestAfterState } : {}), ...(file.primaryActorKey ? { primaryActorKey: file.primaryActorKey } : {}), - ...(file.agentIds.length > 0 ? { agentIds: file.agentIds } : {}), + ...(agentIds.length > 0 ? { agentIds } : {}), ...(file.memberNames ? { memberNames: file.memberNames } : {}), ...(file.executionSeqRange ? { executionSeqRange: file.executionSeqRange } : {}), ...(file.worktreePath ? { worktreePath: file.worktreePath } : {}), @@ -1093,6 +1103,7 @@ export class TaskChangeLedgerReader { scope: LedgerSummaryScopeV2, files: LedgerSummaryFileV2[] ): TaskChangeScope { + const agentIds = Array.isArray(scope.agentIds) ? scope.agentIds : []; return { taskId, memberName: @@ -1111,7 +1122,7 @@ export class TaskChangeLedgerReader { ...(scope.primaryActorKey ? { primaryActorKey: scope.primaryActorKey } : {}), ...(scope.primaryAgentId ? { primaryAgentId: scope.primaryAgentId } : {}), ...(scope.primaryMemberName ? { primaryMemberName: scope.primaryMemberName } : {}), - ...(scope.agentIds.length > 0 ? { agentIds: scope.agentIds } : {}), + ...(agentIds.length > 0 ? { agentIds } : {}), ...(scope.memberNames ? { memberNames: scope.memberNames } : {}), ...(scope.toolUseCount !== undefined ? { toolUseCount: scope.toolUseCount } : {}), ...(scope.toolUseIdsTruncated ? { toolUseIdsTruncated: true } : {}), @@ -1136,6 +1147,73 @@ export class TaskChangeLedgerReader { ); } + private projectJournalEventsForUi(events: LedgerEvent[]): LedgerEvent[] { + const selectedBySourceImportKey = new Map< + string, + { event: LedgerEvent; index: number; rank: number } + >(); + const passthrough: Array<{ event: LedgerEvent; index: number }> = []; + + events.forEach((event, index) => { + const sourceImportKey = this.sourceImportKeyForEvent(event); + if (!sourceImportKey) { + passthrough.push({ event, index }); + return; + } + const rank = this.evidenceRankForEvent(event); + const existing = selectedBySourceImportKey.get(sourceImportKey); + if (!existing || rank >= existing.rank) { + selectedBySourceImportKey.set(sourceImportKey, { event, index, rank }); + } + }); + + return [ + ...passthrough, + ...[...selectedBySourceImportKey.values()].map(({ event, index }) => ({ event, index })), + ] + .sort((left, right) => left.index - right.index) + .map(({ event }) => event); + } + + private sourceImportKeyForEvent(event: LedgerEvent): string | null { + if ( + event.sourceImportKey && + (event.sourceRuntime === 'opencode' || + event.sourceProvider === 'opencode' || + event.source === 'opencode_toolpart_write' || + event.source === 'opencode_toolpart_edit' || + event.source === 'opencode_toolpart_apply_patch') + ) { + return event.sourceImportKey; + } + return null; + } + + private evidenceRankForEvent(event: LedgerEvent): number { + const hasFullText = + event.before !== null || + event.after !== null || + (event.operation === 'create' && + event.afterState?.exists === true && + !event.afterState.unavailableReason) || + (event.operation === 'delete' && + event.beforeState?.exists === true && + !event.beforeState.unavailableReason); + + switch (event.evidenceProof) { + case 'opencode-snapshot': + return hasFullText ? 50 : 35; + case 'inverse-apply-patch-chain': + case 'inverse-edit-chain': + case 'toolpart-chain': + return hasFullText ? 40 : 25; + case 'metadata-only-fallback': + return 10; + default: + return hasFullText ? 30 : 5; + } + } + private async readContentRef( projectDir: string, ref: LedgerContentRef | null diff --git a/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/manifest.json b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/manifest.json new file mode 100644 index 00000000..1b0344f6 --- /dev/null +++ b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/manifest.json @@ -0,0 +1,15 @@ +{ + "schemaVersion": 1, + "name": "opencode-snapshot-upgrade", + "taskId": "fixture-opencode-snapshot-upgrade", + "description": "OpenCode metadata-only import upgraded by source-driven snapshot evidence into one visible full-text row.", + "projectRootToken": "__PROJECT_ROOT__", + "expected": { + "totalFiles": 1, + "warnings": [], + "relativePaths": [ + "src/snapshot-only.js" + ], + "relationKinds": [] + } +} diff --git a/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-change-freshness/fixture-opencode-snapshot-upgrade.json b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-change-freshness/fixture-opencode-snapshot-upgrade.json new file mode 100644 index 00000000..2c97190e --- /dev/null +++ b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-change-freshness/fixture-opencode-snapshot-upgrade.json @@ -0,0 +1 @@ +{"schemaVersion":2,"source":"task-change-ledger","taskId":"fixture-opencode-snapshot-upgrade","updatedAt":"2026-04-26T10:00:02.000Z","journalStamp":{"events":{"bytes":3265,"mtimeMs":1777197602000,"tailSha256":"fixture-opencode-snapshot-upgrade-tail"}},"eventCount":2,"noticeCount":0,"integrity":"ok","bundleSchemaVersion":2,"bundleKind":"summary"} diff --git a/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/blobs/sha256/402c3103f57599660a8b57bb741815adaa06f6d396f46739c279eec0fc25cfb6 b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/blobs/sha256/402c3103f57599660a8b57bb741815adaa06f6d396f46739c279eec0fc25cfb6 new file mode 100644 index 00000000..d10a8442 --- /dev/null +++ b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/blobs/sha256/402c3103f57599660a8b57bb741815adaa06f6d396f46739c279eec0fc25cfb6 @@ -0,0 +1 @@ +export const snapshot = 1; diff --git a/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/blobs/sha256/892dd6554b064c9dec7454fe77a71f364d012bad8e892b1ba9adaa30909fb887 b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/blobs/sha256/892dd6554b064c9dec7454fe77a71f364d012bad8e892b1ba9adaa30909fb887 new file mode 100644 index 00000000..99778790 --- /dev/null +++ b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/blobs/sha256/892dd6554b064c9dec7454fe77a71f364d012bad8e892b1ba9adaa30909fb887 @@ -0,0 +1 @@ +export const snapshot = 2; diff --git a/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/bundles/fixture-opencode-snapshot-upgrade.json b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/bundles/fixture-opencode-snapshot-upgrade.json new file mode 100644 index 00000000..4ebb1b89 --- /dev/null +++ b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/bundles/fixture-opencode-snapshot-upgrade.json @@ -0,0 +1 @@ +{"schemaVersion":2,"source":"task-change-ledger","bundleKind":"summary","taskId":"fixture-opencode-snapshot-upgrade","generatedAt":"2026-04-26T10:00:02.000Z","journalStamp":{"events":{"bytes":3265,"mtimeMs":1777197602000,"tailSha256":"fixture-opencode-snapshot-upgrade-tail"}},"integrity":"ok","eventCount":2,"projectedEventCount":1,"noticeCount":0,"scope":{"confidence":{"tier":1,"label":"high","reason":"Derived from task-change ledger"},"primaryActorKey":"member:bob","primaryMemberName":"bob","memberName":"bob","memberNames":["bob"],"startTimestamp":"2026-04-26T10:00:01.000Z","endTimestamp":"2026-04-26T10:00:01.000Z","toolUseIds":["bob-edit-snapshot-only"],"toolUseCount":1,"phaseSet":["work"],"executionSeqRange":{"start":0,"end":0},"confidenceBreakdown":{"capture":"high","attribution":"high","reviewability":"full-text"},"visibleFileCount":1,"contributors":[{"actorKey":"member:bob","memberName":"bob","eventCount":1,"noticeCount":0,"touchedFileCount":1,"visibleFileCount":1,"toolUseCount":1,"cumulativeLinesAdded":1,"cumulativeLinesRemoved":1,"firstTimestamp":"2026-04-26T10:00:01.000Z","lastTimestamp":"2026-04-26T10:00:01.000Z"}]},"files":[{"changeKey":"modify:__PROJECT_ROOT__/src/snapshot-only.js","filePath":"__PROJECT_ROOT__/src/snapshot-only.js","relativePath":"src/snapshot-only.js","linesAdded":1,"linesRemoved":1,"diffStatKnown":true,"eventCount":1,"journalEventCount":2,"firstTimestamp":"2026-04-26T10:00:01.000Z","lastTimestamp":"2026-04-26T10:00:01.000Z","latestOperation":"modify","createdInTask":false,"deletedInTask":false,"baselineExists":true,"finalExists":true,"latestBeforeHash":"402c3103f57599660a8b57bb741815adaa06f6d396f46739c279eec0fc25cfb6","latestAfterHash":"892dd6554b064c9dec7454fe77a71f364d012bad8e892b1ba9adaa30909fb887","latestBeforeState":{"exists":true,"sha256":"402c3103f57599660a8b57bb741815adaa06f6d396f46739c279eec0fc25cfb6","sizeBytes":27},"latestAfterState":{"exists":true,"sha256":"892dd6554b064c9dec7454fe77a71f364d012bad8e892b1ba9adaa30909fb887","sizeBytes":27},"contentAvailability":"full-text","reviewability":"full-text","primaryActorKey":"member:bob","memberNames":["bob"],"executionSeqRange":{"start":0,"end":0}}],"totalLinesAdded":1,"totalLinesRemoved":1,"diffStatCompleteness":"complete","totalFiles":1,"confidence":"high","warningCount":0,"warnings":[]} diff --git a/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/events/fixture-opencode-snapshot-upgrade.jsonl b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/events/fixture-opencode-snapshot-upgrade.jsonl new file mode 100644 index 00000000..23734d04 --- /dev/null +++ b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/.board-task-changes/events/fixture-opencode-snapshot-upgrade.jsonl @@ -0,0 +1,2 @@ +{"schemaVersion":1,"taskId":"fixture-opencode-snapshot-upgrade","taskRef":"fixture-opencode-snapshot-upgrade","taskRefKind":"canonical","phase":"work","executionSeq":0,"sessionId":"opencode-session-fixture","memberName":"bob","toolUseId":"bob-edit-snapshot-only","source":"opencode_toolpart_edit","operation":"modify","confidence":"medium","workspaceRoot":"__PROJECT_ROOT__","filePath":"__PROJECT_ROOT__/src/snapshot-only.js","relativePath":"src/snapshot-only.js","timestamp":"2026-04-26T10:00:00.000Z","toolStatus":"succeeded","before":null,"after":null,"beforeState":{"exists":true,"unavailableReason":"opencode-before-content-unavailable"},"afterState":{"exists":true,"unavailableReason":"opencode-edit-final-content-unavailable"},"oldString":"snapshot = 1","newString":"snapshot = 2","linesAdded":0,"linesRemoved":0,"sourceRuntime":"opencode","sourceProvider":"opencode","sourceSessionId":"opencode-session-fixture","sourcePartId":"bob-edit-snapshot-only","sourceMessageId":"assistant-1","parentUserMessageId":"user-1","attributionMethod":"delivery-ledger-taskrefs","sourceImportKey":"opencode\u0000opencode-session-fixture\u0000bob-edit-snapshot-only\u0000src/snapshot-only.js","evidenceProof":"metadata-only-fallback","warnings":["OpenCode edit was captured without a git/snapshot baseline; apply/reject is manual-only."],"eventId":"opencode-metadata-only-event"} +{"schemaVersion":1,"taskId":"fixture-opencode-snapshot-upgrade","taskRef":"fixture-opencode-snapshot-upgrade","taskRefKind":"canonical","phase":"work","executionSeq":0,"sessionId":"opencode-session-fixture","memberName":"bob","toolUseId":"bob-edit-snapshot-only","source":"opencode_toolpart_edit","operation":"modify","confidence":"high","workspaceRoot":"__PROJECT_ROOT__","filePath":"__PROJECT_ROOT__/src/snapshot-only.js","relativePath":"src/snapshot-only.js","timestamp":"2026-04-26T10:00:01.000Z","toolStatus":"succeeded","before":{"sha256":"402c3103f57599660a8b57bb741815adaa06f6d396f46739c279eec0fc25cfb6","sizeBytes":27,"blobRef":"sha256/402c3103f57599660a8b57bb741815adaa06f6d396f46739c279eec0fc25cfb6"},"after":{"sha256":"892dd6554b064c9dec7454fe77a71f364d012bad8e892b1ba9adaa30909fb887","sizeBytes":27,"blobRef":"sha256/892dd6554b064c9dec7454fe77a71f364d012bad8e892b1ba9adaa30909fb887"},"beforeState":{"exists":true,"sha256":"402c3103f57599660a8b57bb741815adaa06f6d396f46739c279eec0fc25cfb6","sizeBytes":27},"afterState":{"exists":true,"sha256":"892dd6554b064c9dec7454fe77a71f364d012bad8e892b1ba9adaa30909fb887","sizeBytes":27},"oldString":"snapshot = 1","newString":"snapshot = 2","linesAdded":1,"linesRemoved":1,"sourceRuntime":"opencode","sourceProvider":"opencode","sourceSessionId":"opencode-session-fixture","sourcePartId":"bob-edit-snapshot-only","sourceMessageId":"assistant-1","parentUserMessageId":"user-1","attributionMethod":"delivery-ledger-taskrefs","sourceImportKey":"opencode\u0000opencode-session-fixture\u0000bob-edit-snapshot-only\u0000src/snapshot-only.js","evidenceProof":"inverse-edit-chain","snapshotId":"opencode-snapshot-window-fixture","snapshotSource":"opencode","supersedesEventId":"opencode-metadata-only-event","eventId":"opencode-snapshot-upgrade-event"} diff --git a/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/src/snapshot-only.js b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/src/snapshot-only.js new file mode 100644 index 00000000..99778790 --- /dev/null +++ b/test/fixtures/team/task-change-ledger/opencode-snapshot-upgrade/project/src/snapshot-only.js @@ -0,0 +1 @@ +export const snapshot = 2; diff --git a/test/main/services/team/taskChangeLedgerFixtures.integration.test.ts b/test/main/services/team/taskChangeLedgerFixtures.integration.test.ts index 1eb45550..212a4b35 100644 --- a/test/main/services/team/taskChangeLedgerFixtures.integration.test.ts +++ b/test/main/services/team/taskChangeLedgerFixtures.integration.test.ts @@ -315,6 +315,50 @@ describe('task change ledger golden fixtures', () => { expect(resolved.contentSource).toBe('ledger-snapshot'); }); + it('reads OpenCode snapshot upgrade fixtures as one full-text ledger row', async () => { + const fixture = await materializeTaskChangeLedgerFixture('opencode-snapshot-upgrade'); + cleanups.push(fixture.cleanup); + const reader = new TaskChangeLedgerReader(); + const changeSet = await reader.readTaskChanges({ + teamName: TEAM_NAME, + taskId: fixture.manifest.taskId, + projectDir: fixture.projectDir, + projectPath: fixture.projectDir, + includeDetails: true, + }); + + expect(changeSet?.files).toHaveLength(1); + const file = changeSet!.files[0]!; + expect(file.relativePath).toBe('src/snapshot-only.js'); + expect(file.ledgerSummary).toMatchObject({ + reviewability: 'full-text', + contentAvailability: 'full-text', + }); + expect(file.snippets).toHaveLength(1); + const snippet = file.snippets[0]!; + expect(snippet.toolName).toBe('Edit'); + expect(snippet.type).toBe('edit'); + expect(snippet.ledger).toMatchObject({ + source: 'ledger-snapshot', + confidence: 'high', + textAvailability: 'full-text', + operation: 'modify', + }); + expect(snippet.ledger?.originalFullContent).toBe('export const snapshot = 1;\n'); + expect(snippet.ledger?.modifiedFullContent).toBe('export const snapshot = 2;\n'); + + const resolver = new FileContentResolver({ findMemberLogPaths: vi.fn(async () => []) } as any); + const resolved = await resolver.getFileContent( + TEAM_NAME, + 'bob', + file.filePath, + file.snippets + ); + expect(resolved.originalFullContent).toBe('export const snapshot = 1;\n'); + expect(resolved.modifiedFullContent).toBe('export const snapshot = 2;\n'); + expect(resolved.contentSource).toBe('ledger-snapshot'); + }); + it('rejects grouped copy fixtures by deleting only the copied path', async () => { const fixture = await materializeTaskChangeLedgerFixture('copy'); cleanups.push(fixture.cleanup); From 642cea8857bc602db7824bed06ff2b5212f76206 Mon Sep 17 00:00:00 2001 From: 777genius Date: Tue, 28 Apr 2026 23:42:35 +0300 Subject: [PATCH 17/25] fix(changes): rank full text evidence by blobs --- .../services/team/TaskChangeLedgerReader.ts | 20 ++--- .../team/TaskChangeLedgerReader.test.ts | 78 +++++++++++++++++++ 2 files changed, 89 insertions(+), 9 deletions(-) diff --git a/src/main/services/team/TaskChangeLedgerReader.ts b/src/main/services/team/TaskChangeLedgerReader.ts index 3f5d3cff..644f58ca 100644 --- a/src/main/services/team/TaskChangeLedgerReader.ts +++ b/src/main/services/team/TaskChangeLedgerReader.ts @@ -1190,15 +1190,7 @@ export class TaskChangeLedgerReader { } private evidenceRankForEvent(event: LedgerEvent): number { - const hasFullText = - event.before !== null || - event.after !== null || - (event.operation === 'create' && - event.afterState?.exists === true && - !event.afterState.unavailableReason) || - (event.operation === 'delete' && - event.beforeState?.exists === true && - !event.beforeState.unavailableReason); + const hasFullText = this.hasFullTextEvidence(event); switch (event.evidenceProof) { case 'opencode-snapshot': @@ -1214,6 +1206,16 @@ export class TaskChangeLedgerReader { } } + private hasFullTextEvidence(event: Pick): boolean { + if (event.operation === 'create') { + return event.after !== null; + } + if (event.operation === 'delete') { + return event.before !== null; + } + return event.before !== null && event.after !== null; + } + private async readContentRef( projectDir: string, ref: LedgerContentRef | null diff --git a/test/main/services/team/TaskChangeLedgerReader.test.ts b/test/main/services/team/TaskChangeLedgerReader.test.ts index 969fac6c..11b494cd 100644 --- a/test/main/services/team/TaskChangeLedgerReader.test.ts +++ b/test/main/services/team/TaskChangeLedgerReader.test.ts @@ -285,6 +285,84 @@ describe('TaskChangeLedgerReader', () => { expect(snippets[2]?.ledger?.source).toBe('ledger-snapshot'); }); + it('projects partial OpenCode snapshot journal evidence to a later full-text upgrade', async () => { + tmpDir = await fsTempDir(); + const eventsDir = path.join(tmpDir, '.board-task-changes', 'events'); + const blobsDir = path.join(tmpDir, '.board-task-changes', 'blobs'); + await mkdir(eventsDir, { recursive: true }); + await mkdir(blobsDir, { recursive: true }); + + const beforeContent = 'export const value = 1;\n'; + const afterContent = 'export const value = 2;\n'; + await writeFile(path.join(blobsDir, 'before.txt'), beforeContent, 'utf8'); + await writeFile(path.join(blobsDir, 'after.txt'), afterContent, 'utf8'); + const sourceImportKey = 'opencode\0session-1\0part-edit\0src/file.ts'; + const baseEvent = { + schemaVersion: 1, + taskId: TASK_ID, + taskRef: TASK_ID, + taskRefKind: 'canonical', + phase: 'work', + executionSeq: 1, + sessionId: 'opencode-session-1', + memberName: 'bob', + toolUseId: 'part-edit', + source: 'opencode_toolpart_edit', + operation: 'modify', + confidence: 'high', + workspaceRoot: '/repo', + filePath: '/repo/src/file.ts', + relativePath: 'src/file.ts', + timestamp: '2026-03-01T10:00:00.000Z', + toolStatus: 'succeeded', + sourceRuntime: 'opencode', + sourceProvider: 'opencode', + sourceImportKey, + evidenceProof: 'opencode-snapshot', + beforeState: { exists: true, sha256: sha(beforeContent), sizeBytes: beforeContent.length }, + afterState: { exists: true, sha256: sha(afterContent), sizeBytes: afterContent.length }, + linesAdded: 1, + linesRemoved: 1, + }; + await writeFile( + path.join(eventsDir, `${encodeURIComponent(TASK_ID)}.jsonl`), + [ + { + ...baseEvent, + eventId: 'event-partial', + before: null, + after: { sha256: sha(afterContent), sizeBytes: afterContent.length, blobRef: 'after.txt' }, + }, + { + ...baseEvent, + eventId: 'event-full', + supersedesEventId: 'event-partial', + before: { sha256: sha(beforeContent), sizeBytes: beforeContent.length, blobRef: 'before.txt' }, + after: { sha256: sha(afterContent), sizeBytes: afterContent.length, blobRef: 'after.txt' }, + }, + ] + .map((entry) => JSON.stringify(entry)) + .join('\n') + '\n', + 'utf8' + ); + + const reader = new TaskChangeLedgerReader(); + const result = await reader.readTaskChanges({ + teamName: 'team', + taskId: TASK_ID, + projectDir: tmpDir, + projectPath: '/repo', + includeDetails: true, + }); + + expect(result?.files).toHaveLength(1); + const snippets = result?.files[0]?.snippets ?? []; + expect(snippets).toHaveLength(1); + expect(snippets[0]?.ledger?.eventId).toBe('event-full'); + expect(snippets[0]?.ledger?.originalFullContent).toBe(beforeContent); + expect(snippets[0]?.ledger?.modifiedFullContent).toBe(afterContent); + }); + it('groups rename relations in summary-only bundles without losing absolute paths', async () => { const relation = { kind: 'rename', oldPath: 'src/old.ts', newPath: 'src/new.ts' }; tmpDir = await makeLedgerBundle({ From 07a9f603de878c6a96bd47447479db1cc8f33872 Mon Sep 17 00:00:00 2001 From: 777genius Date: Wed, 29 Apr 2026 00:14:53 +0300 Subject: [PATCH 18/25] fix(changes): require strict ledger hunk rejects --- .../services/team/ReviewApplierService.ts | 106 +++++++++++++++++- .../team/ReviewApplierService.test.ts | 95 +++++++++++++++- 2 files changed, 199 insertions(+), 2 deletions(-) diff --git a/src/main/services/team/ReviewApplierService.ts b/src/main/services/team/ReviewApplierService.ts index e81214d2..734c5174 100644 --- a/src/main/services/team/ReviewApplierService.ts +++ b/src/main/services/team/ReviewApplierService.ts @@ -293,6 +293,7 @@ export class ReviewApplierService { decision.fileDecision === 'rejected', allHunksRejected, rejectedHunkIndices, + decision.hunkContextHashes, fileContent.snippets ); if (ledgerOutcome.handled) { @@ -450,6 +451,7 @@ export class ReviewApplierService { fileRejected: boolean, allHunksRejected: boolean, rejectedHunkIndices: number[], + hunkContextHashes: Record | undefined, snippets: SnippetDiff[] ): Promise { const ledgerSnippets = snippets.filter((snippet) => snippet.ledger && !snippet.isError); @@ -497,6 +499,20 @@ export class ReviewApplierService { error: 'Ledger full text is unavailable; partial reject requires manual review.', }; } + const strictHunks = mapRejectedHunkIndicesByHashStrict( + original, + modified, + rejectedHunkIndices, + hunkContextHashes + ); + if (!strictHunks.ok) { + return { + handled: true, + status: strictHunks.code === 'conflict' ? 'conflict' : 'error', + code: strictHunks.code, + error: strictHunks.error, + }; + } const guard = await this.checkLedgerCurrentHash( filePath, lastLedger.afterState?.sha256 ?? lastLedger.afterHash ?? undefined @@ -504,7 +520,7 @@ export class ReviewApplierService { if (!guard.ok) { return guard.outcome; } - const patchResult = this.tryHunkLevelReject(original, modified, rejectedHunkIndices); + const patchResult = this.tryStrictHunkLevelReject(original, modified, strictHunks.indices); if (!patchResult) { return { handled: true, @@ -1035,6 +1051,46 @@ export class ReviewApplierService { hadConflicts: false, }; } + + private tryStrictHunkLevelReject( + original: string, + modified: string, + hunkIndices: number[] + ): RejectResult | null { + const patch = structuredPatch('file', 'file', original, modified); + + if (!patch.hunks || patch.hunks.length === 0) return null; + + const validIndices = hunkIndices.filter((idx) => idx >= 0 && idx < patch.hunks.length); + if (validIndices.length !== hunkIndices.length || validIndices.length === 0) return null; + + const inversedHunks: StructuredPatchHunk[] = []; + for (const idx of validIndices) { + const hunk = patch.hunks[idx]; + if (!hunk) return null; + inversedHunks.push(invertHunk(hunk)); + } + + const inversePatch = { + oldFileName: 'file', + newFileName: 'file', + oldHeader: undefined, + newHeader: undefined, + hunks: inversedHunks, + }; + + const result = applyPatch(modified, inversePatch, { fuzzFactor: 0 }); + if (result === false) { + logger.debug('Strict ledger hunk-level inverse patch не удался'); + return null; + } + + return { + success: true, + newContent: result, + hadConflicts: false, + }; + } } function buildHunkHashIndexMap(original: string, modified: string): Map { @@ -1086,6 +1142,54 @@ function mapRejectedHunkIndicesByHash( return [...out].sort((a, b) => a - b); } +function mapRejectedHunkIndicesByHashStrict( + original: string, + modified: string, + rejectedIndices: number[], + hunkContextHashes: Record | undefined +): { ok: true; indices: number[] } | { ok: false; code: ApplyErrorCode; error: string } { + if (rejectedIndices.length === 0) { + return { ok: true, indices: [] }; + } + if (!hunkContextHashes || Object.keys(hunkContextHashes).length === 0) { + return { + ok: false, + code: 'manual-review-required', + error: 'Ledger partial reject requires stable hunk context hashes.', + }; + } + + const hashMap = buildHunkHashIndexMap(original, modified); + const out = new Set(); + for (const idx of rejectedIndices) { + const hash = hunkContextHashes[idx]; + if (!hash) { + return { + ok: false, + code: 'manual-review-required', + error: 'Ledger partial reject is missing a hunk context hash.', + }; + } + const candidates = hashMap.get(hash); + if (!candidates || candidates.length === 0) { + return { + ok: false, + code: 'conflict', + error: 'Ledger partial reject hunk context changed; please re-review.', + }; + } + if (candidates.length > 1) { + return { + ok: false, + code: 'conflict', + error: 'Ledger partial reject hunk context is ambiguous; please re-review.', + }; + } + out.add(candidates[0]!); + } + return { ok: true, indices: [...out].sort((a, b) => a - b) }; +} + // ── Module-level helpers ── /** diff --git a/test/main/services/team/ReviewApplierService.test.ts b/test/main/services/team/ReviewApplierService.test.ts index 5b784788..528595ac 100644 --- a/test/main/services/team/ReviewApplierService.test.ts +++ b/test/main/services/team/ReviewApplierService.test.ts @@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import { createHash } from 'crypto'; import { structuredPatch } from 'diff'; +import { computeDiffContextHash } from '@shared/utils/diffContextHash'; import type { SnippetDiff } from '@shared/types'; @@ -985,7 +986,8 @@ describe('ReviewApplierService', () => { { filePath, fileDecision: 'pending', - hunkDecisions: { 0: 'rejected' }, + hunkDecisions: { 0: 'rejected', 1: 'pending' }, + hunkContextHashes: buildHunkContextHashes(original, modified), }, ], }, @@ -1034,8 +1036,99 @@ describe('ReviewApplierService', () => { expect(res).toMatchObject({ applied: 1, conflicts: 0 }); expect(writeFile).toHaveBeenCalledWith(filePath, original, 'utf8'); }); + + it('ledger partial reject refuses stale hunk context instead of falling back to index', async () => { + const fsPromises = await import('fs/promises'); + const readFile = fsPromises.readFile as unknown as ReturnType; + const writeFile = fsPromises.writeFile as unknown as ReturnType; + + const filePath = '/tmp/stale-ledger.ts'; + const original = 'const value = 1;\nconst keep = true;\n'; + const modified = 'const value = 2;\nconst keep = true;\n'; + readFile.mockResolvedValue(modified); + + const { ReviewApplierService } = await import('@main/services/team/ReviewApplierService'); + const svc = new ReviewApplierService(); + + const res = await svc.applyReviewDecisions( + { + teamName: 'team', + decisions: [ + { + filePath, + fileDecision: 'pending', + hunkDecisions: { 0: 'rejected', 1: 'pending' }, + hunkContextHashes: { 0: 'stale-context-hash' }, + }, + ], + }, + new Map([ + [ + filePath, + { + filePath, + relativePath: 'stale-ledger.ts', + snippets: [ + { + toolUseId: 'ledger-1', + filePath, + toolName: 'Edit', + type: 'edit', + oldString: 'const value = 1;\n', + newString: 'const value = 2;\n', + replaceAll: false, + timestamp: '2026-03-01T10:00:00.000Z', + isError: false, + ledger: { + eventId: 'event-1', + source: 'ledger-exact', + confidence: 'exact', + originalFullContent: original, + modifiedFullContent: modified, + beforeHash: sha(original), + afterHash: sha(modified), + operation: 'modify', + beforeState: { exists: true, sha256: sha(original) }, + afterState: { exists: true, sha256: sha(modified) }, + }, + }, + ], + linesAdded: 1, + linesRemoved: 1, + isNewFile: false, + originalFullContent: original, + modifiedFullContent: modified, + contentSource: 'ledger-exact', + }, + ], + ]) + ); + + expect(res.applied).toBe(0); + expect(res.conflicts).toBe(1); + expect(res.errors[0]?.code).toBe('conflict'); + expect(writeFile).not.toHaveBeenCalled(); + }); }); function sha(content: string): string { return createHash('sha256').update(content).digest('hex'); } + +function buildHunkContextHashes(original: string, modified: string): Record { + const patch = structuredPatch('file', 'file', original, modified); + const out: Record = {}; + for (let i = 0; i < patch.hunks.length; i++) { + const hunk = patch.hunks[i]!; + const oldSideContent = hunk.lines + .filter((line) => !line.startsWith('+')) + .map((line) => line.slice(1)) + .join('\n'); + const newSideContent = hunk.lines + .filter((line) => !line.startsWith('-')) + .map((line) => line.slice(1)) + .join('\n'); + out[i] = computeDiffContextHash(oldSideContent, newSideContent); + } + return out; +} From 93532f3ff58e29f354a6f3d2201ee53dcdcc9e91 Mon Sep 17 00:00:00 2001 From: 777genius Date: Wed, 29 Apr 2026 10:55:07 +0300 Subject: [PATCH 19/25] feat: expose agent teams control via MCP --- README.md | 2 +- .../src/internal/runtime.js | 72 +- agent-teams-controller/src/mcpToolCatalog.js | 13 +- ...essenger-connectors-uncertainty-pass-29.md | 360 ++ ...essenger-connectors-uncertainty-pass-30.md | 645 ++++ ...essenger-connectors-uncertainty-pass-31.md | 736 ++++ ...essenger-connectors-uncertainty-pass-32.md | 796 ++++ ...essenger-connectors-uncertainty-pass-33.md | 901 +++++ ...essenger-connectors-uncertainty-pass-34.md | 897 +++++ ...essenger-connectors-uncertainty-pass-35.md | 965 +++++ .../member-work-sync-control-plane-plan.md | 3377 +++++++++++++++++ mcp-server/src/agent-teams-controller.d.ts | 18 +- mcp-server/src/tools/index.ts | 2 + mcp-server/src/tools/runtimeTools.ts | 7 +- mcp-server/src/tools/teamTools.ts | 140 + mcp-server/src/utils/teamConfig.ts | 83 +- mcp-server/test/tools.test.ts | 106 +- src/main/http/index.ts | 4 +- src/main/http/teams.ts | 387 +- src/main/index.ts | 1 + src/main/ipc/teams.ts | 237 +- src/main/services/team/TeamDataService.ts | 55 + .../team/dialogs/CreateTeamDialog.tsx | 8 + .../team/members/membersEditorTypes.ts | 9 +- .../team/members/membersEditorUtils.ts | 24 +- src/shared/types/team.ts | 12 + src/types/agent-teams-controller.d.ts | 3 + .../http/teamMcpControl.integration.test.ts | 395 ++ test/main/http/teams.test.ts | 254 +- test/main/ipc/teams.test.ts | 296 +- .../services/team/TeamDataService.test.ts | 68 + .../team/dialogs/LaunchTeamDialog.test.ts | 84 + .../team/members/membersEditorUtils.test.ts | 31 + 33 files changed, 10775 insertions(+), 213 deletions(-) create mode 100644 docs/research/messenger-connectors-uncertainty-pass-29.md create mode 100644 docs/research/messenger-connectors-uncertainty-pass-30.md create mode 100644 docs/research/messenger-connectors-uncertainty-pass-31.md create mode 100644 docs/research/messenger-connectors-uncertainty-pass-32.md create mode 100644 docs/research/messenger-connectors-uncertainty-pass-33.md create mode 100644 docs/research/messenger-connectors-uncertainty-pass-34.md create mode 100644 docs/research/messenger-connectors-uncertainty-pass-35.md create mode 100644 docs/team-management/member-work-sync-control-plane-plan.md create mode 100644 mcp-server/src/tools/teamTools.ts create mode 100644 test/main/http/teamMcpControl.integration.test.ts diff --git a/README.md b/README.md index e1f26ff6..1430b438 100644 --- a/README.md +++ b/README.md @@ -307,7 +307,6 @@ pnpm dist # macOS + Windows + Linux - [ ] Planning mode to organize agent plans before execution - [ ] Visual workflow editor ([@xyflow/react](https://github.com/xyflow/xyflow)) for building and orchestrating agent pipelines with drag & drop -- [ ] Support more models/providers (including local) e.g OpenCode (with many providers) - [ ] Remote agent execution via SSH: launch and manage agent teams on remote machines over SSH (stream-json protocol over SSH channel, SFTP-based file monitoring for tasks/inboxes/config) - [ ] CLI runtime: Run not only on a local PC but in any headless/console environment (web UI), e.g. VPS, remote server, etc. - [ ] 2 modes: current (agent teams), and a new mode: regular subagents (no communication between them) @@ -321,6 +320,7 @@ pnpm dist # macOS + Windows + Linux - [ ] Monitor agents processes/stats - [ ] Reusable agents with SOUL.md - [ ] Сommunicate via messenger +- [ ] SDK to programmatically launch agents --- diff --git a/agent-teams-controller/src/internal/runtime.js b/agent-teams-controller/src/internal/runtime.js index 6cb7f1f0..a798b6bc 100644 --- a/agent-teams-controller/src/internal/runtime.js +++ b/agent-teams-controller/src/internal/runtime.js @@ -121,7 +121,9 @@ async function requestJson(baseUrl, pathname, options = {}) { } if (payload == null) { - throw makeRetryableControlError(`Team control API returned empty or non-JSON response at ${baseUrl}${pathname}`); + throw makeRetryableControlError( + `Team control API returned empty or non-JSON response at ${baseUrl}${pathname}` + ); } return payload; @@ -170,9 +172,7 @@ function buildLaunchRequest(flags = {}) { ...(typeof flags.prompt === 'string' && flags.prompt.trim() ? { prompt: flags.prompt.trim() } : {}), - ...(typeof flags.model === 'string' && flags.model.trim() - ? { model: flags.model.trim() } - : {}), + ...(typeof flags.model === 'string' && flags.model.trim() ? { model: flags.model.trim() } : {}), ...(typeof flags.effort === 'string' && flags.effort.trim() ? { effort: flags.effort.trim() } : {}), @@ -228,6 +228,16 @@ function compactRuntimeToolBody(context, flags = {}, fields) { return body; } +function compactBody(flags = {}, fields) { + const body = {}; + for (const field of fields) { + if (flags[field] !== undefined) { + body[field] = flags[field]; + } + } + return body; +} + async function postRuntimeTool(context, flags = {}, toolPath, body) { const baseUrls = resolveControlBaseUrls(context, flags); return requestJsonWithFallback( @@ -276,7 +286,9 @@ async function waitForProvisioningState(baseUrls, teamName, runId, timeoutMs) { } const stateLabel = - lastProgress && typeof lastProgress.state === 'string' ? ` while in state ${lastProgress.state}` : ''; + lastProgress && typeof lastProgress.state === 'string' + ? ` while in state ${lastProgress.state}` + : ''; throw new Error(`Timed out waiting for team ${teamName} to become ready${stateLabel}`); } @@ -328,6 +340,48 @@ async function launchTeam(context, flags = {}) { ); } +async function listTeams(context, flags = {}) { + const baseUrls = resolveControlBaseUrls(context, flags); + return requestJsonWithFallback(baseUrls, '/api/teams', { + timeoutMs: normalizeTimeoutMs(flags.waitTimeoutMs || flags['wait-timeout-ms'] || 10000), + }); +} + +async function getTeam(context, flags = {}) { + const baseUrls = resolveControlBaseUrls(context, flags); + return requestJsonWithFallback(baseUrls, `/api/teams/${encodeURIComponent(context.teamName)}`, { + timeoutMs: normalizeTimeoutMs(flags.waitTimeoutMs || flags['wait-timeout-ms'] || 10000), + }); +} + +async function createTeam(context, flags = {}) { + const baseUrls = resolveControlBaseUrls(context, flags); + return requestJsonWithFallback(baseUrls, '/api/teams', { + method: 'POST', + body: { + teamName: context.teamName, + ...compactBody(flags, [ + 'displayName', + 'description', + 'color', + 'members', + 'cwd', + 'prompt', + 'providerId', + 'providerBackendId', + 'model', + 'effort', + 'fastMode', + 'limitContext', + 'skipPermissions', + 'worktree', + 'extraCliArgs', + ]), + }, + timeoutMs: normalizeTimeoutMs(flags.waitTimeoutMs || flags['wait-timeout-ms'] || 10000), + }); +} + async function stopTeam(context, flags = {}) { const baseUrls = resolveControlBaseUrls(context, flags); const stopped = await requestJsonWithFallback( @@ -351,7 +405,10 @@ async function stopTeam(context, flags = {}) { async function getRuntimeState(context, flags = {}) { const baseUrls = resolveControlBaseUrls(context, flags); - return requestJsonWithFallback(baseUrls, `/api/teams/${encodeURIComponent(context.teamName)}/runtime`); + return requestJsonWithFallback( + baseUrls, + `/api/teams/${encodeURIComponent(context.teamName)}/runtime` + ); } async function runtimeBootstrapCheckin(context, flags = {}) { @@ -425,6 +482,9 @@ async function runtimeHeartbeat(context, flags = {}) { } module.exports = { + listTeams, + getTeam, + createTeam, launchTeam, stopTeam, getRuntimeState, diff --git a/agent-teams-controller/src/mcpToolCatalog.js b/agent-teams-controller/src/mcpToolCatalog.js index d896dd1d..ecd74b19 100644 --- a/agent-teams-controller/src/mcpToolCatalog.js +++ b/agent-teams-controller/src/mcpToolCatalog.js @@ -1,3 +1,5 @@ +const AGENT_TEAMS_TEAM_TOOL_NAMES = ['team_list', 'team_get', 'team_create']; + const AGENT_TEAMS_TASK_TOOL_NAMES = [ 'member_briefing', 'task_add_comment', @@ -62,6 +64,11 @@ const AGENT_TEAMS_RUNTIME_TOOL_NAMES = [ ]; const AGENT_TEAMS_MCP_TOOL_GROUPS = [ + { + id: 'team', + teammateOperational: false, + toolNames: AGENT_TEAMS_TEAM_TOOL_NAMES, + }, { id: 'task', teammateOperational: true, @@ -120,10 +127,12 @@ const AGENT_TEAMS_LEAD_BOOTSTRAP_TOOL_NAMES = [ ...AGENT_TEAMS_LEAD_TOOL_NAMES, ]; -const AGENT_TEAMS_NAMESPACED_LEAD_BOOTSTRAP_TOOL_NAMES = - AGENT_TEAMS_LEAD_BOOTSTRAP_TOOL_NAMES.map((toolName) => `mcp__agent-teams__${toolName}`); +const AGENT_TEAMS_NAMESPACED_LEAD_BOOTSTRAP_TOOL_NAMES = AGENT_TEAMS_LEAD_BOOTSTRAP_TOOL_NAMES.map( + (toolName) => `mcp__agent-teams__${toolName}` +); module.exports = { + AGENT_TEAMS_TEAM_TOOL_NAMES, AGENT_TEAMS_TASK_TOOL_NAMES, AGENT_TEAMS_LEAD_TOOL_NAMES, AGENT_TEAMS_REVIEW_TOOL_NAMES, diff --git a/docs/research/messenger-connectors-uncertainty-pass-29.md b/docs/research/messenger-connectors-uncertainty-pass-29.md new file mode 100644 index 00000000..0bd8717b --- /dev/null +++ b/docs/research/messenger-connectors-uncertainty-pass-29.md @@ -0,0 +1,360 @@ +# Messenger Connectors - Uncertainty Pass 29 + +Date: 2026-04-29 +Scope: official shared Telegram bot ingress, webhook ACK semantics, offline desktop behavior, and no durable backend plaintext queue + +## Executive Delta + +The next weakest boundary is official bot ingress: + +```text +Telegram webhook update +official backend +desktop live connection +durable local turn +Telegram webhook ACK +``` + +The product decision was: + +```text +Default official bot, no durable backend plaintext queue. +If desktop is offline, be honest and answer offline. +Encrypted queue can be added later as advanced reliability mode. +``` + +This creates a precise reliability contract: + +```text +Backend must ACK Telegram only after either: +1. desktop durably accepted the plaintext turn locally, or +2. backend recorded a redaction-safe offline/blocked decision and attempted or skipped the offline notice by policy, or +3. this is a duplicate already completed update. +``` + +Do not use Telegram webhook retry as the queue. It is operationally noisy, finite, and hard to reason about. + +## Source Facts Rechecked + +Telegram official facts checked on 2026-04-29: + +- `setWebhook` sends HTTPS POST updates to our URL. +- Telegram repeats webhook delivery after a non-2xx response and eventually gives up after a reasonable number of attempts. +- `secret_token` can be configured so Telegram includes `X-Telegram-Bot-Api-Secret-Token`. +- `max_connections` can be 1-100 and defaults to 40. +- `drop_pending_updates` can drop pending updates. +- `getWebhookInfo` exposes `pending_update_count` and last error fields. +- `getUpdates` confirms updates by calling with offset greater than the previous `update_id`. +- Telegram stores incoming updates until received, but not longer than 24 hours. +- `getUpdates` cannot be used while webhook is set. +- Bot API calls made directly in the webhook response do not return a result to us. + +Sources: + +- https://core.telegram.org/bots/api#setwebhook +- https://core.telegram.org/bots/api#getwebhookinfo +- https://core.telegram.org/bots/api#getting-updates +- https://core.telegram.org/bots/api#making-requests-when-getting-updates +- https://core.telegram.org/bots/faq + +Local code facts: + +- Existing `HttpServer` binds to localhost by default and serves local app HTTP routes through Fastify. +- Existing browser mode uses SSE from local server to renderer. +- There is no current cloud/backend persistent relay layer for messenger traffic. +- The app already has a usable local event idea, but messenger official mode needs a new outbound desktop-to-cloud control connection, not the existing local HTTP server. + +## 1. The ACK Problem + +If the backend returns non-2xx to Telegram because desktop is offline, Telegram retries. That sounds like a queue, but it is a bad queue: + +- plaintext stays in Telegram's pending delivery mechanism, not under our product semantics; +- retries can repeat the same update while the user keeps typing; +- `pending_update_count` can grow and hide real bugs; +- retries are finite; +- update retention has an upper bound; +- backend may send duplicate offline notices unless it has its own idempotency state. + +If the backend returns 2xx before desktop has durably accepted the update, the lead message can be lost forever in official mode because we intentionally do not keep plaintext. + +Therefore ACK timing is the core ingress invariant. + +## 2. Recommended Backend Ingress State Machine + +Backend should persist only redaction-safe metadata before side effects: + +```ts +type OfficialIngressReceipt = { + receiptId: string; + provider: 'telegram'; + botScope: 'official'; + updateId: number; + providerMessageKey: string; + routeId: string | null; + routeGeneration: number | null; + textHash: string | null; + fromUserHash: string | null; + chatIdHash: string; + messageThreadId: number | null; + status: + | 'received' + | 'route_missing' + | 'desktop_claim_started' + | 'desktop_accepted' + | 'desktop_acceptance_unknown' + | 'offline_notice_started' + | 'offline_notice_sent' + | 'offline_notice_ambiguous' + | 'acknowledged' + | 'failed_terminal'; + createdAt: string; + updatedAt: string; +}; +``` + +State rules: + +```text +received -> desktop_claim_started -> desktop_accepted -> acknowledged +received -> offline_notice_started -> offline_notice_sent -> acknowledged +received -> offline_notice_started -> offline_notice_ambiguous -> acknowledged +received -> route_missing -> acknowledged +duplicate acknowledged -> acknowledged +``` + +Important: `offline_notice_ambiguous` still ACKs the webhook. It is better to possibly miss the offline notice than to auto-send duplicate notices or keep Telegram retrying. + +## 3. Desktop Claim Protocol + +Official mode needs a desktop-initiated persistent connection: + +```text +desktop -> backend: connect(route subscriptions, install id, session key, capabilities) +backend -> desktop: inbound plaintext turn +desktop -> backend: accepted_local(internalTurnId, providerMessageKey, localMessageId) +backend -> Telegram: 2xx webhook ACK +``` + +Rules: + +- Backend forwards plaintext only to an already-authenticated active desktop connection. +- Desktop must persist the turn locally before returning `accepted_local`. +- Desktop dedupes by `providerMessageKey` and returns the existing local acceptance if the backend retries delivery. +- Backend does not store plaintext after the request handler scope. +- Backend stores only hashes and receipt state. +- If no desktop session can accept within a short timeout, backend goes to offline policy. + +Suggested timeout: + +```text +2-4 seconds for desktop accepted_local +then offline response or offline status +``` + +This keeps webhook handlers bounded and avoids Telegram retry storms. + +## 4. Crash Matrix + +Critical cases: + +- Backend crashes before persisting receipt. + - Telegram retries; safe, because no side effect happened. +- Backend persists receipt, crashes before desktop forward. + - Telegram retries; backend can process again from `received`. +- Backend forwards plaintext to desktop, crashes before desktop ACK. + - Telegram retries; desktop dedupe by `providerMessageKey` prevents duplicate local turn. +- Desktop persists local turn, ACK to backend is lost. + - Telegram retries; backend redelivers, desktop returns existing `accepted_local`. +- Backend records `desktop_accepted`, crashes before HTTP 2xx. + - Telegram retries; backend sees completed receipt and returns 2xx without redelivering. +- Desktop offline. + - Backend records offline decision and ACKs Telegram after offline policy. +- Offline notice `sendMessage` succeeds but backend crashes before marking success. + - On retry, backend must not blindly resend. Mark `offline_notice_ambiguous`, ACK, show support diagnostics. + +## 5. Offline Policy + +For default official MVP: + +```text +No desktop live acceptance = no local delivery. +``` + +Then choose one of two offline UX policies: + +1. Send a short Telegram offline notice. +2. ACK silently and rely on topic status / setup UI. + +I recommend a short offline notice, but only through the same provider outbox ambiguity policy from pass 42. + +Example behavior: + +```text +Agent Teams desktop is offline. Open the app on the connected computer and send the message again. +``` + +Do not store the lead's plaintext for later replay. + +## 6. Why Not Use Telegram As The Queue + +Top risks: + +- Telegram will retry on non-2xx, but the retry schedule and final give-up behavior are provider-controlled. +- `pending_update_count` becomes an operational failure queue with plaintext updates we cannot inspect safely. +- Once we finally ACK, Telegram considers the update handled, even if desktop state is not coherent. +- If webhook is reconfigured with `drop_pending_updates`, lead messages can be intentionally discarded. +- If the app is offline for more than Telegram retention, messages are lost anyway. + +This conflicts with the product's "honest offline" behavior. + +## 7. Official Mode Privacy Story + +The honest statement: + +```text +Official shared bot backend sees message plaintext transiently while handling Telegram delivery. +It does not durably store plaintext in MVP. +It stores redaction-safe delivery metadata and hashes for dedupe, abuse prevention, and diagnostics. +``` + +Not honest: + +```text +Our backend never sees messages. +``` + +That statement is only true for private own-bot local polling mode, not official shared bot mode. + +## 8. Own Bot Contrast + +Own bot mode is much simpler for ingress: + +```text +desktop getUpdates +desktop durable local turn +desktop confirms offset +``` + +Because Telegram `getUpdates` confirms by offset, desktop can persist locally before advancing offset. That is a better privacy and reliability story for users who want it. + +But own bot mode is less convenient because the user must create/configure a bot. + +## 9. Desktop To Backend Transport Options + +1. Persistent WebSocket from desktop to backend - 🎯 8 🛡️ 8 🧠 7 - approx `1600-3600` changed LOC. + Best default for official mode. Full duplex, explicit ACK messages, connection leases, heartbeats, route subscriptions. + +2. Server-Sent Events from backend to desktop plus HTTPS POST ACKs - 🎯 7 🛡️ 7 🧠 6 - approx `1300-3000` changed LOC. + Simpler in some networks, but ACK correlation and reconnect handling are more awkward. + +3. Desktop polling backend every N seconds - 🎯 5 🛡️ 5 🧠 4 - approx `700-1800` changed LOC. + Poor fit for no plaintext queue because backend would need to hold plaintext or lead messages would be missed between polls. + +Recommendation: + +```text +Use WebSocket-like persistent desktop claim channel for official mode. +Do not add a package decision until package versions can be verified in an unrestricted network environment. +``` + +## 10. Multi-Desktop And Lease Policy + +If the same user connects the same official route from multiple desktops: + +```text +Only one active receiver lease may own inbound plaintext delivery. +``` + +Options: + +1. Single primary device per route - 🎯 8 🛡️ 8 🧠 5 - approx `600-1400` changed LOC. + Recommended for MVP. Simple and prevents split-brain delivery. + +2. Fan out to all active desktops and accept first durable ACK - 🎯 6 🛡️ 6 🧠 7 - approx `1200-2600` changed LOC. + Can duplicate local inboxes and confuse reply ownership. + +3. Per-team device assignment - 🎯 7 🛡️ 8 🧠 7 - approx `1400-3200` changed LOC. + Useful later for power users, too much for MVP. + +## 11. Security Requirements + +Minimum official ingress controls: + +- Verify `X-Telegram-Bot-Api-Secret-Token`. +- Use a secret webhook path as defense in depth. +- Reject updates that do not match expected bot id/account binding. +- Use `allowed_updates` to narrow update surface. +- Persist update id/provider message key dedupe. +- Rate-limit offline notices by chat/topic. +- HMAC/hash user ids and chat ids in backend logs. +- Do not log plaintext update payloads. +- Encrypt desktop-backend transport. +- Rotate desktop session tokens. + +## 12. Test Matrix + +Tests should simulate: + +- valid webhook with active desktop accepted; +- duplicate webhook update after accepted; +- backend crash before receipt write; +- backend crash after receipt write; +- backend crash after desktop forward; +- desktop accepted locally but ACK lost; +- backend accepted desktop ACK but HTTP 2xx lost; +- desktop offline; +- offline notice success; +- offline notice timeout after request start; +- webhook secret mismatch; +- route missing; +- route disabled; +- topic deleted; +- bot permission lost; +- two desktop sessions racing; +- webhook max_connections concurrent deliveries out of order; +- `drop_pending_updates` during reconnect; +- old update after allowed_updates change; +- backend store unavailable; +- desktop reconnect while webhook is in-flight. + +Pass criterion: + +```text +No plaintext is durably stored by official backend. +No Telegram update is ACKed as handled before either desktop durable acceptance or an explicit offline/blocked decision. +No duplicate local turns for the same providerMessageKey. +No duplicate offline notice unless user/support explicitly chooses duplicate send. +``` + +## 13. Top 3 Overall Options + +1. Synchronous desktop claim + redaction-safe ingress receipt + offline notice outbox - 🎯 8 🛡️ 8 🧠 8 - approx `2500-6000` changed LOC. + Recommended official MVP. It matches "no durable backend plaintext queue" and gives deterministic failure states. + +2. Encrypted backend queue for later desktop replay - 🎯 7 🛡️ 9 🧠 9 - approx `3500-8000` changed LOC. + Better reliability, but bigger system. Backend still sees plaintext transiently from Telegram before encrypting. + +3. Non-2xx webhook until desktop online, using Telegram retries as queue - 🎯 4 🛡️ 4 🧠 4 - approx `800-2000` changed LOC. + Not recommended. It is brittle, provider-controlled, and creates operational backlog. + +## 14. Decision Update + +Official shared bot MVP should implement: + +```text +Telegram webhook +-> backend redaction-safe receipt +-> if desktop active: synchronous durable desktop claim +-> if accepted: ACK Telegram +-> if not accepted: offline notice policy, then ACK Telegram +``` + +Own bot mode remains: + +```text +desktop long polling +-> local durable turn +-> advance update offset +``` + +This keeps the default UX simple while making the privacy/reliability tradeoff explicit instead of accidental. diff --git a/docs/research/messenger-connectors-uncertainty-pass-30.md b/docs/research/messenger-connectors-uncertainty-pass-30.md new file mode 100644 index 00000000..4d6a2d28 --- /dev/null +++ b/docs/research/messenger-connectors-uncertainty-pass-30.md @@ -0,0 +1,645 @@ +# Messenger Connectors - Uncertainty Pass 30 + +Date: 2026-04-29 +Scope: Telegram media and attachments for official shared bot mode, own-bot mode, inbox persistence, and no durable backend plaintext queue + +## Executive Delta + +The lowest-confidence boundary after webhook ACK timing is media: + +```text +Telegram message with photo/document/voice +official backend receives update +backend may need bot token to fetch file bytes +desktop may be offline +local app currently persists attachments only for live lead messages +agent reply may need to reference or send files back +``` + +This is not just a file download problem. It changes the privacy story. + +For official shared bot mode, the backend receives the update and can technically fetch Telegram files with the official bot token. Even if we do not store plaintext or media durably, the backend is in the transient data path. That is acceptable only if the product copy is precise: + +```text +Default official bot: +- easiest setup +- no durable backend plaintext/media queue +- backend may transiently process messages while routing them +- if desktop is offline, we honestly say offline +``` + +Private own-bot mode is the clean privacy mode: + +```text +Own bot: +- token stays in desktop +- desktop polls or receives webhooks directly when online +- backend does not receive lead messages or media +- offline reliability is lower unless user enables a separate relay/queue +``` + +⚠️ Recommendation update: launch official shared bot as text-first. Treat Telegram media as metadata-only/unsupported in the first official MVP. Add private own-bot media support before official shared bot media streaming if privacy is a core selling point. + +## Source Facts Rechecked + +Telegram official facts checked on 2026-04-29: + +- Bot API is token-based. API calls are made to `https://api.telegram.org/bot/METHOD_NAME`. +- Webhook responses can call a Bot API method inline, but Telegram does not return the method result to us in that webhook response. +- Incoming updates are stored by Telegram until received, but not longer than 24 hours. +- `Update.message` can be any kind of message, including text, photo, sticker, and more. +- `getFile` returns a `File` object and prepares a file for download. +- File download URL shape is `https://api.telegram.org/file/bot/`. +- Telegram guarantees that the file download link is valid for at least 1 hour. +- Standard cloud Bot API download limit is 20 MB. +- Local Bot API server can download without a size limit, upload up to 2000 MB, and can return a local `file_path`. +- `sendPhoto` supports `message_thread_id` and `direct_messages_topic_id`; uploaded photos are limited to 10 MB. +- `sendDocument` supports `message_thread_id` and `direct_messages_topic_id`; uploaded files are currently up to 50 MB. +- `sendMediaGroup` sends albums of 2-10 media items. +- `createForumTopic` can create a topic in a forum supergroup or a private chat with a user. +- Bot API 9.6 Managed Bots expose `getManagedBotToken`; the manager bot can fetch the managed bot token. + +Sources: + +- https://core.telegram.org/bots/api#making-requests +- https://core.telegram.org/bots/api#making-requests-when-getting-updates +- https://core.telegram.org/bots/api#getting-updates +- https://core.telegram.org/bots/api#file +- https://core.telegram.org/bots/api#getfile +- https://core.telegram.org/bots/api#using-a-local-bot-api-server +- https://core.telegram.org/bots/api#sendphoto +- https://core.telegram.org/bots/api#senddocument +- https://core.telegram.org/bots/api#sendmediagroup +- https://core.telegram.org/bots/api#createforumtopic +- https://core.telegram.org/bots/api#getmanagedbottoken + +Local code facts: + +- `AttachmentPayload` contains base64 data and metadata. +- `AttachmentMeta` is persisted on message rows and may include a local file path. +- `TeamAttachmentStore` writes files under app data `attachments/{teamName}/{messageId}` and stores `_index.json`. +- `TeamAttachmentStore` sanitizes path segments and stored filenames. +- Main-process IPC currently accepts only these message attachment MIME types: PNG, JPEG, GIF, WebP, PDF, and plain text. +- Main-process IPC currently limits message attachments to 5 files, 10 MB per file, and 20 MB total. +- `handleSendMessage` allows attachments only when sending to the live team lead. +- If stdin delivery fails after attachments were requested, the current code fails instead of silently dropping attachments. +- The inbox path is described as offline lead or regular members with no attachment support. +- OpenCode secondary runtime delivery marks attachment messages as terminal failure because attachments are not supported for secondary runtime. +- Renderer composer blocks attachments for cross-team messages, non-lead recipients, and offline teams. + +Implication: + +```text +Current app has a useful local attachment store, +but messenger media cannot safely reuse offline inbox delivery until we add +a durable provider-neutral media acceptance protocol. +``` + +## 1. Why Media Is Harder Than Text + +Text flow can be bounded: + +```text +backend receives plaintext text +desktop accepts locally +backend ACKs Telegram +backend forgets plaintext +``` + +Media flow needs at least one more side effect: + +```text +backend receives file_id/file_unique_id/caption +backend calls getFile +backend downloads bytes using URL that embeds bot token +desktop writes bytes to local attachment store +desktop commits message row with attachment metadata +backend ACKs Telegram +``` + +Every step can fail independently. + +The dangerous half-states are: + +- backend ACKs Telegram, but desktop never wrote the file; +- desktop wrote the file, but message row did not commit; +- message row committed, but attachment file write failed; +- backend downloaded media but desktop disconnected; +- duplicate webhook retries download the same media multiple times; +- media group arrives as multiple updates and only some items are accepted; +- file is too large for Telegram cloud `getFile`; +- file link expires while desktop is offline; +- file_id is stored durably and becomes a capability to fetch content later with the bot token; +- provider MIME/type says one thing, actual bytes are another. + +This is why media should not be part of the default official MVP unless it has its own state machine. + +## 2. Privacy Reality + +There are three privacy tiers. + +### Tier A: official shared bot, text-only + +```text +Backend transiently sees message text. +Backend stores only redaction-safe receipts and hashes. +No media bytes pass through backend because media is unsupported. +``` + +Privacy story: + +```text +Simple default connection. +No durable backend plaintext queue. +Not end-to-end private from our backend. +``` + +### Tier B: official shared bot, ephemeral media streaming + +```text +Backend transiently sees file metadata and file bytes. +Backend does not write bytes to disk. +Desktop must be online and must commit the attachment locally. +``` + +Privacy story: + +```text +Convenient, but backend is a transient processor for media. +No durable backend media store. +``` + +### Tier C: own bot, local token + +```text +Desktop holds token. +Desktop downloads Telegram files directly. +Backend never receives message text or media. +``` + +Privacy story: + +```text +Best privacy. +More setup. +Works only while desktop app or local service is running, unless user adds their own hosting. +``` + +Managed Bots do not eliminate token exposure if our manager bot is the manager. Telegram added `getManagedBotToken`, and the official docs say the token can be fetched by the manager bot. Therefore, Managed Bots are a UX feature, not a clean no-token-access privacy feature for us. + +## 3. Treat file_id As Sensitive + +Telegram `file_id` is not the file bytes, but it is not harmless metadata. + +Reason: + +```text +file_id + bot token -> getFile -> download URL -> file bytes +``` + +Therefore: + +- do not store raw `file_id` in durable official backend receipts unless encrypted; +- do not put raw `file_id` in logs; +- do not expose raw `file_id` to renderer unless the renderer needs it for an explicit action; +- prefer local desktop storage of raw provider file ids only after user acceptance; +- store `file_unique_id` only for dedupe if needed, but remember it cannot download or reuse the file; +- store HMACs for backend idempotency where possible. + +Suggested receipt fields: + +```ts +type ProviderMediaReceipt = { + provider: 'telegram'; + scope: 'official' | 'own_bot'; + updateId: number; + providerMessageKey: string; + providerMediaKeyHash: string; + providerFileUniqueIdHash: string | null; + providerFileIdEncrypted?: string; + mediaKind: 'photo' | 'document' | 'voice' | 'audio' | 'video' | 'animation' | 'sticker' | 'unknown'; + declaredMimeType: string | null; + declaredSizeBytes: number | null; + captionHash: string | null; + status: + | 'received' + | 'unsupported_policy' + | 'desktop_claim_started' + | 'desktop_media_committed' + | 'desktop_text_only_committed' + | 'offline_notice_started' + | 'offline_notice_sent' + | 'offline_notice_ambiguous' + | 'acknowledged' + | 'failed_terminal'; +}; +``` + +For default official mode, omit `providerFileIdEncrypted` entirely. + +## 4. Official MVP Policy + +For official shared bot v1: + +```text +Text, captions, commands: +- support + +Photo/document/voice/audio/video/sticker: +- do not download +- route caption text if present +- include local metadata placeholder only if useful +- tell lead in Telegram that attachments are not supported yet or require desktop online +``` + +The system message should be explicit but not noisy: + +```text +I received an attachment, but this connection currently supports text only. +Please send the key details as text, or connect a private bot for local file handling. +``` + +Rules: + +- If a message has `caption`, deliver caption as the text turn. +- If a message has media and no caption, create a local event only if desktop is online and can persist a metadata-only placeholder. +- If desktop is offline, send one offline/unsupported notice and ACK Telegram. +- Deduplicate unsupported notices by `providerMessageKey`. +- Do not call `getFile` in official MVP. +- Do not store `file_id`. + +This keeps the first version honest and avoids a half-built media pipeline. + +## 5. Own-Bot Media Policy + +Own-bot mode can support media earlier because the desktop has the token. + +```text +desktop receives update via getUpdates or local webhook +desktop calls getFile directly +desktop downloads bytes directly +desktop writes TeamAttachmentStore +desktop writes message row +desktop sends ACK/offset after local commit +``` + +The exact update intake can be: + +- desktop long polling with `getUpdates`; +- local webhook only if user has a reachable tunnel or local Bot API server; +- later, optional user-hosted relay. + +For consumer desktop UX, long polling is simpler and more private: + +```text +No inbound public port. +No server token storage. +Works while app is open. +Telegram can still be used from phone as the client UI. +``` + +Limitations: + +- if desktop is asleep or app closed, no processing; +- Telegram retains updates only up to its limits; +- if the user also runs the same token elsewhere, `getUpdates` offset/webhook conflicts can appear; +- if webhook is set for the bot, `getUpdates` will not work until webhook is deleted. + +## 6. Ephemeral Official Media Streaming + +If we later support official shared bot media without durable backend media queue, use a strict active-desktop stream: + +```text +Telegram webhook update +backend receipt persisted with hashes only +backend checks desktop session capability +backend calls getFile +backend streams file bytes to desktop over existing desktop connection +desktop writes temp file +desktop validates size/hash/MIME +desktop atomically moves into TeamAttachmentStore +desktop writes message row +desktop returns accepted_local_media +backend ACKs Telegram +``` + +Backend rules: + +- stream only to an already authenticated desktop route session; +- do not write bytes to disk; +- limit file size before download using Telegram metadata when present; +- enforce hard byte counters during stream; +- abort stream if desktop disconnects; +- never log filename, file_id, or file_path; +- do not retry file downloads after request scope unless encrypted queue is enabled. + +Desktop rules: + +- write to a temp file outside final attachment path; +- compute SHA-256 while streaming; +- sniff magic bytes for supported types; +- verify final byte count; +- atomically move into local attachment store; +- only then commit message row; +- dedupe by `providerMessageKey + providerMediaPartKey`; +- return the existing local acceptance for duplicate delivery. + +This needs a new storage API. Current `TeamAttachmentStore.saveAttachments` expects base64 payloads. Streaming media should not base64 all bytes through IPC. + +Suggested extension: + +```ts +interface AttachmentContentStore { + saveBase64MessageAttachments(input: SaveBase64AttachmentsInput): Promise; + saveStreamedMessageAttachment(input: SaveStreamedAttachmentInput): Promise; + getMessageAttachmentFiles(input: GetAttachmentFilesInput): Promise; +} +``` + +The Telegram adapter should depend on this port, not on `TeamAttachmentStore` directly. + +## 7. Inbox Integration Model + +Current inbox rows can carry `AttachmentMeta[]`, but the inbox path does not guarantee bytes exist. Messenger media needs a stronger invariant: + +```text +An inbox/message row may reference an attachment only after local bytes are committed, +unless the attachment is explicitly marked as metadata-only/unsupported. +``` + +Add a provider-neutral attachment state: + +```ts +type MessengerAttachmentState = + | 'available_local' + | 'metadata_only' + | 'unsupported_policy' + | 'too_large' + | 'download_failed' + | 'expired' + | 'blocked_security'; + +type MessengerAttachmentMeta = AttachmentMeta & { + state: MessengerAttachmentState; + provider: 'telegram' | 'whatsapp' | 'discord'; + providerKind: string; + providerMessageKey: string; + providerMediaPartKey: string; + caption?: string; + checksumSha256?: string; + localCommittedAt?: string; +}; +``` + +Do not overload `AttachmentMeta.filePath` absence as "unsupported". It already means metadata-only in comments, but messenger needs typed status for UI, retries, and support. + +## 8. Media Group Edge Cases + +Telegram albums arrive as multiple messages with a shared grouping concept. Do not assume one update equals one logical user turn. + +Policy: + +- collect album parts in a short local aggregation window, for example 800-1500 ms; +- if some parts are unsupported, deliver one consolidated turn with mixed attachment states; +- dedupe every part independently; +- do not block a text caption forever waiting for missing album parts; +- if the same album has multiple captions, preserve each caption near its part in the local model; +- if aggregation times out, commit what is available and mark late duplicates as follow-up parts. + +For MVP text-only official mode: + +- do not download album files; +- aggregate captions and unsupported media counts; +- send at most one notice per album. + +## 9. Outbound Media From Agent To Telegram + +Outbound media is easier only if the file already exists locally and the desktop is online. + +Flow: + +```text +agent/tool creates reply with attachment reference +desktop validates local file and policy +desktop sends request to provider adapter +official adapter uploads via backend +own-bot adapter uploads directly +provider returns message ids +local delivery ledger marks sent +``` + +Official shared bot outbound media privacy: + +```text +If backend uploads the file to Telegram, backend transiently sees file bytes. +``` + +That is acceptable only under the same "transient processor, no durable media queue" contract. + +MVP: + +- outbound official: text only; +- outbound own-bot: optionally support local photos/documents after inbound media is solid; +- never let an agent silently send local files to Telegram without explicit policy gates. + +## 10. Security Rules + +Minimum rules before any media bytes are supported: + +- allowlist MIME families by provider mode; +- enforce byte limits before and during download; +- store original filename only after sanitization; +- keep provider filename as untrusted display text; +- never use provider filename as path; +- sniff magic bytes for images/PDF/text where possible; +- reject archives in MVP; +- reject executable types; +- do not auto-open downloaded files; +- do not feed binary content to the model unless the app explicitly supports that type; +- captions and filenames are untrusted user input, not system instructions; +- strip or ignore path-like names; +- rate-limit media downloads per route; +- record redaction-safe diagnostics for failed media; +- design future malware scanning as an optional port, not hardcoded vendor logic. + +For text extraction: + +- plain text files can be included only after encoding validation and size cap; +- PDFs should be attached as model document blocks only when provider/runtime supports them; +- voice transcription should be a separate explicit feature, preferably local-first if privacy matters. + +## 11. Failure Matrix + +Critical cases: + +- Update has media but no text. + - Official MVP: unsupported notice, metadata-only local event if desktop online. +- Update has media plus caption. + - Official MVP: deliver caption and mention unsupported attachment count. +- Duplicate webhook after unsupported notice. + - Return completed receipt, do not resend notice. +- Duplicate webhook after desktop local commit. + - Desktop returns existing local message id. +- Backend calls `getFile`, desktop disconnects before any bytes. + - Abort stream, offline/unsupported policy, ACK according to receipt state. +- Backend streams bytes, desktop crashes before commit. + - Duplicate webhook can retry only if backend has not ACKed yet. +- Desktop commits file, ACK to backend is lost. + - Duplicate webhook redelivers, desktop dedupes and returns existing acceptance. +- File exceeds Telegram cloud download limit. + - Mark too_large; suggest user resend as text or use own bot/local server mode later. +- File download URL expires. + - In official no-queue mode, do not attempt later replay. Mark expired if it happens in active stream. +- Provider MIME lies. + - Sniff bytes, reject if mismatch is dangerous. +- Filename is `../../x` or has control chars. + - Sanitize and preserve original only as escaped display text if needed. +- Media group partially arrives. + - Commit consolidated partial turn with per-part states. +- Backend crashes after downloading media but before desktop commit. + - No backend disk means media is lost; webhook retry may redownload if not ACKed. +- Backend crashes after desktop commit but before HTTP 2xx. + - Telegram retries; backend uses receipt and desktop dedupe to ACK. + +## 12. Top 3 Options + +### Option 1 - Official text-only MVP, media metadata/notice, own-bot media later + +🎯 9 🛡️ 9 🧠 4 + +Approx changed LOC: 700-1800. + +What it means: + +- official shared bot supports text and captions; +- official shared bot does not call `getFile`; +- official shared bot does not store `file_id`; +- media-only messages get one clear unsupported notice; +- local UI can show "attachment received, not imported" metadata only when desktop is online; +- own-bot adapter is the first place where real media support can land. + +Why this is best now: + +- aligns with no durable backend plaintext/media queue; +- avoids token/file privacy ambiguity; +- matches current app constraints where attachments require live lead; +- minimizes risk of partial file delivery bugs; +- gives users a clean upgrade path: "connect private bot for local files". + +Risk: + +- less magical than users expect from Telegram; +- leads may send screenshots and expect them to work; +- product copy must be clear. + +### Option 2 - Official ephemeral media streaming to active desktop + +🎯 7 🛡️ 8 🧠 8 + +Approx changed LOC: 2500-6000. + +What it means: + +- backend downloads media only while desktop is connected; +- backend streams bytes to desktop and does not store them; +- desktop commits attachment bytes before message row; +- ACK waits for local acceptance or clean unsupported/offline decision. + +Why it is viable: + +- preserves convenience of official shared bot; +- no durable backend media store; +- can support common screenshots/documents. + +Risk: + +- backend still transiently sees file bytes; +- many failure states; +- requires new streaming attachment port; +- current base64 attachment path is not the right transport; +- harder to test than text. + +### Option 3 - Backend encrypted media queue + +🎯 6 🛡️ 8 🧠 9 + +Approx changed LOC: 3500-9000. + +What it means: + +- backend stores encrypted media or encrypted Telegram file capabilities for later desktop replay; +- desktop decrypts and commits when it comes online; +- official bot can feel reliable even while desktop is offline. + +Why it is not first: + +- this changes the product from "offline means offline" to "we queue sensitive content"; +- encrypted media queue is still a data retention system; +- key management, replay, retention, deletion, and support diagnostics become much harder; +- it competes with a simpler premium/advanced reliability mode later. + +Use only after: + +- text routing is stable; +- ephemeral streaming is proven; +- user demand for offline media is strong enough. + +## 13. Decision Update + +Recommended sequence: + +```text +1. Official shared bot: + text + captions + topics + reply routing + no durable backend plaintext queue. + +2. Own-bot adapter: + local token + local polling + text first, then local media download. + +3. Official shared bot media: + ephemeral active-desktop streaming only, no backend disk. + +4. Advanced reliability: + encrypted backend queue for text/media only if explicitly enabled. +``` + +This keeps the architecture provider-neutral and honest: + +```text +core messenger domain: + route identity + thread/topic mapping + local message ledger + attachment state machine + delivery ledger + +provider adapters: + Telegram official adapter + Telegram own-bot adapter + future WhatsApp adapter + future Discord adapter + +storage ports: + route registry + inbound receipt store + local turn ledger + attachment content store + outbound delivery ledger +``` + +The main design rule: + +```text +No message row may claim an attachment is available unless the desktop has committed bytes locally. +``` + +## 14. Places Still Worth Deeper Research + +Next low-confidence areas: + +- exact Telegram private-chat topics UX across clients when many teams exist; +- whether `message_thread_id` behavior is consistent for private bot topics on desktop/mobile Telegram clients; +- how to represent teammate messages inside one team topic without confusing the user; +- whether captions/media groups should become one Agent Teams turn or multiple turns; +- how to prevent model/tool prompt injection through Telegram captions and filenames; +- which own-bot intake mode is best for desktop: long polling, local Bot API server, or optional tunnel. + diff --git a/docs/research/messenger-connectors-uncertainty-pass-31.md b/docs/research/messenger-connectors-uncertainty-pass-31.md new file mode 100644 index 00000000..90edf1af --- /dev/null +++ b/docs/research/messenger-connectors-uncertainty-pass-31.md @@ -0,0 +1,736 @@ +# Messenger Connectors - Uncertainty Pass 31 + +Date: 2026-04-29 +Scope: Telegram private-chat topics, one-topic-per-team topology, reply-to teammate routing, topic registry recoverability, and local inbox alignment + +## Executive Delta + +The next lowest-confidence area is not whether Telegram supports topics. It does. + +The weak point is whether topics can be used as a stable product navigation layer without losing routing correctness: + +```text +Telegram private topic id +-> app team route +-> lead or teammate recipient +-> durable local message +-> agent reply +-> Telegram message in the same topic +-> user replies to a concrete teammate message +``` + +The correct approach is: + +```text +One Telegram topic per team. +Route the topic to the team. +Route the recipient inside the team by reply-to message ledger, explicit command, or UI buttons. +Default no-reply messages to the lead. +Never create one bot or one topic per teammate as the default. +``` + +⚠️ Main new finding: Telegram Bot API exposes create/edit/delete topic operations, but I do not see a Bot API method for listing all private-chat topics and recovering their ids. That means our app must treat topic ids as durable provider state and store them locally/backend-side from creation time. If the registry is lost, topic recovery is weak and may require creating replacement topics. + +## Source Facts Rechecked + +Telegram official facts checked on 2026-04-29: + +- Bot API 9.3, dated December 31, 2025, added private-chat topic mode support. +- Bot API 9.3 added `User.has_topics_enabled`, `Message.message_thread_id`, and `Message.is_topic_message` support for private chats with topic mode enabled. +- Bot API 9.3 added `message_thread_id` support in private chats for `sendMessage`, media methods, `sendMediaGroup`, `copyMessage`, `forwardMessage`, `sendChatAction`, and topic-management methods. +- Bot API 9.4, dated February 9, 2026, allowed bots to create topics in private chats with `createForumTopic`. +- Bot API 9.4 added a BotFather setting that can prevent users from creating and deleting topics in private chats. +- `User.has_topics_enabled` is returned by `getMe` and means the bot has forum topic mode enabled in private chats. +- `User.allows_users_to_create_topics` is returned by `getMe` and indicates whether users may create/delete topics in private chats. +- `createForumTopic` can create a topic in a forum supergroup chat or a private chat with a user. +- `editForumTopic`, `deleteForumTopic`, and `unpinAllForumTopicMessages` support private chats with a user. +- `sendMessage.message_thread_id` routes a message to a forum/private-chat topic. +- Incoming `Message` includes optional `message_thread_id`, `is_topic_message`, `reply_to_message`, `media_group_id`, and text/media fields. +- `ReplyParameters` lets a bot reply to a specific message id in the current chat or a specified chat. +- `direct_messages_topic_id` is for channel direct messages chats and should not be confused with forum/private-chat `message_thread_id`. +- Telegram forum topics are conceptually message threads. Nested message threads inside topics are not supported. +- Telegram clients can have a "View as messages" setting for forums that shows messages from all topics in one stream. Treat this as a warning that visible topic grouping is a UX layer, not a routing authority. + +Sources: + +- https://core.telegram.org/bots/api-changelog +- https://core.telegram.org/bots/api#getme +- https://core.telegram.org/bots/api#user +- https://core.telegram.org/bots/api#message +- https://core.telegram.org/bots/api#sendmessage +- https://core.telegram.org/bots/api#replyparameters +- https://core.telegram.org/bots/api#createforumtopic +- https://core.telegram.org/bots/api#editforumtopic +- https://core.telegram.org/bots/api#deleteforumtopic +- https://core.telegram.org/api/forum + +Local code facts: + +- `InboxMessage` already has `from`, `to`, `messageId`, `relayOfMessageId`, `conversationId`, and `replyToConversationId`. +- `TeamDataService.sendMessage` passes `conversationId` and `replyToConversationId` into the message controller. +- `CrossTeamService` already uses `conversationId` and `replyToConversationId` for cross-team threads. +- OpenCode runtime delivery writes direct replies to either `user_sent_messages` or `member_inbox`. +- `MessagesFilterPopover` already derives participants from message `from` and `to`. +- `MessagesPanel` pending reply logic already treats `from=user -> to=member` and `from=member -> to=user` as meaningful route signals. +- Current message model is string-name based, not stable-id based. Prior passes already identified stable route identity as a required feature layer. + +Implication: + +```text +The app can represent the desired conversation shape, +but messenger connectors need a provider-neutral route registry +and provider message link ledger before Telegram topics are safe. +``` + +## 1. Topic Is Team Scope, Not Recipient Scope + +One topic should map to one team: + +```text +chatId + messageThreadId -> teamRouteId +``` + +Recipient should be resolved inside that team: + +```text +incoming message in team topic +-> if it replies to a known bot message from teammate X, route to teammate X +-> else if it contains explicit recipient command/control, route to that recipient +-> else route to lead +``` + +Do not use topic title to route. + +Topic title is display state: + +```text +"Frontend - Acme" +"API - Acme" +"API - Acme (archived)" +``` + +Route identity must be persisted as: + +```ts +type MessengerTeamTopicRoute = { + routeId: string; + provider: 'telegram'; + botScope: 'official' | 'own_bot'; + botId: string; + telegramChatIdHash: string; + telegramChatIdEncrypted?: string; + telegramMessageThreadId: number; + teamId: string; + teamGeneration: number; + projectId: string | null; + projectGeneration: number | null; + displayTitle: string; + status: + | 'active' + | 'create_pending' + | 'create_ambiguous' + | 'renaming' + | 'renamed' + | 'delete_seen' + | 'replaced' + | 'disabled' + | 'error'; + createdAt: string; + updatedAt: string; +}; +``` + +For official shared bot, backend needs this route registry. For own-bot local mode, desktop can own it locally. + +## 2. The Recoverability Problem + +Creation is straightforward: + +```text +user starts bot +desktop/backend knows Telegram chat id +app creates topic for a team +Telegram returns ForumTopic +app stores message_thread_id +``` + +The low-confidence part is recovery: + +```text +What if our route registry is lost? +What if topic creation succeeded but the process crashed before storing topic id? +What if user deletes or renames a topic? +What if app creates a duplicate topic after a timeout? +``` + +I do not see a Bot API method equivalent to "list my private-chat topics". Telegram's MTProto API has forum topic listing for forums, but Bot API docs expose topic creation/edit/delete operations and no simple list method. We should not build a core invariant on being able to reconstruct topic state from Telegram later. + +Therefore: + +```text +Topic registry is authoritative local/backend product state. +Telegram is an external projection. +``` + +Creation must use a two-phase state: + +```text +create_pending -> active +create_pending -> create_ambiguous +create_ambiguous -> replaced +``` + +If creation response is lost: + +- do not keep retrying blindly; +- show diagnostics in app; +- allow "Create replacement topic"; +- optionally send a message in the general/default bot chat asking the user to pick the right topic if we can design a safe verification flow later. + +## 3. User Topic Deletion And BotFather Settings + +Bot API 9.4 added a setting to prevent users from creating and deleting topics in private chats. + +Recommended official bot configuration: + +```text +Private chat topics enabled. +Users cannot create/delete topics. +Bot manages team topics. +``` + +Why: + +- fewer orphan routes; +- fewer topic id invalidation bugs; +- fewer accidental duplicates; +- cleaner support story. + +But the app must still handle deletion or invalid topic errors: + +```text +sendMessage(chatId, message_thread_id) fails +-> mark topic route as error or delete_seen +-> do not fallback silently to general chat +-> create replacement topic only behind an explicit repair flow +``` + +Silent fallback to general chat is dangerous because the user may read a message outside the intended team context. + +## 4. Reply-To Teammate Routing + +The desired product behavior: + +```text +User opens team topic. +Bot posts messages from lead and teammates. +User replies to a concrete message. +App routes the reply to that concrete teammate. +``` + +This is viable if we store provider message links: + +```ts +type ProviderMessageLink = { + provider: 'telegram'; + routeId: string; + providerChatIdHash: string; + providerMessageThreadId: number; + providerMessageId: number; + internalMessageId: string; + internalTeamId: string; + internalFromMemberId: string; + internalToMemberId: string | null; + direction: 'telegram_to_app' | 'app_to_telegram'; + createdAt: string; +}; +``` + +Incoming reply resolution: + +```text +if update.message.reply_to_message.message_id exists: + lookup ProviderMessageLink by chatId + messageThreadId + reply_to_message.message_id + if found and linked internal message came from teammate: + route to that teammate + if found and linked internal message came from lead: + route to lead + if found and linked internal message came from user: + route to lead or use explicit reply target from that internal row +else: + use explicit recipient control or default to lead +``` + +Important edge case: + +```text +Telegram topics cannot have nested message threads. +Reply-to is only a pointer to a message, not a durable sub-thread per teammate. +``` + +Therefore, reply-to should be a routing hint, not the entire conversation model. + +## 5. Explicit Recipient Controls + +Reply-to is natural but insufficient. + +Users will send plain messages into a topic without replying. For those messages, the app needs a deterministic default and optional controls: + +```text +Default: + message without reply -> team lead + +Explicit route: + /to teammate-name message + or inline button "Reply to Alice" + or short command menu +``` + +Do not rely on Telegram mentions for routing: + +- teammate names may not be Telegram users; +- agents are not Telegram accounts; +- inline mention semantics depend on Telegram user privacy and previous contact conditions; +- local app member names can change. + +Suggested official MVP: + +```text +No global "active recipient" state at first. +Use reply-to for specific teammate replies. +Use /to for explicit direct messages. +Default to lead. +``` + +This is less magical but safer than hidden mutable state. + +## 6. Message Text Format In Telegram + +Because client topic grouping can be changed by the user and messages can appear in flattened views, every bot message should carry lightweight context. + +Example: + +```text +[Frontend] Alice +I pushed the fix and need review on the auth callback. +``` + +For lead: + +```text +[Frontend] Lead +I will ask Alice to check the failing test. +``` + +For user-sent routed message acknowledgements: + +```text +[Frontend] to Alice +Forwarded. +``` + +Rules: + +- include team label in the first line; +- include member display name for agent replies; +- keep prefixes short; +- do not include internal ids; +- do not rely only on topic title; +- avoid markdown complexity unless using explicit Telegram entities. + +This makes flattened Telegram views survivable. + +## 7. Topic Lifecycle State Machine + +Suggested route lifecycle: + +```text +not_created +-> create_pending +-> active +-> renaming +-> active +-> disabled + +create_pending +-> create_ambiguous +-> replaced + +active +-> send_failed_topic_missing +-> repair_required +-> replacement_pending +-> active + +active +-> archived +-> disabled +``` + +Do not delete topics automatically when a team is archived. + +Recommended archive behavior: + +- rename topic to include a compact archived marker; +- send one final "team archived" message; +- stop routing new user messages or route them to lead with a clear archived response; +- keep local route state for historical provider links. + +Deletion destroys user-visible history in Telegram and makes provider message links harder to explain. + +## 8. Rename And Duplicate Teams + +Current app still relies heavily on `teamName`, while prior research recommended stable team ids and route generations. + +Telegram topic routing should not follow only team name. + +If team is renamed: + +```text +teamId stays stable +topic route stays stable +displayTitle is updated +editForumTopic is best-effort +message prefix changes after local commit +``` + +If two projects have same team name: + +```text +topic title must include a compact project discriminator +routeId must include project/team stable ids +``` + +Example topic title: + +```text +Frontend - acme-web +Frontend - mobile-app +``` + +Title length is capped, so the full identity must be in the registry, not in Telegram title. + +## 9. Topic Creation Timing + +Three possible creation timings: + +### Lazy create on first outbound/inbound use + +Pros: + +- fewer unused topics; +- less setup friction. + +Cons: + +- first message may be slower; +- creation failure blocks communication at the worst moment; +- ambiguous creation state can happen during a real user message. + +### Eager create during connect wizard + +Pros: + +- setup verifies topic capability early; +- failures are visible before real traffic; +- topic registry is ready. + +Cons: + +- creates topics for teams user may never use; +- can clutter Telegram. + +### Hybrid + +Recommended: + +```text +Create a topic for selected/active teams during connect wizard. +Lazy-create for other teams when user enables them. +``` + +This matches "minimum user actions" without creating too many topics. + +## 10. Route Ambiguity Cases + +Inbound ambiguity cases: + +- message has no `message_thread_id`; +- message has a thread id not in registry; +- message has a known thread id but route is disabled; +- message replies to a provider message id not in ledger; +- reply target maps to a deleted/renamed teammate; +- reply target maps to an old team generation; +- user uses `/to` for an unknown teammate; +- topic title was manually changed; +- duplicate topic exists for the same team; +- user forwards/copies messages between topics; +- media group spans a topic but parts arrive separately; +- update contains `direct_messages_topic` from channel direct messages, not private chat topic; +- bot receives a message outside private chat if added to a group. + +Resolution policy: + +```text +Unknown topic -> do not deliver to agent, send repair/unknown-topic notice. +Known topic + unknown reply target -> route to lead with quoted context. +Known topic + stale teammate -> route to lead and mention stale target in internal metadata. +No topic id -> onboarding/default command handling only. +``` + +Never guess a team by topic title. + +## 11. Local UI Implications + +The current Messages panel can already show participant flows from `from` and `to`. For messenger connectors, add a feature-local projection rather than rewriting the existing panel first: + +```ts +type MessengerConversationProjection = { + routeId: string; + teamId: string; + provider: 'telegram'; + providerTopicTitle: string; + messages: Array<{ + internalMessageId: string; + providerMessageId?: number; + fromMemberId: string; + toMemberId?: string; + replyToInternalMessageId?: string; + direction: 'inbound' | 'outbound'; + deliveryState: 'pending' | 'sent' | 'failed' | 'ambiguous'; + }>; +}; +``` + +The renderer can keep using participant filters, but messenger-specific state should live in `src/features/messenger-connectors/renderer`: + +```text +messenger feature hook +-> maps route/thread state into view model +-> existing MessagesPanel can show the durable local messages +-> optional connector status panel shows Telegram topic health +``` + +Do not put Telegram concepts directly into shared `InboxMessage` unless they are provider-neutral. + +Provider-specific fields belong in a feature table/store: + +```text +provider_message_links +provider_route_registry +provider_delivery_ledger +``` + +## 12. Architecture Fit + +This feature clearly qualifies for the canonical feature architecture: + +```text +src/features/messenger-connectors/ + contracts/ + core/ + domain/ + route.ts + topic.ts + recipient-resolution.ts + provider-message-link.ts + application/ + ports.ts + connect-messenger.ts + receive-provider-update.ts + send-provider-reply.ts + repair-topic-route.ts + main/ + composition/ + adapters/ + input/ + ipc/ + telegram-webhook/ + desktop-relay/ + output/ + telegram/ + team-messages/ + local-store/ + infrastructure/ + preload/ + renderer/ +``` + +Core domain invariants: + +```text +1. Provider topic title never determines route identity. +2. Provider thread id maps to exactly one active team route per bot/chat. +3. Recipient resolution is deterministic and auditable. +4. Unknown topic never reaches an agent as a normal user message. +5. Every outbound Telegram message that can be replied to has a ProviderMessageLink. +6. Topic repair never silently changes the user's message destination. +``` + +## 13. Top 3 Options + +### Option 1 - One topic per team, reply-to ledger, default to lead, `/to` escape hatch + +🎯 8 🛡️ 9 🧠 6 + +Approx changed LOC: 2500-5500. + +What it means: + +- each team has one Telegram private topic; +- inbound messages in that topic route to the lead by default; +- replying to a known teammate message routes to that teammate; +- `/to teammate message` provides explicit routing; +- topic id and provider message links are stored durably; +- unknown/stale topics enter repair flow. + +Why this is best: + +- matches the user's selected model; +- avoids topic explosion; +- works with current `from`/`to` message model; +- scales to many teams better than per-teammate topics; +- keeps routing deterministic. + +Risk: + +- users must learn reply-to or `/to` for teammate-specific messages; +- if provider message link ledger is missing, teammate routing falls back to lead; +- requires solid route registry. + +### Option 2 - One topic per team with mutable active recipient controls + +🎯 6 🛡️ 7 🧠 7 + +Approx changed LOC: 3500-7000. + +What it means: + +- each topic has controls such as "Active recipient: Alice"; +- user taps inline buttons or commands to switch active recipient; +- plain messages route to current active recipient until changed. + +Why it is tempting: + +- fewer reply-to requirements; +- feels convenient on mobile; +- user can have a visible selected target. + +Risk: + +- hidden mutable state across desktop and phone is easy to misunderstand; +- two devices/users can change active recipient unexpectedly; +- stale controls can route messages incorrectly; +- callback handling and status messages add complexity. + +This can be added later after Option 1, but I would not make it the first model. + +### Option 3 - One topic per teammate or per internal conversation + +🎯 4 🛡️ 6 🧠 8 + +Approx changed LOC: 4000-9000. + +What it means: + +- team lead has one topic; +- each teammate has a separate topic; +- or each conversation creates a topic. + +Why it looks reliable: + +- recipient is obvious from topic; +- fewer reply-to resolution rules. + +Why it is worse: + +- topic count explodes; +- Telegram UI becomes cluttered; +- team context fragments; +- archiving/renaming/recovering many topics is painful; +- cross-team/project grouping becomes harder; +- user wanted one team context, not dozens of technical threads. + +Use only for a future "power mode" if users explicitly ask for per-agent topics. + +## 14. Decision Update + +Recommended design: + +```text +Default official bot: + one private topic per team + topic id maps to team route + default route to lead + reply-to route to teammate through ProviderMessageLink + `/to` command as explicit escape hatch + no mutable active recipient in MVP +``` + +Required build blocks before implementation: + +```text +1. Stable TeamRoute identity independent of teamName. +2. MessengerTopicRegistry with route generations and repair states. +3. ProviderMessageLink ledger for every Telegram outbound message. +4. RecipientResolver pure domain service. +5. UnknownTopicPolicy that never sends unknown messages to agents. +6. TopicRepair use case. +7. Tests for duplicate, deleted, renamed, stale, and unknown topics. +``` + +The most important invariant: + +```text +Telegram topic/thread id chooses team. +Provider reply-to message id chooses teammate. +Plain topic message chooses lead. +``` + +## 15. Tests To Write First + +Domain tests: + +- known topic + no reply -> lead; +- known topic + reply to lead message -> lead; +- known topic + reply to teammate message -> teammate; +- known topic + reply to user message -> lead; +- known topic + unknown reply message id -> lead with ambiguity metadata; +- unknown topic -> repair/notice, not agent delivery; +- disabled topic -> archived/disabled response, not agent delivery; +- duplicate topic route -> terminal config error; +- renamed team -> same route id, updated display title; +- deleted teammate -> lead fallback with stale target metadata; +- `/to Alice hello` -> Alice; +- `/to unknown hello` -> lead or error notice by policy; +- media group in known topic -> same team route for all parts. + +Adapter tests: + +- `createForumTopic` success persists `message_thread_id`; +- create response lost enters `create_ambiguous`; +- `sendMessage` includes correct `message_thread_id`; +- `sendMessage` failure for topic not found marks repair-required; +- inbound update stores provider message link before local delivery ACK; +- outbound provider message id is stored before considering Telegram delivery complete; +- duplicate webhook with same provider message id returns existing local route. + +Renderer tests: + +- connector status panel shows topic healthy/error/repair-required; +- message row prefix includes team/member context for Telegram projection; +- participant filters still work with messenger-originated messages; +- reply-to unavailable shows lead fallback reason. + +## 16. Remaining Low-Confidence Areas + +Still worth deeper research next: + +- exact Telegram client UX for private-chat topics on mobile and desktop after Bot API 9.3/9.4; +- whether BotFather private topic settings can be configured programmatically or only manually; +- exact error codes returned when a private topic is deleted or disabled; +- whether Telegram private topics expose enough update events to detect user rename/delete promptly; +- how long topic titles can remain readable with many projects and similar team names; +- whether `sendMessageDraft` could improve "agent is typing" UX per team topic without creating noisy messages; +- how to migrate a user from official shared bot topics to own-bot topics without losing local route history. + diff --git a/docs/research/messenger-connectors-uncertainty-pass-32.md b/docs/research/messenger-connectors-uncertainty-pass-32.md new file mode 100644 index 00000000..fa63510e --- /dev/null +++ b/docs/research/messenger-connectors-uncertainty-pass-32.md @@ -0,0 +1,796 @@ +# Messenger Connectors - Uncertainty Pass 32 + +Date: 2026-04-29 +Scope: agent reply capture, outbound Telegram delivery, message visibility policy, duplicate prevention, and provider delivery ambiguity + +## Executive Delta + +The next lowest-confidence boundary is the final leg: + +```text +agent/team message +-> local app feed +-> outbound eligibility decision +-> Telegram sendMessage +-> provider message id +-> future reply-to route +``` + +This is where two severe bugs can happen: + +```text +1. Privacy leak: + internal thoughts, tool summaries, teammate protocol XML, retry prompts, or slash output + get sent to Telegram as if they were user-facing replies. + +2. Duplicate provider send: + Telegram receives a sendMessage request, but our process times out before seeing the result. + Automatic retry can send the same user-visible reply twice. +``` + +The fix is a dedicated outbound projection layer: + +```text +MessengerOutboundProjectionGate + decides if a local message is eligible for external provider delivery + +MessengerProviderDeliveryLedger + records provider send intent, in-flight state, success, ambiguity, and terminal failure + +ProviderMessageLink + records Telegram message id after success so reply-to routing works later +``` + +Do not use the renderer feed or `sentMessages.json` as the outbound provider queue. They are useful inputs, but not the delivery protocol. + +## Source Facts Rechecked + +Telegram official facts checked on 2026-04-29: + +- Bot API methods return a JSON object with `ok`; successful calls put the method result in `result`. +- `sendMessage` sends text and returns the sent `Message` on success. +- `sendMessage` supports `message_thread_id` for forum/private-chat topics. +- `sendMessage` supports `reply_parameters` for replying to a specific message. +- When using webhook inline responses to call Bot API methods, Telegram says it is not possible to know whether the method succeeded or to get its result. +- `ResponseParameters.retry_after` tells how many seconds to wait after flood control. +- Telegram FAQ recommends avoiding more than one message per second in a single chat; otherwise 429 errors can happen. +- Telegram FAQ says bots should not rely on webhook inline response if they need to know the result of the method. +- Bot API docs and FAQ do not expose a client-supplied idempotency key for `sendMessage`. + +Sources: + +- https://core.telegram.org/bots/api#making-requests +- https://core.telegram.org/bots/api#making-requests-when-getting-updates +- https://core.telegram.org/bots/api#sendmessage +- https://core.telegram.org/bots/api#replyparameters +- https://core.telegram.org/bots/api#responseparameters +- https://core.telegram.org/bots/faq + +Local code facts: + +- `TeamSentMessagesStore` persists `sentMessages.json`, but it caps history at 200 messages and is optimized as a local UI/persistence store, not a provider delivery ledger. +- `TeamSentMessagesStore` preserves message fields such as `from`, `to`, `source`, `leadSessionId`, `conversationId`, and `replyToConversationId`. +- `TeamDataService.extractLeadSessionTextsFromJsonl` creates lead-session text rows with `source: 'lead_session'` and usually no `to`. +- `leadSessionMessageExtractor` creates slash command result rows with `source: 'lead_session'` and `messageKind: 'slash_command_result'`. +- `TeamProvisioningService` captures native `SendMessage` tool calls. `recipient === 'user'` is persisted to `sentMessages.json`; other recipients are persisted to inbox. +- `relayLeadInboxMessages` captures plain lead output for inbox relay, strips agent-only blocks, then persists a `lead_process` message to user. +- `stripAgentBlocks` removes `info_for_agent`, legacy agent blocks, and OpenCode runtime delivery blocks. +- `inboxNoise` detects internal JSON noise and teammate-message XML protocol artifacts. +- `RuntimeDeliveryService` already has strong local idempotency ideas: journal begin, payload hash conflict detection, destination verification, committed state, failed retryable state, and reconciler. +- Existing runtime delivery works for local destinations because it can verify local files/stores. Telegram provider sends are different because success may be unknowable after network timeout. + +Implication: + +```text +The current app has good ingredients, +but messenger outbound needs a separate provider delivery ledger +with stricter "external visibility" rules than the UI feed. +``` + +## 1. Outbound Eligibility Is A Security Boundary + +The local feed contains multiple categories: + +```text +user_sent +lead_process +lead_session +runtime_delivery +inbox +system_notification +cross_team +cross_team_sent +slash_command_result +tool summaries +command output +internal protocol blocks +noise JSON +``` + +Only a small subset should be allowed to leave the app through Telegram. + +Minimal provider-send eligibility: + +```text +message.to == "user" +message.from is a known active team member or lead +message.source is user-visible by policy +message.text remains non-empty after sanitization +message is linked to a provider route or an explicit publish action +message has not already been sent to that provider route +route is active +topic is active +outbound policy allows this member/source/kind +``` + +Hard excludes: + +```text +message.from == "user" +message.from == "system" +message.to != "user" +messageKind == "slash_command" unless explicitly mirrored as a user command echo +messageKind == "slash_command_result" unless explicitly requested +isInboxNoiseMessage(text) +isThoughtProtocolNoise(text) +stripAgentBlocks(text) is empty +only teammate-message XML blocks +tool-only rows with no human answer +debug diagnostics +runtime retry prompt text +permission_request JSON +``` + +The important rule: + +```text +If a message is visible in the local app, that does not automatically mean it is safe to send to Telegram. +``` + +## 2. What Counts As A User-Facing Agent Reply + +For the Telegram topic product, user-facing means: + +```text +Lead or teammate intentionally answered the external user. +``` + +Good candidates: + +- `SendMessage(to="user")` captured from lead or teammate runtime. +- Runtime delivery envelope whose destination is `user_sent_messages`. +- A visible reply proof with `relayOfMessageId` linked to a messenger inbound turn. +- A manual user action in our UI like "send this to Telegram". + +Risky candidates: + +- Lead session thoughts without `to`. +- Plain assistant text captured from stdout during a relay batch. +- Slash command output. +- Task/comment notifications. +- Cross-team internal coordination. +- Teammate-to-teammate messages. + +Recommended MVP: + +```text +Auto-send to Telegram only messages that have an explicit destination to external user. +Do not auto-send generic lead thoughts. +``` + +This means: + +```text +lead_process with to=user -> eligible if linked to route +runtime delivery to user -> eligible if linked to route +lead_session without to -> not eligible +slash_command_result -> not eligible by default +cross_team_sent -> not eligible unless to=user and explicit external link exists +``` + +## 3. User Wants Teammate Messages Too + +The user's desired behavior: + +```text +Messages from other teammates to the user should appear in Telegram too, +signed by each teammate. +``` + +This is real and understandable. The safe model: + +```text +If any team member sends a message to "user" in a route-linked conversation, +send it into the team topic with a member prefix. +``` + +Example Telegram rendering: + +```text +[Frontend] Alice +I found the failing test. The auth callback returns before token refresh completes. +``` + +```text +[Frontend] Lead +Alice is checking the failing test. I will update you when she has a patch. +``` + +Do not send teammate-internal chatter: + +```text +Alice -> Lead: "Can you clarify the expected API?" +Lead -> Bob: "Please review Alice's patch" +Bob -> Alice: "Approved" +``` + +unless the destination is explicitly `user`. + +Therefore the outbound projection should key off destination, not role: + +```text +to=user + route link + eligible source -> send to Telegram +to=lead/teammate/cross-team -> do not send +``` + +## 4. Route Link Requirement + +Do not send every `to=user` message to Telegram. The user may have multiple channels: + +```text +local UI only +Telegram official bot +Telegram own bot +future WhatsApp +future Discord +``` + +Outbound needs an explicit route link: + +```ts +type MessengerOutboundContext = + | { + kind: 'reply_to_provider_turn'; + routeId: string; + inboundProviderMessageKey: string; + internalInboundMessageId: string; + } + | { + kind: 'manual_publish'; + routeId: string; + requestedBy: 'user'; + localMessageId: string; + }; +``` + +For auto-send MVP, require `reply_to_provider_turn`. + +Manual publish can come later. Without route link, local app replies remain local app replies. + +## 5. Provider Delivery Is Not Local Delivery + +Existing `RuntimeDeliveryService` can retry local destinations because it can verify them: + +```text +write deterministic local message id +verify file/store contains destination message id +mark committed +``` + +Telegram is different: + +```text +POST sendMessage +network timeout before response +unknown whether Telegram created the message +no Bot API client idempotency key +cannot verify by deterministic local id +``` + +Therefore provider delivery states need an ambiguity state: + +```ts +type MessengerProviderDeliveryStatus = + | 'pending' + | 'send_in_flight' + | 'sent' + | 'send_ambiguous' + | 'rate_limited' + | 'failed_retryable_before_send' + | 'failed_terminal' + | 'cancelled'; +``` + +Critical rule: + +```text +Never automatically retry send_in_flight after a transport timeout +unless the provider adapter can prove the previous attempt did not reach Telegram. +``` + +Most HTTP timeout cases cannot prove that. + +## 6. Provider Delivery Ledger + +Suggested ledger: + +```ts +type MessengerProviderDeliveryRecord = { + idempotencyKey: string; + provider: 'telegram'; + botScope: 'official' | 'own_bot'; + routeId: string; + providerChatIdHash: string; + providerMessageThreadId: number | null; + internalMessageId: string; + internalPayloadHash: string; + visibilityDecisionId: string; + status: MessengerProviderDeliveryStatus; + providerMessageId: number | null; + replyToProviderMessageId: number | null; + attempts: number; + nextAttemptAt: string | null; + ambiguousSince: string | null; + lastErrorCode: string | null; + lastErrorMessageRedacted: string | null; + createdAt: string; + updatedAt: string; + sentAt: string | null; +}; +``` + +Idempotency key should be deterministic: + +```text +sha256(provider + routeId + internalMessageId + normalizedTextHash + deliveryKind) +``` + +Payload hash prevents accidental reuse: + +```text +same idempotencyKey + different payloadHash -> conflict, terminal +``` + +When `sent`: + +```text +create ProviderMessageLink: + providerMessageId -> internalMessageId +``` + +When `send_ambiguous`: + +```text +do not create ProviderMessageLink +show warning in connector status +allow manual "send again anyway" or "mark as sent" if future support flow exists +``` + +## 7. Send State Machine + +Safe provider send state machine: + +```text +pending +-> send_in_flight +-> sent + +pending +-> failed_retryable_before_send +-> pending + +send_in_flight +-> rate_limited +-> pending at retry_after + +send_in_flight +-> send_ambiguous + +send_in_flight +-> failed_terminal +``` + +Retryable before-send examples: + +- route temporarily locked; +- local rate limiter says wait; +- backend/desktop connection unavailable before calling Telegram; +- provider adapter rejected validation before network send. + +Ambiguous examples: + +- request body was handed to HTTP client and connection timed out; +- process crashed after starting `sendMessage`; +- backend sent inline webhook response with method payload and needs the provider result; +- connection reset after partial response; +- app received malformed response after Telegram may have accepted request. + +Terminal examples: + +- blocked by user; +- chat not found; +- topic missing and repair is required; +- message text empty after sanitization; +- payload too long and split policy disabled; +- route disabled; +- payload hash conflict. + +## 8. Do Not Use Inline Webhook Response For Outbound Replies + +Telegram allows calling a Bot API method by returning it in the webhook response. This is tempting for fast replies. + +Do not use it for messenger outbound replies. + +Reason: + +```text +Telegram says we cannot know if the inline method succeeded or get its result. +Without the returned Message, we cannot store providerMessageId. +Without providerMessageId, reply-to teammate routing becomes weaker. +``` + +Use normal Bot API calls for outbound messages: + +```text +POST /bot/sendMessage +await result +persist provider message id +then mark sent +``` + +Inline webhook response is acceptable only for non-critical throwaway notices where no future reply routing is needed. + +## 9. Rate Limiting + +Telegram FAQ warns to avoid more than one message per second in a single chat. + +For one-topic-per-team inside one private chat, the chat-level limiter matters more than topic-level limiter: + +```text +same Telegram private chat +many team topics +many team replies +one chat-level provider limit +``` + +Add provider route limiter: + +```text +global bot limiter +per chat limiter +per route/topic limiter +``` + +MVP values: + +```text +per chat: 1 message per second steady +per route/topic: 1 message per second steady +burst: small queue, for example 3 messages +queue overflow: collapse or mark delayed +``` + +Avoid splitting a single long answer into many Telegram messages unless necessary. If splitting is needed because text exceeds Telegram limit, send chunks under one ledger group and be careful: + +```text +part 1 sent +part 2 ambiguous +part 3 pending +``` + +Multi-part provider delivery needs a group ledger, so MVP should keep replies concise and reject/trim with clear policy before adding splitting. + +## 10. Text Sanitization And Formatting + +Outbound text pipeline: + +```text +raw local message +strip agent-only blocks +strip teammate protocol blocks if present +reject JSON noise +normalize whitespace +prefix with team/member context +enforce max length +send plain text or Telegram entities +``` + +Avoid parse modes in MVP: + +```text +send plain text +do not use MarkdownV2 until escaping is proven +``` + +Reason: + +- model output can contain arbitrary punctuation; +- MarkdownV2 escaping is brittle; +- malformed formatting can fail provider send; +- provider failure after partial route logic increases ambiguity. + +Use explicit Telegram entities later if rich formatting is necessary. + +## 11. Reply-To Mapping + +When sending a provider reply, use `reply_parameters` if we are replying to a known inbound provider message: + +```text +reply_to_provider_message_id = inbound Telegram message id +message_thread_id = team topic id +``` + +But do not depend only on Telegram reply UI. + +Also store: + +```text +ProviderMessageLink(providerMessageId -> internalMessageId) +``` + +Then future user replies can route: + +```text +reply_to_message.message_id +-> ProviderMessageLink +-> internal from member +-> route to that teammate +``` + +If provider send succeeds but link persistence fails: + +```text +send was externally visible +do not retry send +mark provider link missing +schedule repair if possible +``` + +This should become `sent_link_missing`, or `sent` with diagnostics. It is not a send failure. + +## 12. Local Store Is Not Enough + +`sentMessages.json` is capped at 200 rows. This is fine for a UI feed but not for provider reply-to history. + +Provider message links need their own retention policy: + +```text +keep links for active route history window +minimum 90 days or until route deletion by user +prune only with route-level retention +never prune solely because local sentMessages hit 200 rows +``` + +If links are pruned: + +- future replies to old Telegram messages route to lead; +- UI should show "old reply target not available"; +- do not guess teammate from display prefix. + +## 13. Deletion And Edits + +MVP can ignore edits and deletions mostly, but not silently: + +Inbound Telegram edited messages: + +- do not mutate already delivered internal turns in MVP; +- create an edit event or ignore with diagnostics; +- if edited before desktop acceptance, process latest only if ingress design supports it. + +Outbound local message edits: + +- do not edit Telegram messages in MVP; +- send corrections as new messages only on explicit action. + +Telegram delete: + +- if provider message deleted, later reply-to links may break; +- keep link but mark stale when detected by send/reply errors. + +This avoids complicated bidirectional sync in v1. + +## 14. Failure Matrix + +Critical cases: + +- Local lead thought appears with no `to`. + - Do not send. +- Lead uses `SendMessage(to="user")` answering a Telegram-origin message. + - Eligible, send to that route. +- Teammate uses `SendMessage(to="user")` answering a Telegram-origin message. + - Eligible, send to same team topic with teammate prefix. +- Teammate sends to lead. + - Not eligible. +- Message contains only ``. + - Strip to empty, not eligible. +- Message contains teammate XML blocks. + - Strip/block by protocol-noise policy. +- Slash command output row appears. + - Not eligible by default. +- Provider route disabled after local reply was generated. + - Mark terminal or cancelled, do not send. +- Topic route repair-required. + - Do not fallback to general chat. +- Telegram returns 429 with retry_after. + - Mark rate_limited, schedule retry after given time. +- HTTP timeout after request sent. + - Mark send_ambiguous, do not auto-retry. +- HTTP timeout before request body leaves process. + - If adapter can prove no send, mark failed_retryable_before_send. +- Telegram returns success but local link write fails. + - Do not retry provider send, repair link. +- Duplicate local message event. + - Ledger idempotency key returns existing provider status. +- Same idempotency key with different text. + - Payload conflict, terminal. +- App restarts with `send_in_flight`. + - Convert to send_ambiguous unless adapter has proof. +- Provider message link pruned. + - Future reply falls back to lead with stale target metadata. + +## 15. Top 3 Options + +### Option 1 - Strict outbound projection gate + provider delivery ledger + +🎯 9 🛡️ 9 🧠 7 + +Approx changed LOC: 2500-5500. + +What it means: + +- build `MessengerOutboundProjectionGate`; +- build `MessengerProviderDeliveryLedger`; +- auto-send only explicit `to=user` replies linked to a provider route; +- use Telegram normal API calls, not inline webhook response, for routable replies; +- mark network unknowns as `send_ambiguous`, not retryable; +- store `ProviderMessageLink` after success. + +Why this is best: + +- prevents internal-message leakage; +- avoids unsafe Telegram duplicates; +- supports teammate messages to user; +- gives reply-to routing a durable provider message id; +- matches the feature architecture standard. + +Risk: + +- more code; +- some ambiguous sends need user-visible diagnostics; +- initial behavior may feel conservative. + +### Option 2 - Reuse `sentMessages.json` as outbound queue with simple dedupe + +🎯 5 🛡️ 5 🧠 4 + +Approx changed LOC: 800-1800. + +What it means: + +- watch `sentMessages.json`; +- send any new `to=user` message to Telegram; +- store last sent internal message ids. + +Why it is tempting: + +- quick demo; +- current system already writes user-directed lead messages there; +- easy to observe from renderer. + +Why it is risky: + +- `sentMessages.json` is capped at 200; +- it is not route-specific; +- not all `to=user` messages should go to Telegram; +- provider send ambiguity is not represented; +- reply-to provider ids need another store anyway. + +### Option 3 - Send all visible feed messages with broad filters + +🎯 3 🛡️ 3 🧠 3 + +Approx changed LOC: 500-1400. + +What it means: + +- use `MessagesPanel`/feed projection; +- filter obvious noise; +- push visible items to Telegram. + +Why it is bad: + +- visibility in app is not external eligibility; +- feed contains lead thoughts, slash results, diagnostics, and UI-specific projections; +- dedupe is feed-oriented, not provider-send oriented; +- provider reply-to routing remains fragile. + +This should not be used beyond a throwaway prototype. + +## 16. Decision Update + +Recommended model: + +```text +Inbound Telegram turn creates route-linked internal message. +Agent/team responses become local messages as today. +MessengerOutboundProjectionGate observes durable local messages. +Only explicit user-directed, route-linked replies become provider send intents. +MessengerProviderDeliveryLedger handles Telegram send state. +ProviderMessageLink stores successful Telegram message ids. +Future reply-to routing uses ProviderMessageLink. +``` + +Minimal eligibility formula: + +```text +eligible = + route.active + && message.to == "user" + && message.from is active member + && origin/reply context links message to provider route + && message not already delivered to provider + && sanitized text non-empty + && message kind/source allowed by policy +``` + +Important product behavior: + +```text +Teammate messages to user are sent to Telegram. +Teammate messages to lead or other teammates are not sent. +Lead thoughts without explicit to=user are not sent. +``` + +## 17. Tests To Write First + +Domain tests: + +- `to=user` lead reply linked to provider route is eligible. +- `to=user` teammate reply linked to provider route is eligible. +- `to=user` local-only reply without route link is not eligible. +- `to=lead` teammate message is not eligible. +- lead session thought without `to` is not eligible. +- slash command result is not eligible by default. +- agent-only block strips to empty and is not eligible. +- JSON noise is not eligible. +- provider route disabled blocks eligibility. +- same internal message maps to same provider idempotency key. +- same idempotency key with changed payload is conflict. + +Provider ledger tests: + +- pending -> send_in_flight -> sent creates provider link. +- pre-send validation failure is retryable. +- 429 response stores retry_after and schedules retry. +- HTTP timeout after request started becomes send_ambiguous. +- restart with send_in_flight becomes send_ambiguous. +- duplicate local event returns existing sent/ambiguous state. +- success with provider link write failure does not retry provider send. + +Adapter tests: + +- Telegram send uses `message_thread_id`. +- Telegram send uses `reply_parameters` when inbound provider message id is known. +- Telegram send does not use webhook inline response for routable replies. +- long text is rejected or handled by explicit split policy. +- parse mode is omitted in MVP. + +Renderer tests: + +- connector status shows ambiguous provider sends. +- ambiguous send has manual resolution affordance. +- user can see why a local reply was not sent to Telegram. +- teammate prefix renders in Telegram projection preview. + +## 18. Remaining Low-Confidence Areas + +Still worth deeper research next: + +- exact local event source for teammate `SendMessage(to="user")` across all supported runtimes, not just OpenCode; +- whether legacy Claude lead-session plain text should ever auto-send to Telegram or always require explicit SendMessage; +- how to migrate old `sentMessages.json` rows into provider delivery state without accidental sends; +- how to model manual "send again anyway" for `send_ambiguous` without hiding duplicate risk; +- whether `sendMessageDraft` can safely show typing/progress in a topic without confusing delivery state; +- exact Telegram error taxonomy for deleted private topic, blocked bot, and migrated chats in Bot API responses; +- retention policy for `ProviderMessageLink` under privacy delete/export requirements. + diff --git a/docs/research/messenger-connectors-uncertainty-pass-33.md b/docs/research/messenger-connectors-uncertainty-pass-33.md new file mode 100644 index 00000000..c8ebf029 --- /dev/null +++ b/docs/research/messenger-connectors-uncertainty-pass-33.md @@ -0,0 +1,901 @@ +# Messenger Connectors - Uncertainty Pass 33 + +Date: 2026-04-29 +Scope: Telegram account binding, connect wizard authorization, official shared bot vs own bot privacy, route ownership, revocation, and anti-hijack rules + +## Executive Delta + +The next lowest-confidence area is not the Telegram topic API. + +It is the authorization boundary: + +```text +desktop install +-> pending Telegram binding +-> Telegram user/chat identity +-> active team route +-> provider topic creation +-> future inbound/outbound permission +``` + +If this is wrong, the feature can look correct but still have severe bugs: + +```text +1. A forwarded /start link binds the wrong Telegram account. +2. A stale pairing code reactivates an old route. +3. A username change breaks identity or routes to the wrong person. +4. A copied desktop config gives another OS user access to a Telegram route. +5. A backend log leaks chat ids, start payloads, or own-bot tokens. +6. A route is activated before the desktop confirms the Telegram claim. +``` + +The recommended shape is: + +```text +Desktop creates one-time pairing challenge +-> user opens t.me/our_bot?start= +-> backend records Telegram claim +-> desktop shows "Telegram account X wants to connect" +-> user confirms in desktop +-> route becomes active +-> team topics are created or reconciled +``` + +Do not treat Telegram `/start ` alone as authorization. It proves that the message came from some Telegram account through Telegram, but it does not prove that the account is the same human currently controlling the desktop app. + +## Source Facts Rechecked + +Telegram official facts checked on 2026-04-29: + +- Deep links let bots receive a `start` parameter in private chats. The parameter can use `A-Z`, `a-z`, `0-9`, `_`, `-`; Telegram recommends base64url, and the parameter can be up to 64 characters. +- Bot links have the shape `https://t.me/?start=`. +- Bot API `Message` has `chat`, optional `from`, optional `message_thread_id`, and `is_topic_message` for forum supergroups or private chats with the bot. +- Bot API `User.id` is the stable identifier. It may exceed 32 bits but has at most 52 significant bits. `username` is optional and must not be the primary identity. +- Bot API `Chat.id` has the same 52-bit warning. Store it as string or signed 64-bit safe numeric representation, not as a JS lossy number in persistence boundaries. +- Bot API `setWebhook.secret_token` causes Telegram to send `X-Telegram-Bot-Api-Secret-Token` on webhook requests. This verifies the webhook was set by us, not user identity. +- Bot API 9.6, April 3, 2026, added Managed Bots. The created managed bot token can be fetched using `getManagedBotToken`. This means Managed Bots do not provide a "token hidden from manager bot/backend" privacy story if our bot/backend is the manager. +- Telegram Mini Apps/Login-style data can be validated through HMAC with the bot token, and newer third-party validation can use Telegram Ed25519 signatures. This is useful for a web identity step, but it is more product/backend complexity than the default bot chat wizard needs. + +Sources: + +- https://core.telegram.org/bots/features#deep-linking +- https://core.telegram.org/api/links#bot-links +- https://core.telegram.org/bots/api#message +- https://core.telegram.org/bots/api#user +- https://core.telegram.org/bots/api#setwebhook +- https://core.telegram.org/bots/api#recent-changes +- https://core.telegram.org/bots/api#managedbotcreated +- https://core.telegram.org/bots/api#keyboardbuttonrequestmanagedbot +- https://core.telegram.org/bots/webapps#validating-data-received-via-the-mini-app + +Local code facts checked: + +- `docs/FEATURE_ARCHITECTURE_STANDARD.md` says medium/large cross-process features should live in a full feature slice with `contracts`, `core/domain`, `core/application`, `main`, `preload`, and `renderer`. +- No obvious existing install-id or messenger-binding model was found in local searches. +- `ConfigManager` persists app config at `~/.claude/agent-teams-config.json`. +- `getAppDataPath()` returns app-owned data under Electron `userData` or a fallback app data directory, explicitly separate from `~/.claude`. +- `ApiKeyService` already has a useful encrypted-secret pattern: Electron `safeStorage` first, AES-256-GCM local fallback, file mode `0o600`, and masked list output. This is relevant for optional own-bot token storage. +- Current inbox architecture is based on `~/.claude/teams/{teamName}/inboxes/{memberName}.json`, with known race handling and message ids from earlier research. + +Implication: + +```text +Messenger connectors need their own binding/security sub-slice. +This should not be bolted onto Settings config as plain fields. +``` + +## Top 3 Binding Options + +### 1. Desktop-originated deep link plus desktop confirmation + +🎯 9 🛡️ 9 🧠 6 Approx change size: 2500-5500 LOC + +Flow: + +```text +1. Desktop generates an install identity and opens a connector setup session. +2. Desktop asks official backend for a one-time pairing challenge. +3. Backend stores only a challenge hash, selected capabilities, TTL, and desktop session id. +4. Desktop shows QR/link: https://t.me/our_bot?start= +5. User opens link in Telegram. +6. Official bot receives /start . +7. Backend validates nonce, marks challenge as telegram_claimed, records Telegram user/chat identity. +8. Backend pushes "claim received" to desktop control channel. +9. Desktop shows Telegram profile preview and asks for explicit confirm. +10. Only after confirm, backend activates binding and the desktop creates/reconciles team routes/topics. +``` + +Why this is best: + +- The `/start` link is convenient. +- A stolen link is not enough because the desktop still must confirm the exact Telegram account claim. +- The route cannot become active while the user is away from desktop setup. +- It fits official shared bot default. +- It can reuse the same route model for own-bot later. + +Main weaknesses: + +- Requires a live desktop to complete binding. +- Requires a backend control channel for official bot mode. +- Backend will know Telegram chat id for official shared bot routing. This can be minimized and encrypted at rest, but not eliminated if backend sends messages through the shared bot. + +Verdict: + +```text +Use as default MVP wizard. +``` + +### 2. Bot-first short code entered into desktop + +🎯 8 🛡️ 8 🧠 5 Approx change size: 1800-4000 LOC + +Flow: + +```text +1. User opens our bot manually or from a generic link. +2. Bot creates a short visible code for that Telegram chat. +3. User enters or pastes the code into desktop. +4. Desktop sends the code to backend through its authenticated setup session. +5. Backend matches Telegram claim with desktop session. +6. Desktop confirms and activates binding. +``` + +Why it is useful: + +- Works when deep links are blocked, copied incorrectly, or opened on the wrong device. +- The Telegram chat is already known before desktop confirmation. +- Good fallback for enterprise environments where QR/deep link is unreliable. + +Main weaknesses: + +- More user effort. +- Short visible codes need strict TTL, rate limits, and replay protection. +- If the user pastes code into the wrong desktop install, desktop confirmation still protects against silent activation, but UX can be confusing. + +Verdict: + +```text +Keep as fallback, not the primary happy path. +``` + +### 3. Telegram Mini App or Login Widget based verification + +🎯 7 🛡️ 8 🧠 8 Approx change size: 3500-7500 LOC + +Flow: + +```text +1. User opens a Telegram Mini App or Login Widget. +2. Web identity data is validated using Telegram HMAC or Ed25519 validation. +3. Backend links that verified Telegram identity to the user's app account or desktop setup session. +4. Bot chat binding is completed after confirmation. +``` + +Why it is attractive: + +- Strong web identity story. +- Better if Agent Teams later has real cloud accounts, team membership, device management, and web admin. +- Can support "manage all connected Telegram devices" in a richer UI. + +Main weaknesses: + +- Too much product surface for MVP. +- Needs domain setup, web identity screens, auth expiry rules, and account/device policy. +- Still does not remove the need to bind a bot chat/topic route for messaging. + +Verdict: + +```text +Good later for cloud account management. +Do not use as default MVP unless Agent Teams already depends on cloud login. +``` + +## Explicitly Rejected Option + +### `/start` link alone activates the route + +🎯 4 🛡️ 4 🧠 3 Approx change size: 900-2000 LOC + +This is easy, but unsafe. + +Failure case: + +```text +1. Desktop shows a setup QR. +2. User screenshots or forwards it. +3. Another Telegram account opens it first. +4. Backend binds that chat to the user's teams. +5. The wrong Telegram account receives team replies. +``` + +This option can be patched with TTL and rate limits, but it still has the wrong trust boundary. + +## Recommended Binding State Machine + +```text +unbound + -> desktop_pending + -> telegram_claimed + -> desktop_confirmed + -> active + -> revoked +``` + +Terminal or side states: + +```text +expired +cancelled +suspicious +conflict +provider_unavailable +desktop_offline +``` + +Rules: + +- `desktop_pending`: challenge exists, but no Telegram user is associated yet. +- `telegram_claimed`: Telegram user/chat has sent the nonce, but no route is active yet. +- `desktop_confirmed`: user explicitly accepted the claim in desktop. +- `active`: route may receive inbound Telegram messages and send outbound replies. +- `expired`: TTL elapsed before confirmation. The `/start` payload must become useless. +- `cancelled`: desktop cancelled setup. Later Telegram updates with that nonce get a generic expired response. +- `suspicious`: multiple different Telegram users tried the same nonce, too many attempts, or mismatch with an already active binding. +- `conflict`: same Telegram account/chat is already bound in a way that conflicts with the selected route policy. +- `revoked`: route exists historically but is not allowed to deliver. + +Important invariant: + +```text +No MessengerRoute can become active unless a Telegram claim and a desktop confirmation refer to the same pairing challenge id. +``` + +## Pairing Challenge Shape + +Provider-neutral domain model: + +```ts +interface MessengerPairingChallenge { + id: string; + provider: 'telegram'; + mode: 'official-shared-bot' | 'own-bot'; + installId: string; + desktopSessionId: string; + challengeHash: string; + challengeCreatedAt: string; + challengeExpiresAt: string; + state: + | 'desktop_pending' + | 'telegram_claimed' + | 'desktop_confirmed' + | 'active' + | 'expired' + | 'cancelled' + | 'suspicious' + | 'conflict' + | 'revoked'; + claimedBy?: { + providerUserIdHash: string; + providerChatIdHash: string; + displayNameSnapshot: string; + usernameSnapshot?: string; + claimedAt: string; + }; + capabilities: { + canReceiveTeamTopics: boolean; + canSendExternalUserMessages: boolean; + canIssueCommands: boolean; + }; +} +``` + +Nonce rules: + +- Generate at least 128 bits of randomness. +- Encode base64url without padding. +- Stay under Telegram's 64-character `start` limit. +- Store only a keyed hash server-side, not the raw nonce. +- TTL should be 5-10 minutes. +- Single use after `telegram_claimed`, with idempotent handling for duplicate update delivery. +- Never log raw nonce. + +## Identity Model + +Provider-neutral route ownership: + +```ts +interface MessengerAccountBinding { + id: string; + provider: 'telegram'; + mode: 'official-shared-bot' | 'own-bot'; + installId: string; + providerAccountRef: { + userIdHash: string; + chatIdHash: string; + rawChatIdStorageRef?: string; + }; + displaySnapshot: { + firstName?: string; + lastName?: string; + username?: string; + languageCode?: string; + }; + status: 'active' | 'revoked' | 'disabled' | 'provider_blocked_bot'; + createdAt: string; + confirmedAt: string; + lastSeenAt?: string; + revokedAt?: string; +} +``` + +Identity rules: + +- Telegram `user.id` is identity. +- Telegram `chat.id` is delivery destination. +- Telegram `username` is display metadata only. +- Store ids as strings at persistence/API boundaries to avoid JS precision mistakes. +- Hash ids for logs and list views. +- For official shared bot, backend needs a usable chat id at send time. Use KMS/envelope encryption at rest and redact logs. Do not pretend the backend has zero access. +- For own-bot local mode, raw bot token and chat ids can stay local. This is the cleanest privacy story. + +## Official Shared Bot Privacy Story + +What is true: + +```text +Our backend receives Telegram webhook updates. +Our backend sees enough Telegram identity to route the message. +Our backend needs enough delivery identity to call sendMessage through our shared bot. +``` + +What we can do: + +```text +1. No durable plaintext message queue while desktop is offline. +2. Encrypt chat ids at rest. +3. Hash ids in logs and analytics. +4. Store minimal Telegram profile snapshots. +5. Keep message bodies out of backend durable storage in default mode. +6. If desktop is offline, send a clear offline notice instead of queueing plaintext. +``` + +What we cannot honestly claim: + +```text +The official shared bot backend never sees Telegram metadata. +``` + +Recommended copy: + +```text +Default bot is easiest: messages pass through Agent Teams relay while your desktop is online. +We do not store message bodies in the default relay queue. +For maximum privacy, connect your own bot locally. +``` + +## Managed Bots Privacy Recheck + +Managed Bots are useful, but not for "token invisible to us" if our bot/backend is the manager. + +Official docs say: + +```text +ManagedBotCreated.bot token can be fetched using getManagedBotToken. +ManagedBotUpdated.bot token can be fetched using getManagedBotToken. +``` + +So the manager bot can fetch the created bot token. + +This means: + +```text +If our backend runs the manager bot, our backend can technically get the managed bot token. +``` + +Managed Bots can still be useful for convenience: + +- Less copy/paste from BotFather. +- Better guided creation. +- Automatic suggested name/username. +- Token rotation through `replaceManagedBotToken`. + +But the privacy label should be: + +```text +Convenient customer-owned bot, managed by Agent Teams +``` + +not: + +```text +Private token that Agent Teams cannot access +``` + +For the clean privacy option, user should create a bot in BotFather and paste token into desktop locally, or use a future flow where a locally running manager process receives the token directly and never sends it to our backend. That local-manager flow is probably too complex for MVP. + +## Own-Bot Binding Flow + +Own-bot mode still needs a Telegram account/chat binding. + +Recommended own-bot flow: + +```text +1. User creates bot in BotFather. +2. User pastes token into desktop. +3. Desktop validates getMe. +4. Desktop stores token using a SecretStoragePort based on ApiKeyService-style safeStorage/AES fallback. +5. Desktop checks getWebhookInfo. +6. If webhook exists, explain conflict and ask before deleteWebhook. +7. Desktop starts getUpdates long polling. +8. User sends /start to their own bot. +9. Desktop receives the update locally. +10. Desktop asks user to confirm the Telegram account/chat. +11. Desktop activates binding and creates topics/routes. +``` + +Edge case: + +```text +getUpdates does not work while an outgoing webhook is set. +``` + +So never silently call `deleteWebhook` for an own bot. The bot may be used elsewhere. + +## Route Activation Rules + +After binding, route creation should be explicit: + +```ts +interface MessengerRoute { + id: string; + bindingId: string; + provider: 'telegram'; + teamId: string; + teamIdentitySnapshot: { + teamName: string; + teamPath?: string; + teamConfigHash?: string; + }; + topicRef?: { + providerChatIdHash: string; + providerMessageThreadId: string; + topicNameSnapshot: string; + topicCreatedAt: string; + }; + status: 'active' | 'disabled' | 'revoked' | 'needs_repair'; + createdAt: string; + updatedAt: string; +} +``` + +Rules: + +- Binding is account-level. +- Route is team-level. +- Topic is provider-level delivery state. +- One Telegram account can bind to multiple teams. +- One team route maps to one Telegram topic in that account's bot chat. +- Topic title is display metadata only. Never route by title. +- If topic id is missing or stale, mark `needs_repair` and create a new topic after user confirmation. + +## Multi-Team and Multi-Account Policy + +MVP policy: + +```text +One Telegram account binding per desktop install. +Many team routes under that binding. +One topic per team route. +``` + +Later policy: + +```text +Multiple Telegram accounts per install. +Each account can opt into selected teams. +Routes must include bindingId. +UI can show "Connected as @alice" per route. +``` + +Do not key route ownership only by `teamName`. + +Use a stable team id or derived identity: + +```text +teamId = persisted id if available +fallback = hash(canonical team path + creation marker) +teamName = mutable display snapshot +``` + +This is important because previous local research found many surfaces still use names like `teamName` and `memberName`. + +## Threat Model and Required Controls + +### Forwarded setup link or screenshot + +Control: + +```text +Desktop confirmation is mandatory. +``` + +The Telegram claim only moves challenge to `telegram_claimed`. + +### Stale or replayed nonce + +Controls: + +```text +TTL 5-10 minutes +single-use challenge hash +state transition compare-and-swap +idempotent duplicate update handling +generic expired response +``` + +### Two Telegram users race the same nonce + +Control: + +```text +First claim locks the challenge. +Second distinct user marks suspicious or gets generic expired response. +Desktop must show the first claimed display name before confirm. +``` + +### Username changed + +Control: + +```text +Never use username for identity. +Update display snapshot from new messages. +``` + +### Wrong chat type + +Control: + +```text +Official MVP accepts only private chat with the bot. +Group/supergroup/channel starts are rejected unless a future group-mode route is explicitly built. +``` + +### Telegram user blocks bot + +Control: + +```text +Outbound send failure transitions binding or route to provider_blocked_bot / needs_attention. +Do not keep retrying indefinitely. +``` + +### Desktop offline after binding + +Control: + +```text +Default official mode has no durable plaintext backend queue. +Backend replies with offline notice or "desktop unavailable". +``` + +### Backend receives duplicate Telegram updates + +Control: + +```text +ProviderUpdateLedger keyed by provider + botMode + update_id. +Idempotent inbound message creation. +``` + +### Backend restart during claimed-but-unconfirmed pairing + +Control: + +```text +Persist pending challenge state with TTL. +Desktop reconnect asks for current challenge status. +``` + +### User reinstalls desktop + +Control: + +```text +Install identity is local. +If lost, existing bindings become orphaned until user reconnects. +Offer revoke from Telegram with /disconnect. +``` + +### Shared computer or copied config + +Control: + +```text +Store install secret under app data using OS secret storage where possible. +Copying JSON config alone should not authenticate a binding. +``` + +### Own-bot token leaked + +Controls: + +```text +SafeStorage/AES fallback +0o600 file permissions +masked list output +redacted logs +explicit token rotation and delete +``` + +### Managed bot token fetched by our backend + +Control: + +```text +Do not market Managed Bots as token-private. +Offer "own token locally" for maximum privacy. +``` + +## Security Storage Recommendation + +Create feature-local ports: + +```ts +interface MessengerInstallIdentityStore { + getOrCreateInstallIdentity(): Promise; + rotateInstallSecret(reason: string): Promise; +} + +interface MessengerSecretStore { + saveSecret(ref: string, plaintext: string): Promise; + readSecret(ref: string): Promise; + deleteSecret(ref: string): Promise; + getStatus(): Promise; +} +``` + +Implementation: + +- For local desktop, adapt the existing `ApiKeyService` encryption strategy. +- Do not import `ApiKeyService` directly into core. +- Keep plaintext secrets out of renderer contracts. +- Renderer gets masked status only. +- Main process owns token validation, storage, polling, and provider calls. + +Storage location: + +```text +App-owned data under getAppDataPath(), not ~/.claude/teams. +``` + +Reason: + +```text +Messenger bindings are app integration state, not agent CLI/team project data. +``` + +## Backend Data Minimization for Official Bot + +Backend tables should separate routing metadata from message payloads. + +Minimum default mode: + +```text +messenger_bindings + binding_id + provider + install_id_hash + telegram_user_id_hmac + telegram_chat_id_ciphertext + display_snapshot + status + created_at + confirmed_at + +messenger_routes + route_id + binding_id + team_id_hash + provider_thread_id + status + created_at + updated_at + +telegram_update_ledger + bot_mode + update_id + update_type + processed_at + result_kind +``` + +Avoid in default mode: + +```text +durable plaintext inbound bodies +durable plaintext outbound bodies +raw Telegram ids in logs +raw start payloads in logs +own-bot tokens on backend +``` + +If we later add encrypted queue: + +```text +desktop public key +backend stores ciphertext only +desktop decrypts when online +outbound offline queue requires explicit user opt-in +``` + +## Connect Wizard UX + +Recommended happy path: + +```text +Settings -> Messenger -> Telegram -> Connect + +Step 1: Choose mode + Default: Agent Teams bot + Advanced: My own bot + +Step 2: Select teams + All active teams by default, editable checklist + +Step 3: Open Telegram + QR + button, expires countdown + +Step 4: Confirm + "Telegram account @alice wants to connect" + show first name, username, provider user id suffix/hash + +Step 5: Topics + create one topic per selected team + show per-team success/needs repair +``` + +Failure UI: + +- Link expired: one-click regenerate. +- Wrong Telegram account claimed: cancel and regenerate. +- Desktop offline during claim: bot says "finish setup on desktop". +- Topic creation failed: binding can still be active, route is `needs_repair`. +- Bot blocked: show reconnect instructions. + +No hidden auto-activation. + +## Clean Architecture Placement + +Feature slice: + +```text +src/features/messenger-connectors/ + contracts/ + index.ts + messengerConnectorApi.ts + telegramDtos.ts + core/ + domain/ + bindingState.ts + pairingChallenge.ts + routePolicy.ts + providerIdentity.ts + visibilityPolicy.ts + application/ + ports/ + MessengerBindingStore.ts + MessengerSecretStore.ts + MessengerProviderGateway.ts + MessengerDesktopSessionGateway.ts + StartPairingUseCase.ts + ClaimPairingUseCase.ts + ConfirmPairingUseCase.ts + RevokeBindingUseCase.ts + RepairRoutesUseCase.ts + main/ + composition/ + adapters/ + input/ + messengerIpcHandlers.ts + telegramWebhookRoutes.ts + output/ + TelegramOfficialBotGateway.ts + TelegramOwnBotGateway.ts + FileMessengerBindingStore.ts + ElectronMessengerSecretStore.ts + infrastructure/ + telegram/ + storage/ + crypto/ + preload/ + renderer/ +``` + +Important dependency rule: + +```text +Telegram API specifics are adapter details. +Binding state, route state, replay prevention, and privacy policy are core/application rules. +``` + +## Tests To Add Before Shipping + +Domain/application: + +- `startPairing` creates a challenge with TTL and hashed nonce. +- `claimPairing` rejects unknown nonce. +- `claimPairing` rejects expired nonce. +- `claimPairing` is idempotent for duplicate same Telegram update. +- `claimPairing` marks suspicious for a different user racing the same nonce. +- `confirmPairing` fails if no Telegram claim exists. +- `confirmPairing` activates only the claimed binding. +- `cancelPairing` prevents later activation. +- `revokeBinding` disables routes. +- `routePolicy` never keys by username or topic title. + +Adapter/integration: + +- Telegram webhook verifies `secret_token`. +- Telegram update ledger dedupes `update_id`. +- `/start` in group/supergroup is rejected in MVP. +- JS persistence stores Telegram ids as strings. +- Raw nonce is not logged. +- Raw own-bot token is not sent to renderer. +- Own-bot `getWebhookInfo.url` conflict is surfaced before `deleteWebhook`. +- `safeStorage` unavailable path still encrypts with AES fallback and file mode is restrictive. + +End-to-end scenarios: + +- Happy path official bot connect. +- Forwarded link claimed by wrong Telegram account, desktop cancels, no route active. +- Link expires, user regenerates, old link stays dead. +- Two users race same link. +- Desktop restarts after Telegram claim and before confirm. +- User blocks bot after binding. +- User revokes binding from desktop. +- User sends `/disconnect` in Telegram. +- Team renamed after topic exists. +- Team route repair creates new topic without reusing title as identity. + +## Decision Update + +The best implementation decision after this pass: + +```text +Default: + official shared bot + desktop-originated one-time deep link + desktop confirmation required + no durable plaintext backend message queue + one topic per team route + +Fallback: + bot-first short code entry + +Advanced privacy: + own bot token pasted into desktop + token stored locally + local getUpdates polling + +Later: + encrypted backend queue + Telegram Mini App/Login identity layer + Managed Bots only as convenience, not as "token inaccessible to us" +``` + +Main open question left: + +```text +Do we want one Telegram account per desktop install for MVP, +or allow multiple connected Telegram accounts immediately? +``` + +My recommendation: + +🎯 9 🛡️ 8 🧠 4 Approx change size: +600-1200 LOC compared to account-agnostic routing + +```text +Start with one account per install, but include bindingId in every route model. +That keeps MVP UX simple and leaves the data model ready for multi-account later. +``` diff --git a/docs/research/messenger-connectors-uncertainty-pass-34.md b/docs/research/messenger-connectors-uncertainty-pass-34.md new file mode 100644 index 00000000..38ca885a --- /dev/null +++ b/docs/research/messenger-connectors-uncertainty-pass-34.md @@ -0,0 +1,897 @@ +# Messenger Connectors - Uncertainty Pass 34 + +Date: 2026-04-29 +Scope: official shared bot relay transport, webhook ACK semantics, desktop online detection, no durable plaintext backend queue, and local commit guarantees + +## Executive Delta + +The weakest reliability boundary is: + +```text +Telegram webhook +-> Agent Teams backend +-> online desktop relay session +-> durable local inbound message +-> lead/team routing +``` + +The core problem: + +```text +If backend returns HTTP 2xx to Telegram before the desktop durably commits the message, +then a crash, reconnect, or dropped ACK can lose the user message forever. +``` + +Because the default product decision is "no durable plaintext backend queue", the backend cannot solve this by storing pending message bodies until desktop returns. + +So the default official-bot rule should be: + +```text +Return success to Telegram only after one of these is true: + +1. Desktop ACKed that it durably committed the inbound message locally. +2. Backend handled the update terminally, for example no desktop is online and an offline notice was sent. +3. Backend intentionally rejects the webhook attempt so Telegram retries later. +``` + +This is the exact bridge that must be designed as a protocol, not as a best-effort event bus. + +## Source Facts Rechecked + +Telegram official facts checked on 2026-04-29: + +- Bot API has two mutually exclusive update delivery modes: `getUpdates` and webhooks. +- Incoming updates are stored on Telegram servers until the bot receives them, but not longer than 24 hours. +- `getUpdates.offset` confirms updates when the offset is greater than their `update_id`. +- `Update.update_id` is useful for ignoring repeated updates or restoring sequence if webhook updates are out of order. +- `setWebhook.max_connections` controls the maximum simultaneous HTTPS connections Telegram may use for webhook delivery. +- On webhook delivery, unsuccessful requests are retried for a reasonable number of attempts. +- `WebhookInfo.pending_update_count`, `last_error_date`, and `last_error_message` expose webhook backlog/error state. +- `setWebhook.secret_token` adds `X-Telegram-Bot-Api-Secret-Token` to webhook requests. + +Transport facts checked on 2026-04-29: + +- Node.js v22 has a stable native WebSocket client API. +- Node.js v22 does not provide a built-in WebSocket server, so a Node backend still needs a server library. +- WebSocket is full-duplex over one connection, which fits `offer -> ACK -> control` flows. +- Server-Sent Events are one-way server-to-client. Client ACKs require a separate HTTP request. +- SSE supports `id` and reconnection behavior through EventSource, but it is still one-way. +- Existing repo already uses Fastify 5.7.4. `@fastify/websocket` 11.2.0 is the current npm package for WebSocket support and is built on `ws@8`. +- Snyk lists `ws@8.20.0` as published March 21, 2026, latest, with no direct vulnerabilities in its database at lookup time. + +Sources: + +- https://core.telegram.org/bots/api#getting-updates +- https://core.telegram.org/bots/api#update +- https://core.telegram.org/bots/api#setwebhook +- https://core.telegram.org/bots/api#getwebhookinfo +- https://nodejs.org/learn/getting-started/websocket +- https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events +- https://www.npmjs.com/package/%40fastify/websocket +- https://security.snyk.io/package/npm/ws/8.20.0 + +Local code facts checked: + +- Existing `HttpServer` is Fastify-based and binds to `127.0.0.1` by default for local app/browser API. +- Existing `src/main/http/events.ts` implements SSE for local UI clients, with keepalive comments every 30 seconds. +- That SSE stream has no durable event id/resume model and no client-to-server ACK path. It is fine for local UI refresh, not for Telegram relay commit. +- The repo already has good local durability patterns: + - `VersionedJsonStore.updateLocked()` + - `atomicWriteAsync` + - `withFileLock` + - runtime delivery journals with payload hash, pending/committed states, idempotency keys +- These patterns are directly relevant to the desktop-local inbound store and delivery ledger. + +Implication: + +```text +Do not reuse the existing local SSE event broadcaster as the official bot relay. +Build a dedicated MessengerRelay protocol. +``` + +## Top 3 Relay Architecture Options + +### 1. Desktop outbound WebSocket with local-commit ACK + +🎯 9 🛡️ 8 🧠 7 Approx change size: 4000-8500 LOC + +Shape: + +```text +desktop main process + opens WSS connection to Agent Teams relay backend + authenticates install/binding/session + sends route inventory hash and heartbeat + +backend + receives Telegram webhook + resolves binding/route/topic + sends inbound offer over WebSocket + waits for desktop local-commit ACK + returns Telegram 2xx only after commit ACK +``` + +Why this is best: + +- WebSocket is bidirectional, so `offer -> ack -> cancel -> repair -> heartbeat` stays on one connection. +- Desktop can use Node 22 native WebSocket client with no new desktop dependency. +- Backend can use Fastify + `@fastify/websocket` if cloud backend is Node/Fastify. +- It supports real online presence, route inventory sync, and backpressure. +- It avoids backend durable plaintext message bodies. + +Weaknesses: + +- Needs a real protocol, not just "send JSON over socket". +- Needs careful ACK timeout and reconnect behavior. +- Backend still holds plaintext in memory during the webhook attempt. +- Active-session ACK timeout is ambiguous: desktop might have committed but ACK was lost. + +Verdict: + +```text +Use this for default official shared bot. +``` + +### 2. SSE downlink plus HTTPS ACK uplink + +🎯 7 🛡️ 7 🧠 6 Approx change size: 3500-7000 LOC + +Shape: + +```text +desktop opens EventSource/SSE to backend +backend pushes inbound offers over SSE +desktop POSTs /ack for local commit +desktop POSTs heartbeat/inventory separately +``` + +Why it is attractive: + +- SSE is simple and HTTP-friendly. +- Browser/EventSource has reconnect behavior. +- This resembles the repo's local `/api/events` pattern. + +Weaknesses: + +- SSE is one-way, so ACKs and heartbeats need extra HTTP calls. +- Correlating SSE offer with POST ACK is more complex under reconnect. +- Existing local SSE implementation lacks durable event ids and Last-Event-ID resume. +- Browser SSE connection limits matter for renderer use. Desktop main process can avoid browser limits, but the protocol is still less direct than WebSocket. + +Verdict: + +```text +Acceptable fallback if WebSocket is blocked by enterprise proxies. +Not the primary implementation. +``` + +### 3. Desktop polling/long-polling relay + +🎯 6 🛡️ 6 🧠 4 Approx change size: 2200-5000 LOC + +Shape: + +```text +desktop polls backend for pending inbound updates +backend returns message bodies if any +desktop commits locally and POSTs ACK +``` + +Why it is attractive: + +- Easier to reason about than long-lived sockets. +- Works in many locked-down networks. +- Simple to implement initially. + +Weaknesses: + +- To avoid message loss, backend must hold pending plaintext while waiting for poll. +- If backend refuses durable plaintext queue, polling becomes either lossy or high-frequency. +- Latency is worse. +- Online/offline state becomes fuzzy. + +Verdict: + +```text +Not recommended for default no-plaintext-queue mode. +Can be a diagnostics fallback, not the main relay. +``` + +## Future Reliability Option + +### Durable encrypted backend queue + +🎯 8 🛡️ 9 🧠 9 Approx change size: 7000-14000 LOC + +Shape: + +```text +desktop publishes public encryption key during binding +backend stores only ciphertext message bodies +desktop decrypts when online +backend can survive restarts and desktop offline windows +``` + +This is the right advanced/premium reliability mode, but it is not the MVP default. + +Why: + +- Key rotation is non-trivial. +- Device loss/reinstall can make queued messages undecryptable. +- Multi-device routing becomes harder. +- Attachments need a separate encrypted blob policy. +- User copy must explain exactly who can decrypt what. + +## Recommended Default Protocol + +### High-level flow + +```text +Telegram -> Backend webhook + 1. verify webhook secret_token + 2. dedupe update_id metadata + 3. resolve binding/route/topic + 4. check active desktop relay session + 5. if no healthy session: send offline notice, return 2xx + 6. if healthy session: offer update to desktop + 7. desktop validates route and commits local inbound message + 8. desktop ACKs local commit + 9. backend returns 2xx to Telegram +``` + +The backend durable metadata ledger may store: + +```text +provider +bot mode +update_id +route id +binding id +attempt count +status +timestamps +error class +payload hash +``` + +It should not store in default mode: + +```text +raw message body +raw Telegram chat id in logs +raw Telegram user id in logs +attachment file bodies +bot tokens +``` + +### Webhook ACK invariant + +```text +Telegram 2xx means: + Agent Teams either got the message durably into desktop local storage, + or intentionally terminal-handled it, for example offline notice. +``` + +Telegram non-2xx means: + +```text +Agent Teams has not accepted responsibility for the update. +Telegram may retry the same update later. +``` + +This must be an explicit code invariant. + +## Active Session Definition + +Do not define "online" as "there is a socket object". + +Define it as: + +```ts +interface MessengerRelaySession { + sessionId: string; + installId: string; + bindingId: string; + authenticatedAt: string; + lastHeartbeatAt: string; + lastPongAt: string; + routeInventoryHash: string; + protocolVersion: number; + status: 'ready' | 'stale' | 'draining' | 'closed'; +} +``` + +A session is healthy only if: + +```text +status == ready +authenticated install secret is valid +binding is active +route inventory hash is current or compatible +last pong is recent +desktop protocol version is supported +no newer session has stolen the lease +``` + +Suggested timing: + +```text +ping interval: 15s +stale after: 45s +hard close after: 75s +inbound offer ACK deadline: 3-8s +``` + +Use jitter for reconnect: + +```text +initial reconnect: 1s +max reconnect: 30s +jitter: 20-40 percent +``` + +## Single Active Session Lease + +MVP should allow one active relay session per binding. + +Rule: + +```text +New authenticated session for the same bindingId steals the lease. +Old session transitions to draining/closed and cannot ACK new offers. +``` + +Why: + +- Prevents two desktop processes writing the same Telegram update to different local stores. +- Avoids split-brain if user launches two app instances. +- Keeps support/debugging simpler. + +Later multi-device mode can use: + +```text +bindingId + deviceId + route assignment +``` + +But do not start there. + +## Inbound Offer Envelope + +Provider-neutral envelope: + +```ts +interface MessengerInboundOffer { + type: 'messenger.inbound.offer'; + protocolVersion: 1; + deliveryId: string; + provider: 'telegram'; + bindingId: string; + routeId: string; + orderingKey: string; + providerUpdateId: string; + providerMessageId?: string; + providerMessageThreadId?: string; + providerDate?: string; + receivedAt: string; + expiresAt: string; + payloadHash: string; + payload: MessengerInboundPayload; +} +``` + +Payload: + +```ts +interface MessengerInboundPayload { + kind: 'text' | 'command' | 'unsupported'; + text?: string; + replyTo?: ProviderMessageLink; + sender: { + providerUserIdHash: string; + displayNameSnapshot: string; + usernameSnapshot?: string; + }; +} +``` + +Desktop ACK: + +```ts +type MessengerInboundAck = + | { + type: 'messenger.inbound.ack'; + deliveryId: string; + status: 'committed'; + localMessageId: string; + localCommitHash: string; + committedAt: string; + } + | { + type: 'messenger.inbound.ack'; + deliveryId: string; + status: 'duplicate_committed'; + localMessageId: string; + committedAt: string; + } + | { + type: 'messenger.inbound.ack'; + deliveryId: string; + status: 'rejected_terminal' | 'rejected_retryable'; + reasonCode: string; + detail?: string; + }; +``` + +## Desktop Local Commit Rules + +Desktop must ACK `committed` only after: + +```text +1. Provider update id was deduped locally. +2. Route binding is still active. +3. Message payload passed visibility/safety validation. +4. Message was written to a local durable inbound store. +5. Local store fsync/atomic-write equivalent completed as far as our platform layer supports. +``` + +Recommended stores: + +```text +MessengerDesktopInboundStore + durable provider update payloads after acceptance + +MessengerLocalDeliveryLedger + tracks delivery from inbound store to lead/team inbox + +MessengerProviderUpdateLedger + dedupes providerUpdateId locally +``` + +Do not ACK based on: + +```text +renderer state update +toast notification shown +in-memory queue push only +lead process prompt accepted but not persisted +``` + +## Backend Webhook Decision Matrix + +### No active session + +```text +send offline notice +return Telegram 2xx +record metadata: terminal_offline +``` + +This matches the current product decision: + +```text +desktop offline -> no plaintext queue -> honest offline response +``` + +### Active session, offer ACKed committed + +```text +return Telegram 2xx +record metadata: desktop_committed +``` + +### Active session, duplicate committed ACK + +```text +return Telegram 2xx +record metadata: duplicate_desktop_committed +``` + +### Active session, terminal reject + +Examples: + +```text +route revoked +unknown topic +unsupported chat type +payload rejected by policy +``` + +Action: + +```text +send user-facing rejection if useful +return Telegram 2xx +record metadata: terminal_rejected +``` + +### Active session, retryable reject + +Examples: + +```text +local store locked +team route repairing +desktop still loading route inventory +``` + +Action: + +```text +return Telegram 503 for a bounded number of attempts or bounded age +then fall back to offline/degraded notice and 2xx +``` + +### Active session, no ACK before deadline + +This is the hardest case. + +Recommended: + +```text +1. Mark metadata: ack_timeout_ambiguous. +2. Return Telegram 503 if within retry budget. +3. On retry, re-offer with same providerUpdateId and payloadHash. +4. Desktop must return duplicate_committed if it already wrote the message. +5. If retry budget expires, send "delivery uncertain/offline" notice and return 2xx. +``` + +Do not immediately send a definitive "not delivered" notice after an ACK timeout, because the desktop might have committed and the ACK may have been lost. + +## Retry Budget + +Use Telegram retries only for ambiguous transient failures, not as a product queue. + +Suggested initial policy: + +```text +max retry deferrals per update: 2 +max retry window: 30-60s +if no healthy desktop by then: offline/degraded notice and 2xx +``` + +Why: + +- Keeps the product promise: no default durable backend plaintext queue. +- Avoids indefinite webhook backlog. +- Lets short reconnects recover. +- Does not silently turn Telegram into a long-term queue. + +## Ordering Rules + +Telegram `update_id` is useful for duplicate detection, but do not assume every update id is contiguous forever. + +Route ordering should use: + +```text +orderingKey = provider + bindingId + routeId +providerOrder = update_id plus provider message date/message_id when available +``` + +Backend: + +```text +Use an in-memory per-route serial executor while the process is alive. +Do not persist plaintext to achieve ordering in default mode. +``` + +Desktop: + +```text +Deduplicate by providerUpdateId. +Append accepted messages in provider order when possible. +If out-of-order arrival is detected, store both and mark ordering warning. +``` + +Webhook setting: + +```text +Do not set max_connections to 1 globally for the shared bot unless traffic is tiny. +Use route-level ordering instead. +``` + +Reason: + +```text +max_connections=1 would serialize every customer through one webhook lane. +That is safe but does not scale. +``` + +## Route Inventory Handshake + +When desktop connects: + +```text +1. authenticate install/binding +2. send protocol version +3. send route inventory hash +4. backend responds with active routes known server-side +5. desktop responds with local route inventory +6. both sides mark compatible or needs_repair +``` + +If route inventory mismatches: + +```text +do not deliver inbound user messages into uncertain routes +ask desktop to repair or refresh +``` + +This protects cases like: + +- team deleted locally +- team renamed +- topic recreated +- binding revoked on another process +- local route store restored from old backup + +## Desktop To Lead Delivery + +The desktop local commit should not directly mean "agent saw it". + +Better state split: + +```text +provider update accepted locally +-> local inbound message committed +-> route to lead/team inbox scheduled +-> inbox write committed +-> agent turn started +-> response captured +-> outbound provider delivery ledger +``` + +If inbox write fails after ACKing Telegram: + +```text +The message is not lost because it is in MessengerDesktopInboundStore. +MessengerLocalDeliveryLedger can retry delivery to lead/team inbox. +``` + +This is the same reliability style as existing runtime delivery journals. + +## Official Bot vs Own Bot Difference + +Official shared bot default: + +```text +Telegram sends webhooks to our backend. +Backend must decide quickly whether desktop accepted the update. +If desktop is offline, backend sends offline notice and ACKs Telegram. +No catch-up after offline notice. +``` + +Own bot local mode: + +```text +Desktop can use getUpdates long polling directly. +If desktop is offline, Telegram may retain updates for up to 24 hours. +When desktop returns, it can catch up, because Telegram is the queue. +``` + +This means own-bot mode has a surprising reliability advantage: + +```text +It can support Telegram-side catch-up without our backend storing plaintext. +``` + +But UX must say it clearly: + +```text +Own bot can catch up recent Telegram updates while your computer was asleep, subject to Telegram retention. +Default Agent Teams bot replies offline instead of queueing by default. +``` + +## Technology Recommendation + +### Desktop main process client + +Use Node 22 native WebSocket client. + +🎯 9 🛡️ 8 🧠 4 Approx change size: 700-1400 LOC + +Why: + +- No new dependency for desktop client. +- Node docs say v22.4.0 marked WebSocket stable. +- Full-duplex fits ACK/control messages. + +### Backend WebSocket server + +If backend is Node/Fastify, use `@fastify/websocket` 11.2.0. + +🎯 8 🛡️ 8 🧠 5 Approx change size: 900-1800 LOC backend-side + +Why: + +- Aligns with existing Fastify stack style. +- Built on `ws@8`. +- Has TypeScript declarations. + +Note: + +```text +This dependency is for cloud/backend package, not necessarily this Electron app package. +``` + +### Fallback transport + +Keep SSE + HTTPS ACK as an optional enterprise fallback. + +🎯 7 🛡️ 7 🧠 6 Approx change size: +1200-2500 LOC after WebSocket protocol exists + +Why: + +- Some networks/proxies break WebSocket. +- SSE is easier to pass through HTTP infrastructure. + +But: + +```text +Do not implement fallback until WebSocket protocol semantics are stable. +Otherwise two transports will double the bug surface. +``` + +## Error Copy Policy + +Telegram user-facing responses should be honest and short. + +No desktop session: + +```text +Agent Teams desktop is offline for this team. Open the app and resend your message. +``` + +Route disabled: + +```text +This team is no longer connected to Telegram. Reconnect it in Agent Teams. +``` + +Delivery uncertain after retry budget: + +```text +Agent Teams could not confirm delivery to desktop. Check the app or resend. +``` + +Unsupported media in MVP: + +```text +This Telegram connection currently supports text only. Send the details as text. +``` + +Avoid: + +```text +"Message delivered" before desktop commit ACK. +"Queued" in default mode. +"We will process this when online" in default mode. +``` + +## Security Rules + +Relay authentication: + +```text +desktop signs session start with install secret +backend issues short-lived relay session token +WebSocket uses WSS only +old session token cannot ACK after lease is stolen +ACK includes deliveryId and sessionId +``` + +Frame validation: + +```text +max payload size for text MVP +strict JSON object shape +protocolVersion required +unknown frame types rejected +provider ids stored as strings +raw provider ids redacted in logs +``` + +Replay controls: + +```text +providerUpdateId dedupe on backend metadata ledger +providerUpdateId dedupe on desktop local ledger +deliveryId unique per backend offer +payloadHash conflict detection +duplicate committed ACK path +``` + +## Edge Cases To Test + +Webhook and ACK: + +- Telegram webhook with valid secret token and active desktop returns 2xx only after desktop commit ACK. +- Telegram webhook with no active desktop sends offline notice and returns 2xx. +- Active desktop socket exists but heartbeat is stale, backend treats it offline. +- Desktop commits locally but ACK response is lost, retry returns duplicate committed. +- Desktop receives offer after `expiresAt`, rejects retryable or terminal by policy. +- Backend process crashes before returning 2xx, Telegram retries. +- Backend process crashes after returning 2xx but before metadata update, metadata repair handles it. + +Ordering and duplicates: + +- Same `update_id` delivered twice. +- Two updates for same route arrive concurrently. +- Out-of-order updates due to parallel webhook connections. +- Update id jumps after a long quiet period. +- Payload hash conflict for same update id. + +Session lifecycle: + +- Second desktop instance steals lease. +- Old session tries to ACK after lease stolen. +- Desktop reconnects with old route inventory hash. +- Binding revoked while socket is open. +- Route disabled while offer is in flight. + +Local delivery: + +- Desktop commits inbound message, then app crashes before writing team inbox. +- Local delivery ledger retries inbox write on restart. +- Inbox path locked temporarily. +- Team deleted after local commit. +- Lead process offline after local commit. + +Privacy: + +- Backend durable stores contain no plaintext message bodies in default mode. +- Logs redact raw Telegram ids and message text. +- Offline notice path does not persist message body. +- Metrics count event classes without payload. + +Own bot: + +- Desktop has no webhook and polls with `getUpdates`. +- Existing webhook on own bot is detected and not deleted silently. +- Desktop catches up updates after restart within Telegram retention window. +- Desktop handles updates older than local route creation as ignored. + +## Decision Update + +The feature should introduce: + +```text +MessengerRelaySessionManager +MessengerRelayProtocol +MessengerBackendUpdateMetadataLedger +MessengerDesktopInboundStore +MessengerLocalDeliveryLedger +``` + +Recommended default: + +```text +official shared bot +WebSocket desktop relay +local-commit ACK before Telegram 2xx +offline notice when no healthy desktop session +bounded Telegram retry only for ambiguous active-session failures +no durable plaintext backend queue +``` + +Main open uncertainty left after this pass: + +```text +Should official shared bot use limited Telegram webhook retries for active-session ACK timeouts, +or always terminal-handle ambiguous timeouts with "delivery uncertain" and 2xx? +``` + +My current recommendation: + +🎯 8 🛡️ 8 🧠 6 Approx change size: +500-1200 LOC + +```text +Use limited retry deferral for active-session ACK timeouts only. +Never use retry deferral when there is clearly no healthy desktop session. +``` + +Reason: + +```text +This recovers short reconnects and ACK-loss cases without turning default mode into a hidden queue. +``` diff --git a/docs/research/messenger-connectors-uncertainty-pass-35.md b/docs/research/messenger-connectors-uncertainty-pass-35.md new file mode 100644 index 00000000..2d8c9df6 --- /dev/null +++ b/docs/research/messenger-connectors-uncertainty-pass-35.md @@ -0,0 +1,965 @@ +# Messenger Connectors - Uncertainty Pass 35 + +Date: 2026-04-29 +Scope: conversation history, Telegram topic projection, teammate-visible messages, backfill policy, canonical local store, and anti-duplication rules + +## Executive Delta + +The next weakest area is: + +```text +local app messages +-> canonical messenger conversation history +-> Telegram topic projection +-> provider message links +-> reply-to routing +``` + +This looks like a UX problem, but it is actually a data model problem. + +If we simply mirror the existing app feed into Telegram, we risk: + +```text +1. Sending internal lead thoughts or slash command output to Telegram. +2. Mixing unrelated teammate replies from inboxes/user.json into the wrong topic. +3. Duplicating the same answer because the local UI feed dedupes differently from provider delivery. +4. Losing reply-to routing because local messages have no provider message link. +5. Creating a Telegram topic that looks like history, but is missing context from before connection. +6. Backfilling old history and accidentally exposing private/internal messages. +``` + +The safest rule: + +```text +Telegram topic is a projection, not the source of truth. +``` + +Canonical history must be a new provider-neutral store: + +```text +MessengerConversationStore + accepted inbound provider messages + external-safe local replies + provider delivery links + route/team/member references + projection state +``` + +The existing `TeamMessageFeedService` is useful as an input, but it is not safe to use as the Telegram projection source directly. + +## Source Facts Rechecked + +Telegram official facts checked on 2026-04-29: + +- Bot API exposes update delivery through `getUpdates` or webhooks. Updates are stored on Telegram servers until the bot receives them, but not longer than 24 hours. +- `Update.update_id` helps ignore repeated updates or restore order if webhook updates arrive out of order. +- `Message.message_id` is unique inside a chat. In some scheduled-message cases it can be `0` and unusable until actually sent. +- `Message.message_thread_id` identifies a message thread or forum topic for supergroups and private chats. +- `createForumTopic` can create a topic in a forum supergroup or a private chat with a user. It returns a `ForumTopic`. +- `editForumTopic` can change topic name/icon in a forum supergroup or private chat with a user. +- `copyMessages` supports `message_thread_id`, copies 1-100 known messages, and returns `MessageId[]`. +- `sendMessage` and media methods return the sent `Message` on success. This returned provider message id is required for future reply-to routing. +- `sendChatAction` supports `message_thread_id` and lasts 5 seconds or less. Telegram recommends it only when a response will take noticeable time. +- `sendMessageDraft` can stream a partial message to a user while it is being generated, with optional `message_thread_id`. +- `editMessageText` can edit messages, but it is primarily for changing existing message history and has 48-hour limits for certain business messages not sent by the bot. +- `deleteMessage` has important limits, including a 48-hour deletion window for normal messages and service-message exceptions. +- Telegram FAQ says bots can see messages sent to them, and group privacy mode changes what group messages they can see. Treat bots as third-party participants. + +Sources: + +- https://core.telegram.org/bots/api#getting-updates +- https://core.telegram.org/bots/api#update +- https://core.telegram.org/bots/api#message +- https://core.telegram.org/bots/api#createforumtopic +- https://core.telegram.org/bots/api#editforumtopic +- https://core.telegram.org/bots/api#copymessages +- https://core.telegram.org/bots/api#sendmessage +- https://core.telegram.org/bots/api#sendchataction +- https://core.telegram.org/bots/api#sendmessagedraft +- https://core.telegram.org/bots/api#editmessagetext +- https://core.telegram.org/bots/api#deletemessage +- https://telegram.org/faq + +Inference from the Bot API docs: + +```text +The Bot API is update-driven and method-driven. +It does not document a general "read arbitrary private chat history" method for bots. +Therefore Agent Teams must persist the history it needs at acceptance/projection time. +``` + +Local code facts checked: + +- `TeamInboxReader` merges all `inboxes/*.json`, assigns `to` from the filename when absent, and creates deterministic message ids for rows without `messageId`. +- `TeamSentMessagesStore` keeps only the newest 200 messages in `sentMessages.json`. This is a UI/local persistence cap, not a long-term external conversation history. +- `TeamMessageFeedService` merges inbox messages, lead session messages, and sent messages, then dedupes, links passive summaries, attaches lead session ids, and annotates slash command responses. +- `TeamMessageFeedService` is optimized for UI display, not for provider delivery or privacy policy. +- `InboxMessage.source` already has multiple categories: `inbox`, `lead_session`, `lead_process`, `runtime_delivery`, `user_sent`, `system_notification`, `cross_team`, `cross_team_sent`. +- Existing `conversationId` and `replyToConversationId` are used for cross-team routing and can inspire messenger conversation identity, but they are not enough by themselves for Telegram provider links. +- `inboxes/user.json` can contain teammate replies to the user without stable provider thread context. + +Implication: + +```text +Messenger history must not be derived lazily from the renderer feed. +It must be committed as a conversation ledger when an external route is involved. +``` + +## Top 3 History Models + +### 1. Canonical MessengerConversationStore plus Telegram projection ledger + +🎯 9 🛡️ 9 🧠 7 Approx change size: 4000-9000 LOC + +Shape: + +```text +provider inbound committed locally +-> MessengerConversationStore append inbound +-> local delivery to lead/team +-> safe local replies appended to same conversation +-> TelegramProjectionLedger sends only eligible projection events +-> provider message ids stored as ProviderMessageLink +``` + +Why this is best: + +- Telegram topic is a view of an external conversation, not the data source. +- Existing UI feed remains untouched for local app semantics. +- Provider delivery idempotency and reply-to mapping have a durable home. +- Future WhatsApp/Discord adapters can reuse the same core model. +- Privacy policy can be enforced before a row becomes externally projectable. + +Weaknesses: + +- More code. +- Needs migration/UI integration to show messenger conversations. +- Requires careful linking from existing team replies to the correct conversation. + +Verdict: + +```text +Use this. +``` + +### 2. Reuse existing TeamMessageFeedService as canonical history + +🎯 5 🛡️ 4 🧠 3 Approx change size: 900-2200 LOC + +Shape: + +```text +watch TeamMessageFeedService +filter messages +send eligible messages to Telegram topic +store provider links separately +``` + +Why it is tempting: + +- Much less new architecture. +- UI already displays this feed. +- Existing refresh/invalidation paths exist. + +Why it is risky: + +- Feed is display-oriented and merges many sources. +- It can annotate slash command responses. +- It dedupes and links passive summaries for UI purposes. +- It includes local-only concepts that should never leave the app by default. +- It has no long-term guarantee because `sentMessages.json` caps at 200 rows. + +Verdict: + +```text +Do not use as provider source of truth. +Can be an input to a projection gate only. +``` + +### 3. Telegram topic as the canonical history + +🎯 4 🛡️ 5 🧠 5 Approx change size: 1800-4500 LOC + +Shape: + +```text +send everything important to Telegram +use Telegram topic message ids as history +local app reads/links only provider ids +``` + +Why it is attractive: + +- User sees history in Telegram. +- Less local history UI work. + +Why it fails: + +- Bot API does not provide a general documented way to read arbitrary private chat history later. +- If delivery to Telegram is ambiguous, local source of truth is unclear. +- If user deletes messages or blocks bot, local product history degrades. +- Provider-specific semantics leak into core. +- WhatsApp/Discord will not match exactly. + +Verdict: + +```text +Reject for core architecture. +Telegram is projection only. +``` + +## Recommended Canonical Model + +Use two related ledgers: + +```text +MessengerConversationStore + what happened in the external-user conversation + +MessengerProviderProjectionLedger + what was attempted/sent/linked in Telegram +``` + +Conversation row: + +```ts +interface MessengerConversationMessage { + id: string; + conversationId: string; + routeId: string; + bindingId: string; + teamId: string; + direction: 'inbound_from_user' | 'outbound_to_user' | 'internal_note'; + author: { + kind: 'external_user' | 'team_member' | 'team_lead' | 'system'; + memberId?: string; + displayName: string; + }; + text: string; + createdAt: string; + externalVisibility: + | 'projectable' + | 'local_only' + | 'blocked_by_policy' + | 'requires_manual_approval'; + source: { + kind: + | 'telegram_update' + | 'team_inbox' + | 'lead_session' + | 'runtime_delivery' + | 'manual_ui' + | 'system'; + localMessageId?: string; + providerUpdateId?: string; + providerMessageId?: string; + leadSessionId?: string; + }; + replyTo?: { + conversationMessageId?: string; + providerMessageLink?: ProviderMessageLink; + localMessageId?: string; + }; + policy: { + sanitized: boolean; + strippedInternalBlocks: boolean; + reasonCodes: string[]; + }; +} +``` + +Projection row: + +```ts +interface MessengerProviderProjectionRecord { + id: string; + conversationMessageId: string; + provider: 'telegram'; + routeId: string; + providerTarget: { + chatIdHash: string; + messageThreadId: string; + }; + status: + | 'pending' + | 'sending' + | 'sent' + | 'ambiguous' + | 'failed_retryable' + | 'failed_terminal' + | 'suppressed'; + payloadHash: string; + providerMessageLink?: ProviderMessageLink; + attempts: number; + createdAt: string; + updatedAt: string; + lastError?: string; +} +``` + +Important: + +```text +The conversation store can contain local-only rows. +The projection ledger can only contain rows that passed external visibility policy. +``` + +## What Counts As Conversation History + +For Telegram user-facing history, include: + +```text +1. User inbound messages accepted from Telegram. +2. Lead replies explicitly addressed to user. +3. Teammate replies explicitly addressed to user. +4. User manual messages from local UI that are intentionally sent to the team under this route. +5. Short system status messages that are explicitly external-facing, for example "desktop offline". +``` + +Do not include by default: + +```text +lead thoughts +tool summaries +slash command outputs +task status notifications +cross-team internal messages +teammate-to-teammate chat +permission_request JSON +idle heartbeats +bootstrap check-ins +raw XML/agent blocks +attachments until media policy is implemented +``` + +This must be enforced before a message is appended as `projectable`. + +## How To Handle Teammate Messages To User + +The user asked for this: + +```text +Messages from teammates to the user should appear in Telegram, +with each teammate clearly signed. +``` + +Recommended rule: + +```text +Any known team member message with to == "user" can be appended to the conversation +only if it is linked to an active messenger route/conversation. +``` + +Rendering: + +```text +[Frontend] Alice +I found the failing test. The callback resolves before token refresh. +``` + +```text +[QA] Mark +Reproduced on the latest build. Only happens after session restore. +``` + +Why prefix instead of separate bots: + +- One bot per team member is much harder to manage. +- Multiple bots do not solve core routing. +- Prefix keeps the topic readable. +- It works across providers later. + +Routing requirement: + +```text +Do not send every message to user globally. +Send only messages whose conversationId or relay link ties them to the active messenger conversation. +``` + +## Conversation Identity + +Use one active user-facing conversation per team route in MVP: + +```text +conversationId = routeId + currentConversationSeq +``` + +MVP can start with: + +```text +one open conversation per team topic +``` + +Later: + +```text +multiple conversations per team topic with task/thread labels +``` + +Why not one conversation per message: + +- Too noisy. +- Hard for the lead to maintain context. +- Telegram topic already groups by team. + +Why not only one global conversation for all teams: + +- Reply routing becomes ambiguous. +- User needs team-level separation. +- Topics per team become mostly cosmetic. + +## Backfill Policy + +Backfill is risky because old local history may contain private/internal context. + +Top 3 backfill options: + +### A. No automatic backfill, send a compact connection marker + +🎯 9 🛡️ 9 🧠 3 Approx change size: 500-1200 LOC + +On topic creation: + +```text +Connected to Agent Teams. +Team: Frontend +New messages will appear here. +``` + +Optional local-only UI shows older app history, but Telegram starts clean. + +Verdict: + +```text +Use for MVP. +``` + +### B. User-approved summary backfill + +🎯 8 🛡️ 8 🧠 6 Approx change size: 1800-4000 LOC + +Desktop prepares a summary: + +```text +Recent context: +- Alice is debugging auth callback tests. +- Mark is checking session restore. +- Open question: should refresh happen before redirect? +``` + +User explicitly approves before sending. + +Verdict: + +```text +Good Phase 2. +``` + +### C. Raw transcript backfill + +🎯 4 🛡️ 3 🧠 5 Approx change size: 1600-3600 LOC + +Desktop sends last N messages from local feed into Telegram. + +Problems: + +- High privacy leak risk. +- Rate-limit/noise risk. +- Duplicates provider projection. +- Old messages may lack clean source/route links. +- Telegram message timestamps become send time, not original time. + +Verdict: + +```text +Reject by default. +Only allow export/manual paste workflows later. +``` + +## History Display In Telegram + +Telegram topic should show: + +```text +inbound user message +team reply with member prefix +short status markers +optional typing/draft/progress indicator +``` + +It should not try to reproduce the full local app timeline. + +Recommended topic message examples: + +```text +You +Can you check why login redirects loop? +``` + +```text +[Lead] Agent Teams +I routed this to Frontend. Alice is checking the auth callback. +``` + +```text +[Frontend] Alice +Found the loop. The callback reads a stale refresh token after restore. +``` + +```text +[System] +Desktop went offline. Open Agent Teams and resend if this still matters. +``` + +Avoid: + +```text +tool call summaries +stdout chunks +agent chain-of-thought style text +raw task board mutations +every idle/status heartbeat +``` + +## Progress Indicators + +Top 3 options: + +### 1. `sendChatAction(typing)` heartbeat while a route-linked answer is pending + +🎯 8 🛡️ 8 🧠 4 Approx change size: 700-1500 LOC + +Pros: + +- Official method. +- Supports `message_thread_id`. +- Lasts 5 seconds or less, so it naturally expires. +- Does not create message history clutter. + +Cons: + +- Needs throttling. +- Can imply active work even if the lead is blocked. + +Verdict: + +```text +Use carefully after inbound commit, while local delivery is pending or agent turn is active. +``` + +### 2. `sendMessageDraft` + +🎯 6 🛡️ 6 🧠 7 Approx change size: 1200-3000 LOC + +Pros: + +- New Bot API method for partial generated messages. +- Supports `message_thread_id`. +- Could feel impressive. + +Cons: + +- Draft lifecycle/id semantics need real-world testing. +- It might leak partial agent output before safety/projection filtering. +- Harder to reconcile if final answer is suppressed. + +Verdict: + +```text +Do not use in MVP. +Only consider for final-answer generation after projection gate is mature. +``` + +### 3. Explicit status messages like "Alice is working" + +🎯 7 🛡️ 6 🧠 3 Approx change size: 500-1200 LOC + +Pros: + +- Simple. +- Durable and visible. + +Cons: + +- Adds clutter. +- Can become spammy. +- Hard to keep accurate. + +Verdict: + +```text +Use only for major state changes, not continuous progress. +``` + +## Reply-To Routing + +Incoming Telegram reply should route by priority: + +```text +1. reply_to_message.message_id maps to ProviderMessageLink +2. message_thread_id maps to team route +3. slash command selects member or action +4. fallback to lead +``` + +Provider message link: + +```ts +interface ProviderMessageLink { + provider: 'telegram'; + routeId: string; + providerChatIdHash: string; + providerMessageThreadId: string; + providerMessageId: string; + conversationMessageId: string; + authorKind: 'external_user' | 'team_member' | 'team_lead' | 'system'; + authorMemberId?: string; + sentAt: string; +} +``` + +Examples: + +```text +User replies to Alice message +-> route to team topic +-> include reply target "Alice" in lead/team prompt +-> if direct teammate reply mode is enabled, deliver to Alice inbox +``` + +```text +User sends a new message in team topic without reply +-> route to lead by default +``` + +MVP decision: + +```text +Do not DM arbitrary teammate automatically from reply-to. +Route to lead with reply context first. +``` + +Why: + +- Lead can coordinate. +- Teammate may be offline or mid-turn. +- Direct teammate routing can be added after route policy is proven. + +## Commands In Topic + +Keep commands minimal in MVP: + +```text +/teams +/status +/help +/disconnect +``` + +Do not overload the topic with rich command grammar early. + +Team selection: + +```text +Primary selection is topic. +Commands are fallback and diagnostics. +``` + +If message arrives outside a topic: + +```text +show active teams +ask user to pick a topic +do not infer from recent activity unless exactly one team is active +``` + +## Projection State Machine + +```text +local_message_seen + -> policy_checked + -> conversation_appended + -> projection_pending + -> provider_sending + -> provider_sent + -> linked +``` + +Failure states: + +```text +suppressed_by_policy +requires_manual_approval +provider_ambiguous +provider_failed_retryable +provider_failed_terminal +route_disabled +topic_needs_repair +``` + +Important invariant: + +```text +Provider projection cannot start before the message is appended to MessengerConversationStore. +``` + +This ensures Telegram never has a message that the local conversation store cannot explain. + +## Duplicate Prevention + +Use three layers: + +```text +1. Conversation idempotency key +2. Projection payload hash +3. Provider message link +``` + +Conversation idempotency: + +```text +source.kind + source.localMessageId/providerUpdateId + routeId +``` + +Projection idempotency: + +```text +conversationMessageId + provider + routeId + payloadHash +``` + +Provider link: + +```text +stored only after sendMessage returns Message +``` + +If Telegram send times out: + +```text +mark projection ambiguous +do not retry automatically with the same text unless policy accepts duplicate risk +surface "delivery uncertain" in local UI +``` + +This matches earlier outbound delivery research. + +## Edit And Delete Policy + +Do not use Telegram edit/delete as the normal sync mechanism. + +Reasons: + +- `deleteMessage` has a 48-hour limit and service-message exceptions. +- `editMessageText` has constraints and can return different shapes. +- Edits are provider-specific and hard to reconcile across adapters. + +MVP: + +```text +append-only Telegram topic +append-only local conversation ledger +corrections are new messages +``` + +Later: + +```text +support explicit "correct last bot message" for bot-authored messages only +``` + +## Storage And Retention + +Do not rely on: + +```text +sentMessages.json cap of 200 +inboxes/user.json as long-term canonical external history +Telegram topic as recoverable history +``` + +Use: + +```text +getAppDataPath()/messenger-conversations/ + bindings/ + routes/ + conversations/ + projections/ +``` + +Retention tiers: + +```text +MVP: + keep text conversation rows locally until user deletes route/binding + +Later: + per-route retention setting + export/delete controls + encrypted local store option + encrypted backend queue option +``` + +## UI Implications + +Desktop should show: + +```text +Connected Telegram account +team topics/routes +last projected message status +delivery uncertain warnings +local-only vs sent-to-Telegram marker +reconnect/repair action +``` + +Message row badges: + +```text +local only +sent to Telegram +delivery uncertain +blocked by policy +needs approval +``` + +This matters because the local app feed and Telegram topic will not always match exactly by design. + +## Clean Architecture Placement + +Core/domain: + +```text +ConversationMessage +ConversationPolicy +ProjectionEligibility +ProviderMessageLink +ProjectionStateMachine +BackfillPolicy +``` + +Core/application: + +```text +AppendInboundProviderMessageUseCase +AppendLocalReplyUseCase +EvaluateProjectionUseCase +ProjectConversationMessageUseCase +ReconcileProjectionUseCase +BuildBackfillPreviewUseCase +``` + +Ports: + +```text +MessengerConversationStore +MessengerProjectionLedger +MessengerProviderGateway +TeamMessageSource +ExternalVisibilityPolicy +``` + +Adapters: + +```text +TeamMessageFeedInputAdapter +TelegramProjectionAdapter +FileConversationStore +FileProjectionLedger +``` + +Important dependency rule: + +```text +TeamMessageFeedInputAdapter may depend on existing team services. +Core policy must not depend on TeamMessageFeedService. +``` + +## Edge Cases To Test + +History and projection: + +- Topic created after team already has a long local message history. +- No automatic raw backfill occurs. +- User-approved summary backfill sends only approved summary. +- `sentMessages.json` drops old rows, but MessengerConversationStore keeps route conversation history. +- Same local message appears in both inbox and sent messages, only one conversation row is created. +- Same conversation row is not projected twice. + +Teammate messages: + +- Alice sends `to=user` in a route-linked conversation, Telegram gets `[Alice]`. +- Alice sends `to=user` outside a route-linked conversation, Telegram gets nothing. +- Alice sends teammate-internal message, Telegram gets nothing. +- Lead sends generic thought with no `to=user`, Telegram gets nothing. +- Slash command result is visible in UI, Telegram gets nothing by default. + +Reply routing: + +- User replies to Alice's Telegram message, provider link maps to Alice context. +- User replies to system offline notice, route remains lead fallback. +- User writes in topic without reply, route goes to lead. +- User writes outside topic with multiple teams connected, bot asks to choose topic. +- Unknown provider message id does not crash routing. + +Provider behavior: + +- `sendMessage` success stores provider message link. +- `sendMessage` timeout marks ambiguous and does not auto-duplicate. +- `deleteMessage` failure does not corrupt local conversation. +- `editForumTopic` failure does not reroute by title. +- Topic repair creates new topic and marks old projection state historical. + +Privacy: + +- Internal blocks stripped before projectable rows. +- Policy blocks `permission_request` JSON. +- Policy blocks tool stdout/stderr unless manually approved. +- Backfill preview redacts secrets and requires explicit approval. + +## Decision Update + +Add this to the implementation plan: + +```text +MessengerConversationStore is mandatory for MVP. +Telegram topic is provider projection only. +No raw automatic history backfill. +One topic per team route. +One open conversation per team topic in MVP. +Teammate messages to user are projected only when route-linked and external-safe. +``` + +Recommended MVP behavior: + +```text +Connect Telegram +-> create one topic per selected team +-> send a short connection marker +-> start projecting new inbound/outbound external-safe messages +-> show local projection status in desktop +``` + +Main remaining uncertainty: + +```text +Should reply-to a teammate message route directly to that teammate, +or always go through lead with reply context? +``` + +My current recommendation: + +🎯 8 🛡️ 8 🧠 5 Approx change size: +800-1800 LOC + +```text +MVP routes all Telegram inbound through lead, +but includes reply-to teammate context in the prompt. +Add direct teammate routing later as an explicit per-team setting. +``` + +Reason: + +```text +It preserves coordination, avoids surprising teammate interruptions, +and still lets the lead tell Alice "the user replied to your message". +``` diff --git a/docs/team-management/member-work-sync-control-plane-plan.md b/docs/team-management/member-work-sync-control-plane-plan.md new file mode 100644 index 00000000..456487bc --- /dev/null +++ b/docs/team-management/member-work-sync-control-plane-plan.md @@ -0,0 +1,3377 @@ +# Member Work Sync Control Plane Plan + +**Status:** Proposed +**Scope:** Team management, task work synchronization, agent work coordination +**Primary repo:** `claude_team` +**Secondary write-boundary repo:** `agent_teams_orchestrator` / `agent-teams-controller` +**Feature name:** `member-work-sync` + +--- + +## 1. Summary + +Build a shadow-first control plane that lets the app determine whether each teammate has seen and acknowledged the current actionable work state. + +This is not a simple "ping when agent is idle" feature. The app owns the truth: + +- the current actionable work agenda; +- the agenda fingerprint; +- whether an agent report is valid for that fingerprint; +- whether a future nudge would be needed; +- whether watchdog remains responsible for semantic task stalls. + +Recommended implementation: + +**Phase 1: Shadow-first `member-work-sync` control plane** +`🎯 10 🛡️ 10 🧠 5`, roughly `850-1150 LOC`. + +Phase 1 does not send nudges. It computes agenda/fingerprint/status, validates `member_work_sync_report`, stores status conditions, and exposes diagnostics. This avoids agent spam and gives real metrics before behavior changes. + +Phase 2 adds durable nudges only after Phase 1 metrics prove that fingerprint churn and false positives are low. + +Patterns used: + +- Kubernetes-style level-triggered reconcile: recompute from current desired/current state instead of trusting events. +- Queue visibility / lease pattern: `still_working` suppresses duplicate nudges for a bounded time, but never completes work. +- Durable outbox pattern: Phase 2 sends rare nudges through idempotent outbox records, not direct side effects in reconcilers. +- Concurrency-key pattern: one pending nudge per `(team, member, agendaFingerprint)`. +- Gastown-style control loop idea: prefer explicit coordination state over ad hoc idle pings. +- GoClaw-style lightweight worker signals: tool/turn events are useful triggers, but not proof. + +--- + +## 2. Why This Exists + +Current failure class: + +1. A teammate has assigned tasks or review work. +2. The agent stops after saying "done", "standing by", "continuing", or "I will work on it". +3. The UI may still show "working on" or the task remains incomplete. +4. Existing watchdog eventually catches some stalls, but it is task-specific and delayed. +5. A naive ping-after-idle loop would spam agents and conflict with the watchdog. + +The missing layer is a fast consistency loop: + +```text +Does this member know the current actionable work agenda? +``` + +That question is different from: + +```text +Did delivery succeed? +Is the runtime alive? +Is the member making meaningful progress? +Did the task complete? +``` + +Those remain separate systems. + +--- + +## 3. Design Principles + +### 3.1 Clean Architecture + +The feature follows `docs/FEATURE_ARCHITECTURE_STANDARD.md`. + +```text +src/features/member-work-sync/ + contracts/ + core/ + domain/ + application/ + main/ + composition/ + adapters/ + input/ + output/ + infrastructure/ + preload/ + renderer/ +``` + +Rules: + +- `core/domain` contains pure business rules. +- `core/application` orchestrates use cases through ports. +- `main/adapters/output` adapts current team/task/runtime services. +- `main/infrastructure` owns filesystem stores, locks, versioning. +- `renderer` only displays status and diagnostics. +- controller/orchestrator owns only the MCP write boundary for `member_work_sync_report`. + +### 3.2 SOLID + +- **SRP:** agenda building, fingerprinting, report validation, decision policy, persistence, and dispatch are separate classes. +- **OCP:** Phase 2 nudges add a new outbox port without rewriting Phase 1 domain logic. +- **LSP:** tests can replace real adapters with fakes without special cases. +- **ISP:** ports are narrow: `WorkAgendaSource`, `MemberWorkSyncStatusStore`, `BusySignalSource`, `Clock`, `Logger`. +- **DIP:** application layer depends on interfaces, not `TeamDataService`, `TeamTaskReader`, Electron, or filesystem. + +### 3.3 Naming Convention + +Use `MemberWorkSync` for implementation types and files. + +Preferred public names: + +- feature folder: `src/features/member-work-sync` +- agent report tool: `member_work_sync_report` +- optional read helper: `member_work_sync_status` +- message kind for Phase 2 nudges: `member_work_sync_nudge` + +Avoid introducing new board-prefixed sync type names. "Board" is still part of the domain explanation, but the feature name should stay focused on member actionable work. + +### 3.4 Domain Coupling Is Intentional + +The feature is strongly coupled to the task board domain. That is correct. + +It must depend on: + +- `TeamTask`; +- task owner; +- review state; +- blockers; +- clarification state; +- workflow history events; +- configured team members. + +It must not depend on: + +- React state; +- kanban UI columns as presentation; +- CSS/layout; +- raw OpenCode transcript quirks; +- prompt wording; +- current animation/spinner state. + +The core abstraction is not "kanban board UI". It is: + +```text +authoritative actionable work graph +``` + +--- + +## 4. Key Terms + +### 4.1 Actionable Work Agenda + +A canonical list of work items that currently require action from a specific member. + +It can include: + +- implementation work owned by the member; +- review work assigned to the member; +- clarification follow-up owned by the member; +- blocked work that requires explicit blocker reporting; +- tasks that became actionable because another task unblocked them. + +It should not include: + +- completed tasks; +- deleted tasks; +- tasks owned by another member unless this member is reviewer/action owner; +- informational comments; +- runtime heartbeats; +- tool logs; +- UI-only ordering. + +### 4.2 Agenda Fingerprint + +A stable hash of the canonical agenda. + +Example: + +```text +agenda:v1:7d6d337b5f91c1e9a2f7f6e9d2f0b1013e145db1... +``` + +The fingerprint is the work-sync equivalent of `observedGeneration`. + +Agent reports are valid only for the current fingerprint. + +### 4.3 Work Sync Report + +An agent-side report for a specific fingerprint. + +It is not trusted blindly. The app validates it against current agenda. + +Allowed states: + +- `still_working` +- `blocked` +- `caught_up` + +### 4.4 Lease + +A time-limited report that suppresses sync nudges. + +Important: + +- `still_working` is a lease. +- `blocked` is a lease with board evidence. +- `caught_up` is not a free-form lease. It is only accepted when agenda is empty. +- A lease is not task progress. + +--- + +## 5. Non-Goals + +Phase 1 does not: + +- send nudges to agents; +- auto-complete tasks; +- mark inbox messages read; +- replace `TeamTaskStallMonitor`; +- change OpenCode delivery ledger semantics; +- interpret model text as truth; +- read UI kanban layout as source of truth. + +Phase 1 may add prompt/tool instructions, but they must be advisory. The server validates everything. + +--- + +## 6. Architecture Overview + +```mermaid +flowchart TD + A["TeamChangeEvent / scheduled scan / manual refresh"] --> B["MemberWorkSyncReconciler"] + B --> C["WorkAgendaSource port"] + C --> D["TeamTaskAgendaSource adapter"] + B --> E["AgendaFingerprint"] + B --> F["MemberWorkSyncStatusStore port"] + B --> G["BusySignalSource port"] + B --> H["SyncDecisionPolicy"] + H --> I["MemberWorkSyncStatusStore port"] + I --> J["JsonMemberWorkSyncStatusStore"] + K["member_work_sync_report MCP tool"] --> L["MemberWorkSyncReporter"] + L --> M["ReportValidator"] + M --> I + I --> N["Renderer status / diagnostics"] +``` + +Phase 2 extends this: + +```mermaid +flowchart TD + A["SyncDecisionPolicy returns needs_sync"] --> B["MemberWorkSyncOutbox"] + B --> C["MemberWorkSyncDispatcher"] + C --> D["Fresh agenda revalidation"] + D --> E["IdempotentInboxNudgePort.insertIfAbsent"] +``` + +--- + +## 7. Feature Directory Plan + +```text +src/features/member-work-sync/ + index.ts + + contracts/ + index.ts + types.ts + ipc.ts + + core/ + domain/ + ActionableWorkAgenda.ts + AgendaFingerprint.ts + MemberWorkSyncReportValidator.ts + SyncDecisionPolicy.ts + MemberWorkSyncConditions.ts + memberName.ts + application/ + MemberWorkSyncReconciler.ts + MemberWorkSyncReporter.ts + MemberWorkSyncDiagnosticsReader.ts + ports.ts + + main/ + composition/ + createMemberWorkSyncFeature.ts + adapters/ + input/ + registerMemberWorkSyncIpc.ts + output/ + TeamTaskAgendaSource.ts + TeamMemberRosterSource.ts + MemberBusySignalSource.ts + WatchdogCooldownSource.ts + infrastructure/ + JsonMemberWorkSyncStore.ts + MemberWorkSyncStorePaths.ts + MemberWorkSyncQueue.ts + HmacReportTokenAdapter.ts + + preload/ + index.ts + + renderer/ + adapters/ + memberWorkSyncStatusViewModel.ts + hooks/ + useMemberWorkSyncStatus.ts + ui/ + MemberWorkSyncBadge.tsx + MemberWorkSyncDetails.tsx +``` + +Phase 1 can omit preload/renderer UI if we expose diagnostics only through existing debug surfaces, but the feature should reserve contracts now. + +--- + +## 8. Core Domain Types + +## 8.0 Highest-Risk Domain Decisions + +These are the places most likely to create bugs if implemented casually. + +| Area | Main failure mode | Required guard | Required tests | +|---|---|---|---| +| Agenda semantics | False `NeedsSync`, hidden pending work, or wrong owner | Build only from canonical task/review/blocker facts and document every include/exclude rule | pending owned task, review task, blocked task, clarification task, completed task | +| Fingerprint | Churn from comments, timestamps, retries, or runtime liveness | Stable canonical JSON, explicit include/exclude list, transition diagnostics | timestamp-only change, weak comment, owner change, blocker change | +| Reviewer resolution | Old review cycle becomes current work | Current-cycle review resolver, never "last reviewer wins" | approved old review plus new work, needs-fix reopening, unresolved reviewer | +| Report validation | Model hides work with `caught_up` or stale lease | Validate against fresh app-side agenda, fail closed, return current preview | stale fingerprint, non-empty caught-up, foreign task id | +| Identity authority | Model claims `user`, provider id, lead alias, or another teammate | Treat `from` as a claim, require runtime context or report token for accepted leases | codex-as-author, user-as-author, session jack/from bob, removed member | +| Controller fallback | Controller claims lease while app is down | Raw intent only, never accepted lease, replay through app validator | app unavailable, duplicate intent replay, stale intent | +| Event queue | File/change burst creates reconcile storm | Per-member coalescing, quiet window, bounded concurrency, no synchronous emitter work | burst coalescing, stop drains queue, removed member drop | +| Team lifecycle | Stopped team still accepts reports or schedules nudges | Explicit lifecycle port checked before reconcile, report acceptance, and dispatch | stopped team report, queued item after stop, restart after stop | +| Store writes | Lost update or corrupted JSON | Versioned store, file lock, atomic write, bounded history, quarantine | concurrent update, invalid JSON, future schema | +| Watchdog interaction | Double nudge or false progress proof | Work sync reports are not progress, shared cooldown only in Phase 2 | watchdog cooldown respected, watchdog still fires on real stall | + +Phase 1 success depends more on these guards than on UI. + +Risk ranking: + +1. Agenda semantics - `🎯 8 🛡️ 9 🧠 7`, `250-400 LOC`. + This is the hardest part because it decides what the system believes is real work. +2. Identity authority and report-token validation - `🎯 8 🛡️ 9 🧠 6`, `180-320 LOC`. + This prevents the model from suppressing another member's sync state or repeating the earlier `codex` / `user` author bug. +3. App/controller validation split - `🎯 8 🛡️ 9 🧠 6`, `150-260 LOC`. + This prevents MCP tool calls from becoming untrusted writes. +4. Queue, locking, and outbox boundaries - `🎯 9 🛡️ 9 🧠 6`, `220-380 LOC`. + This prevents spam, deadlocks, duplicate nudges, and startup storms. +5. Team lifecycle gating - `🎯 9 🛡️ 9 🧠 4`, `80-160 LOC`. + This prevents "team is off but background agents still report" behavior from becoming accepted sync state. + +Pre-coding hardening checklist: + +- Write agenda-builder tests before adapters. If the agenda is wrong, every later layer behaves confidently wrong. +- Implement fingerprint diagnostics before Phase 2. Without transition reasons, churn bugs are hard to debug after nudges exist. +- Keep controller fallback intentionally weak. It can record intent, but cannot accept a lease. +- Add identity tests before report persistence. `from` is not authority unless runtime context or report token proves it. +- Treat every app restart as a replay scenario. Pending intents, queued reconciles, and stale reports must be safe to process again. +- Make every Phase 2 side effect idempotent before adding the dispatcher. +- Add one explicit kill switch per side-effect class: reconcile/status, report acceptance, and nudges. +- Do not merge watchdog and work-sync concepts. Work-sync is agenda observation; watchdog is semantic progress. + +Failure-mode matrix: + +| Failure | Safe behavior | +|---|---| +| Agenda source throws | record diagnostic, no accepted report, no nudge | +| Roster source cannot resolve member | reject report, drop queued reconcile | +| Team is stopped/cancelled | reject reports as inactive, drop queued reconcile, send no nudges | +| Controller cannot reach app validator | append pending intent only | +| Runtime identity and payload `from` disagree | reject with `identity_mismatch`, write no intent | +| Report token missing in full feature mode | reject or pending diagnostics, no accepted lease | +| Provider id appears as member name | reject unless roster has that exact configured member | +| Pending intent becomes stale before replay | reject/supersede intent, no lease | +| Fingerprint changes while report writes | app validator re-reads and rejects stale report | +| Queue receives 100 task events | one reconcile per member after quiet window | +| Store lock timeout | leave old status intact, record diagnostic | +| Phase 2 dispatcher crashes after inbox insert | deterministic message id prevents duplicate | +| Watchdog already nudged same member/task | Phase 2 work-sync nudge suppressed by cooldown | + +Codebase-specific integration hazards: + +| Existing area | Risk for this feature | Required decision | +|---|---|---| +| `src/shared/types/team.ts` | Plan drifts from real `TeamTask` fields | Use `historyEvents`, `reviewState`, `needsClarification`, `blockedBy`, `blocks`, `comments` exactly as typed | +| `TeamDataService` reviewer helpers | Duplicate stale reviewer logic | Extract or reuse current-cycle resolver instead of creating a third interpretation | +| `stallMonitor/reviewerResolution.ts` | Existing resolver can return old `review_approved` actor | Do not reuse it blindly for work-sync action ownership | +| `TeamTaskStallMonitor.noteTeamChange()` | Two background systems may react to same events | Work-sync uses its own queue and never calls stall monitor | +| `VersionedJsonStore` | New store could reimplement locking/quarantine badly | Wrap or reuse this pattern in member-work-sync infrastructure | +| `TeamTaskWriter` locks | Nested task lock plus sync-store lock can deadlock | Read task snapshot first, release, then write sync store | +| `TeammateToolTracker` | Tool finish may be mistaken for completion | Use only as busy/trigger signal | +| `RuntimeDeliveryService` / OpenCode ledger | Pending delivery may cause premature sync nudge | Treat pending delivery as busy until quiet window expires | + +Most important code alignment: + +```text +Action ownership comes from TeamTask board state. +Current activity comes from tracker/ledger only as suppression. +Progress quality comes from TaskStallMonitor only. +``` + +If implementation starts parsing transcript text to decide agenda ownership, stop and redesign. That crosses the boundary into watchdog semantics. + +Real type alignment: + +- `TeamTask.reviewState` is derived, not the only authority. +- `TeamTaskWithKanban.reviewer` can contain kanban overlay reviewer state. +- `TeamTask.historyEvents` is append-only workflow history and must be used for current-cycle proof. +- `TaskHistoryEvent.type` uses `review_changes_requested`, not generic `changes_requested`. +- `TaskHistoryEvent.type` uses `status_changed` with `to`, not separate `task_start` / `task_complete` event names. + +Implementation must compile against `src/shared/types/team.ts` before writing any adapter code. If a planned event name is not in that file, the plan is wrong, not the type. + +### 8.1 Actionable Work Item + +```ts +export type ActionableWorkKind = + | 'work' + | 'review' + | 'clarification' + | 'blocked_dependency'; + +export type ActionableWorkPriority = + | 'normal' + | 'review_requested' + | 'blocked' + | 'needs_clarification'; + +export interface ActionableWorkItem { + taskId: string; + displayId?: string; + subject: string; + kind: ActionableWorkKind; + assignee: string; + priority: ActionableWorkPriority; + reason: string; + evidence: { + status: string; + owner?: string; + reviewer?: string; + reviewState?: string; + needsClarification?: 'lead' | 'user'; + blockerTaskIds?: string[]; + blockedByTaskIds?: string[]; + historyEventIds?: string[]; + }; +} +``` + +### 8.2 Agenda + +```ts +export interface ActionableWorkAgenda { + teamName: string; + memberName: string; + generatedAt: string; + items: ActionableWorkItem[]; + sourceRevision?: string; +} +``` + +`sourceRevision` is optional in Phase 1. If a reliable board revision exists later, it should be included. + +### 8.2.1 Agenda Semantics + +The agenda is a per-member operational projection, not a full task list. + +Include a task item only when the member has a concrete next action: + +| Task state | Member relation | Agenda item | +|---|---|---| +| `pending` | `owner === member` | `work` | +| `in_progress`, not in review | `owner === member` | `work` | +| `reviewState === review` | member is current-cycle reviewer | `review` | +| `needsClarification` set | owner/member must respond or escalate | `clarification` when owner is member | +| blocked by dependency | owner still owns blocked work | `blocked_dependency` when owner is member | + +Exclude: + +- completed/deleted tasks; +- pending tasks not assigned to member; +- tasks in review where member is original owner but not reviewer; +- stale reviewer assignments from previous review cycles; +- comments that only say "starting", "ok", "will do"; +- work-sync report records. + +Important: `blocked_dependency` means "the member owns blocked work and may need to report/wait", not "the member must fix the dependency". If the dependency is owned by another member, that other member gets their own agenda item from their task state. + +Important: `pending` + owner member is actionable. Otherwise a newly assigned task can be invisible until the agent calls `task_start`, which is exactly the class of "agent stopped but still has work" bug this feature exists to catch. + +Clarification semantics: + +- If `needsClarification === 'lead'` and the task owner is the member, the member agenda should include a `clarification` item: the member must unblock by asking the lead or updating the task. +- If `needsClarification === 'user'` and the task owner is the member, include a `clarification` item only if the member is expected to route the question through team messaging or task comments. +- If the task owner is not the member, do not assign the clarification to this member unless the current-cycle reviewer resolver says this member owns the current action. +- If uncertain, include no item and store a diagnostic reason. A false positive nudge is worse than a missing diagnostic in Phase 1. + +Blocked dependency semantics: + +- Include a `blocked_dependency` item for the blocked task owner. +- Do not include a dependency task for the blocked task owner unless they also own that dependency task. +- `blocked_dependency` permits `blocked` report only when the dependency/blocker evidence is still present in board state. +- If the blocker disappears, the fingerprint changes and old `blocked` leases become stale. + +Concrete action owner table: + +| Situation | Action owner | +|---|---| +| owned pending task | owner | +| owned in-progress task | owner | +| task in active review | current-cycle reviewer | +| changes requested / returned to work | owner | +| clarification on owned task | owner | +| task completed | none | +| task deleted | none | +| unowned task | none | + +Agenda builder must be deterministic and monotonic per board snapshot: + +- Deterministic: same task snapshot always produces the same agenda items in the same order. +- Monotonic: adding unrelated comments or runtime logs must not remove an agenda item. +- Conservative: if ownership is ambiguous, prefer no item plus diagnostic over assigning to the wrong member. +- Board-only: do not inspect transcript text, model messages, or UI labels to determine action ownership. + +Hard edge cases: + +| Case | Expected agenda behavior | +|---|---| +| Task owner changed from `bob` to `jack` | Bob agenda loses item, Jack agenda gains item, fingerprint changes for both | +| Task moved to review with reviewer `alice` | Owner agenda loses work item, Alice agenda gains review item | +| Review asks for changes | Reviewer agenda loses item, owner agenda gains work item | +| Task has stale reviewer from previous cycle | No review item unless current-cycle resolver proves it | +| Task is blocked by another task | Owner gets `blocked_dependency`; dependency owner is handled separately | +| Task has only "starting work" comment | No fingerprint change | +| Member removed from config | Agenda returns null and queued reconciles drop | +| Lead owns task | Lead agenda can include it; teammates do not inherit it | + +Implementation guidance: + +```ts +function buildAgendaForMember(input: { + tasks: TeamTask[]; + member: ActiveTeamMember; + reviewerResolver: ReviewerResolverPort; +}): ActionableWorkAgenda { + const items = input.tasks.flatMap((task) => { + if (isTaskTerminal(task)) { + return []; + } + + const reviewItem = mapCurrentReviewItem(task, input.member, input.reviewerResolver); + if (reviewItem) { + return [reviewItem]; + } + + return mapOwnedTaskItem(task, input.member) ?? []; + }); + + return sortAgenda(input.member, items); +} +``` + +Do not let `mapOwnedTaskItem` create an item for a task currently owned by another member. Cross-member dependencies should be modeled as evidence, not ownership transfer. + +### 8.2.2 Current-Cycle Review Resolution + +Review ownership must use the current review cycle. + +Recommended algorithm: + +1. Find the most recent review-cycle opening event for the task. +2. Ignore review events before the latest return-to-work / needs-fix / completed transition. +3. Resolve reviewer from current-cycle `review_started` first, then current-cycle `review_requested`. +4. If reviewer is still unresolved, do not assign review agenda to a teammate. +5. Do not use old `review_approved` actor as the current reviewer. + +This mirrors the conservative direction used by the stall monitor and avoids assigning work to a stale reviewer. + +Pseudo-code: + +```ts +function resolveCurrentCycleReviewer(task: TeamTask): string | null { + if (task.reviewState !== 'review') { + return null; + } + + const historyEvents = task.historyEvents ?? []; + + const cycleStartIndex = findLatestIndex(historyEvents, (event) => + event.type === 'review_requested' || + event.type === 'review_started' + ); + + const returnToWorkIndex = findLatestIndex(historyEvents, (event) => + event.type === 'review_changes_requested' || + (event.type === 'status_changed' && + (event.to === 'in_progress' || event.to === 'completed' || event.to === 'deleted')) + ); + + if (cycleStartIndex < 0) { + return null; + } + + if (returnToWorkIndex > cycleStartIndex) { + return null; + } + + const currentCycleEvents = historyEvents.slice(cycleStartIndex); + return ( + findLastReviewer(currentCycleEvents, 'review_started') ?? + findLastReviewer(currentCycleEvents, 'review_requested') ?? + null + ); +} +``` + +Hard rule: if this resolver cannot prove a current reviewer, it returns `null`. It must not fall back to `task.owner`, lead, or a previously active reviewer. That keeps Phase 1 conservative and prevents accidental reassignment. + +Existing-code warning: + +`stallMonitor/reviewerResolution.ts` can resolve from `review_approved` history actor because stall detection needs historical evidence. Member-work-sync needs current action ownership, so it must not directly use that function unless the function is split into two explicit policies: + +- `resolveCurrentActionReviewer()` for work-sync and task briefing action ownership; +- `resolveHistoricalReviewActor()` for stall evidence and diagnostics. + +Recommended extraction: + +```text +src/main/services/team/reviewerResolution/currentReviewCycle.ts +src/main/services/team/reviewerResolution/historicalReviewEvidence.ts +``` + +Do not hide the policy difference behind one generic `resolveReviewerFromHistory()` name. + +Kanban overlay rule: + +- If `TeamTaskWithKanban.reviewer` is present and task `reviewState === 'review'`, it can be used as current action reviewer. +- If kanban reviewer is missing, fall back to current-cycle history resolver. +- If both exist and conflict, prefer kanban reviewer for current UI state but record a diagnostic conflict. +- Do not hash diagnostic conflicts into agenda fingerprint unless the action owner changes. + +This matches the current app shape: task files carry history, while UI DTOs may add kanban reviewer overlay. + +### 8.3 Fingerprint + +```ts +export interface AgendaFingerprintResult { + version: 'agenda:v1'; + fingerprint: string; + canonicalJson: string; +} +``` + +Fingerprint must be deterministic. + +It should sort: + +- items by `taskId`, then `kind`; +- arrays inside evidence; +- object keys. + +It must exclude: + +- `generatedAt`; +- raw comments; +- tool logs; +- runtime status; +- sync report timestamps. + +It must also exclude low-signal self updates that do not change actionable ownership: + +- "starting", "начинаю", "беру в работу"; +- acknowledgement-only comments; +- member work-sync reports; +- delivery retry markers; +- runtime liveness changes. + +It must include only facts that change the member agenda: + +- owner/reviewer/action owner; +- task status and review state; +- blocker/dependency/clarification state; +- current-cycle review assignment; +- new actionable user/lead request; +- task creation/deletion when it affects this member. + +Implementation split: + +- `core/domain` builds canonical data and stable JSON only. +- `core/application` depends on a `HashPort`. +- `main/infrastructure` provides the Node `sha256` implementation. + +Domain example: + +```ts +export function buildCanonicalAgendaJson(agenda: ActionableWorkAgenda): string { + const canonical = { + teamName: agenda.teamName, + memberName: agenda.memberName, + items: agenda.items.map((item) => ({ + taskId: item.taskId, + displayId: item.displayId ?? null, + kind: item.kind, + assignee: item.assignee, + priority: item.priority, + reason: item.reason, + evidence: sortObjectDeep(item.evidence), + })), + }; + + return stableStringify(canonical); +} +``` + +Application example: + +```ts +export interface HashPort { + sha256Hex(input: string): string; +} + +export class AgendaFingerprintService { + constructor(private readonly hash: HashPort) {} + + fingerprint(agenda: ActionableWorkAgenda): AgendaFingerprintResult { + const canonicalJson = buildCanonicalAgendaJson(agenda); + const hash = this.hash.sha256Hex(canonicalJson); + + return { + version: 'agenda:v1', + fingerprint: `agenda:v1:${hash}`, + canonicalJson, + }; + } +} +``` + +Infrastructure example: + +```ts +import { createHash } from 'crypto'; + +export class NodeHashAdapter implements HashPort { + sha256Hex(input: string): string { + return createHash('sha256').update(input).digest('hex'); + } +} +``` + +Do not import Node `crypto` in `core/domain` or `core/application`. + +Avoid: + +```ts +// Bad: framework/runtime dependency inside domain logic. +import { createHash } from 'crypto'; +``` + +The final output still has this shape: + +```ts +function exampleResult(hash: string, canonicalJson: string): AgendaFingerprintResult { + return { + version: 'agenda:v1', + fingerprint: `agenda:v1:${hash}`, + canonicalJson, + }; +} +``` + +### 8.3.1 Fingerprint Stability Contract + +Fingerprint changes should mean: "the member's actionable agenda changed". + +It should not mean: + +- a task file mtime changed; +- an agent wrote "I am starting"; +- a runtime process restarted; +- a delivery retry marker was appended; +- a status condition timestamp changed. + +Phase 1 must track `fingerprintChangeCount` and store the last few fingerprint transition reasons. If this count rises without visible agenda changes, do not enable Phase 2 nudges. + +Recommended transition diagnostic: + +```ts +export interface AgendaFingerprintTransition { + from: string | null; + to: string; + changedTaskIds: string[]; + changedReasons: Array< + | 'task_added' + | 'task_removed' + | 'owner_changed' + | 'reviewer_changed' + | 'status_changed' + | 'review_state_changed' + | 'blocker_changed' + | 'clarification_changed' + >; + changedAt: string; +} +``` + +This diagnostic is not part of the hash. It is stored only to debug churn. + +### 8.4 Report + +```ts +export type MemberWorkSyncReportState = + | 'still_working' + | 'blocked' + | 'caught_up'; + +export interface MemberWorkSyncTrustedContext { + origin: 'mcp' | 'app' | 'intent_replay'; + expectedTeamName?: string; + expectedMemberName?: string; + runtimeSessionId?: string; + providerId?: string; + receivedAt: string; + identitySource: + | 'runtime_session' + | 'process_team' + | 'report_token' + | 'claimed_member' + | 'unknown'; +} + +export interface MemberWorkSyncReportInput { + teamName: string; + memberName: string; + agendaFingerprint: string; + state: MemberWorkSyncReportState; + taskIds?: string[]; + blockerCommentId?: string; + note?: string; + reportToken?: string; + reportedAt?: string; + trustedContext: MemberWorkSyncTrustedContext; +} +``` + +Stored report: + +```ts +export interface MemberWorkSyncReportRecord extends MemberWorkSyncReportInput { + id: string; + accepted: boolean; + rejectedReason?: string; + observedFingerprint: string; + expiresAt?: string; + acceptedAt?: string; + lastSeenAt: string; + trustedIdentity: boolean; +} +``` + +Report identity must be stable enough for idempotent replay. + +Recommended accepted/rejected report id: + +```ts +function buildReportId(input: { + teamName: string; + memberName: string; + agendaFingerprint: string; + state: MemberWorkSyncReportState; + taskIds: string[]; + blockerCommentId?: string; +}): string { + return `member-work-sync-report:${stableHash(input)}`; +} +``` + +Do not include `reportedAt` or `note` in this id. A repeated report for the same lease key should update `lastSeenAt` and latest diagnostic note, not create unbounded duplicates. `note` is useful for diagnostics but must not affect lease identity. + +### 8.4.1 Clock And Lease Authority + +Lease time is always computed from app-owned time, never model-supplied time. + +Rules: + +- `trustedContext.receivedAt` is the authority for `acceptedAt`, `lastSeenAt`, and lease expiry. +- `reportedAt` is optional diagnostics only. It must not extend or shorten a lease. +- If the system clock jumps, lease expiry is recomputed from stored app timestamps on next read. +- Tests must use an injected `ClockPort`; no direct `Date.now()` in domain or application use cases. +- A replayed pending intent uses the replay app time for validation, but the original intent time stays in diagnostics. + +This prevents a model from keeping itself quiet forever by sending a future timestamp. + +### 8.4.2 Identity Authority + +`memberName` / MCP `from` is a claim, not authority. + +Accepted reports that suppress nudges require one of these identity proofs: + +- trusted runtime session context says the caller is the same configured member; +- process/team metadata says the caller is the same configured member; +- app-generated `reportToken` validates for `(teamName, memberName, agendaFingerprint)`. + +If none is available, the report can be stored as rejected diagnostics, but it must not create a valid lease. + +Identity hard rules: + +- All identity comparisons use canonical team/member names, preserving display case only after validation. +- `user` and `system` can never submit a member work-sync report. +- Provider ids such as `codex`, `anthropic`, `opencode`, or `gemini` are not member names unless the team explicitly has a member with that exact configured name. +- `lead` / `team-lead` aliases map only to the configured lead member and only for the lead's own agenda. +- A lead alias cannot report for another teammate. +- A runtime-bound report for `expectedMemberName="jack"` with `from="bob"` is rejected as `identity_mismatch`. +- A report for a removed/inactive member is rejected before intent persistence. +- Identity mismatch is terminal for that call; do not write a pending intent that could later become accepted. + +This deliberately repeats the earlier `codex` / `user` author bug prevention at the work-sync boundary. + +Retention: + +- keep latest accepted report per member/fingerprint; +- keep latest rejected report per member/fingerprint/reason; +- keep a bounded history, recommended `20` report records per member or `7` days; +- never let report history affect fingerprint; +- never treat a pending intent as accepted history. + +### 8.5 Conditions + +```ts +export type MemberWorkSyncConditionType = + | 'CaughtUp' + | 'ValidLease' + | 'NeedsSync' + | 'SuppressedBusy' + | 'SuppressedCooldown' + | 'StaleReport' + | 'InvalidReport'; + +export interface MemberWorkSyncCondition { + type: MemberWorkSyncConditionType; + status: 'true' | 'false' | 'unknown'; + observedFingerprint: string; + reason: string; + message: string; + lastTransitionAt: string; +} +``` + +Condition examples: + +```json +{ + "type": "NeedsSync", + "status": "true", + "observedFingerprint": "agenda:v1:abc...", + "reason": "ActionableAgendaWithoutValidLease", + "message": "Member has 2 actionable work items and no valid report lease.", + "lastTransitionAt": "2026-04-29T12:00:00.000Z" +} +``` + +--- + +## 9. Application Ports + +`core/application/ports.ts`: + +```ts +export interface ClockPort { + now(): Date; +} + +export interface LoggerPort { + debug(message: string, context?: Record): void; + warn(message: string, context?: Record): void; + error(message: string, context?: Record): void; +} + +export interface WorkAgendaSourcePort { + getAgenda(input: { + teamName: string; + memberName: string; + }): Promise; +} + +export interface MemberRosterSourcePort { + getMember(input: { + teamName: string; + memberName: string; + }): Promise<{ + name: string; + providerId?: string; + active: boolean; + isLead: boolean; + } | null>; +} + +export interface TeamLifecycleSourcePort { + getTeamLifecycle(input: { + teamName: string; + }): Promise<{ + active: boolean; + state: 'active' | 'stopped' | 'cancelled' | 'deleted' | 'unknown'; + reason?: string; + }>; +} + +export interface BusySignalSourcePort { + getBusyState(input: { + teamName: string; + memberName: string; + }): Promise<{ + busy: boolean; + reason?: 'tool_active' | 'runtime_delivery_pending' | 'launching' | 'recent_activity'; + until?: string; + }>; +} + +export interface MemberWorkSyncStatusStorePort { + readMemberStatus(input: { + teamName: string; + memberName: string; + }): Promise; + + updateMemberStatus(input: { + teamName: string; + memberName: string; + updater: (current: MemberWorkSyncStatus | null) => MemberWorkSyncStatus; + }): Promise; +} + +export interface ReportTokenPort { + issue(input: { + teamName: string; + memberName: string; + agendaFingerprint: string; + issuedAt: Date; + }): Promise; + + verify(input: { + teamName: string; + memberName: string; + agendaFingerprint: string; + token: string; + now: Date; + }): Promise<{ ok: true } | { ok: false; reason: 'missing' | 'expired' | 'invalid' }>; +} +``` + +Phase 2 ports: + +```ts +export interface MemberWorkSyncOutboxPort { + ensurePending(input: MemberWorkSyncOutboxEnsureInput): Promise; + claimDue(input: MemberWorkSyncOutboxClaimInput): Promise; + markDelivered(input: MemberWorkSyncOutboxMarkDeliveredInput): Promise; + markSuperseded(input: MemberWorkSyncOutboxMarkSupersededInput): Promise; +} + +export interface IdempotentInboxNudgePort { + insertIfAbsent(input: { + teamName: string; + memberName: string; + messageId: string; + payloadHash: string; + payload: MemberWorkSyncNudgePayload; + }): Promise<{ + inserted: boolean; + messageId: string; + conflict?: boolean; + }>; +} +``` + +--- + +## 10. Domain Policies + +### 10.1 Report Validation + +Rules: + +1. Unknown or stopped team -> reject. +2. Unknown member -> reject. +3. Removed/inactive member -> reject. +4. Reserved member names `user` / `system` -> reject. +5. Runtime identity mismatch -> reject without writing an intent. +6. Invalid report token -> reject without writing an accepted lease. +7. Stale fingerprint -> reject with current fingerprint and agenda summary. +8. `caught_up` with non-empty agenda -> reject. +9. `still_working` with empty agenda -> reject with `caught_up` recommendation. +10. `still_working` taskIds must be subset of current agenda task ids. +11. `blocked` requires board evidence. + +Validation must be fail-closed. If the validator cannot prove a report is safe, reject it and return the current agenda preview. + +Additional rules: + +12. `note` is advisory only and never changes validation. +13. `blockerCommentId` must point to a comment on one of the current agenda tasks if provided. +14. A report for a lead agenda cannot be used on behalf of another teammate. +15. `taskIds` must be unique after task-ref normalization. +16. Reports for deleted tasks are rejected. +17. A report with empty `taskIds` and state `still_working` covers all current agenda items. +18. A report with empty `taskIds` and state `blocked` is rejected unless the whole agenda is blocked by board evidence. + +Source-of-truth rule: + +```text +MCP controller validates identity, schema, and size. +claude_team application validates agenda, fingerprint, leases, blockers, and status writes. +``` + +The controller must not duplicate agenda policy. If the controller has no live app validation port, it records an intent and returns `pending_validation`. This avoids policy drift where the orchestrator accepts a report that the app would reject. + +Hard failure cases: + +- agenda source unavailable -> reject or pending intent, never accepted; +- stopped/cancelled team -> reject, never accepted; +- member missing from roster -> reject; +- report for `user` / `system` -> reject; +- runtime/session identity mismatch -> reject, no pending intent; +- invalid report token -> reject, no accepted lease; +- fingerprint not matching current app-side fingerprint -> reject; +- `caught_up` while app-side agenda has items -> reject; +- `blocked` without blocker evidence in the current agenda -> reject. + +Agent-facing rejected responses should be corrective, not punitive. For stale/agenda rejections, include current fingerprint and a compact agenda preview so the model can retry correctly. For identity failures, do not include agenda preview for another member. + +Validation result contract: + +```ts +export type MemberWorkSyncReportValidationReason = + | 'feature_disabled' + | 'team_inactive' + | 'member_inactive' + | 'reserved_author' + | 'unsafe_provider_alias' + | 'identity_mismatch' + | 'identity_untrusted' + | 'team_mismatch' + | 'invalid_report_token' + | 'agenda_unavailable' + | 'stale_fingerprint' + | 'caught_up_rejected_actionable_items_exist' + | 'still_working_rejected_empty_agenda' + | 'task_not_in_current_agenda' + | 'blocked_rejected_without_evidence' + | 'invalid_payload'; + +export type MemberWorkSyncReportValidationResult = + | { + ok: true; + acceptedState: MemberWorkSyncReportState; + leaseExpiresAt?: string; + } + | { + ok: false; + reason: MemberWorkSyncReportValidationReason; + currentFingerprint?: string; + currentAgendaPreview?: AgendaSummaryItem[]; + }; +``` + +Side-effect rule: + +```text +validate() is pure. +report() is the only use case allowed to write accepted/rejected report state. +controller fallback writes only report intents. +``` + +This keeps SRP clear: validator decides, reporter persists, controller translates MCP input. + +Stale report handling: + +- Store the rejected report reason for diagnostics. +- Do not update `latestAcceptedReport`. +- Return current fingerprint and preview. +- If the stale report came from a pending intent replay, mark the intent `rejected`. +- Do not enqueue a nudge immediately because the stale response itself is corrective. + +Blocked report handling: + +- `blocked` must refer to current agenda items. +- If `blockerCommentId` is provided, it must belong to one of those tasks. +- If no `blockerCommentId` is provided, the agenda item must already carry blocker/dependency/clarification evidence. +- Free-text `note` cannot create blocker evidence. + +Example: + +```ts +export class MemberWorkSyncReportValidator { + validate(input: { + report: MemberWorkSyncReportInput; + agenda: ActionableWorkAgenda; + currentFingerprint: string; + memberActive: boolean; + trustedContext: MemberWorkSyncTrustedContext; + tokenValidation: { ok: true } | { ok: false; reason: 'missing' | 'expired' | 'invalid' }; + }): MemberWorkSyncReportValidationResult { + if (!input.memberActive) { + return { ok: false, reason: 'member_inactive' }; + } + + if ( + input.trustedContext.expectedTeamName && + input.trustedContext.expectedTeamName !== input.report.teamName + ) { + return { ok: false, reason: 'team_mismatch' }; + } + + if ( + input.trustedContext.expectedMemberName && + input.trustedContext.expectedMemberName !== input.report.memberName + ) { + return { ok: false, reason: 'identity_mismatch' }; + } + + if (input.report.agendaFingerprint !== input.currentFingerprint) { + return { + ok: false, + reason: 'stale_fingerprint', + currentFingerprint: input.currentFingerprint, + currentAgendaPreview: previewAgenda(input.agenda), + }; + } + + const hasTrustedIdentity = + input.trustedContext.identitySource === 'runtime_session' || + input.trustedContext.identitySource === 'process_team' || + input.tokenValidation.ok; + if (!hasTrustedIdentity) { + return { + ok: false, + reason: + input.tokenValidation.reason === 'missing' + ? 'identity_untrusted' + : 'invalid_report_token', + }; + } + + if (input.report.state === 'caught_up' && input.agenda.items.length > 0) { + return { + ok: false, + reason: 'caught_up_rejected_actionable_items_exist', + currentFingerprint: input.currentFingerprint, + currentAgendaPreview: previewAgenda(input.agenda), + }; + } + + if (input.report.state === 'still_working') { + return this.validateStillWorking(input.report, input.agenda); + } + + if (input.report.state === 'blocked') { + return this.validateBlocked(input.report, input.agenda); + } + + return { ok: true, acceptedState: input.report.state }; + } +} +``` + +### 10.2 Decision Policy + +Decision input: + +```ts +export interface MemberWorkSyncDecisionInput { + agenda: ActionableWorkAgenda; + fingerprint: string; + latestValidReport: MemberWorkSyncReportRecord | null; + busyState: MemberBusyState; + now: Date; +} +``` + +Decision output: + +```ts +export type MemberWorkSyncDecision = + | { kind: 'caught_up'; conditions: MemberWorkSyncCondition[] } + | { kind: 'valid_lease'; leaseExpiresAt: string; conditions: MemberWorkSyncCondition[] } + | { kind: 'suppressed_busy'; reason: string; conditions: MemberWorkSyncCondition[] } + | { kind: 'needs_sync'; conditions: MemberWorkSyncCondition[] }; +``` + +Policy: + +```ts +export class SyncDecisionPolicy { + decide(input: MemberWorkSyncDecisionInput): MemberWorkSyncDecision { + if (input.agenda.items.length === 0) { + return { + kind: 'caught_up', + conditions: [conditionCaughtUp(input)], + }; + } + + if (hasValidLease(input.latestValidReport, input.fingerprint, input.now)) { + return { + kind: 'valid_lease', + leaseExpiresAt: input.latestValidReport.expiresAt!, + conditions: [conditionValidLease(input)], + }; + } + + if (input.busyState.busy) { + return { + kind: 'suppressed_busy', + reason: input.busyState.reason ?? 'unknown', + conditions: [conditionSuppressedBusy(input)], + }; + } + + return { + kind: 'needs_sync', + conditions: [conditionNeedsSync(input)], + }; + } +} +``` + +Phase 1 stores `needs_sync` but does not send. + +### 10.3 Lease Defaults + +Default lease durations should be conservative: + +```text +still_working: 10 minutes +blocked: 30 minutes +caught_up: no lease, condition only +``` + +Reasons: + +- `still_working` should suppress noisy sync checks, but not hide long-running stalls. +- `blocked` can reasonably last longer because it must have board evidence. +- `caught_up` is recalculated from empty agenda and should not be trusted after new work appears. + +Expired leases are ignored by `SyncDecisionPolicy`. + +### 10.4 Shadow Would-Nudge Semantics + +Phase 1 may compute `wouldNudgeCount`, but must not enqueue or send. + +`wouldNudge` is true only when all are true: + +- agenda is non-empty; +- no valid lease for current fingerprint; +- not busy; +- not inside quiet window; +- not suppressed by recent watchdog remediation; +- member is active and not stopped/removed. + +This makes Phase 1 metrics close to Phase 2 behavior without changing runtime behavior. + +--- + +## 11. Main Adapters + +### 11.1 TeamTaskAgendaSource + +Responsibility: + +- read current tasks; +- read members/config metadata; +- resolve owner/reviewer/action owner from authoritative task fields and current-cycle history; +- output `ActionableWorkAgenda`. + +It should not: + +- write files; +- send messages; +- inspect UI kanban presentation; +- parse runtime transcripts; +- classify semantic progress. + +Reviewer resolution must be conservative. Prefer the same current-cycle semantics used by stall-monitor review resolution. Do not treat stale review history from a previous cycle as current action ownership. + +Pseudo-code: + +```ts +export class TeamTaskAgendaSource implements WorkAgendaSourcePort { + constructor( + private readonly taskReader: TeamTaskReader, + private readonly memberResolver: TeamMemberResolverPort, + private readonly reviewerResolver: ReviewerResolverPort + ) {} + + async getAgenda(input: { + teamName: string; + memberName: string; + }): Promise { + const tasks = await this.taskReader.getTasks(input.teamName); + const activeMember = await this.memberResolver.resolve(input.teamName, input.memberName); + if (!activeMember?.active) { + return null; + } + + const items = tasks.flatMap((task) => + this.toActionableItems({ + task, + memberName: activeMember.name, + }) + ); + + return { + teamName: input.teamName, + memberName: activeMember.name, + generatedAt: new Date().toISOString(), + items: sortAgendaItems(items), + }; + } +} +``` + +Actionable mapping examples: + +```ts +function mapWorkItem(task: TeamTask, memberName: string): ActionableWorkItem | null { + if (task.status !== 'pending' && task.status !== 'in_progress') { + return null; + } + if (task.reviewState === 'review') { + return null; + } + if (!sameMember(task.owner, memberName)) { + return null; + } + if (task.needsClarification) { + return { + taskId: task.id, + displayId: task.displayId, + subject: task.subject, + kind: 'clarification', + assignee: memberName, + priority: 'needs_clarification', + reason: `Task needs clarification from ${task.needsClarification}.`, + evidence: { + status: task.status, + owner: task.owner, + reviewState: task.reviewState, + needsClarification: task.needsClarification, + }, + }; + } + return { + taskId: task.id, + displayId: task.displayId, + subject: task.subject, + kind: 'work', + assignee: memberName, + priority: task.blockedBy?.length ? 'blocked' : 'normal', + reason: task.blockedBy?.length + ? 'Owned task is blocked by dependencies.' + : task.status === 'pending' + ? 'Owned task is pending and needs to be started, clarified, or declined.' + : 'Owned task is in progress.', + evidence: { + status: task.status, + owner: task.owner, + reviewState: task.reviewState, + blockerTaskIds: task.blocks, + blockedByTaskIds: task.blockedBy, + }, + }; +} +``` + +Review mapping: + +```ts +function mapReviewItem(args: { + task: TeamTask; + memberName: string; + reviewer: string | null; +}): ActionableWorkItem | null { + if (args.task.reviewState !== 'review') { + return null; + } + if (!args.reviewer || !sameMember(args.reviewer, args.memberName)) { + return null; + } + return { + taskId: args.task.id, + displayId: args.task.displayId, + subject: args.task.subject, + kind: 'review', + assignee: args.memberName, + priority: 'review_requested', + reason: 'Task is waiting for this member review.', + evidence: { + status: args.task.status, + owner: args.task.owner, + reviewer: args.reviewer, + reviewState: args.task.reviewState, + }, + }; +} +``` + +### 11.2 MemberBusySignalSource + +Inputs: + +- active tool calls from `TeammateToolTracker`; +- OpenCode delivery ledger pending states; +- member spawn status launching/restarting; +- recent team/member activity quiet window. + +Output: + +```ts +export interface MemberBusyState { + busy: boolean; + reason?: 'tool_active' | 'runtime_delivery_pending' | 'launching' | 'recent_activity'; + until?: string; +} +``` + +Phase 1 can keep this conservative: + +- if uncertain, return busy for short quiet window; +- better to suppress false `needs_sync` than create noisy status. + +Busy signal precedence: + +1. active tool call or active runtime turn; +2. runtime delivery pending or retry scheduled; +3. member launch/restart in progress; +4. recent inbox delivery or recent tool finish inside quiet window; +5. unknown runtime state for a configured OpenCode lane. + +For Phase 1, unknown should bias toward `busy` for a bounded quiet window, not permanent suppression. + +### 11.2.1 Member Work Sync Queue + +Do not reconcile synchronously inside `teamChangeEmitter`. + +Use a per-team/member queue: + +```ts +export interface MemberWorkSyncQueue { + enqueue(input: { + teamName: string; + memberName: string; + trigger: MemberWorkSyncTrigger; + runAfterMs?: number; + }): void; + start(): void; + stop(): Promise; +} +``` + +Queue requirements: + +- coalesce duplicate `(teamName, memberName)` entries; +- keep the strongest/most recent trigger reason for diagnostics; +- apply quiet window, default `90_000ms`; +- bounded concurrency, default `2`; +- drop work for removed/stopped teams; +- never send messages in Phase 1; +- expose debug counts for queued/running/dropped entries. + +This prevents file watcher bursts from becoming expensive repeated agenda reads. + +Implementation details: + +- Use one in-memory work item per `(teamName, memberName)`. +- Store `firstQueuedAt`, `lastQueuedAt`, `triggerReasons`, and `runAfter`. +- If a new trigger arrives while the key is queued, merge reasons and push `runAfter` only when the new trigger is later than the existing quiet window. +- If a new trigger arrives while the key is running, mark `rerunRequested` and enqueue one follow-up pass after the current pass finishes. +- Use `setTimeout(...).unref?.()` for delayed work so the queue does not keep the app alive. +- `stop()` must clear timers and wait for current in-flight reconciles to settle. +- Do not persist Phase 1 queue state. Startup reconciliation can recreate safe state from authoritative tasks. + +Startup reconciliation: + +- on app start / team load, enqueue every active member with trigger `startup_scan`; +- on config/member metadata change, enqueue all active members for that team; +- on task import/migration, enqueue all active members after a longer quiet window; +- if a team is stopped/cancelled, drop queued entries for that team; +- if a member is removed, drop queued entries and keep only bounded diagnostic status. + +Event strength order: + +```text +config_changed > task_state_changed > review_changed > inbox_delivered > tool_finished > runtime_heartbeat +``` + +This order is only for diagnostics and queue coalescing. The reconciler still recomputes from fresh state. + +Race conditions to test: + +| Race | Required behavior | +|---|---| +| task event arrives while reconcile is running | set `rerunRequested`, run exactly one follow-up pass | +| member removed while queued | drop queue item before reading agenda | +| team stopped while queued | drop queue item, do not write status | +| report accepted while queued | reconcile sees latest accepted report and avoids false `NeedsSync` | +| status write fails | retry only through later queue event or startup scan, not tight loop | +| app shutdown during delayed timer | timer cleared, no side effects after stop | + +Suggested queue state: + +```ts +interface QueuedMemberWorkSyncItem { + key: string; + teamName: string; + memberName: string; + firstQueuedAt: number; + lastQueuedAt: number; + runAfter: number; + triggerReasons: MemberWorkSyncTrigger[]; + running: boolean; + rerunRequested: boolean; +} +``` + +The queue is an adapter/infrastructure concern. The domain must not know it exists. + +### 11.3 JsonMemberWorkSyncStore + +Use a versioned JSON envelope similar to `VersionedJsonStore`. + +Path: + +```text +~/.claude/teams//.member-work-sync/status.json +``` + +Schema: + +```ts +export interface MemberWorkSyncStoreData { + members: Record; +} +``` + +Envelope: + +```json +{ + "schemaName": "member-work-sync.status", + "schemaVersion": 1, + "updatedAt": "2026-04-29T12:00:00.000Z", + "data": { + "members": {} + } +} +``` + +Store requirements: + +- atomic write; +- file lock; +- invalid JSON quarantine; +- future schema quarantine or safe error; +- tests for missing/invalid/future schema. + +Reuse requirement: + +Prefer adapting the existing OpenCode `VersionedJsonStore` pattern instead of inventing a new JSON store. If direct reuse is awkward because it currently lives under `opencode/store`, extract a generic main-process infrastructure helper first. + +Extraction target: + +```text +src/main/services/team/versionedJsonStore/VersionedJsonStore.ts +``` + +Do not deep-import from `opencode/store` into a new feature. That would make member-work-sync depend on OpenCode implementation details and violate dependency direction. + +--- + +## 12. Application Use Cases + +### 12.1 Reconcile Member + +This use case must be side-effect-light: + +- allowed: read ports, compute fingerprint, update member-work-sync status; +- forbidden: send messages, mutate tasks, mark inbox read, launch/restart runtimes; +- forbidden: call `TeamDataService` directly from core/application. + +If a future behavior needs a message, write an outbox intent in a separate Phase 2 use case. + +```ts +export class MemberWorkSyncReconciler { + constructor( + private readonly agendaSource: WorkAgendaSourcePort, + private readonly rosterSource: MemberRosterSourcePort, + private readonly lifecycleSource: TeamLifecycleSourcePort, + private readonly busySource: BusySignalSourcePort, + private readonly statusStore: MemberWorkSyncStatusStorePort, + private readonly fingerprintService: AgendaFingerprintService, + private readonly decisionPolicy: SyncDecisionPolicy, + private readonly clock: ClockPort, + private readonly logger: LoggerPort + ) {} + + async reconcileMember(input: { + teamName: string; + memberName: string; + trigger: MemberWorkSyncTrigger; + }): Promise { + const lifecycle = await this.lifecycleSource.getTeamLifecycle({ teamName: input.teamName }); + if (!lifecycle.active) { + return null; + } + + const member = await this.rosterSource.getMember(input); + if (!member?.active) { + return null; + } + + const agenda = await this.agendaSource.getAgenda(input); + if (!agenda) { + return null; + } + + const fingerprint = this.fingerprintService.fingerprint(agenda); + const busyState = await this.busySource.getBusyState(input); + + return this.statusStore.updateMemberStatus({ + teamName: input.teamName, + memberName: member.name, + updater: (current) => { + const latestValidReport = selectValidReport({ + current, + fingerprint: fingerprint.fingerprint, + now: this.clock.now(), + }); + + const decision = this.decisionPolicy.decide({ + agenda, + fingerprint: fingerprint.fingerprint, + latestValidReport, + busyState, + now: this.clock.now(), + }); + + return buildNextStatus({ + current, + agenda, + fingerprint, + decision, + trigger: input.trigger, + now: this.clock.now(), + }); + }, + }); + } +} +``` + +### 12.2 Report Sync + +```ts +export class MemberWorkSyncReporter { + constructor( + private readonly agendaSource: WorkAgendaSourcePort, + private readonly rosterSource: MemberRosterSourcePort, + private readonly lifecycleSource: TeamLifecycleSourcePort, + private readonly statusStore: MemberWorkSyncStatusStorePort, + private readonly reportTokenPort: ReportTokenPort, + private readonly fingerprintService: AgendaFingerprintService, + private readonly validator: MemberWorkSyncReportValidator, + private readonly clock: ClockPort + ) {} + + async report(input: MemberWorkSyncReportInput): Promise { + const lifecycle = await this.lifecycleSource.getTeamLifecycle({ teamName: input.teamName }); + if (!lifecycle.active) { + return { ok: false, reason: 'team_inactive' }; + } + + const member = await this.rosterSource.getMember(input); + if (!member?.active) { + return { ok: false, reason: 'member_inactive' }; + } + + const agenda = await this.agendaSource.getAgenda(input); + if (!agenda) { + return { ok: false, reason: 'agenda_unavailable' }; + } + + const fingerprint = this.fingerprintService.fingerprint(agenda); + const tokenValidation = input.reportToken + ? await this.reportTokenPort.verify({ + teamName: input.teamName, + memberName: member.name, + agendaFingerprint: fingerprint.fingerprint, + token: input.reportToken, + now: this.clock.now(), + }) + : ({ ok: false, reason: 'missing' } as const); + + const validation = this.validator.validate({ + report: input, + agenda, + currentFingerprint: fingerprint.fingerprint, + memberActive: member.active, + tokenValidation, + trustedContext: input.trustedContext, + }); + + await this.statusStore.updateMemberStatus({ + teamName: input.teamName, + memberName: member.name, + updater: (current) => + applyReportValidation({ + current, + report: input, + validation, + agenda, + fingerprint, + now: this.clock.now(), + }), + }); + + return toReportResult(validation, agenda, fingerprint); + } +} +``` + +--- + +## 13. MCP Tool Design + +Tool name: + +```text +member_work_sync_report +``` + +Purpose: + +```text +Report whether you have observed the current assigned/review work agenda. +``` + +Write-boundary location: + +- The MCP tool is implemented in `agent_teams_orchestrator` / `agent-teams-controller`. +- Full agenda validation lives in `claude_team`. +- The controller reaches app validation through an explicit port/bridge when available. +- If that bridge is not available, the controller records only a pending intent. + +This is intentionally not a "smart controller" design. The controller is a write boundary and identity/schema gate, not the business-policy owner. + +Cross-repo rollout risk: + +`agent_teams_orchestrator` and `claude_team` can be out of sync during development or user upgrades. The new tool must therefore be capability-gated. + +Rules: + +- Do not add `member_work_sync_report` as a hard required OpenCode readiness tool until both repos support it in the same release path. +- In Phase 1, missing `member_work_sync_report` must not block team launch. +- If the tool is missing, omit work-sync instructions from `task_briefing`/`member_briefing`. +- If the tool exists but app validation bridge is unavailable, return `pending_validation`. +- If app says feature disabled, return `feature_disabled`. +- OpenCode readiness tests should prove old required tools still gate launch, while work-sync tool is optional unless `CLAUDE_TEAM_MEMBER_WORK_SYNC_REQUIRE_MCP_TOOL=true`. + +Suggested rollout gate: + +```text +CLAUDE_TEAM_MEMBER_WORK_SYNC_REQUIRE_MCP_TOOL=false +``` + +Default `false` until Phase 1 has shipped across both repos. + +Compatibility matrix: + +| claude_team | orchestrator/controller | Expected behavior | +|---|---|---| +| no feature | no tool | no work-sync surface | +| feature enabled | no tool | status/reconcile only, no report instruction | +| feature enabled | tool exists, no app bridge | pending intent only | +| feature enabled | tool exists, app bridge live | full report validation | +| feature disabled | tool exists | tool returns `feature_disabled`, no writes | + +### 13.1 Current Agenda Read Surface + +The report tool needs a current `agendaFingerprint`. The agent must not invent this value. + +Preferred Phase 1 read surface: + +- extend `task_briefing` with a compact `workSync` block; +- include current `agendaFingerprint`; +- include a short actionable agenda preview; +- include report instructions only when the feature is enabled. + +Example `task_briefing` addition: + +```json +{ + "workSync": { + "feature": "member-work-sync", + "agendaFingerprint": "agenda:v1:abc...", + "reportToken": "wrs:v1:short-lived-token", + "state": "needs_sync", + "actionableCount": 2, + "items": [ + { + "taskRef": "#00d1e081", + "kind": "work", + "reason": "Owned task is in progress." + } + ], + "reportTool": "member_work_sync_report" + } +} +``` + +Optional fallback read tool: + +```text +member_work_sync_status +``` + +Use it only if extending `task_briefing` becomes too invasive. Prefer `task_briefing` because the agent already uses it to understand work context. + +Read surface requirements: + +- must be generated from the same `WorkAgendaSourcePort` and `AgendaFingerprintService` as the reconciler; +- must not have a separate fingerprint implementation; +- must include no more than a compact agenda preview; +- must include `reportToken` only for the requesting member's own agenda; +- must omit raw comments and large task descriptions by default; +- must make clear that the fingerprint can become stale after task changes; +- must not expose other members' full agenda unless the caller is lead. + +If `task_briefing` is extended, add tests that existing consumers still receive the old fields unchanged. + +Prompt pollution guard: + +- Do not repeat the full work-sync schema in every briefing. +- Include the report instruction only when `state === 'needs_sync'` or a stale report was just rejected. +- If state is `caught_up`, include a tiny status line only. +- If state is `valid_lease`, include lease expiry only when useful for debugging. +- Keep preview item text below `160` chars each. +- Never include raw comments in `workSync.items`. + +Recommended compact text rendering: + +```text +Work sync: agendaFingerprint=agenda:v1:abc123 state=needs_sync actionable=2. +When you have reviewed this agenda, call member_work_sync_report with this fingerprint and reportToken. +``` + +Do not add a second "primary queue" next to `task_briefing`. Work-sync is metadata about the same queue. + +Input schema: + +```json +{ + "type": "object", + "properties": { + "from": { + "type": "string", + "description": "Your configured teammate name." + }, + "agendaFingerprint": { + "type": "string" + }, + "reportToken": { + "type": "string", + "description": "Short-lived report token from your current workSync briefing." + }, + "state": { + "type": "string", + "enum": ["still_working", "blocked", "caught_up"] + }, + "taskIds": { + "type": "array", + "items": { "type": "string" } + }, + "blockerCommentId": { + "type": "string" + }, + "note": { + "type": "string" + } + }, + "required": ["from", "agendaFingerprint", "reportToken", "state"] +} +``` + +Controller-side behavior: + +1. Resolve `from` through existing configured-member validation. +2. Reject `user`, `system`, unknown, removed members, and unsafe provider-id aliases. +3. Attach trusted runtime context if the controller knows it. +4. Enforce schema and size limits before writing anything. +5. Prefer live app validation through the `member-work-sync` application port when available. +6. If live validation is unavailable, append a raw report intent only when identity is not already terminally invalid. +7. Return `pending_validation` for stored intents. +8. Never claim a lease was accepted unless the app-side validator accepted it for the current fingerprint, current agenda, and trusted identity. + +### 13.2 Identity And Authority Contract + +This is the highest-risk write boundary in the feature. The model can hallucinate `from`, stale fingerprints, or copied task ids, so controller and app validation must split responsibilities without drifting. + +Recommended design: `🎯 9 🛡️ 9 🧠 6`, `120-220 LOC`. + +Authority order: + +1. Trusted runtime context from the current OpenCode/Codex/Claude member session. +2. Process/team metadata from the current agent runtime. +3. App-generated `reportToken` bound to `(teamName, memberName, agendaFingerprint)`. +4. Claimed `from` only for rejection diagnostics, never by itself for an accepted lease. + +`reportToken` details: + +- Generated by `claude_team` when building the `workSync` briefing. +- Bound to `teamName`, canonical `memberName`, `agendaFingerprint`, and an app-side secret or nonce. +- Short-lived; recommended validity is the same as the current fingerprint plus `15` minutes. +- Omitted from the agenda fingerprint. +- Not a hard security boundary; it prevents accidental cross-member reports and stale prompt reuse. +- If missing because of cross-repo compatibility, the report returns `pending_validation` or `identity_untrusted`, not accepted. + +Alias policy: + +| Input | Allowed? | Rule | +|---|---:|---| +| `alice` | yes | Only if `alice` is active in the current team. | +| `user` | no | Reserved human actor. | +| `system` | no | Reserved system actor. | +| `codex` | no by default | Provider id, not a member name, unless explicitly configured as a member. | +| `team-lead` | conditional | Maps only to the configured lead and only for the lead's own agenda. | +| `lead` | conditional | Same as `team-lead`. | +| removed member | no | Reject before writing an intent. | + +Terminal identity failures: + +- `identity_mismatch`: trusted runtime says one member, payload claims another. +- `team_mismatch`: trusted runtime/session belongs to another team. +- `reserved_author`: payload uses `user` or `system`. +- `unsafe_provider_alias`: payload uses a provider id that is not a configured member name. +- `invalid_report_token`: token does not validate for this team/member/fingerprint. +- `member_inactive`: member is removed or no longer configured. + +Terminal failures must not write `report-intents.json`. A stale fingerprint can write rejected diagnostics, but identity mismatch cannot, because replay cannot make it safe. + +Agent-facing response for identity failures should be minimal: + +```json +{ + "ok": false, + "reason": "identity_mismatch", + "instruction": "Use this tool only as your configured teammate identity. Re-read member_briefing if you are unsure." +} +``` + +Do not include another member's agenda preview in identity-failure responses. + +### 13.3 Validation Ownership Split + +Keep SOLID boundaries explicit: + +- Controller owns protocol validation: schema, payload size, raw alias rejection, trusted context attachment. +- Application owns domain validation: agenda, fingerprint, lease, task ids, blocker evidence, accepted/rejected report persistence. +- Domain owns pure decisions only: no filesystem, IPC, timers, or process/session lookups. +- Infrastructure owns stores, hash implementation, report token signing/verification, and runtime context adapters. + +Do not import `claude_team` domain policy into the orchestrator controller. Use a port: + +```ts +export interface MemberWorkSyncValidationPort { + validateAndRecordReport(input: MemberWorkSyncReportInput): Promise; +} +``` + +When the port is unavailable, the controller can persist a raw intent only after controller-level identity validation has passed. + +Accepted response: + +```json +{ + "ok": true, + "state": "still_working", + "agendaFingerprint": "agenda:v1:abc...", + "leaseExpiresAt": "2026-04-29T12:10:00.000Z" +} +``` + +Pending response when the app validator is unavailable: + +```json +{ + "ok": true, + "pendingValidation": true, + "state": "still_working", + "agendaFingerprint": "agenda:v1:abc...", + "instruction": "Report was recorded for validation. Continue concrete task work; do not treat this as a confirmed lease yet." +} +``` + +Stale response: + +```json +{ + "ok": false, + "reason": "stale_fingerprint", + "currentAgendaFingerprint": "agenda:v1:def...", + "currentAgendaPreview": [ + { + "taskRef": "#00d1e081", + "kind": "work", + "reason": "Owned task is in progress." + } + ], + "instruction": "Read the current agenda and retry with the current fingerprint only after you understand it." +} +``` + +Caught-up rejected response: + +```json +{ + "ok": false, + "reason": "caught_up_rejected_actionable_items_exist", + "currentAgendaFingerprint": "agenda:v1:def...", + "currentAgendaPreview": [ + { + "taskRef": "#00d1e081", + "kind": "work", + "reason": "Owned task is in progress." + } + ] +} +``` + +--- + +## 14. Prompt Guidance + +Add minimal instructions to operational prompt surfaces after Phase 1 tool exists. + +Do not over-prompt. The server enforces correctness. + +Suggested text: + +```text +When you have reviewed your current assigned/review task agenda, call member_work_sync_report with the current agendaFingerprint. + +Use still_working when you are continuing work on actionable tasks. +Use blocked only when the board shows a real blocker or clarification state. +Use caught_up only when your current actionable agenda is empty. + +Do not use member_work_sync_report as a replacement for task_start, task_add_comment, task_complete, review tools, or visible messages. +``` + +Do not say: + +```text +Call this after every response. +``` + +That would create noise and tool spam. + +--- + +## 15. Phase 2 Outbox Design + +Phase 2 only after shadow metrics are acceptable. + +### 15.1 Outbox Item + +```ts +export interface MemberWorkSyncOutboxItem { + id: string; + teamName: string; + memberName: string; + agendaFingerprint: string; + payloadHash: string; + status: + | 'pending' + | 'claimed' + | 'delivered' + | 'superseded' + | 'failed_retryable' + | 'failed_terminal'; + attemptGeneration: number; + claimedBy?: string; + claimedAt?: string; + deliveredMessageId?: string; + lastError?: string; + nextAttemptAt?: string; + createdAt: string; + updatedAt: string; +} +``` + +### 15.2 Idempotency Rules + +Key: + +```text +member-work-sync::: +``` + +Rules: + +- same `id` + same `payloadHash` -> return existing result; +- same `id` + different `payloadHash` -> conflict, no send; +- stale fingerprint before dispatch -> mark `superseded`; +- delivered item is terminal; +- retryable failures use backoff + jitter. + +Dispatcher revalidation: + +Before inserting an inbox nudge, dispatcher must re-read: + +- feature gate state; +- current roster membership; +- current agenda fingerprint; +- latest accepted report; +- busy state; +- recent watchdog cooldown. + +If any no longer matches the outbox item, mark `superseded`. Do not send stale nudges. + +Crash safety: + +- Claim outbox item with `attemptGeneration`. +- Re-read item before marking delivered. +- Inbox insert uses deterministic message id from outbox id. +- If process crashes after inbox insert but before mark delivered, retry insert returns conflict/existing and dispatcher can mark delivered. +- If payload hash differs for same message id, mark terminal conflict and do not overwrite. + +### 15.3 Nudge Payload + +```ts +export interface MemberWorkSyncNudgePayload { + from: 'system'; + to: string; + messageKind: 'member_work_sync_nudge'; + source: 'member-work-sync'; + actionMode: 'do'; + text: string; + taskRefs: TaskRef[]; +} +``` + +Suggested text: + +```text +Work sync check: you have current actionable work assigned. Review your agenda, continue the concrete task work, or report a real blocker with the task tools. Do not reply only with acknowledgement. +``` + +The nudge should be rare, deterministic, and tied to the fingerprint. + +--- + +## 16. Interaction With Existing Systems + +### 16.1 TaskStallMonitor + +MemberWorkSync does not replace it. + +| System | Question | Time horizon | Action | +|---|---|---:|---| +| MemberWorkSync | Did member observe current agenda? | fast | status, later rare nudge | +| TaskStallMonitor | Is member making meaningful progress? | slow | task-specific remediation | +| Delivery ledger | Did runtime receive/respond to message? | per message | retry delivery | +| Spawn/liveness | Is runtime alive? | runtime | launch/restart status | + +Rules: + +- MemberWorkSync report is not progress. +- Watchdog may alert even with valid MemberWorkSync lease. +- MemberWorkSync should read watchdog cooldown in Phase 2 to avoid back-to-back nudges. +- Watchdog should not be disabled by MemberWorkSync. + +### 16.2 OpenCode Delivery Ledger + +MemberWorkSync does not mark OpenCode inbox rows read. + +It may use pending delivery as a busy signal: + +```text +If OpenCode delivery to member is pending, suppress MemberWorkSync needs_sync/nudge until quiet window. +``` + +### 16.3 Teammate Tool Tracker + +Tool activity is a trigger, not proof. + +```text +tool finish -> enqueue reconcile after quiet window +``` + +Do not infer `caught_up` from tool finish. + +### 16.4 TeamChangeEvent + +Phase 1 integration: + +- `task` -> enqueue relevant owner/reviewer; +- `inbox` -> maybe enqueue recipient; +- `tool-activity finish` -> enqueue member; +- `member-spawn` -> enqueue member after launch grace; +- `config` -> enqueue all active members. + +Use a feature-owned queue, not direct sync work inside `teamChangeEmitter`. + +Event routing details: + +```ts +function routeTeamChangeToWorkSync(event: TeamChangeEvent): MemberWorkSyncRoute[] { + switch (event.type) { + case 'task': + return [{ teamName: event.teamName, taskId: event.taskId, scope: 'task_related_members' }]; + case 'inbox': + case 'lead-message': + return [{ teamName: event.teamName, scope: 'message_recipient_if_member' }]; + case 'tool-activity': + return [{ teamName: event.teamName, scope: 'tool_member_after_quiet_window' }]; + case 'member-spawn': + return [{ teamName: event.teamName, scope: 'spawned_member_after_launch_grace' }]; + case 'config': + return [{ teamName: event.teamName, scope: 'all_active_members' }]; + default: + return []; + } +} +``` + +The router should live in member-work-sync main adapter code, not in `src/main/index.ts`. Main index should only call a narrow facade like `memberWorkSyncService.noteTeamChange(event)`. + +### 16.5 Watchdog Boundary Matrix + +The two systems must stay separate. + +| Situation | MemberWorkSync action | TaskStallMonitor action | +|---|---|---| +| member has new pending task and no report | `NeedsSync`, Phase 2 maybe rare agenda nudge | no immediate semantic stall | +| member reported `still_working` but no progress for long threshold | valid lease until expiry | may still alert/remediate stall | +| member wrote weak "starting" comment | no fingerprint change | may classify weak start and later alert | +| watchdog recently nudged same member/task | Phase 2 suppresses work-sync nudge | watchdog owns that remediation | +| work-sync stale response returned to model | no immediate nudge | no change | +| delivery ledger pending | suppress as busy | delivery ledger owns retry | + +Shared cooldown in Phase 2 should be advisory, not a hard dependency: + +```text +work-sync reads recent watchdog nudges to avoid duplicate nudges. +watchdog does not trust work-sync reports as progress. +``` + +This prevents a valid `still_working` lease from hiding a real task stall. + +--- + +## 17. Store And Locking + +### 17.1 Paths + +```text +/.member-work-sync/status.json +/.member-work-sync/reports.json +/.member-work-sync/report-intents.json # fallback when live validator is unavailable +/.member-work-sync/outbox.json # Phase 2 +``` + +Phase 1 can keep reports embedded in status if simpler: + +```ts +export interface MemberWorkSyncStatus { + teamName: string; + memberName: string; + agendaFingerprint: string; + agendaSummary: AgendaSummaryItem[]; + latestAcceptedReport?: MemberWorkSyncReportRecord; + latestRejectedReport?: MemberWorkSyncReportRecord; + conditions: MemberWorkSyncCondition[]; + metrics: MemberWorkSyncMetrics; + updatedAt: string; +} +``` + +`report-intents.json` is only a fallback queue for controller-side writes when the live app validator is unavailable. It is not an accepted lease store. + +Rules: + +- app consumes report intents and validates them with the normal `MemberWorkSyncReporter`; +- accepted intents become normal accepted reports in `status.json` or `reports.json`; +- rejected intents are retained only as bounded diagnostics; +- stale intents must not update leases; +- intent records must have stable ids so reprocessing after restart is idempotent. + +Report intent shape: + +```ts +export interface MemberWorkSyncReportIntent { + id: string; + teamName: string; + memberName: string; + rawInput: unknown; + receivedAt: string; + status: 'pending' | 'accepted' | 'rejected' | 'superseded'; + validationReason?: string; + processedAt?: string; +} +``` + +Intent id: + +```text +member-work-sync-intent: +``` + +Replay rules: + +- process pending intents after app startup and after team load; +- re-read the current agenda before validation; +- if fingerprint is stale, mark `rejected`, do not apply lease; +- if identical intent was already accepted, mark duplicate as accepted with same report id; +- if member is now removed, mark `superseded`; +- keep a bounded intent history, recommended `100` records per team or `7` days. + +### 17.2 Locking Rule + +Do not perform side effects while holding board locks. + +Phase 1: + +1. read board state; +2. compute agenda/fingerprint; +3. write sync status under sync-store lock. + +Phase 2: + +1. read board state; +2. compute decision; +3. write outbox intent; +4. dispatcher later claims; +5. dispatcher revalidates board state; +6. dispatcher writes inbox idempotently. + +Never: + +```text +hold board lock -> send message -> write sync store +``` + +That risks deadlock and long lock duration. + +### 17.3 Lock Ordering + +Lock ordering must be stable across the app: + +```text +task board read lock -> release +member-work-sync store lock -> release +outbox lock -> release +inbox/message write lock -> release +``` + +Never acquire a board write lock from inside a sync-store update callback. + +Never acquire an inbox/message lock while holding sync-store lock. + +If a future implementation needs multiple writes, use a durable intent: + +1. write intent under the feature store lock; +2. release lock; +3. dispatcher reads intent; +4. dispatcher revalidates fresh state; +5. dispatcher writes external side effect idempotently. + +### 17.4 Corruption And Recovery + +Store corruption should degrade the feature, not the team. + +Rules: + +- invalid JSON -> quarantine file and start with empty sync state; +- future schema -> quarantine or read-only diagnostic, no writes with unknown schema; +- lock timeout -> return previous known state if available; +- missing store -> initialize lazily; +- failed atomic rename -> keep temp file for diagnostics and do not delete existing good file; +- report intents with invalid raw payload -> mark rejected during replay. + +Do not block task board operations because member-work-sync storage is broken. + +--- + +## 18. Metrics + +Phase 1 must emit developer diagnostics and store enough data to answer: + +- is fingerprint stable? +- are agents sending stale reports? +- are reports useful? +- how often would nudges happen? +- are busy suppressions working? + +Recommended counters: + +```ts +export interface MemberWorkSyncMetrics { + reconcileCount: number; + fingerprintChangeCount: number; + staleReportCount: number; + invalidReportCount: number; + acceptedReportCount: number; + needsSyncCount: number; + suppressedBusyCount: number; + wouldNudgeCount: number; + lastReconcileAt?: string; + lastReportAt?: string; +} +``` + +For summary reports: + +```text +team: forge-labs-9 +member: jack +state: needs_sync +fingerprint churn/hour: 0.4 +stale report rate: 3% +would nudge count: 1 +busy suppressions: 8 +``` + +### 18.1 Phase 2 Entry Thresholds + +Do not enable nudges until shadow metrics are stable. + +Recommended gates: + +| Metric | Target before Phase 2 | +|---|---:| +| false `NeedsSync` samples | 0 high-confidence cases in sampled teams | +| fingerprint churn for stable member | less than 2 changes/hour | +| stale report rate | less than 15 percent | +| would-nudge rate | at most 2 per member/hour | +| busy suppression correctness | no known prompt during active tool/runtime turn | +| report intent replay errors | 0 lost accepted reports | + +If a metric misses the target, keep Phase 2 disabled and fix the specific source of noise. Do not compensate with a shorter lease or more nudges. + +--- + +## 19. UI Plan + +Phase 1 UI should be minimal. + +Member card optional badge: + +- `Synced` +- `Working` +- `Needs sync` +- `Blocked` +- `Unknown` + +Tooltip examples: + +```text +Synced with current work agenda at 12:03. +``` + +```text +Needs sync: 2 actionable tasks changed since last member report. +``` + +Do not show alarming warning banners in Phase 1. + +Details dialog can show: + +- current fingerprint; +- agenda summary; +- latest report state; +- lease expiration; +- last rejected report reason; +- shadow `would nudge` status. + +--- + +## 20. Testing Plan + +### 20.1 Domain Unit Tests + +`AgendaFingerprint.test.ts` + +- same agenda with different object key order -> same fingerprint; +- different actionable task -> different fingerprint; +- changed timestamp only -> same fingerprint; +- added weak comment -> same fingerprint; +- delivery retry marker -> same fingerprint; +- runtime liveness change -> same fingerprint; +- review assignment change -> different fingerprint; +- blocker change -> different fingerprint. + +`MemberWorkSyncReportValidator.test.ts` + +- `caught_up` accepted when agenda empty; +- `caught_up` rejected when agenda non-empty; +- stale fingerprint rejected with current fingerprint; +- `still_working` accepted for actionable task subset; +- `still_working` rejected for foreign task id; +- `blocked` rejected without board evidence; +- inactive member rejected. +- identity mismatch rejected and writes no accepted lease; +- invalid report token rejected; +- `user`, `system`, and unsafe provider aliases rejected; +- stopped/cancelled team rejected; +- duplicate identical report reuses stable report id; +- report for deleted task rejected; +- `blocked` with stale blocker evidence rejected. + +`SyncDecisionPolicy.test.ts` + +- empty agenda -> `caught_up`; +- non-empty agenda + valid lease -> `valid_lease`; +- non-empty agenda + busy -> `suppressed_busy`; +- non-empty agenda + no lease -> `needs_sync`; +- expired lease -> `needs_sync`. + +### 20.2 Application Tests + +`MemberWorkSyncReconciler.test.ts` + +- writes conditions for empty agenda; +- writes `NeedsSync` for assigned task without report; +- suppresses when busy source says active tool call; +- drops reconcile when lifecycle source says team is stopped; +- preserves previous report history; +- updates fingerprint when task owner changes; +- does not throw on missing team/member. + +`MemberWorkSyncReporter.test.ts` + +- accepts valid `still_working`; +- rejects stale report and stores rejected condition; +- rejects `caught_up` with actionable tasks; +- rejects stopped team without writing accepted report; +- stores accepted lease expiration; +- normalizes member name. +- uses app `receivedAt` for lease expiry, not model `reportedAt`; +- does not persist pending intents as accepted reports; +- rejected identity mismatch does not write a pending intent. + +### 20.3 Adapter Tests + +`TeamTaskAgendaSource.test.ts` + +- owned pending task maps to `work`; +- owner in-progress maps to `work`; +- review task maps to reviewer `review`; +- stale reviewer from an old review cycle does not become current action owner; +- old approved reviewer is not current reviewer; +- changes-requested task returns action owner to task owner; +- blocked task includes blocked evidence; +- needsClarification maps to clarification item; +- completed/deleted tasks excluded; +- non-member tasks excluded. +- uses `historyEvents`, not nonexistent `history`; +- does not use stall-monitor historical review actor as current reviewer; + +`JsonMemberWorkSyncStore.test.ts` + +- missing file returns empty state; +- invalid JSON quarantined; +- future schema quarantined or returns safe error; +- update is atomic under lock; +- concurrent updates do not lose records. +- extraction/reuse of versioned store preserves quarantine behavior. + +`MemberWorkSyncTeamChangeRouter.test.ts` + +- task event enqueues task-related members only; +- config event enqueues all active members; +- tool-activity event uses quiet-window trigger; +- unsupported event types are ignored; +- router does not read files or call application use cases directly. + +`MemberWorkSyncCrossRepoCompatibility.test.ts` + +- missing `member_work_sync_report` does not fail OpenCode readiness in Phase 1; +- work-sync instructions are omitted when the tool is unavailable; +- tool available + app bridge unavailable returns `pending_validation`; +- feature disabled returns `feature_disabled` and writes no intents; +- optional require-tool gate can fail readiness when explicitly enabled. + +### 20.4 Controller Tests + +In `agent-teams-controller`: + +- `member_work_sync_report` requires `from`; +- rejects `user` / `system`; +- rejects provider id aliases such as `codex` unless they are configured member names; +- maps lead aliases only for the configured lead's own agenda; +- rejects lead alias reporting for a teammate; +- rejects session identity `jack` with payload `from: "bob"`; +- rejects removed member before writing a pending intent; +- rejects invalid/missing `reportToken` without creating an accepted lease; +- rejects unknown member; +- returns structured stale fingerprint response; +- returns `pendingValidation` instead of accepted lease when app validator is unavailable; +- pending validation intent replay does not update lease until app accepts; +- disabled feature returns `feature_disabled` and does not write intents; +- exposes current fingerprint through the chosen read surface; +- does not write task comments or messages. + +### 20.5 Integration Tests + +- create realistic team with lead + members + tasks/reviews/blockers; +- run reconcile all members; +- verify statuses and fingerprints; +- submit reports; +- mutate task board; +- verify previous report becomes stale; +- no inbox messages are created in Phase 1. +- simulate app restart and replay report intents idempotently; +- simulate task/comment burst and verify one coalesced reconcile; +- verify watchdog alert still works when member has valid work-sync lease. + +### 20.6 Regression Tests + +Run at minimum: + +```bash +pnpm vitest run test/main/services/team/stallMonitor/TeamTaskStallMonitor.test.ts +pnpm vitest run test/main/services/team/stallMonitor/TeamTaskStallPolicy.test.ts +pnpm vitest run test/main/services/team/TeamProvisioningServiceRelay.test.ts +pnpm typecheck --pretty false +git diff --check +``` + +Add feature tests once implemented: + +```bash +pnpm vitest run test/features/member-work-sync +``` + +--- + +## 21. Rollout Plan + +### Phase 0: Design And Test Fixtures + +`🎯 10 🛡️ 10 🧠 3`, `100-200 LOC`. + +- Add this plan. +- Add fixture team definitions for agenda/fingerprint tests. +- No runtime changes. + +### Phase 1: Shadow Control Plane + +`🎯 10 🛡️ 10 🧠 5`, `850-1150 LOC`. + +Includes: + +- feature skeleton; +- domain types; +- agenda builder; +- fingerprint; +- current agenda read surface through `task_briefing` or `member_work_sync_status`; +- report validator; +- decision policy; +- JSON status store; +- report intent fallback consumer; +- reconciler; +- reporter; +- app-side composition; +- controller MCP tool; +- tests. + +Does not include: + +- nudges; +- outbox; +- inbox writes; +- rate limiter. + +### Phase 1.5: Observability Review + +`🎯 9 🛡️ 10 🧠 3`, `100-250 LOC`. + +Use real teams for 1-2 days. + +Check: + +- false `NeedsSync`; +- fingerprint churn; +- stale report rate; +- invalid caught-up attempts; +- how many nudges Phase 2 would send. + +Exit criteria: + +- fingerprint churn is low for stable tasks; +- no noisy churn from comments/timestamps; +- `NeedsSync` aligns with actual actionable work; +- no user-visible behavior regression. + +### Phase 2: Durable Nudges + +`🎯 9 🛡️ 9 🧠 7`, `700-1000 LOC`. + +Includes: + +- outbox; +- dispatcher; +- idempotent inbox insert; +- payload hash; +- attempt generation fencing; +- jitter/backoff; +- per-member token bucket; +- shared cooldown with watchdog. + +### Phase 3: Provider Accelerators + +`🎯 8 🛡️ 8 🧠 5`, `300-600 LOC`. + +Includes: + +- Claude Stop hook enqueue signal; +- OpenCode turn-settled enqueue signal; +- tool-finish quiet-window tuning; +- optional manual "sync now". + +No accelerator is proof. + +--- + +## 22. Feature Gates + +Phase 1: + +```text +CLAUDE_TEAM_MEMBER_WORK_SYNC_ENABLED=true +CLAUDE_TEAM_MEMBER_WORK_SYNC_SHADOW_ONLY=true +``` + +Defaults: + +- enabled can default `true` only if Phase 1 is read/status-only; +- shadow-only must default `true`; +- Phase 2 nudges default `false` until explicitly validated. + +Gate behavior: + +- `CLAUDE_TEAM_MEMBER_WORK_SYNC_ENABLED=false` disables queue, reconcile, status writes, and report acceptance. The MCP report tool should return `feature_disabled`. +- `CLAUDE_TEAM_MEMBER_WORK_SYNC_SHADOW_ONLY=true` allows reconcile/status/report validation but forbids outbox and inbox writes. +- `CLAUDE_TEAM_MEMBER_WORK_SYNC_SHADOW_ONLY=false` is allowed only after Phase 2 implementation and metrics review. +- Report intent recording should also honor `ENABLED=false`; do not write intent files when the feature is explicitly disabled. +- Read surfaces can include `"feature": "disabled"` when disabled, but should not instruct agents to call the report tool. + +Phase 2: + +```text +CLAUDE_TEAM_MEMBER_WORK_SYNC_NUDGES_ENABLED=false +CLAUDE_TEAM_MEMBER_WORK_SYNC_MAX_NUDGES_PER_MEMBER_PER_HOUR=2 +CLAUDE_TEAM_MEMBER_WORK_SYNC_QUIET_WINDOW_MS=90000 +CLAUDE_TEAM_MEMBER_WORK_SYNC_STILL_WORKING_LEASE_MS=600000 +``` + +Recommended defaults by phase: + +| Gate | Phase 1 default | Phase 2 default after metrics | +|---|---:|---:| +| `CLAUDE_TEAM_MEMBER_WORK_SYNC_ENABLED` | `true` | `true` | +| `CLAUDE_TEAM_MEMBER_WORK_SYNC_SHADOW_ONLY` | `true` | `false` only after manual enable | +| `CLAUDE_TEAM_MEMBER_WORK_SYNC_NUDGES_ENABLED` | `false` | `false` until explicitly flipped | +| report tool enabled | `true` when feature enabled | `true` | +| report intent fallback | `true` when feature enabled | `true` | + +Kill-switch expectations: + +- turning `ENABLED=false` should stop queue processing within one event-loop tick; +- pending outbox items must not dispatch while disabled; +- report tool should return a structured disabled response; +- status read APIs may still return last known status marked stale/disabled; +- no feature flag should change task board state directly. + +--- + +## 23. Security And Abuse Controls + +Threats: + +- model calls `caught_up` incorrectly; +- model uses `from: user`; +- model uses provider id `codex` as a fake member; +- model reports for another teammate; +- prompt injection asks model to suppress work sync; +- stale report hides new task; +- copied/stale report token suppresses a new agenda; +- malicious payload floods reports. + +Controls: + +- controller validates `from`, reserved actors, unsafe provider aliases, and payload size; +- app validates active member, current fingerprint, report token, and agenda state; +- accepted lease requires trusted runtime identity or valid report token; +- server rejects `caught_up` with non-empty agenda; +- identity failures write no pending intent; +- report size limits; +- note length limit; +- taskIds count limit; +- rejected reports are stored as diagnostics but do not update lease; +- nudges are rate limited in Phase 2. + +Recommended limits: + +```text +note max length: 1000 chars +taskIds max count: 20 +blockerCommentId max length: 128 chars +agenda preview max items in tool response: 10 +report intents max retained per team: 100 +report token validity: current fingerprint plus 15 minutes +status history max retained per member: 20 +``` + +Sanitize output: + +- never include raw hidden prompt text in a rejected response; +- never echo huge notes back to the model; +- never include other members' full agendas in a non-lead response; +- never include agenda preview when rejection reason is identity-related; +- include task refs, kind, and short reason only. + +--- + +## 24. DRY And Reuse + +Reuse: + +- member provider resolution logic from stall monitor where possible; +- versioned store pattern from OpenCode runtime stores; +- existing file lock/atomic write utilities; +- existing task reader and member metadata sources; +- existing author validation in controller runtime helpers; +- existing task reference DTOs. + +Do not duplicate: + +- separate reviewer resolution if existing `reviewerResolution` can be extracted cleanly; +- separate provider normalization if shared util exists; +- raw file parsing if `TeamTaskReader` already provides validated tasks. + +Potential extraction: + +```text +src/main/services/team/memberResolution/ +src/main/services/team/reviewerResolution/ +src/main/services/team/versionedStore/ +``` + +Only extract if it reduces duplication without forcing broad migration. + +--- + +## 25. Open Questions + +1. Should Phase 1 expose UI badges immediately, or keep diagnostics developer-only? + - Recommended: small details-only surface, no warning badge yet. + +2. Should `member_work_sync_report` live in task tool group or a new sync group? + - Recommended: new sync/report tool group if catalog supports it; otherwise task-adjacent operational tool. + +3. Should reports be allowed from lead? + - Recommended: only for lead's own agenda, not on behalf of teammates. + +4. Should `blocked` require `needsClarification` or accept dependency blockers? + - Recommended: accept both, but require evidence. + +5. Should `still_working` require explicit taskIds? + - Recommended: optional. If omitted, it covers all current agenda items. If provided, validate subset. + +6. Should the controller duplicate agenda validation for instant report rejection? + - Recommended: no. Keep full policy in `claude_team`. Controller performs identity/schema/size validation and uses live app validation when available. If not available, it records a pending report intent that the app validates later. This avoids policy drift between JS controller code and TypeScript feature core. + +7. Should Phase 1 create nudges for obvious no-report states? + - Recommended: no. Even obvious states can be noisy until fingerprint churn and busy suppression are proven on real teams. + +8. Should `caught_up` be accepted when agenda source is unavailable? + - Recommended: no. Unknown state is not caught-up. + +9. Should lead be allowed to report for teammates? + - Recommended: no. Lead can inspect status, but reports are member-scoped and should represent the reporting actor. + +10. Should report notes become task comments? + - Recommended: no. Report notes are diagnostics only. Agents should use task tools for durable task communication. + +11. Should `reportToken` be treated as authentication? + - Recommended: no. It is an accidental-cross-member guard. Real authority still comes from app validation and trusted runtime context when available. + +--- + +## 26. Acceptance Criteria + +Phase 1 is complete when: + +- feature follows `docs/FEATURE_ARCHITECTURE_STANDARD.md`; +- domain code has no filesystem/Electron/main imports; +- agents can read the current `agendaFingerprint` from a documented read surface; +- agents can read a member-scoped `reportToken` from the same read surface; +- report validation rejects stale/caught-up-invalid cases; +- report validation rejects identity mismatch, unsafe aliases, and invalid report tokens; +- report validation and reconciler respect stopped/cancelled team lifecycle; +- report fallback cannot claim an accepted lease without app-side validation; +- reconcile creates stable conditions for realistic task boards; +- status store is versioned and safe under invalid JSON; +- no inbox messages are written by member-work-sync; +- existing watchdog tests still pass; +- typecheck passes; +- docs include rollout/Phase 2 requirements. + +Phase 2 may start only when: + +- shadow metrics show acceptable fingerprint churn; +- no high false-positive `NeedsSync` pattern is found; +- idempotent inbox insert design is implemented and tested; +- outbox dispatcher has stale revalidation tests. + +Phase 2 must not start if any of these are true: + +- report validation can accept leases without app-side agenda validation; +- report validation can accept leases with claimed `from` only and no trusted identity/report token; +- queue can run more than one reconcile for the same member concurrently; +- watchdog cooldown integration is untested; +- outbox dispatcher can send while feature is disabled; +- outbox dispatcher can send for a stopped/cancelled team; +- pending intent replay can turn stale reports into accepted leases; +- fingerprint transition diagnostics are missing; +- current agenda read surface cannot produce the same fingerprint as reconciler. + +Cross-repo acceptance: + +- OpenCode launch/readiness remains green when work-sync tool is absent and require-tool gate is false. +- OpenCode launch/readiness fails with a clear message when require-tool gate is true and tool is absent. +- `task_briefing` and work-sync status use the same `AgendaFingerprintService`. +- `member_work_sync_report` does not become a substitute for `task_start`, `task_add_comment`, `task_complete`, or review tools. +- No report response writes task comments, messages, or task status. + +--- + +## 27. Implementation Order + +Recommended commit sequence: + +1. `docs: add member work sync control plane plan` +2. `refactor(team): extract current review cycle resolver` +3. `refactor(team): extract reusable versioned json store` +4. `feat(member-work-sync): add domain agenda and fingerprint model` +5. `feat(member-work-sync): add status store and shadow reconciler` +6. `feat(member-work-sync): expose current agenda fingerprint read surface` +7. `feat(member-work-sync): add report token adapter and validation` +8. `feat(member-work-sync): add report validation use case` +9. `feat(agent-teams): add member work sync report tool` +10. `test(member-work-sync): add shadow control plane coverage` +11. `feat(member-work-sync): wire shadow reconciler to team changes` + +Keep Phase 2 in separate commits/PR. + +### 27.1 Cut 1: Shadow Core And Status + +Goal: + +```text +Compute member work-sync status from authoritative team state and persist diagnostics. +No MCP report tool. No nudges. No renderer warning UI. +``` + +Estimate: `🎯 9 🛡️ 10 🧠 5`, `450-750 LOC`. + +Step order: + +1. Create feature shell under `src/features/member-work-sync`. + - Add `contracts`, `core/domain`, `core/application`, `main/adapters`, and `main/index.ts`. + - Export only through feature entrypoints. + - Do not import Electron, filesystem, or `TeamDataService` inside `core`. + +2. Extract current-cycle reviewer resolver. + - Create a shared resolver near team services or inside member-work-sync adapter layer. + - Keep two separate functions: + - `resolveCurrentActionReviewer()` for current actionable work. + - `resolveHistoricalReviewActor()` for old stall/log attribution if needed. + - Add tests before replacing existing call sites. + - Stop if replacing stall-monitor behavior changes existing tests. + +3. Extract or wrap versioned JSON store. + - Prefer a generic helper under `src/main/services/team/versionedJsonStore`. + - Do not deep-import OpenCode runtime store internals from the new feature. + - Preserve quarantine and atomic write behavior. + - Add store tests before using it from member-work-sync. + +4. Implement domain types and agenda builder. + - Input is normalized task/member snapshots from ports, not raw files. + - Include only documented actionable item kinds: `work`, `review`, `blocked`, `clarification`. + - If ownership/reviewer is ambiguous, skip the item and emit diagnostic. + - Do not guess owner from comments or log activity. + +5. Implement fingerprint service. + - Domain builds canonical fingerprint payload. + - Application receives `HashPort`; Node crypto lives in infrastructure. + - Add transition diagnostics but keep them out of the hash. + - Verify timestamp/comment/runtime-only changes do not churn fingerprints. + +6. Implement decision policy. + - Empty agenda -> caught up. + - Non-empty agenda + valid lease -> valid lease. + - Non-empty agenda + busy -> suppressed busy. + - Non-empty agenda + no valid lease -> needs sync. + - In Cut 1 there are no accepted agent reports yet, so valid lease only appears in direct unit tests or seeded fixtures. + +7. Implement `TeamTaskAgendaSource`. + - Adapter reads through existing validated task/member services. + - Adapter resolves team lifecycle before producing agenda. + - Adapter returns no agenda for stopped/cancelled/deleted teams. + - Adapter must not mutate tasks, messages, or runtime state. + +8. Implement `JsonMemberWorkSyncStatusStore`. + - Path: `~/.claude/teams//.member-work-sync/status.json`. + - Store bounded member status and diagnostics only. + - No report intents or outbox in Cut 1 unless required by tests as empty schema. + +9. Implement `MemberWorkSyncReconciler`. + - Reconciles one `(teamName, memberName)` at a time. + - Checks lifecycle first, then member active, then agenda. + - Writes status only through `MemberWorkSyncStatusStorePort`. + - Does not send any message or call runtime delivery. + +10. Add shadow trigger wiring behind feature gate. + - Default shadow status can be on, but all side effects are status-only. + - Use quiet-window queue and bounded concurrency. + - Wire broad team/task change events only after domain tests are green. + - Drop queued entries when team/member is removed or stopped. + +11. Add read-only diagnostics surface for tests. + - Main-process use case can return current status for a team/member. + - Renderer can remain untouched in Cut 1. + +Cut 1 tests: + +```bash +pnpm vitest run test/features/member-work-sync +pnpm vitest run test/main/services/team/stallMonitor/TeamTaskStallPolicy.test.ts test/main/services/team/stallMonitor/TeamTaskStallMonitor.test.ts +pnpm typecheck --pretty false +git diff --check +``` + +Cut 1 stop criteria: + +- Any false-positive `NeedsSync` is found for completed/deleted/non-owned tasks. +- Fingerprint changes because only timestamps/comments/runtime liveness changed. +- Reconciler mutates task board, messages, inbox, or runtime. +- Stopped team still writes fresh status as active. + +### 27.2 Cut 2: Report Tool, Token, And App Validation + +Goal: + +```text +Allow agents to report current work-sync state, but only app-side validation can accept a lease. +The orchestrator remains a thin MCP adapter. +``` + +Estimate: `🎯 8 🛡️ 9 🧠 6`, `350-600 LOC`. + +Step order: + +1. Add `ReportTokenPort`. + - Infrastructure adapter signs/verifies token using app-owned secret/nonce. + - Token binds `(teamName, memberName, agendaFingerprint)`. + - Token does not affect fingerprint. + - Token expiry uses app clock. + +2. Extend current agenda read surface. + - Prefer `task_briefing.workSync`. + - Include compact agenda preview, `agendaFingerprint`, state, and `reportToken`. + - Omit report instructions when tool is unavailable or feature disabled. + - Keep old `task_briefing` fields unchanged. + +3. Implement report validator. + - Pure domain validation first. + - Reject stale fingerprint, invalid caught-up, foreign task ids, invalid blocker evidence. + - Reject identity mismatch, unsafe provider aliases, missing/invalid report token when no trusted runtime context exists. + +4. Implement reporter use case. + - Checks lifecycle first. + - Checks active member second. + - Rebuilds fresh agenda and fingerprint. + - Verifies token. + - Stores accepted or rejected report diagnostics. + - Never writes tasks/comments/messages. + +5. Add app validation bridge contract. + - `claude_team` exposes a narrow main-process/application port. + - Orchestrator calls that port when available. + - Result is structured and does not leak internal task data. + +6. Add orchestrator MCP tool. + - Tool name: `member_work_sync_report`. + - Controller validates schema, size, reserved actors, obvious unsafe aliases. + - Controller attaches trusted runtime/session context when available. + - Controller forwards to app validation port. + - If app validation port unavailable, controller writes pending intent only if identity is not terminally invalid. + - Controller never returns accepted lease unless app returned accepted lease. + +7. Add pending intent replay. + - Replay through the same reporter use case. + - Stale or identity-invalid intents become rejected diagnostics. + - Pending intent replay cannot update accepted lease directly. + +8. Update OpenCode readiness compatibility. + - Missing `member_work_sync_report` must not fail launch by default. + - Require-tool gate stays false. + - Add compatibility tests for old orchestrator/new app and new orchestrator/old app paths. + +Cut 2 tests: + +```bash +pnpm vitest run test/features/member-work-sync +pnpm vitest run test/main/services/team/TeamProvisioningServiceRelay.test.ts +cd /Users/belief/dev/projects/claude/agent_teams_orchestrator && bun test src/services/opencode/OpenCodeBridgeCommandHandler.test.ts +cd /Users/belief/dev/projects/claude/claude_team && pnpm typecheck --pretty false +git diff --check +``` + +Cut 2 stop criteria: + +- Orchestrator accepts a lease without app validation. +- `from` alone can create a valid lease. +- `user`, `system`, provider id, removed member, or session mismatch writes a pending intent. +- Invalid token creates accepted or valid lease state. +- Missing new tool blocks OpenCode launch while require-tool gate is false. + +### 27.3 Cut 3: Minimal UI And Developer Diagnostics + +Goal: + +```text +Expose work-sync state without creating noisy user-facing warnings. +``` + +Estimate: `🎯 8 🛡️ 8 🧠 5`, `180-320 LOC`. + +Step order: + +1. Add read-only API contract. + - Return status for current team/member. + - Do not expose raw notes, raw comments, hidden prompt text, or other members' full agendas. + +2. Add renderer adapter/view model. + - Convert domain status to compact labels: `Synced`, `Working`, `Needs sync`, `Blocked`, `Unknown`. + - Keep status neutral. No warning banner in Phase 1. + +3. Add details surface. + - Show fingerprint, state, latest report, lease expiry, and short agenda preview. + - Hide developer-only diagnostics behind existing debug/dev patterns if available. + +4. Add tests for UI mapping. + - Snapshot only small view-model outputs, not full prompts or full task data. + - Verify identity rejection does not leak another member's agenda. + +Cut 3 tests: + +```bash +pnpm vitest run test/features/member-work-sync test/renderer +pnpm typecheck --pretty false +git diff --check +``` + +Cut 3 stop criteria: + +- UI shows alarming warning banners in Phase 1. +- UI implies the agent is broken when state is only shadow `NeedsSync`. +- UI exposes another member's full agenda or raw report notes. +- Renderer contains domain policy logic instead of view-model mapping. + +### 27.4 Phase 2: Nudges Later, Separate Work + +Do not start Phase 2 until shadow metrics prove low noise. + +Phase 2 sequence: + +1. Add outbox schema and idempotency key. +2. Add dispatcher with feature gate default off. +3. Add stale revalidation before dispatch. +4. Add watchdog cooldown integration. +5. Add one-in-flight per `(teamName, memberName, fingerprint)`. +6. Add max nudges per member/hour. +7. Enable only for sampled/dev teams first. + +Phase 2 tests must prove no duplicate nudges, no stopped-team nudges, no stale-fingerprint nudges, and no watchdog duplicate ping. + +--- + +## 28. Implementation Readiness Lock + +The plan is ready for implementation only if Phase 1 stays intentionally narrow. + +Phase 1 includes: + +- domain agenda builder; +- current-cycle reviewer resolver extraction; +- agenda fingerprint service; +- report token port and adapter; +- report validator and reporter; +- shadow reconciler; +- versioned status store; +- current agenda read surface; +- MCP report tool as optional capability; +- diagnostics and tests. + +Phase 1 excludes: + +- inbox nudges; +- runtime restarts; +- task status mutation; +- task comments/messages from report notes; +- watchdog semantic changes; +- renderer warning banners; +- making the MCP report tool required for OpenCode launch. + +Non-negotiable implementation rules: + +- `from` is never authority by itself. +- Stopped/cancelled teams cannot accept reports. +- Accepted leases require app-side agenda validation. +- Pending intents can never become accepted without replay through app validation. +- Work-sync reports do not count as task progress for watchdog. +- The same source builds `task_briefing.workSync`, reconciler agenda, and report validation agenda. +- Any uncertain agenda ownership case returns no actionable item plus diagnostics, not a guessed nudge. + +If any of these rules becomes hard to satisfy during implementation, stop and split the work into a smaller refactor before continuing. + +Recommended first coding cut: + +`🎯 9 🛡️ 10 🧠 5`, `450-750 LOC`. + +Implement only domain + adapters needed to compute and persist shadow statuses. Do not add the MCP tool until agenda/fingerprint/status tests are green. This creates a stable base and avoids debugging protocol, storage, and agenda policy at the same time. + +Second coding cut: + +`🎯 8 🛡️ 9 🧠 6`, `350-600 LOC`. + +Add `member_work_sync_report`, report token, app validation bridge, pending-intent fallback, and cross-repo compatibility tests. + +Third coding cut: + +`🎯 8 🛡️ 8 🧠 5`, `180-320 LOC`. + +Add minimal renderer/details surface only after main-process state is proven. No alarming UI by default. + +## 29. Final Recommendation + +Build `member-work-sync` as a clean feature, starting with Phase 1 shadow mode. + +Do not implement nudges in the first pass. + +The most important engineering quality bar: + +```text +Every decision is based on fresh authoritative board state. +Every side effect is durable and idempotent. +Every agent report is server-validated. +Every future nudge is rare, bounded, and fingerprint-scoped. +``` + +That gives us a stronger architecture than a simple nudge queue and avoids the main failure modes: spam, stale flags, duplicate messages, false completion, and watchdog conflicts. diff --git a/mcp-server/src/agent-teams-controller.d.ts b/mcp-server/src/agent-teams-controller.d.ts index 011c692f..79693520 100644 --- a/mcp-server/src/agent-teams-controller.d.ts +++ b/mcp-server/src/agent-teams-controller.d.ts @@ -8,7 +8,20 @@ declare module 'agent-teams-controller' { export interface ControllerTaskApi { createTask(flags: Record): unknown; getTask(taskId: string): unknown; - getTaskComment(taskId: string, commentId: string): { comment: Record; task: { id: string; displayId: string; subject: string; status: string; owner: string | null; commentCount: number } }; + getTaskComment( + taskId: string, + commentId: string + ): { + comment: Record; + task: { + id: string; + displayId: string; + subject: string; + status: string; + owner: string | null; + commentCount: number; + }; + }; listTasks(): unknown[]; listTaskInventory(filters?: Record): unknown[]; listDeletedTasks(): unknown[]; @@ -77,6 +90,9 @@ declare module 'agent-teams-controller' { } export interface ControllerRuntimeApi { + listTeams(flags?: Record): Promise; + getTeam(flags?: Record): Promise; + createTeam(flags: Record): Promise; launchTeam(flags: Record): Promise; stopTeam(flags?: Record): Promise; getRuntimeState(flags?: Record): Promise; diff --git a/mcp-server/src/tools/index.ts b/mcp-server/src/tools/index.ts index 4d8a17f7..c9937293 100644 --- a/mcp-server/src/tools/index.ts +++ b/mcp-server/src/tools/index.ts @@ -13,8 +13,10 @@ import { registerProcessTools } from './processTools'; import { registerReviewTools } from './reviewTools'; import { registerRuntimeTools } from './runtimeTools'; import { registerTaskTools } from './taskTools'; +import { registerTeamTools } from './teamTools'; const REGISTRATION_BY_GROUP = { + team: registerTeamTools, task: registerTaskTools, lead: registerLeadTools, kanban: registerKanbanTools, diff --git a/mcp-server/src/tools/runtimeTools.ts b/mcp-server/src/tools/runtimeTools.ts index 2f66baa1..e8a409dc 100644 --- a/mcp-server/src/tools/runtimeTools.ts +++ b/mcp-server/src/tools/runtimeTools.ts @@ -3,7 +3,7 @@ import { z } from 'zod'; import { getController } from '../controller'; import { jsonTextContent } from '../utils/format'; -import { assertConfiguredTeam } from '../utils/teamConfig'; +import { assertConfiguredOrDraftTeam, assertConfiguredTeam } from '../utils/teamConfig'; const toolContextSchema = { teamName: z.string().min(1), @@ -59,7 +59,7 @@ export function registerRuntimeTools(server: Pick) { extraCliArgs, waitForReady, }) => { - assertConfiguredTeam(teamName, claudeDir); + assertConfiguredOrDraftTeam(teamName, claudeDir); return jsonTextContent( await getController(teamName, claudeDir).runtime.launchTeam({ cwd, @@ -99,7 +99,8 @@ export function registerRuntimeTools(server: Pick) { server.addTool({ name: 'runtime_bootstrap_checkin', - description: 'Confirm that an OpenCode team member runtime reached the app MCP bootstrap boundary', + description: + 'Confirm that an OpenCode team member runtime reached the app MCP bootstrap boundary', parameters: z.object({ ...runtimeIdentitySchema, observedAt: z.string().min(1).optional(), diff --git a/mcp-server/src/tools/teamTools.ts b/mcp-server/src/tools/teamTools.ts new file mode 100644 index 00000000..3808779b --- /dev/null +++ b/mcp-server/src/tools/teamTools.ts @@ -0,0 +1,140 @@ +import type { FastMCP } from 'fastmcp'; +import { z } from 'zod'; + +import { getController } from '../controller'; +import { jsonTextContent } from '../utils/format'; + +const controlContextSchema = { + claudeDir: z.string().min(1).optional(), + controlUrl: z.string().optional(), + waitTimeoutMs: z.number().int().min(1000).max(600000).optional(), +}; + +const teamContextSchema = { + ...controlContextSchema, + teamName: z.string().min(1), +}; + +const providerIdSchema = z.enum(['anthropic', 'codex', 'gemini', 'opencode']); +const effortSchema = z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']); +const fastModeSchema = z.enum(['inherit', 'on', 'off']); + +const memberSchema = z.object({ + name: z.string().min(1), + role: z.string().optional(), + workflow: z.string().optional(), + isolation: z.literal('worktree').optional(), + providerId: providerIdSchema.optional(), + providerBackendId: z.string().min(1).optional(), + model: z.string().min(1).optional(), + effort: effortSchema.optional(), + fastMode: fastModeSchema.optional(), +}); + +function controlFlags(args: { + controlUrl?: string; + waitTimeoutMs?: number; +}): Record { + return { + ...(args.controlUrl ? { controlUrl: args.controlUrl } : {}), + ...(args.waitTimeoutMs ? { waitTimeoutMs: args.waitTimeoutMs } : {}), + }; +} + +export function registerTeamTools(server: Pick) { + server.addTool({ + name: 'team_list', + description: 'List teams through the local Agent Teams control API', + parameters: z.object({ + ...controlContextSchema, + }), + execute: async ({ claudeDir, controlUrl, waitTimeoutMs }) => { + return jsonTextContent( + await getController('agent-teams-control', claudeDir).runtime.listTeams( + controlFlags({ controlUrl, waitTimeoutMs }) + ) + ); + }, + }); + + server.addTool({ + name: 'team_get', + description: 'Get a team snapshot through the local Agent Teams control API', + parameters: z.object({ + ...teamContextSchema, + }), + execute: async ({ teamName, claudeDir, controlUrl, waitTimeoutMs }) => { + return jsonTextContent( + await getController(teamName, claudeDir).runtime.getTeam( + controlFlags({ controlUrl, waitTimeoutMs }) + ) + ); + }, + }); + + server.addTool({ + name: 'team_create', + description: + 'Create a draft team configuration through the local Agent Teams control API. This does not launch the team.', + parameters: z.object({ + ...teamContextSchema, + displayName: z.string().min(1).optional(), + description: z.string().optional(), + color: z.string().min(1).optional(), + members: z.array(memberSchema).optional(), + cwd: z.string().min(1).optional(), + prompt: z.string().min(1).optional(), + providerId: providerIdSchema.optional(), + providerBackendId: z.string().min(1).optional(), + model: z.string().min(1).optional(), + effort: effortSchema.optional(), + fastMode: fastModeSchema.optional(), + limitContext: z.boolean().optional(), + skipPermissions: z.boolean().optional(), + worktree: z.string().min(1).optional(), + extraCliArgs: z.string().min(1).optional(), + }), + execute: async ({ + teamName, + claudeDir, + controlUrl, + waitTimeoutMs, + displayName, + description, + color, + members, + cwd, + prompt, + providerId, + providerBackendId, + model, + effort, + fastMode, + limitContext, + skipPermissions, + worktree, + extraCliArgs, + }) => { + return jsonTextContent( + await getController(teamName, claudeDir).runtime.createTeam({ + ...controlFlags({ controlUrl, waitTimeoutMs }), + ...(displayName ? { displayName } : {}), + ...(description ? { description } : {}), + ...(color ? { color } : {}), + ...(members ? { members } : {}), + ...(cwd ? { cwd } : {}), + ...(prompt ? { prompt } : {}), + ...(providerId ? { providerId } : {}), + ...(providerBackendId ? { providerBackendId } : {}), + ...(model ? { model } : {}), + ...(effort ? { effort } : {}), + ...(fastMode ? { fastMode } : {}), + ...(limitContext !== undefined ? { limitContext } : {}), + ...(skipPermissions !== undefined ? { skipPermissions } : {}), + ...(worktree ? { worktree } : {}), + ...(extraCliArgs ? { extraCliArgs } : {}), + }) + ); + }, + }); +} diff --git a/mcp-server/src/utils/teamConfig.ts b/mcp-server/src/utils/teamConfig.ts index 1e9cb650..37983524 100644 --- a/mcp-server/src/utils/teamConfig.ts +++ b/mcp-server/src/utils/teamConfig.ts @@ -3,38 +3,73 @@ import path from 'node:path'; import { getController } from '../controller'; -function resolveConfigPath(teamName: string, claudeDir?: string): string { +function unknownTeamMessage(teamName: string): string { + return `Unknown team "${teamName}". Board tools require an existing configured team with config.json. Use the real board teamName from durable team context - never use a member or lead name as teamName.`; +} + +function resolveTeamPaths( + teamName: string, + claudeDir?: string +): { + configPath: string; + metaPath: string; +} { const controller = getController(teamName, claudeDir) as { context?: { paths?: { teamDir?: string } }; }; const teamDir = controller.context?.paths?.teamDir; if (typeof teamDir !== 'string' || teamDir.trim().length === 0) { - throw new Error( - `Unknown team "${teamName}". Board tools require an existing configured team with config.json. Use the real board teamName from durable team context - never use a member or lead name as teamName.` - ); + throw new Error(unknownTeamMessage(teamName)); } - return path.join(teamDir, 'config.json'); + return { + configPath: path.join(teamDir, 'config.json'), + metaPath: path.join(teamDir, 'team.meta.json'), + }; +} + +function readJsonObject(filePath: string): Record | null { + let raw = ''; + try { + raw = fs.readFileSync(filePath, 'utf8'); + } catch { + return null; + } + + try { + const parsed = JSON.parse(raw) as unknown; + return parsed && typeof parsed === 'object' && !Array.isArray(parsed) + ? (parsed as Record) + : null; + } catch { + return null; + } +} + +function isConfiguredTeamConfig(value: Record | null): boolean { + return typeof value?.name === 'string' && value.name.trim().length > 0; +} + +function isDraftTeamMeta(value: Record | null): boolean { + return value?.version === 1 && typeof value.cwd === 'string' && value.cwd.trim().length > 0; } export function assertConfiguredTeam(teamName: string, claudeDir?: string): void { - const configPath = resolveConfigPath(teamName, claudeDir); - let raw = ''; - try { - raw = fs.readFileSync(configPath, 'utf8'); - } catch { - throw new Error( - `Unknown team "${teamName}". Board tools require an existing configured team with config.json. Use the real board teamName from durable team context - never use a member or lead name as teamName.` - ); - } - - try { - const parsed = JSON.parse(raw) as { name?: unknown }; - if (typeof parsed?.name !== 'string' || parsed.name.trim().length === 0) { - throw new Error('invalid'); - } - } catch { - throw new Error( - `Unknown team "${teamName}". Board tools require an existing configured team with config.json. Use the real board teamName from durable team context - never use a member or lead name as teamName.` - ); + const { configPath } = resolveTeamPaths(teamName, claudeDir); + const parsed = readJsonObject(configPath); + if (!isConfiguredTeamConfig(parsed)) { + throw new Error(unknownTeamMessage(teamName)); } } + +export function assertConfiguredOrDraftTeam(teamName: string, claudeDir?: string): void { + const { configPath, metaPath } = resolveTeamPaths(teamName, claudeDir); + if (isConfiguredTeamConfig(readJsonObject(configPath))) { + return; + } + + if (isDraftTeamMeta(readJsonObject(metaPath))) { + return; + } + + throw new Error(unknownTeamMessage(teamName)); +} diff --git a/mcp-server/test/tools.test.ts b/mcp-server/test/tools.test.ts index 26a16407..b9ea1d2f 100644 --- a/mcp-server/test/tools.test.ts +++ b/mcp-server/test/tools.test.ts @@ -224,6 +224,108 @@ describe('agent-teams-mcp tools', () => { } }); + it('lists, gets, and creates teams through the local control API', async () => { + const claudeDir = makeClaudeDir(); + const calls: Array<{ method?: string; url?: string; body?: unknown }> = []; + const server = await startControlServer(async ({ method, url, body }) => { + calls.push({ method, url, body }); + + if (method === 'GET' && url === '/api/teams') { + return { + body: [ + { + teamName: 'alpha', + displayName: 'Alpha', + description: '', + memberCount: 1, + taskCount: 0, + lastActivity: null, + pendingCreate: true, + }, + ], + }; + } + if (method === 'GET' && url === '/api/teams/alpha') { + return { + body: { + teamName: 'alpha', + members: [{ name: 'builder', role: 'Engineer' }], + tasks: [], + }, + }; + } + if (method === 'POST' && url === '/api/teams') { + return { statusCode: 201, body: { teamName: 'alpha' } }; + } + + return { statusCode: 404, body: { error: `Unhandled ${method} ${url}` } }; + }); + + try { + const listed = parseJsonToolResult( + await getTool('team_list').execute({ + claudeDir, + controlUrl: server.baseUrl, + }) + ); + expect(listed[0].teamName).toBe('alpha'); + + const fetched = parseJsonToolResult( + await getTool('team_get').execute({ + claudeDir, + teamName: 'alpha', + controlUrl: server.baseUrl, + }) + ); + expect(fetched.teamName).toBe('alpha'); + + const created = parseJsonToolResult( + await getTool('team_create').execute({ + claudeDir, + teamName: 'alpha', + controlUrl: server.baseUrl, + displayName: 'Alpha', + members: [{ name: 'builder', role: 'Engineer', providerId: 'codex' }], + cwd: '/tmp/project', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }) + ); + expect(created.teamName).toBe('alpha'); + + expect(calls).toEqual([ + { + method: 'GET', + url: '/api/teams', + body: undefined, + }, + { + method: 'GET', + url: '/api/teams/alpha', + body: undefined, + }, + { + method: 'POST', + url: '/api/teams', + body: { + teamName: 'alpha', + displayName: 'Alpha', + members: [{ name: 'builder', role: 'Engineer', providerId: 'codex' }], + cwd: '/tmp/project', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }, + }, + ]); + } finally { + await server.close(); + } + }); + it('forwards OpenCode runtime MCP tools through the runtime control bridge', async () => { const claudeDir = makeClaudeDir(); writeTeamConfig(claudeDir, 'alpha', { @@ -1317,7 +1419,9 @@ describe('agent-teams-mcp tools', () => { ).rejects.toThrow('Unknown team "typo-team"'); expect(fs.existsSync(path.join(claudeDir, 'teams', 'typo-team'))).toBe(false); - expect(fs.existsSync(path.join(claudeDir, 'teams', 'real-team', 'inboxes', 'lead.json'))).toBe(false); + expect(fs.existsSync(path.join(claudeDir, 'teams', 'real-team', 'inboxes', 'lead.json'))).toBe( + false + ); }); it('exposes zod schemas that reject obviously invalid payloads', () => { diff --git a/src/main/http/index.ts b/src/main/http/index.ts index c5914839..4b225922 100644 --- a/src/main/http/index.ts +++ b/src/main/http/index.ts @@ -33,6 +33,7 @@ import type { UpdaterService, } from '../services'; import type { SshConnectionManager } from '../services/infrastructure/SshConnectionManager'; +import type { TeamDataService } from '../services/team/TeamDataService'; import type { TeamProvisioningService } from '../services/team/TeamProvisioningService'; import type { FastifyInstance } from 'fastify'; @@ -47,6 +48,7 @@ export interface HttpServices { recentProjectsFeature?: RecentProjectsFeatureFacade; updaterService: UpdaterService; sshConnectionManager: SshConnectionManager; + teamDataService?: TeamDataService; teamProvisioningService?: TeamProvisioningService; } @@ -59,7 +61,7 @@ export function registerHttpRoutes( registerSessionRoutes(app, services); registerSearchRoutes(app, services); registerSubagentRoutes(app, services); - if (services.teamProvisioningService) { + if (services.teamProvisioningService || services.teamDataService) { registerTeamRoutes(app, services); } registerNotificationRoutes(app); diff --git a/src/main/http/teams.ts b/src/main/http/teams.ts index d2835c14..38047798 100644 --- a/src/main/http/teams.ts +++ b/src/main/http/teams.ts @@ -1,4 +1,6 @@ -import { validateTeamName } from '@main/ipc/guards'; +import { validateTeammateName, validateTeamName } from '@main/ipc/guards'; +import { getTeamsBasePath } from '@main/utils/pathDecoder'; +import { extractUserFlags, PROTECTED_CLI_FLAGS } from '@shared/utils/cliArgsParser'; import { formatEffortLevelListForProvider, isTeamEffortLevelForProvider, @@ -7,26 +9,44 @@ import { getErrorMessage } from '@shared/utils/errorHandling'; import { createLogger } from '@shared/utils/logger'; import { migrateProviderBackendId } from '@shared/utils/providerBackend'; import { isTeamProviderId } from '@shared/utils/teamProvider'; -import { isAbsolute } from 'path'; +import { constants as fsConstants } from 'fs'; +import { access } from 'fs/promises'; +import { isAbsolute, join } from 'path'; import type { HttpServices } from './index'; -import type { EffortLevel, TeamFastMode, TeamLaunchRequest } from '@shared/types/team'; +import type { + EffortLevel, + TeamCreateConfigRequest, + TeamCreateRequest, + TeamFastMode, + TeamLaunchRequest, +} from '@shared/types/team'; import type { FastifyInstance } from 'fastify'; const logger = createLogger('HTTP:teams'); type LaunchBody = Omit; +type CreateTeamBody = TeamCreateConfigRequest; class HttpBadRequestError extends Error {} class HttpFeatureUnavailableError extends Error {} -function getTeamProvisioningService(services: HttpServices) { +function getTeamProvisioningService( + services: HttpServices +): NonNullable { if (!services.teamProvisioningService) { throw new HttpFeatureUnavailableError('Team runtime control is not available in this mode'); } return services.teamProvisioningService; } +function getTeamDataService(services: HttpServices): NonNullable { + if (!services.teamDataService) { + throw new HttpFeatureUnavailableError('Team data control is not available in this mode'); + } + return services.teamDataService; +} + function getStatusCode(error: unknown, fallback: number = 500): number { if (error instanceof HttpBadRequestError) { return 400; @@ -37,11 +57,35 @@ function getStatusCode(error: unknown, fallback: number = 500): number { if (error instanceof Error && error.name === 'RuntimeStaleEvidenceError') { return 409; } + if (error instanceof Error && error.message.startsWith('Team not found')) { + return 404; + } + if (error instanceof Error && error.message.startsWith('Team already exists')) { + return 409; + } return fallback; } function shouldLogError(error: unknown): boolean { - return !(error instanceof HttpBadRequestError) && !(error instanceof HttpFeatureUnavailableError); + const statusCode = getStatusCode(error); + return ( + statusCode >= 500 && + !(error instanceof HttpBadRequestError) && + !(error instanceof HttpFeatureUnavailableError) + ); +} + +function assertProvisioningTeamName(value: unknown): string { + const validated = validateTeamName(value); + if (!validated.valid) { + throw new HttpBadRequestError(validated.error ?? 'Invalid teamName'); + } + const teamName = validated.value!; + const parts = teamName.split('-'); + if (teamName.length > 64 || !parts.every((part) => /^[a-z0-9]+$/.test(part))) { + throw new HttpBadRequestError('teamName must be kebab-case [a-z0-9-], max 64 chars'); + } + return teamName; } function assertAbsoluteCwd(cwd: unknown): string { @@ -82,6 +126,40 @@ function assertOptionalBoolean(value: unknown, fieldName: string): boolean | und return value; } +function assertOptionalCwd(value: unknown): string | undefined { + if (value == null) { + return undefined; + } + const cwd = assertOptionalString(value, 'cwd'); + if (!cwd) { + return undefined; + } + if (!isAbsolute(cwd)) { + throw new HttpBadRequestError('cwd must be an absolute path'); + } + return cwd; +} + +function assertOptionalExtraCliArgs(value: unknown): string | undefined { + const extraCliArgs = assertOptionalString(value, 'extraCliArgs'); + if (!extraCliArgs) { + return undefined; + } + if (extraCliArgs.length > 1024) { + throw new HttpBadRequestError('extraCliArgs too long (max 1024)'); + } + + const protectedFlags = extractUserFlags(extraCliArgs).filter((flag) => + PROTECTED_CLI_FLAGS.has(flag) + ); + if (protectedFlags.length > 0) { + throw new HttpBadRequestError( + `extraCliArgs contains app-managed flags: ${[...new Set(protectedFlags)].join(', ')}` + ); + } + return extraCliArgs; +} + function assertOptionalEffort( value: unknown, providerId: TeamLaunchRequest['providerId'] @@ -111,33 +189,92 @@ function assertOptionalFastMode(value: unknown): TeamFastMode | undefined { return value; } -function parseLaunchRequest(teamName: string, body: unknown): TeamLaunchRequest { - const payload = body && typeof body === 'object' ? (body as Record) : {}; - const providerId = - payload.providerId == null - ? 'anthropic' - : isTeamProviderId(payload.providerId) - ? payload.providerId - : (() => { - throw new HttpBadRequestError( - 'providerId must be anthropic, codex, gemini, or opencode' - ); - })(); - const prompt = assertOptionalString(payload.prompt, 'prompt'); - const rawProviderBackendId = assertOptionalString(payload.providerBackendId, 'providerBackendId'); +function parseProviderId(value: unknown): TeamLaunchRequest['providerId'] { + if (value == null) { + return 'anthropic'; + } + if (isTeamProviderId(value)) { + return value; + } + throw new HttpBadRequestError('providerId must be anthropic, codex, gemini, or opencode'); +} + +function parseProviderBackendId( + providerId: TeamLaunchRequest['providerId'], + value: unknown +): TeamLaunchRequest['providerBackendId'] | undefined { + const rawProviderBackendId = assertOptionalString(value, 'providerBackendId'); const providerBackendId = migrateProviderBackendId(providerId, rawProviderBackendId); if (rawProviderBackendId && !providerBackendId) { throw new HttpBadRequestError( 'providerBackendId must be one of auto, adapter, api, cli-sdk, or codex-native' ); } + return providerBackendId; +} + +function parseCreateMembers(payloadMembers: unknown): TeamCreateConfigRequest['members'] { + if (payloadMembers == null) { + return []; + } + if (!Array.isArray(payloadMembers)) { + throw new HttpBadRequestError('members must be an array'); + } + + const seenNames = new Set(); + return payloadMembers.map((member) => { + if (!member || typeof member !== 'object') { + throw new HttpBadRequestError('member must be object'); + } + const rawMember = member as Record; + const nameValidation = validateTeammateName(rawMember.name); + if (!nameValidation.valid) { + throw new HttpBadRequestError(nameValidation.error ?? 'Invalid member name'); + } + const name = nameValidation.value!; + if (seenNames.has(name)) { + throw new HttpBadRequestError('member names must be unique'); + } + seenNames.add(name); + + const role = assertOptionalString(rawMember.role, 'member role'); + const workflow = assertOptionalString(rawMember.workflow, 'member workflow'); + if (rawMember.isolation !== undefined && rawMember.isolation !== 'worktree') { + throw new HttpBadRequestError('member isolation must be "worktree" when provided'); + } + const providerId = + rawMember.providerId == null ? undefined : parseProviderId(rawMember.providerId); + const providerBackendId = parseProviderBackendId(providerId, rawMember.providerBackendId); + const model = assertOptionalString(rawMember.model, 'member model'); + const effort = assertOptionalEffort(rawMember.effort, providerId); + const fastMode = assertOptionalFastMode(rawMember.fastMode); + + return { + name, + ...(role ? { role } : {}), + ...(workflow ? { workflow } : {}), + ...(rawMember.isolation === 'worktree' ? { isolation: 'worktree' as const } : {}), + ...(providerId ? { providerId } : {}), + ...(providerBackendId ? { providerBackendId } : {}), + ...(model ? { model } : {}), + ...(effort ? { effort } : {}), + ...(fastMode ? { fastMode } : {}), + }; + }); +} + +function parseLaunchRequest(teamName: string, body: unknown): TeamLaunchRequest { + const payload = body && typeof body === 'object' ? (body as Record) : {}; + const providerId = parseProviderId(payload.providerId); + const prompt = assertOptionalString(payload.prompt, 'prompt'); + const providerBackendId = parseProviderBackendId(providerId, payload.providerBackendId); const model = assertOptionalString(payload.model, 'model'); const effort = assertOptionalEffort(payload.effort, providerId); const fastMode = assertOptionalFastMode(payload.fastMode); const clearContext = assertOptionalBoolean(payload.clearContext, 'clearContext'); const skipPermissions = assertOptionalBoolean(payload.skipPermissions, 'skipPermissions'); const worktree = assertOptionalString(payload.worktree, 'worktree'); - const extraCliArgs = assertOptionalString(payload.extraCliArgs, 'extraCliArgs'); + const extraCliArgs = assertOptionalExtraCliArgs(payload.extraCliArgs); return { teamName, @@ -173,6 +310,150 @@ function parseLaunchRequest(teamName: string, body: unknown): TeamLaunchRequest }; } +function parseCreateTeamRequest(body: unknown): TeamCreateConfigRequest { + const payload = body && typeof body === 'object' ? (body as Record) : {}; + const teamName = assertProvisioningTeamName(payload.teamName); + const providerId = payload.providerId == null ? undefined : parseProviderId(payload.providerId); + const providerBackendId = parseProviderBackendId(providerId, payload.providerBackendId); + const displayName = assertOptionalString(payload.displayName, 'displayName'); + const description = assertOptionalString(payload.description, 'description'); + const color = assertOptionalString(payload.color, 'color'); + const cwd = assertOptionalCwd(payload.cwd); + const prompt = assertOptionalString(payload.prompt, 'prompt'); + const model = assertOptionalString(payload.model, 'model'); + const effort = assertOptionalEffort(payload.effort, providerId); + const fastMode = assertOptionalFastMode(payload.fastMode); + const limitContext = assertOptionalBoolean(payload.limitContext, 'limitContext'); + const skipPermissions = assertOptionalBoolean(payload.skipPermissions, 'skipPermissions'); + const worktree = assertOptionalString(payload.worktree, 'worktree'); + const extraCliArgs = assertOptionalExtraCliArgs(payload.extraCliArgs); + + return { + teamName, + members: parseCreateMembers(payload.members), + ...(displayName ? { displayName } : {}), + ...(description ? { description } : {}), + ...(color ? { color } : {}), + ...(cwd ? { cwd } : {}), + ...(prompt ? { prompt } : {}), + ...(providerId ? { providerId } : {}), + ...(providerBackendId ? { providerBackendId } : {}), + ...(model ? { model } : {}), + ...(effort ? { effort } : {}), + ...(fastMode ? { fastMode } : {}), + ...(limitContext !== undefined ? { limitContext } : {}), + ...(skipPermissions !== undefined ? { skipPermissions } : {}), + ...(worktree ? { worktree } : {}), + ...(extraCliArgs ? { extraCliArgs } : {}), + }; +} + +function getObjectPayload(body: unknown): Record { + return body && typeof body === 'object' ? (body as Record) : {}; +} + +function pickOptionalString( + payload: Record, + key: string, + fallback: string | undefined, + fieldName: string +): string | undefined { + return Object.hasOwn(payload, key) ? assertOptionalString(payload[key], fieldName) : fallback; +} + +function pickOptionalBoolean( + payload: Record, + key: string, + fallback: boolean | undefined, + fieldName: string +): boolean | undefined { + return Object.hasOwn(payload, key) ? assertOptionalBoolean(payload[key], fieldName) : fallback; +} + +function parseDraftLaunchCreateRequest( + savedRequest: TeamCreateRequest, + body: unknown +): TeamCreateRequest { + const payload = getObjectPayload(body); + const cwd = Object.hasOwn(payload, 'cwd') ? assertAbsoluteCwd(payload.cwd) : savedRequest.cwd; + if (!cwd) { + throw new HttpBadRequestError('cwd is required'); + } + + const providerId = Object.hasOwn(payload, 'providerId') + ? parseProviderId(payload.providerId) + : (savedRequest.providerId ?? 'anthropic'); + const providerBackendId = parseProviderBackendId( + providerId, + Object.hasOwn(payload, 'providerBackendId') + ? payload.providerBackendId + : savedRequest.providerBackendId + ); + const effort = assertOptionalEffort( + Object.hasOwn(payload, 'effort') ? payload.effort : savedRequest.effort, + providerId + ); + const fastMode = Object.hasOwn(payload, 'fastMode') + ? assertOptionalFastMode(payload.fastMode) + : savedRequest.fastMode; + const extraCliArgs = Object.hasOwn(payload, 'extraCliArgs') + ? assertOptionalExtraCliArgs(payload.extraCliArgs) + : savedRequest.extraCliArgs; + if (extraCliArgs) { + assertOptionalExtraCliArgs(extraCliArgs); + } + + return { + teamName: savedRequest.teamName, + displayName: savedRequest.displayName, + description: savedRequest.description, + color: savedRequest.color, + members: savedRequest.members, + cwd, + prompt: pickOptionalString(payload, 'prompt', savedRequest.prompt, 'prompt'), + providerId, + ...(providerBackendId ? { providerBackendId } : {}), + model: pickOptionalString(payload, 'model', savedRequest.model, 'model'), + ...(effort ? { effort } : {}), + ...(fastMode ? { fastMode } : {}), + limitContext: pickOptionalBoolean( + payload, + 'limitContext', + savedRequest.limitContext, + 'limitContext' + ), + skipPermissions: pickOptionalBoolean( + payload, + 'skipPermissions', + savedRequest.skipPermissions, + 'skipPermissions' + ), + worktree: pickOptionalString(payload, 'worktree', savedRequest.worktree, 'worktree'), + ...(extraCliArgs ? { extraCliArgs } : {}), + }; +} + +async function getDraftSavedRequest( + services: HttpServices, + teamName: string +): Promise { + if (!services.teamDataService) { + return null; + } + + const configPath = join(getTeamsBasePath(), teamName, 'config.json'); + try { + await access(configPath, fsConstants.F_OK); + return null; + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'ENOENT') { + throw error; + } + } + + return getTeamDataService(services).getSavedRequest(teamName); +} + function withRuntimeTeamName(teamName: string, body: unknown): Record { const payload = body && typeof body === 'object' && !Array.isArray(body) @@ -186,6 +467,56 @@ function withRuntimeTeamName(teamName: string, body: unknown): Record { + try { + return reply.send(await getTeamDataService(services).listTeams()); + } catch (error) { + if (shouldLogError(error)) { + logger.error('Error in GET /api/teams:', getErrorMessage(error)); + } + return reply.status(getStatusCode(error)).send({ error: getErrorMessage(error) }); + } + }); + + app.post<{ Body: CreateTeamBody }>('/api/teams', async (request, reply) => { + try { + const createRequest = parseCreateTeamRequest(request.body); + await getTeamDataService(services).createTeamConfig(createRequest); + return reply.status(201).send({ teamName: createRequest.teamName }); + } catch (error) { + if (shouldLogError(error)) { + logger.error('Error in POST /api/teams:', getErrorMessage(error)); + } + return reply.status(getStatusCode(error)).send({ error: getErrorMessage(error) }); + } + }); + + app.get<{ Params: { teamName: string } }>('/api/teams/:teamName', async (request, reply) => { + try { + const validatedTeamName = validateTeamName(request.params.teamName); + if (!validatedTeamName.valid) { + return reply.status(400).send({ error: validatedTeamName.error }); + } + + const teamName = validatedTeamName.value!; + const draftSavedRequest = await getDraftSavedRequest(services, teamName); + if (draftSavedRequest) { + return reply.send({ + teamName, + pendingCreate: true, + savedRequest: draftSavedRequest, + }); + } + + return reply.send(await getTeamDataService(services).getTeamData(teamName)); + } catch (error) { + if (shouldLogError(error)) { + logger.error(`Error in GET /api/teams/${request.params.teamName}:`, getErrorMessage(error)); + } + return reply.status(getStatusCode(error)).send({ error: getErrorMessage(error) }); + } + }); + app.post<{ Params: { teamName: string }; Body: LaunchBody }>( '/api/teams/:teamName/launch', async (request, reply) => { @@ -195,11 +526,17 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices) return reply.status(400).send({ error: validatedTeamName.error }); } - const launchRequest = parseLaunchRequest(validatedTeamName.value!, request.body); - const response = await getTeamProvisioningService(services).launchTeam( - launchRequest, - () => undefined - ); + const teamName = validatedTeamName.value!; + const draftSavedRequest = await getDraftSavedRequest(services, teamName); + const response = draftSavedRequest + ? await getTeamProvisioningService(services).createTeam( + parseDraftLaunchCreateRequest(draftSavedRequest, request.body), + () => undefined + ) + : await getTeamProvisioningService(services).launchTeam( + parseLaunchRequest(teamName, request.body), + () => undefined + ); return reply.send(response); } catch (error) { const statusCode = getStatusCode(error); diff --git a/src/main/index.ts b/src/main/index.ts index 8e8a9e39..00434089 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -1337,6 +1337,7 @@ async function startHttpServer( recentProjectsFeature, updaterService, sshConnectionManager, + teamDataService, teamProvisioningService, }, modeSwitchHandler, diff --git a/src/main/ipc/teams.ts b/src/main/ipc/teams.ts index d8ff4d8b..6338d079 100644 --- a/src/main/ipc/teams.ts +++ b/src/main/ipc/teams.ts @@ -1222,8 +1222,9 @@ function isValidEffort(value: unknown, providerId?: TeamProviderId | null): valu return isTeamEffortLevelForProvider(value, providerId); } -function parseOptionalMemberProviderId( - value: unknown +function parseOptionalProviderId( + value: unknown, + fieldName: string ): { valid: true; value: TeamProviderId | undefined } | { valid: false; error: string } { if (value === undefined || value === null || value === '') { return { valid: true, value: undefined }; @@ -1231,7 +1232,19 @@ function parseOptionalMemberProviderId( if (isTeamProviderId(value)) { return { valid: true, value }; } - return { valid: false, error: 'member providerId must be anthropic, codex, gemini, or opencode' }; + return { valid: false, error: `${fieldName} must be anthropic, codex, gemini, or opencode` }; +} + +function parseOptionalMemberProviderId( + value: unknown +): { valid: true; value: TeamProviderId | undefined } | { valid: false; error: string } { + return parseOptionalProviderId(value, 'member providerId'); +} + +function parseOptionalTeamProviderId( + value: unknown +): { valid: true; value: TeamProviderId | undefined } | { valid: false; error: string } { + return parseOptionalProviderId(value, 'providerId'); } function parseOptionalProviderBackendId( @@ -1611,6 +1624,13 @@ async function validateProvisioningRequest( if (!providerValidation.valid) { return { valid: false, error: providerValidation.error }; } + const providerBackendValidation = parseOptionalProviderBackendId( + (member as { providerBackendId?: unknown }).providerBackendId, + providerValidation.value + ); + if (!providerBackendValidation.valid) { + return { valid: false, error: providerBackendValidation.error }; + } const model = (member as { model?: unknown }).model; if (model !== undefined && typeof model !== 'string') { return { valid: false, error: 'member model must be string' }; @@ -1622,14 +1642,22 @@ async function validateProvisioningRequest( if (!effortValidation.valid) { return { valid: false, error: effortValidation.error }; } + const fastModeValidation = parseOptionalTeamFastMode( + (member as { fastMode?: unknown }).fastMode + ); + if (!fastModeValidation.valid) { + return { valid: false, error: fastModeValidation.error }; + } members.push({ name: memberName, role: typeof role === 'string' ? role.trim() : undefined, workflow: typeof workflow === 'string' ? workflow.trim() : undefined, isolation: isolation === 'worktree' ? ('worktree' as const) : undefined, providerId: providerValidation.value, + providerBackendId: providerBackendValidation.value, model: typeof model === 'string' ? model.trim() || undefined : undefined, effort: effortValidation.value, + fastMode: fastModeValidation.value, }); } @@ -1858,61 +1886,60 @@ async function handleLaunchTeam( } if (isDraft) { - const meta = await teamMetaStore.getMeta(tn); - const membersStore = new TeamMembersMetaStore(); - const membersMeta = await membersStore.getMeta(tn); - const members = membersMeta?.members ?? []; + const savedRequest = await getTeamDataService().getSavedRequest(tn); + if (!savedRequest) { + return { success: false, error: `Missing saved request for draft team: ${tn}` }; + } - const resolvedProviderId = - providerId === 'codex' || providerId === 'gemini' - ? providerId - : meta?.providerId === 'codex' - ? 'codex' - : meta?.providerId === 'gemini' - ? 'gemini' - : 'anthropic'; - const effortValidation = parseOptionalTeamEffort(payload.effort, resolvedProviderId); + const resolvedProviderId = explicitProviderId ?? savedRequest.providerId ?? providerId; + const effortValidation = parseOptionalTeamEffort( + payload.effort ?? savedRequest.effort, + resolvedProviderId + ); if (!effortValidation.valid) { return { success: false, error: effortValidation.error }; } - const fastModeValidation = parseOptionalTeamFastMode(payload.fastMode); + const fastModeValidation = parseOptionalTeamFastMode(payload.fastMode ?? savedRequest.fastMode); if (!fastModeValidation.valid) { return { success: false, error: fastModeValidation.error }; } const createRequest: TeamCreateRequest = { teamName: tn, - displayName: meta?.displayName, - description: meta?.description, - color: meta?.color, + displayName: savedRequest.displayName, + description: savedRequest.description, + color: savedRequest.color, cwd, - prompt: typeof payload.prompt === 'string' ? payload.prompt.trim() || undefined : undefined, + prompt: + typeof payload.prompt === 'string' + ? payload.prompt.trim() || undefined + : savedRequest.prompt, providerId: resolvedProviderId, providerBackendId: migrateProviderBackendId( resolvedProviderId, - providerBackendValidation.value ?? meta?.providerBackendId ?? membersMeta?.providerBackendId + providerBackendValidation.value ?? savedRequest.providerBackendId ), - model: typeof payload.model === 'string' ? payload.model.trim() || undefined : undefined, + model: + typeof payload.model === 'string' ? payload.model.trim() || undefined : savedRequest.model, effort: effortValidation.value, - fastMode: fastModeValidation.value ?? meta?.fastMode, - limitContext: typeof payload.limitContext === 'boolean' ? payload.limitContext : undefined, + fastMode: fastModeValidation.value, + limitContext: + typeof payload.limitContext === 'boolean' + ? payload.limitContext + : savedRequest.limitContext, skipPermissions: - typeof payload.skipPermissions === 'boolean' ? payload.skipPermissions : undefined, + typeof payload.skipPermissions === 'boolean' + ? payload.skipPermissions + : savedRequest.skipPermissions, worktree: - typeof payload.worktree === 'string' ? payload.worktree.trim() || undefined : undefined, + typeof payload.worktree === 'string' + ? payload.worktree.trim() || undefined + : savedRequest.worktree, extraCliArgs: typeof payload.extraCliArgs === 'string' ? payload.extraCliArgs.trim() || undefined - : undefined, - members: members.map((m) => ({ - name: m.name, - role: m.role, - workflow: m.workflow, - isolation: m.isolation, - providerId: m.providerId, - model: m.model, - effort: m.effort, - })), + : savedRequest.extraCliArgs, + members: savedRequest.members, }; return wrapTeamHandler('create', () => @@ -3147,14 +3174,69 @@ async function handleCreateConfig( return { success: false, error: 'cwd must be an absolute path' }; } } - const providerBackendValidation = parseOptionalProviderBackendId(payload.providerBackendId); + if (payload.prompt !== undefined && typeof payload.prompt !== 'string') { + return { success: false, error: 'prompt must be a string' }; + } + const providerValidation = parseOptionalTeamProviderId(payload.providerId); + if (!providerValidation.valid) { + return { success: false, error: providerValidation.error }; + } + const providerBackendValidation = parseOptionalProviderBackendId( + payload.providerBackendId, + providerValidation.value + ); if (!providerBackendValidation.valid) { return { success: false, error: providerBackendValidation.error }; } + if (payload.model !== undefined && typeof payload.model !== 'string') { + return { success: false, error: 'model must be a string' }; + } + const effortValidation = parseOptionalTeamEffort(payload.effort, providerValidation.value); + if (!effortValidation.valid) { + return { success: false, error: effortValidation.error }; + } const fastModeValidation = parseOptionalTeamFastMode(payload.fastMode); if (!fastModeValidation.valid) { return { success: false, error: fastModeValidation.error }; } + if (payload.limitContext !== undefined && typeof payload.limitContext !== 'boolean') { + return { success: false, error: 'limitContext must be a boolean' }; + } + if (payload.skipPermissions !== undefined && typeof payload.skipPermissions !== 'boolean') { + return { success: false, error: 'skipPermissions must be a boolean' }; + } + if (payload.worktree !== undefined) { + if (typeof payload.worktree !== 'string') { + return { success: false, error: 'worktree must be a string' }; + } + const worktree = payload.worktree.trim(); + if (worktree.length > 128) { + return { success: false, error: 'worktree name too long (max 128)' }; + } + if (worktree && !/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/.test(worktree)) { + return { + success: false, + error: 'worktree name: start with alphanumeric, use [a-zA-Z0-9._-]', + }; + } + } + if (payload.extraCliArgs !== undefined) { + if (typeof payload.extraCliArgs !== 'string') { + return { success: false, error: 'extraCliArgs must be a string' }; + } + if (payload.extraCliArgs.length > 1024) { + return { success: false, error: 'extraCliArgs too long (max 1024)' }; + } + const protectedFlags = extractUserFlags(payload.extraCliArgs).filter((flag) => + PROTECTED_CLI_FLAGS.has(flag) + ); + if (protectedFlags.length > 0) { + return { + success: false, + error: `extraCliArgs contains app-managed flags: ${[...new Set(protectedFlags)].join(', ')}`, + }; + } + } const seenNames = new Set(); const members: TeamCreateConfigRequest['members'] = []; @@ -3190,6 +3272,13 @@ async function handleCreateConfig( if (!providerValidation.valid) { return { success: false, error: providerValidation.error }; } + const providerBackendValidation = parseOptionalProviderBackendId( + (member as { providerBackendId?: unknown }).providerBackendId, + providerValidation.value + ); + if (!providerBackendValidation.valid) { + return { success: false, error: providerBackendValidation.error }; + } const model = (member as { model?: unknown }).model; if (model !== undefined && typeof model !== 'string') { return { success: false, error: 'member model must be string' }; @@ -3201,14 +3290,22 @@ async function handleCreateConfig( if (!effortValidation.valid) { return { success: false, error: effortValidation.error }; } + const fastModeValidation = parseOptionalTeamFastMode( + (member as { fastMode?: unknown }).fastMode + ); + if (!fastModeValidation.valid) { + return { success: false, error: fastModeValidation.error }; + } members.push({ name: memberName, role: typeof role === 'string' ? role.trim() : undefined, workflow: typeof workflow === 'string' ? workflow.trim() : undefined, isolation: isolation === 'worktree' ? ('worktree' as const) : undefined, providerId: providerValidation.value, + providerBackendId: providerBackendValidation.value, model: typeof model === 'string' ? model.trim() || undefined : undefined, effort: effortValidation.value, + fastMode: fastModeValidation.value, }); } @@ -3220,8 +3317,23 @@ async function handleCreateConfig( color: typeof payload.color === 'string' ? payload.color.trim() || undefined : undefined, members, cwd: typeof payload.cwd === 'string' ? payload.cwd.trim() || undefined : undefined, + prompt: typeof payload.prompt === 'string' ? payload.prompt.trim() || undefined : undefined, + providerId: providerValidation.value, providerBackendId: providerBackendValidation.value, + model: typeof payload.model === 'string' ? payload.model.trim() || undefined : undefined, + effort: effortValidation.value, fastMode: fastModeValidation.value, + limitContext: typeof payload.limitContext === 'boolean' ? payload.limitContext : undefined, + skipPermissions: + typeof payload.skipPermissions === 'boolean' ? payload.skipPermissions : undefined, + worktree: + typeof payload.worktree === 'string' && payload.worktree.trim() + ? payload.worktree.trim() + : undefined, + extraCliArgs: + typeof payload.extraCliArgs === 'string' && payload.extraCliArgs.trim() + ? payload.extraCliArgs.trim() + : undefined, }) ); } @@ -4696,52 +4808,9 @@ async function handleGetSavedRequest( if (!validated.valid) { return { success: false, error: validated.error ?? 'Invalid teamName' }; } - const tn = validated.value!; - - const meta = await teamMetaStore.getMeta(tn); - if (!meta) { - return { success: true, data: null }; - } - - const membersStore = new TeamMembersMetaStore(); - const membersMeta = await membersStore.getMeta(tn); - const members = membersMeta?.members ?? []; - - const resolvedProviderId = meta.providerId ?? 'anthropic'; - - return { - success: true, - data: { - teamName: tn, - displayName: meta.displayName, - description: meta.description, - color: meta.color, - cwd: meta.cwd, - prompt: meta.prompt, - providerId: resolvedProviderId, - providerBackendId: migrateProviderBackendId( - resolvedProviderId, - meta.providerBackendId ?? membersMeta?.providerBackendId - ), - model: meta.model, - effort: meta.effort as TeamCreateRequest['effort'], - fastMode: meta.fastMode, - skipPermissions: meta.skipPermissions, - worktree: meta.worktree, - extraCliArgs: meta.extraCliArgs, - limitContext: meta.limitContext, - members: members.map((m) => ({ - name: m.name, - role: m.role, - workflow: m.workflow, - isolation: m.isolation, - cwd: m.cwd, - providerId: m.providerId, - model: m.model, - effort: m.effort, - })), - }, - }; + return wrapTeamHandler('getSavedRequest', async () => { + return getTeamDataService().getSavedRequest(validated.value!); + }); } async function handleDeleteDraft( diff --git a/src/main/services/team/TeamDataService.ts b/src/main/services/team/TeamDataService.ts index 3c2d8cef..e00bdbdb 100644 --- a/src/main/services/team/TeamDataService.ts +++ b/src/main/services/team/TeamDataService.ts @@ -85,6 +85,7 @@ import type { TaskRef, TeamConfig, TeamCreateConfigRequest, + TeamCreateRequest, TeamMember, TeamMemberActivityMeta, TeamMemberSnapshot, @@ -854,6 +855,50 @@ export class TeamDataService { return this.configReader.listTeams(); } + async getSavedRequest(teamName: string): Promise { + const meta = await this.teamMetaStore.getMeta(teamName); + if (!meta) { + return null; + } + + const membersMeta = await this.membersMetaStore.getMeta(teamName); + const members = membersMeta?.members ?? []; + const resolvedProviderId = meta.providerId ?? 'anthropic'; + + return { + teamName, + displayName: meta.displayName, + description: meta.description, + color: meta.color, + cwd: meta.cwd, + prompt: meta.prompt, + providerId: resolvedProviderId, + providerBackendId: migrateProviderBackendId( + resolvedProviderId, + meta.providerBackendId ?? membersMeta?.providerBackendId + ), + model: meta.model, + effort: meta.effort as TeamCreateRequest['effort'], + fastMode: meta.fastMode, + skipPermissions: meta.skipPermissions, + worktree: meta.worktree, + extraCliArgs: meta.extraCliArgs, + limitContext: meta.limitContext, + members: members.map((member) => ({ + name: member.name, + role: member.role, + workflow: member.workflow, + isolation: member.isolation, + cwd: member.cwd, + providerId: member.providerId, + providerBackendId: member.providerBackendId, + model: member.model, + effort: member.effort, + fastMode: member.fastMode, + })), + }; + } + async listAliveProcessTeams(): Promise { const teams = await this.listTeams(); const alive: string[] = []; @@ -2792,8 +2837,16 @@ export class TeamDataService { description: request.description, color: request.color, cwd: request.cwd?.trim() || '', + prompt: request.prompt, + providerId: request.providerId, providerBackendId: request.providerBackendId, + model: request.model, + effort: request.effort, fastMode: request.fastMode, + skipPermissions: request.skipPermissions, + worktree: request.worktree, + extraCliArgs: request.extraCliArgs, + limitContext: request.limitContext, createdAt: joinedAt, }); @@ -2823,8 +2876,10 @@ export class TeamDataService { workflow: member.workflow?.trim() || undefined, isolation: member.isolation === 'worktree' ? ('worktree' as const) : undefined, providerId: normalizeOptionalTeamProviderId(member.providerId), + providerBackendId: member.providerBackendId, model: member.model?.trim() || undefined, effort: isTeamEffortLevel(member.effort) ? member.effort : undefined, + fastMode: member.fastMode, agentType: 'general-purpose' as const, joinedAt, })) diff --git a/src/renderer/components/team/dialogs/CreateTeamDialog.tsx b/src/renderer/components/team/dialogs/CreateTeamDialog.tsx index dfebc8c5..f735a9a7 100644 --- a/src/renderer/components/team/dialogs/CreateTeamDialog.tsx +++ b/src/renderer/components/team/dialogs/CreateTeamDialog.tsx @@ -1611,8 +1611,16 @@ export const CreateTeamDialog = ({ color: request.color, members: request.members, cwd: effectiveCwd || undefined, + prompt: request.prompt, + providerId: request.providerId, providerBackendId: request.providerBackendId, + model: request.model, + effort: request.effort, fastMode: request.fastMode, + limitContext: request.limitContext, + skipPermissions: request.skipPermissions, + worktree: request.worktree, + extraCliArgs: request.extraCliArgs, }); onOpenTeam(request.teamName, effectiveCwd || undefined); resetFormState(); diff --git a/src/renderer/components/team/members/membersEditorTypes.ts b/src/renderer/components/team/members/membersEditorTypes.ts index 73df24bf..2c3cd9ea 100644 --- a/src/renderer/components/team/members/membersEditorTypes.ts +++ b/src/renderer/components/team/members/membersEditorTypes.ts @@ -1,5 +1,10 @@ import type { InlineChip } from '@renderer/types/inlineChip'; -import type { EffortLevel, TeamProviderId } from '@shared/types'; +import type { + EffortLevel, + TeamFastMode, + TeamProviderBackendId, + TeamProviderId, +} from '@shared/types'; export interface MemberDraft { id: string; @@ -11,8 +16,10 @@ export interface MemberDraft { workflowChips?: InlineChip[]; isolation?: 'worktree'; providerId?: TeamProviderId; + providerBackendId?: TeamProviderBackendId; model?: string; effort?: EffortLevel; + fastMode?: TeamFastMode; removedAt?: number | string | null; } diff --git a/src/renderer/components/team/members/membersEditorUtils.ts b/src/renderer/components/team/members/membersEditorUtils.ts index bddd13f8..46b0a402 100644 --- a/src/renderer/components/team/members/membersEditorUtils.ts +++ b/src/renderer/components/team/members/membersEditorUtils.ts @@ -10,7 +10,13 @@ import { normalizeOptionalTeamProviderId } from '@shared/utils/teamProvider'; import type { MemberDraft } from './membersEditorTypes'; import type { MentionSuggestion } from '@renderer/types/mention'; -import type { EffortLevel, TeamProviderId, TeamProvisioningMemberInput } from '@shared/types'; +import type { + EffortLevel, + TeamFastMode, + TeamProviderBackendId, + TeamProviderId, + TeamProvisioningMemberInput, +} from '@shared/types'; export function validateMemberNameInline(name: string): string | null { const trimmed = name.trim(); @@ -34,8 +40,10 @@ export function createMemberDraft(initial?: Partial): MemberDraft { workflow: initial?.workflow, isolation: initial?.isolation === 'worktree' ? 'worktree' : undefined, providerId, + providerBackendId: initial?.providerBackendId, model: normalizeExplicitTeamModelForUi(providerId, initial?.model ?? ''), effort: initial?.effort, + fastMode: initial?.fastMode, removedAt: initial?.removedAt, }; } @@ -47,8 +55,10 @@ export function createMemberDraftsFromInputs( role?: string; workflow?: string; providerId?: TeamProviderId; + providerBackendId?: TeamProviderBackendId; model?: string; effort?: EffortLevel; + fastMode?: TeamFastMode; isolation?: 'worktree'; removedAt?: number | string | null; }[] @@ -67,8 +77,10 @@ export function createMemberDraftsFromInputs( workflow: member.workflow, isolation: member.isolation === 'worktree' ? 'worktree' : undefined, providerId: normalizeOptionalTeamProviderId(member.providerId), + providerBackendId: member.providerBackendId, model: member.model ?? '', effort: normalizeDraftEffort(member.effort), + fastMode: member.fastMode, removedAt: member.removedAt, }); }); @@ -84,8 +96,10 @@ export function clearMemberModelOverrides(member: MemberDraft): MemberDraft { return { ...member, providerId: undefined, + providerBackendId: undefined, model: '', effort: undefined, + fastMode: undefined, }; } @@ -125,7 +139,9 @@ export function normalizeMemberDraftForProviderMode( return { ...member, providerId: normalizedProviderId, + providerBackendId: undefined, model: '', + fastMode: undefined, }; } return member; @@ -253,6 +269,9 @@ export function buildMembersFromDrafts(members: MemberDraft[]): TeamProvisioning if (providerId) { result.providerId = providerId; } + if (member.providerBackendId) { + result.providerBackendId = member.providerBackendId; + } const model = member.model?.trim(); if (model) { result.model = normalizeExplicitTeamModelForUi(providerId, model); @@ -261,6 +280,9 @@ export function buildMembersFromDrafts(members: MemberDraft[]): TeamProvisioning if (effort) { result.effort = effort; } + if (member.fastMode) { + result.fastMode = member.fastMode; + } return result; }) .filter((member): member is NonNullable => member !== null); diff --git a/src/shared/types/team.ts b/src/shared/types/team.ts index 63dbe188..beef1d6b 100644 --- a/src/shared/types/team.ts +++ b/src/shared/types/team.ts @@ -1283,8 +1283,20 @@ export interface TeamCreateConfigRequest { color?: string; members: TeamProvisioningMemberInput[]; cwd?: string; + prompt?: string; + providerId?: TeamProviderId; providerBackendId?: TeamProviderBackendId; + model?: string; + effort?: EffortLevel; fastMode?: TeamFastMode; + /** When true, context window is limited to 200K tokens instead of the default. */ + limitContext?: boolean; + /** When false, run WITHOUT --dangerously-skip-permissions (manual tool approval). Default: true. */ + skipPermissions?: boolean; + /** Worktree name — CLI: --worktree . */ + worktree?: string; + /** Raw custom CLI args string, shell-split and appended to CLI command. */ + extraCliArgs?: string; } export interface TeamCreateResponse { diff --git a/src/types/agent-teams-controller.d.ts b/src/types/agent-teams-controller.d.ts index 77c209bb..f5e5fccc 100644 --- a/src/types/agent-teams-controller.d.ts +++ b/src/types/agent-teams-controller.d.ts @@ -90,6 +90,9 @@ declare module 'agent-teams-controller' { } export interface ControllerRuntimeApi { + listTeams(flags?: Record): Promise; + getTeam(flags?: Record): Promise; + createTeam(flags: Record): Promise; launchTeam(flags: Record): Promise; stopTeam(flags?: Record): Promise; getRuntimeState(flags?: Record): Promise; diff --git a/test/main/http/teamMcpControl.integration.test.ts b/test/main/http/teamMcpControl.integration.test.ts new file mode 100644 index 00000000..87752ef0 --- /dev/null +++ b/test/main/http/teamMcpControl.integration.test.ts @@ -0,0 +1,395 @@ +// @vitest-environment node + +import Fastify from 'fastify'; +import { mkdtemp, mkdir, rm, writeFile } from 'fs/promises'; +import { tmpdir } from 'os'; +import path from 'path'; +import type { AddressInfo } from 'net'; + +import { registerTools } from '../../../mcp-server/src/tools'; +import { registerTeamRoutes } from '@main/http/teams'; +import { TeamDataService } from '@main/services/team/TeamDataService'; +import { setClaudeBasePathOverride } from '@main/utils/pathDecoder'; +import type { HttpServices } from '@main/http'; +import type { + TeamCreateRequest, + TeamLaunchRequest, + TeamLaunchResponse, + TeamProvisioningProgress, + TeamRuntimeState, +} from '@shared/types/team'; + +interface RegisteredTool { + name: string; + execute: (args: Record) => unknown; +} + +function collectTools(): Map { + const tools = new Map(); + + registerTools({ + addTool(config: RegisteredTool) { + tools.set(config.name, config); + }, + } as never); + + return tools; +} + +function parseJsonToolResult(result: unknown): unknown { + const text = (result as { content?: { text?: string }[] }).content?.[0]?.text; + return JSON.parse(text ?? 'null'); +} + +async function fetchJson( + baseUrl: string, + pathname: string +): Promise<{ + body: unknown; + status: number; +}> { + const response = await fetch(`${baseUrl}${pathname}`); + return { + status: response.status, + body: await response.json(), + }; +} + +function createServices(claudeRoot: string): { + createTeamCalls: TeamCreateRequest[]; + services: HttpServices; +} { + const teamDataService = new TeamDataService(); + const createTeamCalls: TeamCreateRequest[] = []; + const aliveTeams = new Set(); + const progressByRunId = new Map(); + const runIdByTeam = new Map(); + + async function persistLaunchedConfig(request: TeamCreateRequest): Promise { + const teamDir = path.join(claudeRoot, 'teams', request.teamName); + await mkdir(teamDir, { recursive: true }); + await writeFile( + path.join(teamDir, 'config.json'), + JSON.stringify( + { + name: request.displayName ?? request.teamName, + projectPath: request.cwd, + members: [ + { + name: 'team-lead', + role: 'team-lead', + agentType: 'team-lead', + }, + ...request.members.map((member) => ({ + name: member.name, + role: member.role, + workflow: member.workflow, + agentType: 'teammate', + providerId: member.providerId, + providerBackendId: member.providerBackendId, + model: member.model, + effort: member.effort, + fastMode: member.fastMode, + })), + ], + }, + null, + 2 + ), + 'utf8' + ); + } + + async function createTeam( + request: TeamCreateRequest, + onProgress: (progress: TeamProvisioningProgress) => void + ): Promise { + createTeamCalls.push(request); + await persistLaunchedConfig(request); + + const runId = `run-${request.teamName}`; + const progress: TeamProvisioningProgress = { + runId, + teamName: request.teamName, + state: 'ready', + message: 'Ready', + startedAt: '2026-04-29T00:00:00.000Z', + updatedAt: '2026-04-29T00:00:01.000Z', + }; + aliveTeams.add(request.teamName); + runIdByTeam.set(request.teamName, runId); + progressByRunId.set(runId, progress); + onProgress(progress); + return { runId }; + } + + const teamProvisioningService = { + createTeam, + launchTeam: async ( + request: TeamLaunchRequest, + onProgress: (progress: TeamProvisioningProgress) => void + ): Promise => { + return createTeam( + { + teamName: request.teamName, + cwd: request.cwd, + prompt: request.prompt, + providerId: request.providerId, + providerBackendId: request.providerBackendId, + model: request.model, + effort: request.effort, + fastMode: request.fastMode, + skipPermissions: request.skipPermissions, + worktree: request.worktree, + extraCliArgs: request.extraCliArgs, + members: [], + }, + onProgress + ); + }, + getProvisioningStatus: (runId: string): Promise => { + const progress = progressByRunId.get(runId); + if (!progress) { + throw new Error('Unknown runId'); + } + return Promise.resolve(progress); + }, + getRuntimeState: (teamName: string): Promise => { + const runId = runIdByTeam.get(teamName) ?? null; + return Promise.resolve({ + teamName, + isAlive: aliveTeams.has(teamName), + runId, + progress: runId ? (progressByRunId.get(runId) ?? null) : null, + }); + }, + stopTeam: (teamName: string): Promise => { + aliveTeams.delete(teamName); + return Promise.resolve(); + }, + getAliveTeams: (): string[] => [...aliveTeams], + } as HttpServices['teamProvisioningService']; + + return { + createTeamCalls, + services: { + projectScanner: {} as HttpServices['projectScanner'], + sessionParser: {} as HttpServices['sessionParser'], + subagentResolver: {} as HttpServices['subagentResolver'], + chunkBuilder: {} as HttpServices['chunkBuilder'], + dataCache: {} as HttpServices['dataCache'], + updaterService: {} as HttpServices['updaterService'], + sshConnectionManager: {} as HttpServices['sshConnectionManager'], + teamDataService, + teamProvisioningService, + }, + }; +} + +describe('MCP team tools over the local REST control API', () => { + const tools = collectTools(); + + function getTool(name: string): RegisteredTool { + const tool = tools.get(name); + expect(tool).toBeDefined(); + return tool!; + } + + it('creates, gets, launches, and lists a team through MCP and REST end to end', async () => { + const claudeRoot = await mkdtemp(path.join(tmpdir(), 'agent-teams-control-e2e-')); + const projectDir = await mkdtemp(path.join(tmpdir(), 'agent-teams-project-e2e-')); + setClaudeBasePathOverride(claudeRoot); + + const app = Fastify(); + const { createTeamCalls, services } = createServices(claudeRoot); + registerTeamRoutes(app, services); + + try { + await app.listen({ host: '127.0.0.1', port: 0 }); + const address = app.server.address() as AddressInfo; + const controlUrl = `http://127.0.0.1:${address.port}`; + + const created = parseJsonToolResult( + await getTool('team_create').execute({ + claudeDir: claudeRoot, + controlUrl, + teamName: 'mcp-e2e-team', + displayName: 'MCP E2E Team', + description: 'Created by MCP integration test', + color: '#3366ff', + cwd: projectDir, + prompt: 'Coordinate the test task', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + skipPermissions: false, + worktree: 'feature-e2e', + extraCliArgs: '--max-turns 5', + members: [ + { + name: 'builder', + role: 'Engineer', + workflow: 'Ship a focused patch', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }, + ], + }) + ) as { teamName: string }; + expect(created).toEqual({ teamName: 'mcp-e2e-team' }); + + const restDraft = await fetchJson(controlUrl, '/api/teams/mcp-e2e-team'); + expect(restDraft.status).toBe(200); + expect(restDraft.body).toMatchObject({ + teamName: 'mcp-e2e-team', + pendingCreate: true, + savedRequest: { + teamName: 'mcp-e2e-team', + displayName: 'MCP E2E Team', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + skipPermissions: false, + members: [ + { + name: 'builder', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }, + ], + }, + }); + + const mcpDraft = parseJsonToolResult( + await getTool('team_get').execute({ + claudeDir: claudeRoot, + controlUrl, + teamName: 'mcp-e2e-team', + }) + ); + expect(mcpDraft).toMatchObject({ + teamName: 'mcp-e2e-team', + pendingCreate: true, + savedRequest: { + prompt: 'Coordinate the test task', + worktree: 'feature-e2e', + extraCliArgs: '--max-turns 5', + }, + }); + + const restListBeforeLaunch = await fetchJson(controlUrl, '/api/teams'); + expect(restListBeforeLaunch.status).toBe(200); + expect(restListBeforeLaunch.body).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + teamName: 'mcp-e2e-team', + displayName: 'MCP E2E Team', + pendingCreate: true, + }), + ]) + ); + + const launched = parseJsonToolResult( + await getTool('team_launch').execute({ + claudeDir: claudeRoot, + controlUrl, + teamName: 'mcp-e2e-team', + cwd: projectDir, + }) + ) as { isAlive: boolean; progress: TeamProvisioningProgress; runId: string }; + expect(launched).toMatchObject({ + isAlive: true, + runId: 'run-mcp-e2e-team', + progress: { + state: 'ready', + teamName: 'mcp-e2e-team', + }, + }); + expect(createTeamCalls).toHaveLength(1); + expect(createTeamCalls[0]).toMatchObject({ + teamName: 'mcp-e2e-team', + displayName: 'MCP E2E Team', + cwd: projectDir, + prompt: 'Coordinate the test task', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + skipPermissions: false, + worktree: 'feature-e2e', + extraCliArgs: '--max-turns 5', + members: [ + { + name: 'builder', + role: 'Engineer', + workflow: 'Ship a focused patch', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }, + ], + }); + + const restRuntime = await fetchJson(controlUrl, '/api/teams/mcp-e2e-team/runtime'); + expect(restRuntime.status).toBe(200); + expect(restRuntime.body).toMatchObject({ + teamName: 'mcp-e2e-team', + isAlive: true, + runId: 'run-mcp-e2e-team', + }); + + const restListAfterLaunch = await fetchJson(controlUrl, '/api/teams'); + expect(restListAfterLaunch.status).toBe(200); + const launchedListItem = (restListAfterLaunch.body as Record[]).find( + (team) => team.teamName === 'mcp-e2e-team' + ); + expect(launchedListItem).toMatchObject({ + teamName: 'mcp-e2e-team', + displayName: 'MCP E2E Team', + }); + expect(launchedListItem).not.toHaveProperty('pendingCreate'); + + const mcpLaunchedTeam = parseJsonToolResult( + await getTool('team_get').execute({ + claudeDir: claudeRoot, + controlUrl, + teamName: 'mcp-e2e-team', + }) + ); + expect(mcpLaunchedTeam).toMatchObject({ + teamName: 'mcp-e2e-team', + config: { + name: 'MCP E2E Team', + projectPath: projectDir, + }, + members: expect.arrayContaining([ + expect.objectContaining({ + name: 'builder', + role: 'Engineer', + }), + ]), + }); + } finally { + await app.close(); + setClaudeBasePathOverride(null); + await rm(claudeRoot, { recursive: true, force: true }); + await rm(projectDir, { recursive: true, force: true }); + } + }); +}); diff --git a/test/main/http/teams.test.ts b/test/main/http/teams.test.ts index e06520f0..87ad021f 100644 --- a/test/main/http/teams.test.ts +++ b/test/main/http/teams.test.ts @@ -4,22 +4,44 @@ import { describe, expect, it, vi } from 'vitest'; import { registerTeamRoutes } from '@main/http/teams'; import type { HttpServices } from '@main/http'; import type { + TeamCreateConfigRequest, + TeamCreateRequest, TeamLaunchRequest, TeamLaunchResponse, TeamProvisioningProgress, TeamRuntimeState, + TeamSummary, + TeamViewSnapshot, } from '@shared/types/team'; describe('HTTP team runtime routes', () => { function createServicesMock() { - const launchTeam = vi.fn< - (request: TeamLaunchRequest, onProgress: (progress: TeamProvisioningProgress) => void) => Promise - >(); + const launchTeam = + vi.fn< + ( + request: TeamLaunchRequest, + onProgress: (progress: TeamProvisioningProgress) => void + ) => Promise + >(); const getRuntimeState = vi.fn<(teamName: string) => Promise>(); const getProvisioningStatus = vi.fn<(runId: string) => Promise>(); const stopTeam = vi.fn<(teamName: string) => Promise>(() => Promise.resolve()); const getAliveTeams = vi.fn<() => string[]>(); + const createTeam = + vi.fn< + ( + request: TeamCreateRequest, + onProgress: (progress: TeamProvisioningProgress) => void + ) => Promise + >(); + const listTeams = vi.fn<() => Promise>(); + const getTeamData = vi.fn<(teamName: string) => Promise>(); + const getSavedRequest = vi.fn<(teamName: string) => Promise>(); + const createTeamConfig = vi.fn<(request: TeamCreateConfigRequest) => Promise>(() => + Promise.resolve() + ); const teamProvisioningService = { + createTeam, launchTeam, getRuntimeState, getProvisioningStatus, @@ -27,8 +49,22 @@ describe('HTTP team runtime routes', () => { getAliveTeams, } as Pick< NonNullable, - 'launchTeam' | 'getRuntimeState' | 'getProvisioningStatus' | 'stopTeam' | 'getAliveTeams' + | 'createTeam' + | 'launchTeam' + | 'getRuntimeState' + | 'getProvisioningStatus' + | 'stopTeam' + | 'getAliveTeams' > as HttpServices['teamProvisioningService']; + const teamDataService = { + listTeams, + getTeamData, + getSavedRequest, + createTeamConfig, + } as Pick< + NonNullable, + 'listTeams' | 'getTeamData' | 'getSavedRequest' | 'createTeamConfig' + > as HttpServices['teamDataService']; const services = { projectScanner: {} as HttpServices['projectScanner'], @@ -38,6 +74,7 @@ describe('HTTP team runtime routes', () => { dataCache: {} as HttpServices['dataCache'], updaterService: {} as HttpServices['updaterService'], sshConnectionManager: {} as HttpServices['sshConnectionManager'], + teamDataService, teamProvisioningService, } satisfies HttpServices; @@ -48,6 +85,11 @@ describe('HTTP team runtime routes', () => { getProvisioningStatus, stopTeam, getAliveTeams, + createTeam, + listTeams, + getTeamData, + getSavedRequest, + createTeamConfig, }; } @@ -59,6 +101,87 @@ describe('HTTP team runtime routes', () => { return { app, ...mocks }; } + it('lists, gets, and creates draft teams through team data service', async () => { + const { app, listTeams, getTeamData, createTeamConfig } = await createApp(); + listTeams.mockResolvedValue([ + { + teamName: 'demo-team', + displayName: 'Demo Team', + description: 'Demo', + memberCount: 1, + taskCount: 0, + lastActivity: null, + pendingCreate: true, + }, + ]); + getTeamData.mockResolvedValue({ + teamName: 'demo-team', + config: null, + tasks: [], + messages: [], + processes: [], + kanban: null, + } as unknown as TeamViewSnapshot); + + try { + const listResponse = await app.inject({ + method: 'GET', + url: '/api/teams', + }); + expect(listResponse.statusCode).toBe(200); + expect(listResponse.json()[0]).toMatchObject({ + teamName: 'demo-team', + pendingCreate: true, + }); + + const getResponse = await app.inject({ + method: 'GET', + url: '/api/teams/demo-team', + }); + expect(getResponse.statusCode).toBe(200); + expect(getTeamData).toHaveBeenCalledWith('demo-team'); + + const createResponse = await app.inject({ + method: 'POST', + url: '/api/teams', + payload: { + teamName: 'new-team', + displayName: 'New Team', + members: [{ name: 'builder', role: 'Engineer', providerId: 'codex' }], + cwd: '/Users/test/project', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + }, + }); + expect(createResponse.statusCode).toBe(201); + expect(createResponse.json()).toEqual({ teamName: 'new-team' }); + expect(createTeamConfig).toHaveBeenCalledWith({ + teamName: 'new-team', + displayName: 'New Team', + members: [ + { + name: 'builder', + role: 'Engineer', + providerId: 'codex', + providerBackendId: 'codex-native', + }, + ], + cwd: '/Users/test/project', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + }); + } finally { + await app.close(); + } + }); + it('launches a team with validated request payload', async () => { const { app, launchTeam } = await createApp(); launchTeam.mockResolvedValue({ runId: 'run-1' }); @@ -68,7 +191,7 @@ describe('HTTP team runtime routes', () => { method: 'POST', url: '/api/teams/demo-team/launch', payload: { - cwd: '/tmp/project', + cwd: '/Users/test/project', prompt: 'Resume work', skipPermissions: false, clearContext: true, @@ -80,7 +203,7 @@ describe('HTTP team runtime routes', () => { expect(launchTeam).toHaveBeenCalledWith( { teamName: 'demo-team', - cwd: '/tmp/project', + cwd: '/Users/test/project', prompt: 'Resume work', providerId: 'anthropic', skipPermissions: false, @@ -93,6 +216,97 @@ describe('HTTP team runtime routes', () => { } }); + it('routes draft team launch through createTeam with saved metadata', async () => { + const { app, createTeam, getSavedRequest, launchTeam } = await createApp(); + getSavedRequest.mockResolvedValue({ + teamName: 'draft-team', + displayName: 'Draft Team', + description: 'Saved draft', + color: '#3366ff', + cwd: '/Users/test/saved-project', + prompt: 'Saved prompt', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'medium', + fastMode: 'on', + limitContext: true, + members: [{ name: 'builder', role: 'Engineer', providerId: 'codex' }], + }); + createTeam.mockResolvedValue({ runId: 'run-draft' }); + + try { + const response = await app.inject({ + method: 'POST', + url: '/api/teams/draft-team/launch', + payload: { + cwd: '/Users/test/project', + effort: 'high', + }, + }); + + expect(response.statusCode).toBe(200); + expect(response.json()).toEqual({ runId: 'run-draft' }); + expect(launchTeam).not.toHaveBeenCalled(); + expect(createTeam).toHaveBeenCalledWith( + { + teamName: 'draft-team', + displayName: 'Draft Team', + description: 'Saved draft', + color: '#3366ff', + members: [{ name: 'builder', role: 'Engineer', providerId: 'codex' }], + cwd: '/Users/test/project', + prompt: 'Saved prompt', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + }, + expect.any(Function) + ); + } finally { + await app.close(); + } + }); + + it('returns saved metadata for draft team get without requiring config.json', async () => { + const { app, getSavedRequest, getTeamData } = await createApp(); + getSavedRequest.mockResolvedValue({ + teamName: 'draft-team', + displayName: 'Draft Team', + cwd: '/Users/test/project', + providerId: 'codex', + providerBackendId: 'codex-native', + members: [{ name: 'builder', role: 'Engineer', providerId: 'codex' }], + }); + + try { + const response = await app.inject({ + method: 'GET', + url: '/api/teams/draft-team', + }); + + expect(response.statusCode).toBe(200); + expect(response.json()).toEqual({ + teamName: 'draft-team', + pendingCreate: true, + savedRequest: { + teamName: 'draft-team', + displayName: 'Draft Team', + cwd: '/Users/test/project', + providerId: 'codex', + providerBackendId: 'codex-native', + members: [{ name: 'builder', role: 'Engineer', providerId: 'codex' }], + }, + }); + expect(getTeamData).not.toHaveBeenCalled(); + } finally { + await app.close(); + } + }); + it('rejects launch requests with non-absolute cwd', async () => { const { app, launchTeam } = await createApp(); @@ -114,7 +328,8 @@ describe('HTTP team runtime routes', () => { }); it('returns runtime state, provisioning status, and stop results', async () => { - const { app, getRuntimeState, getProvisioningStatus, stopTeam, getAliveTeams } = await createApp(); + const { app, getRuntimeState, getProvisioningStatus, stopTeam, getAliveTeams } = + await createApp(); getRuntimeState .mockResolvedValueOnce({ teamName: 'demo-team', @@ -213,18 +428,15 @@ describe('HTTP team runtime routes', () => { it('returns 501 when team runtime routes are registered without a runtime service', async () => { const app = Fastify(); - registerTeamRoutes( - app, - { - projectScanner: {} as HttpServices['projectScanner'], - sessionParser: {} as HttpServices['sessionParser'], - subagentResolver: {} as HttpServices['subagentResolver'], - chunkBuilder: {} as HttpServices['chunkBuilder'], - dataCache: {} as HttpServices['dataCache'], - updaterService: {} as HttpServices['updaterService'], - sshConnectionManager: {} as HttpServices['sshConnectionManager'], - } satisfies HttpServices - ); + registerTeamRoutes(app, { + projectScanner: {} as HttpServices['projectScanner'], + sessionParser: {} as HttpServices['sessionParser'], + subagentResolver: {} as HttpServices['subagentResolver'], + chunkBuilder: {} as HttpServices['chunkBuilder'], + dataCache: {} as HttpServices['dataCache'], + updaterService: {} as HttpServices['updaterService'], + sshConnectionManager: {} as HttpServices['sshConnectionManager'], + } satisfies HttpServices); await app.ready(); try { @@ -234,7 +446,9 @@ describe('HTTP team runtime routes', () => { }); expect(response.statusCode).toBe(501); - expect(response.json()).toEqual({ error: 'Team runtime control is not available in this mode' }); + expect(response.json()).toEqual({ + error: 'Team runtime control is not available in this mode', + }); } finally { await app.close(); } diff --git a/test/main/ipc/teams.test.ts b/test/main/ipc/teams.test.ts index f081d6bb..aef64c5e 100644 --- a/test/main/ipc/teams.test.ts +++ b/test/main/ipc/teams.test.ts @@ -2,6 +2,7 @@ import * as os from 'os'; import * as fs from 'fs'; import * as path from 'path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { setClaudeBasePathOverride } from '@main/utils/pathDecoder'; import type { BoardTaskActivityDetailResult, BoardTaskActivityEntry, @@ -30,7 +31,9 @@ vi.mock('@preload/constants/ipcChannels', async (importOriginal) => { // Mock NotificationManager — handleShowMessageNotification calls addTeamNotification const { mockAddTeamNotification } = vi.hoisted(() => ({ - mockAddTeamNotification: vi.fn().mockResolvedValue({ id: 'n1', isRead: false, createdAt: Date.now() }), + mockAddTeamNotification: vi + .fn() + .mockResolvedValue({ id: 'n1', isRead: false, createdAt: Date.now() }), })); const { mockGetMembersMeta } = vi.hoisted(() => ({ mockGetMembersMeta: vi.fn(), @@ -147,25 +150,29 @@ describe('ipc teams handlers', () => { const service = { listTeams: vi.fn(async () => [{ teamName: 'my-team', displayName: 'My Team' }]), - getTeamData: vi.fn(async (): Promise => ({ - teamName: 'my-team', - config: { name: 'My Team' }, - tasks: [], - members: [], - kanbanState: { teamName: 'my-team', reviewers: [], tasks: {} }, - processes: [], - })), + getTeamData: vi.fn( + async (): Promise => ({ + teamName: 'my-team', + config: { name: 'My Team' }, + tasks: [], + members: [], + kanbanState: { teamName: 'my-team', reviewers: [], tasks: {} }, + processes: [], + }) + ), getMessageFeed: vi.fn(async () => ({ teamName: 'my-team', feedRevision: 'rev-1', messages: [] as InboxMessage[], })), - getMessagesPage: vi.fn(async (..._args: unknown[]): Promise => ({ - messages: [] as InboxMessage[], - nextCursor: null, - hasMore: false, - feedRevision: 'rev-1', - })), + getMessagesPage: vi.fn( + async (..._args: unknown[]): Promise => ({ + messages: [] as InboxMessage[], + nextCursor: null, + hasMore: false, + feedRevision: 'rev-1', + }) + ), getMemberActivityMeta: vi.fn(async () => ({ teamName: 'my-team', computedAt: '2026-03-12T10:00:00.000Z', @@ -207,6 +214,7 @@ describe('ipc teams handlers', () => { removeTaskRelationship: vi.fn(async () => undefined), replaceMembers: vi.fn(async () => undefined), createTeamConfig: vi.fn(async () => undefined), + getSavedRequest: vi.fn(async (): Promise => null), }; const provisioningService = { prepareForProvisioning: vi.fn(async () => ({ @@ -265,24 +273,26 @@ describe('ipc teams handlers', () => { getTaskActivity: vi.fn<() => Promise>(async () => []), }; const boardTaskActivityDetailService = { - getTaskActivityDetail: - vi.fn<() => Promise>(async () => ({ status: 'missing' })), + getTaskActivityDetail: vi.fn<() => Promise>(async () => ({ + status: 'missing', + })), }; const boardTaskLogStreamService = { - getTaskLogStream: - vi.fn<() => Promise>(async () => ({ - participants: [], - defaultFilter: 'all', - segments: [], - })), + getTaskLogStream: vi.fn<() => Promise>(async () => ({ + participants: [], + defaultFilter: 'all', + segments: [], + })), }; const boardTaskExactLogsService = { - getTaskExactLogSummaries: - vi.fn<() => Promise>(async () => ({ items: [] })), + getTaskExactLogSummaries: vi.fn<() => Promise>( + async () => ({ items: [] }) + ), }; const boardTaskExactLogDetailService = { - getTaskExactLogDetail: - vi.fn<() => Promise>(async () => ({ status: 'missing' })), + getTaskExactLogDetail: vi.fn<() => Promise>(async () => ({ + status: 'missing', + })), }; beforeEach(() => { @@ -316,13 +326,14 @@ describe('ipc teams handlers', () => { boardTaskActivityDetailService as never, boardTaskLogStreamService as never, boardTaskExactLogsService as never, - boardTaskExactLogDetailService as never, + boardTaskExactLogDetailService as never ); registerTeamHandlers(ipcMain as never); }); afterEach(() => { vi.useRealTimers(); + setClaudeBasePathOverride(null); }); it('registers all expected handlers', () => { @@ -417,12 +428,7 @@ describe('ipc teams handlers', () => { const taskId = 'task-js'; const attachmentId = 'att-js'; - const attachmentDir = path.join( - getAppDataPath(), - 'task-attachments', - 'my-team', - taskId - ); + const attachmentDir = path.join(getAppDataPath(), 'task-attachments', 'my-team', taskId); await fs.promises.rm(attachmentDir, { recursive: true, force: true }); await fs.promises.mkdir(attachmentDir, { recursive: true }); await fs.promises.writeFile( @@ -778,7 +784,9 @@ describe('ipc teams handlers', () => { ); expect(provisioningService.sendMessageToTeam).toHaveBeenCalledWith( 'my-team', - expect.stringContaining('FORBIDDEN: editing files, changing code, changing task/board state, delegating work, launching Agent/subagents'), + expect.stringContaining( + 'FORBIDDEN: editing files, changing code, changing task/board state, delegating work, launching Agent/subagents' + ), undefined ); expect(service.sendDirectToLead).toHaveBeenCalledWith( @@ -814,7 +822,9 @@ describe('ipc teams handlers', () => { ); expect(provisioningService.sendMessageToTeam).toHaveBeenCalledWith( 'my-team', - expect.stringContaining('Persistent teammates currently configured: alice (reviewer), jack (developer)'), + expect.stringContaining( + 'Persistent teammates currently configured: alice (reviewer), jack (developer)' + ), undefined ); expect(provisioningService.sendMessageToTeam).toHaveBeenCalledWith( @@ -842,7 +852,9 @@ describe('ipc teams handlers', () => { ); expect(provisioningService.sendMessageToTeam).toHaveBeenCalledWith( 'my-team', - expect.stringContaining('Make the acknowledgement at least 40 characters so it is preserved in the Messages panel.'), + expect.stringContaining( + 'Make the acknowledgement at least 40 characters so it is preserved in the Messages panel.' + ), undefined ); }); @@ -887,9 +899,10 @@ describe('ipc teams handlers', () => { '/COMPACT keep kanban', undefined ); - const compactCall = vi.mocked(provisioningService.sendMessageToTeam).mock - .calls as unknown[][]; - expect(String(compactCall[0]?.[1] ?? '')).not.toContain('You received a direct message from the user'); + const compactCall = vi.mocked(provisioningService.sendMessageToTeam).mock.calls as unknown[][]; + expect(String(compactCall[0]?.[1] ?? '')).not.toContain( + 'You received a direct message from the user' + ); expect(String(compactCall[0]?.[1] ?? '')).not.toContain('Current durable team context:'); expect(service.sendDirectToLead).toHaveBeenCalledWith( 'my-team', @@ -1933,7 +1946,9 @@ describe('ipc teams handlers', () => { ); expect(provisioningService.sendMessageToTeam).toHaveBeenCalledWith( 'my-team', - expect.stringContaining('Do NOT start work, claim tasks, or improvise workflow/task/process rules') + expect.stringContaining( + 'Do NOT start work, claim tasks, or improvise workflow/task/process rules' + ) ); expect(provisioningService.sendMessageToTeam).toHaveBeenCalledWith( 'my-team', @@ -2516,7 +2531,9 @@ describe('ipc teams handlers', () => { })) as { success: boolean; error?: string }; expect(result.success).toBe(false); - expect(result.error).toContain('Live member migration between OpenCode and the primary runtime owner'); + expect(result.error).toContain( + 'Live member migration between OpenCode and the primary runtime owner' + ); expect(result.error).toContain('alice'); expect(service.replaceMembers).not.toHaveBeenCalled(); expect(provisioningService.reattachOpenCodeOwnedMemberLane).not.toHaveBeenCalled(); @@ -2562,7 +2579,9 @@ describe('ipc teams handlers', () => { })) as { success: boolean; error?: string }; expect(result.success).toBe(false); - expect(result.error).toContain('Live member migration between OpenCode and the primary runtime owner'); + expect(result.error).toContain( + 'Live member migration between OpenCode and the primary runtime owner' + ); expect(result.error).toContain('alice'); expect(service.replaceMembers).not.toHaveBeenCalled(); expect(provisioningService.reattachOpenCodeOwnedMemberLane).not.toHaveBeenCalled(); @@ -2843,6 +2862,42 @@ describe('ipc teams handlers', () => { expect(callArg.members).toEqual([]); }); + it('createTeam preserves teammate backend and fast mode metadata', async () => { + const handler = handlers.get(TEAM_CREATE)!; + const result = (await handler({ sender: { send: vi.fn() } } as never, { + teamName: 'runtime-team', + members: [ + { + name: 'builder', + role: 'Engineer', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + effort: 'high', + fastMode: 'on', + }, + ], + cwd: os.tmpdir(), + providerId: 'codex', + providerBackendId: 'codex-native', + })) as { success: boolean }; + + expect(result.success).toBe(true); + expect(provisioningService.createTeam.mock.calls[0][0].members).toEqual([ + { + name: 'builder', + role: 'Engineer', + workflow: undefined, + isolation: undefined, + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + effort: 'high', + fastMode: 'on', + }, + ]); + }); + it('handleCreateConfig accepts members: []', async () => { const handler = handlers.get(TEAM_CREATE_CONFIG)!; const result = (await handler({} as never, { @@ -2853,6 +2908,161 @@ describe('ipc teams handlers', () => { expect(result.success).toBe(true); }); + it('handleCreateConfig preserves draft launch metadata', async () => { + const handler = handlers.get(TEAM_CREATE_CONFIG)!; + const result = (await handler({} as never, { + teamName: 'draft-team', + displayName: ' Draft Team ', + description: ' Saved draft ', + color: '#3366ff', + members: [ + { + name: 'builder', + role: ' Engineer ', + workflow: ' Ship focused patches ', + providerId: 'codex', + providerBackendId: 'codex-native', + model: ' gpt-5.2 ', + effort: 'high', + fastMode: 'on', + }, + ], + cwd: '/Users/test/project', + prompt: ' Saved prompt ', + providerId: 'codex', + providerBackendId: 'codex-native', + model: ' gpt-5.2 ', + effort: 'high', + fastMode: 'on', + limitContext: true, + skipPermissions: false, + worktree: 'feature-x', + extraCliArgs: '--max-turns 5', + })) as { success: boolean }; + + expect(result.success).toBe(true); + expect(service.createTeamConfig).toHaveBeenCalledWith({ + teamName: 'draft-team', + displayName: 'Draft Team', + description: 'Saved draft', + color: '#3366ff', + members: [ + { + name: 'builder', + role: 'Engineer', + workflow: 'Ship focused patches', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }, + ], + cwd: '/Users/test/project', + prompt: 'Saved prompt', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + skipPermissions: false, + worktree: 'feature-x', + extraCliArgs: '--max-turns 5', + }); + }); + + it('launches draft team through saved request without dropping Electron draft metadata', async () => { + const claudeRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ipc-draft-launch-')); + setClaudeBasePathOverride(claudeRoot); + try { + const teamDir = path.join(claudeRoot, 'teams', 'draft-team'); + fs.mkdirSync(teamDir, { recursive: true }); + fs.writeFileSync( + path.join(teamDir, 'team.meta.json'), + JSON.stringify({ + version: 1, + displayName: 'Draft Team', + cwd: '/Users/test/project', + createdAt: Date.now(), + }) + ); + service.getSavedRequest.mockResolvedValueOnce({ + teamName: 'draft-team', + displayName: 'Draft Team', + description: 'Saved draft', + color: '#3366ff', + cwd: '/Users/test/project', + prompt: 'Saved prompt', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'medium', + fastMode: 'on', + limitContext: true, + skipPermissions: false, + worktree: 'feature-x', + extraCliArgs: '--max-turns 5', + members: [ + { + name: 'builder', + role: 'Engineer', + workflow: 'Ship focused patches', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }, + ], + }); + + const handler = handlers.get(TEAM_LAUNCH)!; + const result = (await handler({ sender: { send: vi.fn() } } as never, { + teamName: 'draft-team', + cwd: os.tmpdir(), + effort: 'high', + })) as { success: boolean }; + + expect(result.success).toBe(true); + expect(provisioningService.launchTeam).not.toHaveBeenCalled(); + expect(provisioningService.createTeam).toHaveBeenCalledWith( + { + teamName: 'draft-team', + displayName: 'Draft Team', + description: 'Saved draft', + color: '#3366ff', + members: [ + { + name: 'builder', + role: 'Engineer', + workflow: 'Ship focused patches', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }, + ], + cwd: os.tmpdir(), + prompt: 'Saved prompt', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + skipPermissions: false, + worktree: 'feature-x', + extraCliArgs: '--max-turns 5', + }, + expect.any(Function) + ); + } finally { + fs.rmSync(claudeRoot, { recursive: true, force: true }); + } + }); + it('handleReplaceMembers accepts members: []', async () => { const handler = handlers.get(TEAM_REPLACE_MEMBERS)!; const result = (await handler({} as never, 'my-team', { diff --git a/test/main/services/team/TeamDataService.test.ts b/test/main/services/team/TeamDataService.test.ts index 9413efc1..9876146f 100644 --- a/test/main/services/team/TeamDataService.test.ts +++ b/test/main/services/team/TeamDataService.test.ts @@ -237,6 +237,74 @@ afterEach(async () => { ); }); +describe('TeamDataService draft metadata', () => { + it('round-trips create config metadata through getSavedRequest', async () => { + const claudeRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'team-data-saved-request-')); + tempPaths.push(claudeRoot); + setClaudeBasePathOverride(claudeRoot); + + const service = new TeamDataService(); + await service.createTeamConfig({ + teamName: 'draft-team', + displayName: 'Draft Team', + description: 'Saved draft', + color: '#3366ff', + cwd: '/Users/test/project', + prompt: 'Saved prompt', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + skipPermissions: false, + worktree: 'feature-x', + extraCliArgs: '--max-turns 5', + members: [ + { + name: 'builder', + role: 'Engineer', + workflow: 'Ship focused patches', + providerId: 'codex', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }, + ], + }); + + await expect(service.getSavedRequest('missing-team')).resolves.toBeNull(); + await expect(service.getSavedRequest('draft-team')).resolves.toMatchObject({ + teamName: 'draft-team', + displayName: 'Draft Team', + description: 'Saved draft', + color: '#3366ff', + cwd: '/Users/test/project', + prompt: 'Saved prompt', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + limitContext: true, + skipPermissions: false, + worktree: 'feature-x', + extraCliArgs: '--max-turns 5', + members: [ + { + name: 'builder', + role: 'Engineer', + workflow: 'Ship focused patches', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.2', + effort: 'high', + fastMode: 'on', + }, + ], + }); + }); +}); + function createForwardingJournalStore(initialEntries: Array> = []) { const journalEntries = initialEntries; const journal = { diff --git a/test/renderer/components/team/dialogs/LaunchTeamDialog.test.ts b/test/renderer/components/team/dialogs/LaunchTeamDialog.test.ts index 139f5ef6..b3b5a005 100644 --- a/test/renderer/components/team/dialogs/LaunchTeamDialog.test.ts +++ b/test/renderer/components/team/dialogs/LaunchTeamDialog.test.ts @@ -124,8 +124,10 @@ vi.mock('@renderer/components/team/members/MembersEditorSection', () => ({ customRole?: string; workflow?: string; providerId?: string; + providerBackendId?: string; model?: string; effort?: string; + fastMode?: string; }> ) => drafts.map((draft) => ({ @@ -133,8 +135,10 @@ vi.mock('@renderer/components/team/members/MembersEditorSection', () => ({ role: draft.customRole || undefined, workflow: draft.workflow, providerId: draft.providerId as 'anthropic' | 'codex' | 'gemini' | undefined, + providerBackendId: draft.providerBackendId as 'codex-native' | undefined, model: draft.model, effort: draft.effort as 'low' | 'medium' | 'high' | undefined, + fastMode: draft.fastMode as 'inherit' | 'on' | 'off' | undefined, })), clearMemberModelOverrides: (member: unknown) => member, createMemberDraftsFromInputs: ( @@ -143,8 +147,10 @@ vi.mock('@renderer/components/team/members/MembersEditorSection', () => ({ role?: string; workflow?: string; providerId?: string; + providerBackendId?: string; model?: string; effort?: string; + fastMode?: string; isolation?: 'worktree'; }> ) => @@ -157,8 +163,10 @@ vi.mock('@renderer/components/team/members/MembersEditorSection', () => ({ workflow: member.workflow ?? '', isolation: member.isolation, providerId: member.providerId, + providerBackendId: member.providerBackendId, model: member.model ?? '', effort: member.effort, + fastMode: member.fastMode, })), filterEditableMemberInputs: (members: unknown) => members, normalizeLeadProviderForMode: (providerId: unknown) => @@ -587,6 +595,82 @@ describe('LaunchTeamDialog', () => { }); }); + it('preserves hidden teammate backend and fast mode metadata before draft launch', async () => { + vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true); + vi.mocked(api.teams.getSavedRequest).mockResolvedValueOnce({ + teamName: 'team-alpha', + cwd: '/tmp/project', + providerId: 'anthropic', + model: 'opus', + members: [ + { + name: 'alice', + role: 'Reviewer', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + effort: 'medium', + fastMode: 'on', + }, + ], + } as any); + const onLaunch = vi.fn(async () => {}); + + const host = document.createElement('div'); + document.body.appendChild(host); + const root = createRoot(host); + + await act(async () => { + root.render( + React.createElement(LaunchTeamDialog, { + mode: 'launch', + open: true, + teamName: 'team-alpha', + members: [], + defaultProjectPath: '/tmp/project', + provisioningError: null, + clearProvisioningError: vi.fn(), + activeTeams: [], + onClose: vi.fn(), + onLaunch, + }) + ); + await flush(); + await flush(); + }); + + const submitButton = Array.from(host.querySelectorAll('button')).find( + (button) => button.textContent === 'Launch team' + ); + expect(submitButton).toBeTruthy(); + + await act(async () => { + submitButton?.dispatchEvent(new MouseEvent('click', { bubbles: true })); + await flush(); + await flush(); + }); + + expect(vi.mocked(api.teams.replaceMembers).mock.calls[0]?.[1]).toMatchObject({ + members: [ + { + name: 'alice', + role: 'Reviewer', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + effort: 'medium', + fastMode: 'on', + }, + ], + }); + expect(onLaunch).toHaveBeenCalledTimes(1); + + await act(async () => { + root.unmount(); + await flush(); + }); + }); + it('submits relaunch through onRelaunch without replacing members in-dialog', async () => { vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true); diff --git a/test/renderer/components/team/members/membersEditorUtils.test.ts b/test/renderer/components/team/members/membersEditorUtils.test.ts index 7d738649..3b8f31d2 100644 --- a/test/renderer/components/team/members/membersEditorUtils.test.ts +++ b/test/renderer/components/team/members/membersEditorUtils.test.ts @@ -72,6 +72,37 @@ describe('members editor editable input filtering', () => { }); }); + it('round-trips hidden teammate backend and fast mode metadata', () => { + const drafts = createMemberDraftsFromInputs( + filterEditableMemberInputs([ + { + name: 'alice', + agentType: 'reviewer', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4-mini', + effort: 'medium', + fastMode: 'on', + }, + ] as any) + ); + + expect(drafts[0]).toMatchObject({ + providerBackendId: 'codex-native', + fastMode: 'on', + }); + expect(buildMembersFromDrafts(drafts)).toEqual([ + expect.objectContaining({ + name: 'alice', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4-mini', + effort: 'medium', + fastMode: 'on', + }), + ]); + }); + it('preserves explicit codex models when exporting member inputs', () => { const drafts = createMemberDraftsFromInputs( filterEditableMemberInputs([ From f19adea10d3398c08386665f4382d3172b03bb1a Mon Sep 17 00:00:00 2001 From: 777genius Date: Wed, 29 Apr 2026 11:01:59 +0300 Subject: [PATCH 20/25] docs: add openclaw integration guide --- docs/team-management/README.md | 32 +- .../openclaw-agent-teams-integration.md | 811 ++++++++++++++++++ 2 files changed, 834 insertions(+), 9 deletions(-) create mode 100644 docs/team-management/openclaw-agent-teams-integration.md diff --git a/docs/team-management/README.md b/docs/team-management/README.md index c320774b..587c378b 100644 --- a/docs/team-management/README.md +++ b/docs/team-management/README.md @@ -12,41 +12,48 @@ ## Документация -| Файл | Содержание | -|------|-----------| -| [research-inbox.md](./research-inbox.md) | Формат inbox-файлов, race conditions, atomic write, доставка сообщений | -| [research-tasks.md](./research-tasks.md) | Формат task-файлов, .lock, .highwatermark, конкурентный доступ | -| [research-messaging.md](./research-messaging.md) | Сравнение подходов (inbox vs SDK vs CLI), почему выбрали inbox | -| [kanban-design.md](./kanban-design.md) | Kanban flow, колонки, review mechanism, kanban-state.json | -| [implementation.md](./implementation.md) | Техплан: файлы, шаги, verification | -| [research-worktrees.md](./research-worktrees.md) | Git worktrees + teams, запуск Claude процессов из UI (Phase 2) | -| [task-queue-derived-agenda-plan.md](./task-queue-derived-agenda-plan.md) | Подробный rollout-plan по разделению queue/inventory, derived actionOwner и phased agenda/delta sync | +| Файл | Содержание | +| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| [research-inbox.md](./research-inbox.md) | Формат inbox-файлов, race conditions, atomic write, доставка сообщений | +| [research-tasks.md](./research-tasks.md) | Формат task-файлов, .lock, .highwatermark, конкурентный доступ | +| [research-messaging.md](./research-messaging.md) | Сравнение подходов (inbox vs SDK vs CLI), почему выбрали inbox | +| [kanban-design.md](./kanban-design.md) | Kanban flow, колонки, review mechanism, kanban-state.json | +| [implementation.md](./implementation.md) | Техплан: файлы, шаги, verification | +| [openclaw-agent-teams-integration.md](./openclaw-agent-teams-integration.md) | How to connect OpenClaw or another outside AI through Agent Teams MCP and REST control API | +| [research-worktrees.md](./research-worktrees.md) | Git worktrees + teams, запуск Claude процессов из UI (Phase 2) | +| [task-queue-derived-agenda-plan.md](./task-queue-derived-agenda-plan.md) | Подробный rollout-plan по разделению queue/inventory, derived actionOwner и phased agenda/delta sync | ## Ключевые решения ⚠️ `docs/iterations/*` - это исторические planning notes. Они полезны для контекста, но не являются source-of-truth для текущего поведения продукта. Актуальный контракт review flow описан в этом файле и в [kanban-design.md](./kanban-design.md). ### 1. Messaging: Inbox-файлы + Единственный способ общаться с **запущенными** тиммейтами. SDK и CLI создают новые сессии, а не подключаются к существующим. Подробности: [research-messaging.md](./research-messaging.md) ### 1.1 Roster source: members.meta.json + inboxes + - `config.json` не используется как полный реестр участников (он может содержать только team-lead и служебные поля CLI). - Источник метаданных участников (role/color/agentType): `members.meta.json`. - Источник runtime-состава и адресации сообщений: `inboxes/{member}.json`. ### 2. Kanban Storage: Собственный файл + Kanban-позиция (REVIEW, APPROVED) хранится в `kanban-state.json`, а не в task metadata. Причина: metadata может быть перезаписан агентом при TaskUpdate. Подробности: [kanban-design.md](./kanban-design.md) ### 3. Review Flow: Approve / Request Changes + - Есть ревьюверы в команде → автоматическое назначение через inbox - Юзер также может вручную одобрить задачу напрямую из `DONE` без отдельного захода в `REVIEW` - Нет ревьюверов → ручное ревью юзером (Approve / Request Changes в UI) - При Request Changes → юзер описывает проблему (опционально) → задача возвращается owner'у в `pending` с `needsFix` ### 4. Atomic Write + Все записи через tmp + rename для предотвращения corrupted JSON. ### 5. Sender Identity + Отправляем `from: "user"`. Fallback на `from: "team-lead"` если не работает. ## Финальные решения после ревью @@ -54,16 +61,19 @@ Kanban-позиция (REVIEW, APPROVED) хранится в `kanban-state.json` По итогам 3 раундов ревью (13 экспертов) приняты следующие решения: ### Inbox: Atomic write + messageId verify + - Atomic write (tmp + rename) предотвращает corrupted JSON - После записи читаем файл обратно и проверяем наличие нашего `messageId` - Полный CAS/retry-цикл — не нужен на MVP: проверка при следующем read достаточна - Риск race condition с агентом реален, но вероятность низкая ### Kanban: kanban-state.json с безопасным GC + - GC устаревших записей kanban-state выполняется ТОЛЬКО ПОСЛЕ полной загрузки tasks - Иначе при startup возможна race condition: GC удаляет запись до того как task-файл прочитан ### Review Flow: Approve / Request Changes + - Кнопки переименованы: **Approve** (вместо OK) и **Request Changes** (вместо Error) - Комментарий при Request Changes — опционален - Manual UI допускает два valid path: @@ -73,10 +83,12 @@ Kanban-позиция (REVIEW, APPROVED) хранится в `kanban-state.json` - `reviewHistory` и round-robin балансировка → Phase 2, не MVP ### Members: полный список через union + - `union(config members + inbox filenames + task owners)` — единственный способ получить полный список - `owner` в task-файлах — опционален (агент может не иметь owner до назначения) ### Graceful Degradation + - `try/catch` везде в TeamDataService — при ошибке чтения возвращаем безопасные дефолты - 3 состояния участника: `ACTIVE` / `IDLE` / `TERMINATED` - `ACTIVE`: idle < 5 минут @@ -84,6 +96,7 @@ Kanban-позиция (REVIEW, APPROVED) хранится в `kanban-state.json` - `TERMINATED`: получен `shutdown_response` с `approve: true` ### @dnd-kit and review transitions + - Переходы между review-колонками делаются через card actions в UI - `@dnd-kit` сейчас используется в первую очередь для перестановки задач внутри колонки - Phase 2: полноценный D&D через `@dnd-kit` @@ -113,5 +126,6 @@ Kanban-позиция (REVIEW, APPROVED) хранится в `kanban-state.json` ``` **ВАЖНО**: + - `config.json` не является source-of-truth для полного roster. - Полный roster для UI формируется как `members.meta.json + inbox filenames (+ lead из config)`. diff --git a/docs/team-management/openclaw-agent-teams-integration.md b/docs/team-management/openclaw-agent-teams-integration.md new file mode 100644 index 00000000..a2c4b77e --- /dev/null +++ b/docs/team-management/openclaw-agent-teams-integration.md @@ -0,0 +1,811 @@ +# OpenClaw Integration With Agent Teams + +**Status:** Local-first integration guide +**Audience:** OpenClaw or any external AI client that can call MCP tools or local HTTP APIs +**Primary use case:** Let an outside AI create, inspect, launch, and coordinate Agent Teams for complex work and cross-checking + +## 1. Short Answer + +Yes, this is feasible. + +The integration has two layers: + +1. **Agent Teams Desktop App HTTP control API** + - Runs locally on the same machine as the desktop app. + - Defaults to `http://127.0.0.1:3456`. + - Exposes REST endpoints for teams and runtime lifecycle. + +2. **`agent-teams-mcp` stdio MCP server** + - Does not listen on a port. + - Is started by each MCP client as a normal child process. + - Forwards runtime/team operations to the local HTTP control API. + +For OpenClaw, the preferred path is: + +```text +OpenClaw -> stdio MCP process: agent-teams-mcp -> local Agent Teams HTTP API -> Desktop runtime +``` + +Direct REST is also possible: + +```text +OpenClaw -> http://127.0.0.1:3456/api/... -> Desktop runtime +``` + +## 2. Important Architecture Notes + +### 2.1 Multiple MCP Processes Are Expected + +It is safe for multiple agents and OpenClaw to each start their own `agent-teams-mcp` process. + +This works because `agent-teams-mcp` uses **stdio transport**: + +- it does not bind a TCP port; +- it does not own team state; +- it does not create a separate control plane; +- it reads/writes through the shared Agent Teams runtime and shared Claude data directory. + +Example: + +```text +Agent 1 MCP process \ +Agent 2 MCP process -> Agent Teams Desktop HTTP API -> shared teams/tasks/runtime +OpenClaw MCP process/ +``` + +The MCP processes are many. The control plane is one. + +### 2.2 The MCP Server Has No URL + +Do not look for an MCP URL. + +`agent-teams-mcp` is launched by the MCP client: + +```text +client starts process -> client speaks JSON-RPC over stdin/stdout +``` + +The URL belongs to the desktop app HTTP control API, not to MCP. + +### 2.3 The HTTP Control API Is Localhost Only + +The desktop HTTP server binds to `127.0.0.1` by default. + +Default base URL: + +```text +http://127.0.0.1:3456 +``` + +If port `3456` is busy, the app tries the next ports. + +The current URL is published to: + +```text +~/.claude/team-control-api.json +``` + +Example: + +```json +{ + "baseUrl": "http://127.0.0.1:3456", + "pid": 12345, + "updatedAt": "2026-04-29T10:00:00.000Z" +} +``` + +### 2.4 Remote OpenClaw Needs a Tunnel + +If OpenClaw runs on the same Mac as the desktop app, no tunnel is needed. + +If OpenClaw runs on another server, it cannot directly reach `127.0.0.1` on the Mac. Use one of: + +- SSH tunnel; +- reverse tunnel; +- VPN; +- a future authenticated remote control endpoint. + +Do not expose the local HTTP API to the public internet without authentication and transport security. + +## 3. Prerequisites + +1. Agent Teams desktop app is running. +2. HTTP server is enabled in Agent Teams settings. +3. OpenClaw runs on the same machine, or has a secure tunnel to the machine. +4. Node.js 20+ is available if OpenClaw will launch the MCP server from source or build output. + +To confirm the HTTP control API is available: + +```bash +cat ~/.claude/team-control-api.json +curl -s http://127.0.0.1:3456/api/teams +``` + +If the app selected a different port, use the `baseUrl` from `team-control-api.json`. + +## 4. Recommended Integration: MCP + +Use MCP if OpenClaw supports external MCP servers. MCP gives OpenClaw a tool surface instead of forcing it to hand-roll REST calls. + +### 4.1 Dev Workspace MCP Config + +When running from this repository: + +```json +{ + "mcpServers": { + "agent-teams": { + "command": "pnpm", + "args": ["--dir", "/Users/belief/dev/projects/claude/claude_team/mcp-server", "dev"], + "env": { + "AGENT_TEAMS_MCP_CLAUDE_DIR": "/Users/belief/.claude", + "CLAUDE_TEAM_CONTROL_URL": "http://127.0.0.1:3456" + } + } + } +} +``` + +Notes: + +- Adjust the paths for the user's machine. +- `AGENT_TEAMS_MCP_CLAUDE_DIR` tells MCP which Claude data directory to use. +- `CLAUDE_TEAM_CONTROL_URL` is optional if `~/.claude/team-control-api.json` exists, but it is useful for explicit setup. +- If the HTTP server is on another port, update `CLAUDE_TEAM_CONTROL_URL`. + +### 4.2 Built MCP Config + +For a built MCP server: + +```bash +pnpm --filter agent-teams-mcp build +``` + +Then configure OpenClaw like: + +```json +{ + "mcpServers": { + "agent-teams": { + "command": "node", + "args": ["/Users/belief/dev/projects/claude/claude_team/mcp-server/dist/index.js"], + "env": { + "AGENT_TEAMS_MCP_CLAUDE_DIR": "/Users/belief/.claude", + "CLAUDE_TEAM_CONTROL_URL": "http://127.0.0.1:3456" + } + } + } +} +``` + +### 4.3 Packaged App Config + +In a packaged app, the app resolves its bundled MCP entrypoint internally for teams it launches. For an external client like OpenClaw, give it either: + +- the packaged `agent-teams-mcp/dist/index.js` path, if available; +- or a separately installed copy of `agent-teams-mcp`; +- or a dev checkout path while testing. + +The MCP client still starts it as a stdio process. + +## 5. MCP Tool Flow Examples + +The exact UI for tool calls depends on OpenClaw, but the calls are conceptually: + +### 5.1 List Teams + +Tool: + +```text +team_list +``` + +Arguments: + +```json +{ + "controlUrl": "http://127.0.0.1:3456" +} +``` + +`controlUrl` can be omitted if `~/.claude/team-control-api.json` is available. + +### 5.2 Create a Draft Team + +Tool: + +```text +team_create +``` + +Arguments: + +```json +{ + "teamName": "openclaw-review", + "displayName": "OpenClaw Review", + "description": "Team used by OpenClaw to cross-check complex work", + "cwd": "/Users/belief/dev/projects/example-project", + "providerId": "codex", + "providerBackendId": "codex-native", + "model": "gpt-5.4", + "effort": "high", + "fastMode": "inherit", + "limitContext": true, + "skipPermissions": false, + "members": [ + { + "name": "reviewer", + "role": "Reviewer", + "workflow": "Review OpenClaw's work for bugs, missing tests, incorrect assumptions, and integration risks.", + "providerId": "codex", + "providerBackendId": "codex-native", + "model": "gpt-5.4", + "effort": "high", + "fastMode": "inherit" + }, + { + "name": "critic", + "role": "Critical reviewer", + "workflow": "Look for edge cases, concurrency issues, unsafe assumptions, and architectural regressions.", + "providerId": "anthropic", + "model": "claude-opus-4-6", + "effort": "high" + } + ] +} +``` + +This creates a **draft** team. It does not launch the runtime yet. + +### 5.3 Inspect a Team + +Tool: + +```text +team_get +``` + +Arguments: + +```json +{ + "teamName": "openclaw-review" +} +``` + +For a draft team, the response includes draft/saved request data. For a launched/configured team, it returns the team snapshot. + +### 5.4 Launch a Team + +Tool: + +```text +team_launch +``` + +Arguments: + +```json +{ + "teamName": "openclaw-review", + "cwd": "/Users/belief/dev/projects/example-project", + "prompt": "Cross-check OpenClaw's latest changes. Focus on regressions, missing tests, and risky assumptions. Report actionable findings.", + "waitForReady": true, + "waitTimeoutMs": 180000 +} +``` + +`team_launch` works for: + +- a draft team created by `team_create`; +- an existing configured team already known to Agent Teams. + +### 5.5 Stop a Team + +Tool: + +```text +team_stop +``` + +Arguments: + +```json +{ + "teamName": "openclaw-review", + "waitForStop": true +} +``` + +## 6. Suggested OpenClaw Policy + +OpenClaw can use Agent Teams only for work that benefits from parallel review or specialized team behavior. + +Suggested routing: + +1. For small edits, OpenClaw works alone. +2. For risky changes, OpenClaw calls `team_create` if the review team does not exist. +3. OpenClaw calls `team_launch` with a focused review prompt. +4. OpenClaw waits for team readiness. +5. OpenClaw uses the existing MCP board/message tools to create tasks or collect results, if needed. +6. OpenClaw treats Agent Teams feedback as review input, not as automatically trusted output. + +Example instruction for OpenClaw: + +```text +When the task is complex, high-risk, or needs cross-checking, use the agent-teams MCP server. + +Prefer reusing an existing team named "openclaw-review". +If it does not exist, create it with team_create. +Launch it with team_launch and a focused review prompt. +Use team_get to inspect team state. +Do not create duplicate teams with the same purpose. +Do not expose the local control API outside localhost unless the user explicitly configured a secure tunnel. +``` + +## 7. Direct REST API Integration + +Use REST if OpenClaw cannot use MCP, or if you want a very small integration without MCP tool registration. + +Base URL: + +```text +http://127.0.0.1:3456 +``` + +Discover the current base URL: + +```bash +cat ~/.claude/team-control-api.json +``` + +### 7.1 REST Endpoint Summary + +| Method | Path | Purpose | +| ------ | -------------------------------- | --------------------------------- | +| `GET` | `/api/teams` | List teams | +| `POST` | `/api/teams` | Create a draft team configuration | +| `GET` | `/api/teams/:teamName` | Get a draft or configured team | +| `POST` | `/api/teams/:teamName/launch` | Launch a draft or configured team | +| `POST` | `/api/teams/:teamName/stop` | Stop a running team | +| `GET` | `/api/teams/:teamName/runtime` | Get runtime state for one team | +| `GET` | `/api/teams/provisioning/:runId` | Poll launch/provisioning status | +| `GET` | `/api/teams/runtime/alive` | List alive team runtime states | + +Advanced OpenCode runtime bridge endpoints also exist: + +| Method | Path | +| ------ | --------------------------------------------------------- | +| `POST` | `/api/teams/:teamName/opencode/runtime/bootstrap-checkin` | +| `POST` | `/api/teams/:teamName/opencode/runtime/deliver-message` | +| `POST` | `/api/teams/:teamName/opencode/runtime/task-event` | +| `POST` | `/api/teams/:teamName/opencode/runtime/heartbeat` | + +Most OpenClaw integrations should not need the OpenCode runtime bridge endpoints. + +### 7.2 List Teams + +```bash +curl -s http://127.0.0.1:3456/api/teams | jq . +``` + +### 7.3 Create a Draft Team + +```bash +curl -s \ + -X POST http://127.0.0.1:3456/api/teams \ + -H 'content-type: application/json' \ + -d '{ + "teamName": "openclaw-review", + "displayName": "OpenClaw Review", + "description": "Team used by OpenClaw to cross-check complex work", + "cwd": "/Users/belief/dev/projects/example-project", + "providerId": "codex", + "providerBackendId": "codex-native", + "model": "gpt-5.4", + "effort": "high", + "fastMode": "inherit", + "limitContext": true, + "skipPermissions": false, + "members": [ + { + "name": "reviewer", + "role": "Reviewer", + "workflow": "Review OpenClaw work for correctness, regressions, and missing tests.", + "providerId": "codex", + "providerBackendId": "codex-native", + "model": "gpt-5.4", + "effort": "high" + } + ] + }' | jq . +``` + +Expected response: + +```json +{ + "teamName": "openclaw-review" +} +``` + +### 7.4 Get a Draft or Existing Team + +```bash +curl -s http://127.0.0.1:3456/api/teams/openclaw-review | jq . +``` + +Draft response shape: + +```json +{ + "teamName": "openclaw-review", + "pendingCreate": true, + "savedRequest": { + "teamName": "openclaw-review", + "cwd": "/Users/belief/dev/projects/example-project", + "members": [] + } +} +``` + +Configured team response shape is the normal Agent Teams team data snapshot. + +### 7.5 Launch a Team + +```bash +curl -s \ + -X POST http://127.0.0.1:3456/api/teams/openclaw-review/launch \ + -H 'content-type: application/json' \ + -d '{ + "cwd": "/Users/belief/dev/projects/example-project", + "prompt": "Cross-check OpenClaw latest work. Focus on bugs, missing tests, and architectural risks.", + "providerId": "codex", + "providerBackendId": "codex-native", + "model": "gpt-5.4", + "effort": "high", + "fastMode": "inherit", + "limitContext": true, + "skipPermissions": false + }' | jq . +``` + +Expected response: + +```json +{ + "runId": "..." +} +``` + +### 7.6 Poll Launch Status + +```bash +RUN_ID="paste-run-id-here" +curl -s "http://127.0.0.1:3456/api/teams/provisioning/$RUN_ID" | jq . +``` + +Ready states: + +- `ready` +- `failed` +- `disconnected` +- `cancelled` + +A successful launch reaches: + +```json +{ + "state": "ready" +} +``` + +### 7.7 Get Runtime State + +```bash +curl -s http://127.0.0.1:3456/api/teams/openclaw-review/runtime | jq . +``` + +### 7.8 Stop a Team + +```bash +curl -s \ + -X POST http://127.0.0.1:3456/api/teams/openclaw-review/stop \ + -H 'content-type: application/json' \ + -d '{}' | jq . +``` + +## 8. JavaScript REST Client Example + +This is a minimal OpenClaw-side helper. + +```js +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; + +async function getAgentTeamsBaseUrl() { + if (process.env.CLAUDE_TEAM_CONTROL_URL) { + return process.env.CLAUDE_TEAM_CONTROL_URL; + } + + const statePath = path.join(os.homedir(), '.claude', 'team-control-api.json'); + const raw = await fs.readFile(statePath, 'utf8'); + const parsed = JSON.parse(raw); + if (!parsed.baseUrl) { + throw new Error('team-control-api.json does not contain baseUrl'); + } + return parsed.baseUrl; +} + +async function requestJson(pathname, options = {}) { + const baseUrl = await getAgentTeamsBaseUrl(); + const response = await fetch(`${baseUrl}${pathname}`, { + method: options.method ?? 'GET', + headers: { + accept: 'application/json', + ...(options.body ? { 'content-type': 'application/json' } : {}), + }, + ...(options.body ? { body: JSON.stringify(options.body) } : {}), + }); + + const payload = await response.json().catch(() => null); + if (!response.ok) { + throw new Error(payload?.error || `${response.status} ${response.statusText}`); + } + return payload; +} + +export async function ensureReviewTeam() { + const teamName = 'openclaw-review'; + + try { + return await requestJson(`/api/teams/${teamName}`); + } catch (error) { + if (!String(error.message).includes('not found')) { + throw error; + } + } + + await requestJson('/api/teams', { + method: 'POST', + body: { + teamName, + displayName: 'OpenClaw Review', + cwd: process.cwd(), + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + effort: 'high', + members: [ + { + name: 'reviewer', + role: 'Reviewer', + workflow: 'Cross-check OpenClaw work for bugs, missing tests, and risky assumptions.', + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + effort: 'high', + }, + ], + }, + }); + + return requestJson(`/api/teams/${teamName}`); +} + +export async function launchReviewTeam(prompt) { + const teamName = 'openclaw-review'; + await ensureReviewTeam(); + const launch = await requestJson(`/api/teams/${teamName}/launch`, { + method: 'POST', + body: { + cwd: process.cwd(), + prompt, + providerId: 'codex', + providerBackendId: 'codex-native', + model: 'gpt-5.4', + effort: 'high', + skipPermissions: false, + }, + }); + return launch; +} +``` + +## 9. Validation and Error Behavior + +### 9.1 Team Names + +Team names must be kebab-case: + +```text +[a-z0-9-], max 64 chars +``` + +Good: + +```text +openclaw-review +repo-audit-1 +security-check +``` + +Bad: + +```text +OpenClaw Review +openclaw_review +review team +``` + +### 9.2 Member Names + +Member names are validated by Agent Teams. + +Avoid reserved names: + +- `user` +- `team-lead` + +Use simple names: + +```text +reviewer +critic +tester +architect +``` + +### 9.3 Common HTTP Status Codes + +| Status | Meaning | +| ------ | ------------------------------------------------------------------- | +| `400` | Invalid request payload | +| `404` | Team or run id not found | +| `409` | Conflict, for example team already exists or stale runtime evidence | +| `501` | Team control service is not available in this mode | +| `500` | Unexpected server/runtime error | + +## 10. Recommended Choice + +### Option A: MCP-first integration + +Confidence: `9/10`. Reliability: `8/10`. Complexity: `4/10`. Roughly `20-60 LOC` of OpenClaw config/glue. + +Use this when OpenClaw supports MCP. It is the cleanest integration because OpenClaw sees tools like `team_create`, `team_get`, and `team_launch`. + +Pros: + +- idiomatic for AI clients; +- no custom HTTP client needed; +- multiple MCP processes are safe; +- automatically uses the same team/task tool surface that Agent Teams already gives agents. + +Cons: + +- OpenClaw must support stdio MCP servers; +- debugging involves MCP logs plus desktop logs. + +### Option B: REST-first integration + +Confidence: `8/10`. Reliability: `7/10`. Complexity: `5/10`. Roughly `80-180 LOC` of OpenClaw code. + +Use this when OpenClaw does not support MCP or when you want direct lifecycle control. + +Pros: + +- easy to call from any language; +- simple to debug with curl; +- no MCP client integration needed. + +Cons: + +- only exposes HTTP routes currently implemented; +- OpenClaw must implement retries/polling; +- task/message/board workflows are richer through MCP. + +### Option C: Hybrid MCP + REST + +Confidence: `8/10`. Reliability: `8/10`. Complexity: `7/10`. Roughly `120-260 LOC`. + +Use MCP for normal AI tool calls and REST for health checks, diagnostics, or non-agent automation. + +Pros: + +- best operational visibility; +- can recover from MCP-client-specific issues; +- useful for dashboards or service wrappers. + +Cons: + +- more moving parts; +- more integration surface to test. + +Recommended starting point: **Option A, MCP-first**. + +## 11. Troubleshooting + +### MCP tool says the control API is unavailable + +Check: + +```bash +cat ~/.claude/team-control-api.json +curl -s http://127.0.0.1:3456/api/teams +``` + +Fix: + +- start the Agent Teams desktop app; +- enable the HTTP server in settings; +- pass `CLAUDE_TEAM_CONTROL_URL` explicitly in OpenClaw MCP config. + +### OpenClaw starts MCP, but tool calls fail + +Possible causes: + +- wrong `AGENT_TEAMS_MCP_CLAUDE_DIR`; +- desktop app is using a different Claude root; +- HTTP server is disabled; +- port changed because `3456` was busy; +- OpenClaw runs on another machine without a tunnel. + +### `team_create` returns conflict + +The team already exists. Use `team_get` and either reuse it or choose a new name. + +### `team_launch` hangs or times out + +Check provisioning status: + +```bash +curl -s http://127.0.0.1:3456/api/teams/provisioning/ | jq . +``` + +Possible causes: + +- model/provider unavailable; +- runtime auth missing; +- working directory is invalid; +- app-side provisioning failed; +- team is already in a conflicting runtime state. + +### Remote OpenClaw cannot connect + +This is expected if it is not on the same machine. The API is local-only by default. + +Use an SSH tunnel, for example: + +```bash +ssh -N -L 3456:127.0.0.1:3456 user@mac-mini-host +``` + +Then OpenClaw can use: + +```text +http://127.0.0.1:3456 +``` + +from its own machine if the tunnel is established there. + +## 12. Security Notes + +- The current control API is intended for local use. +- It should not be bound to public interfaces without authentication. +- Prefer SSH tunnels for remote access. +- Treat access to the control API as access to team runtime control. +- Do not share `~/.claude` with untrusted processes. + +## 13. Summary for the Original Request + +The requested integration is realistic: + +- OpenClaw can call Agent Teams through MCP. +- OpenClaw can also call the local REST API directly. +- Each agent/OpenClaw can run its own stdio MCP process. +- Those MCP processes do not conflict because they do not bind ports. +- The single shared control point is the Agent Teams desktop HTTP API. +- For local Mac mini usage, this is the right initial architecture. From d2a3cb45adf8a1b998d00af19eefc51a51f5de51 Mon Sep 17 00:00:00 2001 From: 777genius Date: Wed, 29 Apr 2026 11:14:21 +0300 Subject: [PATCH 21/25] docs: clarify openclaw agent teams integration --- .../openclaw-agent-teams-integration.md | 973 +++++++++++++----- 1 file changed, 687 insertions(+), 286 deletions(-) diff --git a/docs/team-management/openclaw-agent-teams-integration.md b/docs/team-management/openclaw-agent-teams-integration.md index a2c4b77e..5524dc11 100644 --- a/docs/team-management/openclaw-agent-teams-integration.md +++ b/docs/team-management/openclaw-agent-teams-integration.md @@ -1,77 +1,180 @@ # OpenClaw Integration With Agent Teams -**Status:** Local-first integration guide -**Audience:** OpenClaw or any external AI client that can call MCP tools or local HTTP APIs -**Primary use case:** Let an outside AI create, inspect, launch, and coordinate Agent Teams for complex work and cross-checking +- **Status:** local-first integration guide +- **Audience:** OpenClaw or any outside AI client that can run MCP tools or call a local REST API +- **Primary use case:** let an outside AI create, inspect, launch, and cross-check Agent Teams work +- **Recommended first implementation:** MCP-first, REST as a lifecycle/debug fallback -## 1. Short Answer +## 1. Executive Summary -Yes, this is feasible. +Yes, this integration is feasible. -The integration has two layers: +The clean local architecture is: -1. **Agent Teams Desktop App HTTP control API** - - Runs locally on the same machine as the desktop app. +```text +OpenClaw + starts agent-teams-mcp over stdio + calls Agent Teams desktop HTTP control API + controls shared Agent Teams runtime and shared ~/.claude state +``` + +There are two integration surfaces: + +1. **MCP surface** + - Best fit for an AI agent like OpenClaw. + - OpenClaw starts `agent-teams-mcp` as a child process. + - The MCP server exposes tools such as `team_list`, `team_create`, `team_get`, `team_launch`, `task_create`, `message_send`, `review_request`, and more. + +2. **REST control API** + - Best fit for lifecycle automation, health checks, debugging, and simple wrappers. + - Runs inside the Agent Teams desktop app. - Defaults to `http://127.0.0.1:3456`. - - Exposes REST endpoints for teams and runtime lifecycle. + - Currently covers team lifecycle/runtime control. It does not replace the full board/message MCP tool surface. -2. **`agent-teams-mcp` stdio MCP server** - - Does not listen on a port. - - Is started by each MCP client as a normal child process. - - Forwards runtime/team operations to the local HTTP control API. - -For OpenClaw, the preferred path is: +For the original request - "can OpenClaw call Agent Teams for complex tasks and cross-checking?" - the answer is: ```text -OpenClaw -> stdio MCP process: agent-teams-mcp -> local Agent Teams HTTP API -> Desktop runtime +Use MCP for normal AI-to-Agent-Teams interaction. +Use REST for lifecycle/debug or if OpenClaw cannot use MCP yet. ``` -Direct REST is also possible: +## 2. The Mental Model + +### 2.1 What Runs Where ```text -OpenClaw -> http://127.0.0.1:3456/api/... -> Desktop runtime +Mac mini or local Mac + +Agent Teams Desktop App + - owns Electron UI + - owns team runtime process management + - owns local HTTP control API + - writes current control API URL to ~/.claude/team-control-api.json + +agent-teams-mcp + - stdio MCP server process + - started by each MCP client that needs it + - does not listen on a port + - forwards lifecycle operations to the desktop HTTP control API + - uses ~/.claude for team/task/message controller state + +OpenClaw + - external AI client + - can start agent-teams-mcp as an MCP server + - can optionally call REST directly ``` -## 2. Important Architecture Notes +### 2.2 One Control Plane, Many MCP Processes -### 2.1 Multiple MCP Processes Are Expected - -It is safe for multiple agents and OpenClaw to each start their own `agent-teams-mcp` process. - -This works because `agent-teams-mcp` uses **stdio transport**: - -- it does not bind a TCP port; -- it does not own team state; -- it does not create a separate control plane; -- it reads/writes through the shared Agent Teams runtime and shared Claude data directory. - -Example: +Multiple MCP processes are expected and safe. ```text Agent 1 MCP process \ -Agent 2 MCP process -> Agent Teams Desktop HTTP API -> shared teams/tasks/runtime +Agent 2 MCP process -> one Agent Teams desktop HTTP API -> shared teams/tasks/runtime OpenClaw MCP process/ ``` -The MCP processes are many. The control plane is one. +This is safe because `agent-teams-mcp` is a stdio process: -### 2.2 The MCP Server Has No URL +- it does not bind a port; +- it does not own global runtime state; +- it does not create a second app server; +- it uses the shared desktop app control API for lifecycle operations; +- it uses the shared Claude data directory for team/task/message state. -Do not look for an MCP URL. +The thing that must be singular is the **desktop control plane**, not the MCP process. -`agent-teams-mcp` is launched by the MCP client: +### 2.3 What Can Conflict + +MCP processes themselves should not conflict. + +Possible conflicts are logical, not port/process conflicts: + +- two clients create the same `teamName`; +- two clients launch or stop the same team at the same time; +- two clients edit the same task concurrently; +- one client changes the board while another client is using stale state. + +These are normal shared-state coordination issues. They are not caused by multiple MCP servers. + +## 3. Recommended Integration Choice + +### Option A: MCP-first integration + +Scores: 🎯 9/10 🛡️ 8/10 🧠 4/10 + +Expected OpenClaw changes: roughly `20-80 LOC` plus configuration. + +Use this if OpenClaw supports stdio MCP servers. + +Why it is the recommended path: + +- it matches how AI clients naturally call tools; +- it exposes the richer board/task/message/review surface; +- each OpenClaw run can start its own MCP process safely; +- it avoids writing a custom task/message client against internal files; +- it keeps OpenClaw integration close to the tools Agent Teams already gives team agents. + +Use REST only for health checks and debugging in this option. + +### Option B: REST-first lifecycle integration + +Scores: 🎯 7/10 🛡️ 7/10 🧠 5/10 + +Expected OpenClaw changes: roughly `80-180 LOC`. + +Use this if OpenClaw cannot run MCP yet. + +Important limitation: ```text -client starts process -> client speaks JSON-RPC over stdin/stdout +REST currently covers team lifecycle/runtime control. +It is not the full board/task/message/review control surface. ``` -The URL belongs to the desktop app HTTP control API, not to MCP. +REST can: -### 2.3 The HTTP Control API Is Localhost Only +- list teams; +- create draft team configs; +- get team snapshots; +- launch teams; +- stop teams; +- poll runtime/provisioning state. -The desktop HTTP server binds to `127.0.0.1` by default. +REST should not be treated as the full replacement for task/message MCP tools. -Default base URL: +### Option C: Hybrid integration + +Scores: 🎯 8/10 🛡️ 8/10 🧠 7/10 + +Expected OpenClaw changes: roughly `120-260 LOC`. + +Use MCP for normal AI tool calls, and REST for operational checks. + +Good split: + +- MCP: team operations, task creation, messages, reviews, process registry. +- REST: "is the desktop app alive?", "what is the runtime state?", "what is the current run status?" + +This is the best long-term shape if OpenClaw needs both agentic workflows and a supervisory dashboard. + +## 4. Local Setup Checklist + +### 4.1 Start Agent Teams Desktop App + +The desktop app must be running. It owns the runtime and local HTTP API. + +### 4.2 Enable Browser Access / Server Mode + +In the desktop app: + +```text +Settings -> Browser Access -> Enable server mode +``` + +When enabled, the app starts a local Fastify HTTP server. + +Default: ```text http://127.0.0.1:3456 @@ -79,7 +182,9 @@ http://127.0.0.1:3456 If port `3456` is busy, the app tries the next ports. -The current URL is published to: +### 4.3 Discover the Current Control API URL + +The desktop app writes the active URL to: ```text ~/.claude/team-control-api.json @@ -95,42 +200,69 @@ Example: } ``` -### 2.4 Remote OpenClaw Needs a Tunnel - -If OpenClaw runs on the same Mac as the desktop app, no tunnel is needed. - -If OpenClaw runs on another server, it cannot directly reach `127.0.0.1` on the Mac. Use one of: - -- SSH tunnel; -- reverse tunnel; -- VPN; -- a future authenticated remote control endpoint. - -Do not expose the local HTTP API to the public internet without authentication and transport security. - -## 3. Prerequisites - -1. Agent Teams desktop app is running. -2. HTTP server is enabled in Agent Teams settings. -3. OpenClaw runs on the same machine, or has a secure tunnel to the machine. -4. Node.js 20+ is available if OpenClaw will launch the MCP server from source or build output. - -To confirm the HTTP control API is available: +Check it: ```bash cat ~/.claude/team-control-api.json +``` + +Then verify REST: + +```bash curl -s http://127.0.0.1:3456/api/teams ``` -If the app selected a different port, use the `baseUrl` from `team-control-api.json`. +If the file shows a different port, use that `baseUrl`. -## 4. Recommended Integration: MCP +### 4.4 Local vs Remote OpenClaw -Use MCP if OpenClaw supports external MCP servers. MCP gives OpenClaw a tool surface instead of forcing it to hand-roll REST calls. +If OpenClaw runs on the same Mac: -### 4.1 Dev Workspace MCP Config +```text +No tunnel needed. +Use http://127.0.0.1:. +``` -When running from this repository: +If OpenClaw runs on another machine: + +```text +127.0.0.1 points to the OpenClaw machine, not to the Mac running Agent Teams. +Use an SSH tunnel, reverse tunnel, VPN, or another secure local-network setup. +``` + +Basic SSH tunnel example: + +```bash +ssh -N -L 3456:127.0.0.1:3456 user@mac-mini-host +``` + +Then OpenClaw can use: + +```text +http://127.0.0.1:3456 +``` + +from the machine where the tunnel is open. + +## 5. MCP Integration + +### 5.1 What OpenClaw Needs To Do + +OpenClaw should register `agent-teams-mcp` as a stdio MCP server. + +That means OpenClaw starts a process and speaks MCP JSON-RPC over stdin/stdout. + +OpenClaw does **not** connect to an MCP URL. + +The URL belongs to the desktop HTTP control API and is passed to MCP through: + +- `CLAUDE_TEAM_CONTROL_URL`, or +- `~/.claude/team-control-api.json`, or +- per-tool `controlUrl`. + +### 5.2 Dev Checkout MCP Config + +Use this while testing from the repository checkout: ```json { @@ -147,22 +279,21 @@ When running from this repository: } ``` -Notes: +Adjust: -- Adjust the paths for the user's machine. -- `AGENT_TEAMS_MCP_CLAUDE_DIR` tells MCP which Claude data directory to use. -- `CLAUDE_TEAM_CONTROL_URL` is optional if `~/.claude/team-control-api.json` exists, but it is useful for explicit setup. -- If the HTTP server is on another port, update `CLAUDE_TEAM_CONTROL_URL`. +- repo path; +- Claude data directory; +- control URL port. -### 4.2 Built MCP Config +### 5.3 Built MCP Config -For a built MCP server: +Build: ```bash pnpm --filter agent-teams-mcp build ``` -Then configure OpenClaw like: +Configure OpenClaw: ```json { @@ -179,21 +310,81 @@ Then configure OpenClaw like: } ``` -### 4.3 Packaged App Config +### 5.4 If OpenClaw Supports `cwd` -In a packaged app, the app resolves its bundled MCP entrypoint internally for teams it launches. For an external client like OpenClaw, give it either: +Some MCP clients allow a `cwd` field. If OpenClaw supports it, this is cleaner: -- the packaged `agent-teams-mcp/dist/index.js` path, if available; -- or a separately installed copy of `agent-teams-mcp`; -- or a dev checkout path while testing. +```json +{ + "mcpServers": { + "agent-teams": { + "command": "pnpm", + "args": ["dev"], + "cwd": "/Users/belief/dev/projects/claude/claude_team/mcp-server", + "env": { + "AGENT_TEAMS_MCP_CLAUDE_DIR": "/Users/belief/.claude", + "CLAUDE_TEAM_CONTROL_URL": "http://127.0.0.1:3456" + } + } + } +} +``` -The MCP client still starts it as a stdio process. +If OpenClaw does not support `cwd`, use the `pnpm --dir ... dev` form. -## 5. MCP Tool Flow Examples +### 5.5 MCP URL Discovery Order -The exact UI for tool calls depends on OpenClaw, but the calls are conceptually: +For lifecycle tools such as `team_list`, `team_get`, `team_create`, and `team_launch`, the control URL is resolved in this order: -### 5.1 List Teams +1. tool argument `controlUrl`; +2. `~/.claude/team-control-api.json`; +3. environment variable `CLAUDE_TEAM_CONTROL_URL`. + +Passing `CLAUDE_TEAM_CONTROL_URL` is the most explicit OpenClaw setup. + +Passing `controlUrl` per tool call is useful for debugging or tunnels. + +### 5.6 MCP Tool Surface + +Current tool groups: + +| Group | Tools | +| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Team lifecycle | `team_list`, `team_get`, `team_create` | +| Runtime lifecycle | `team_launch`, `team_stop` | +| Task board | `task_create`, `task_create_from_message`, `task_get`, `task_get_comment`, `task_list`, `task_start`, `task_complete`, `task_set_owner`, `task_set_status`, `task_add_comment`, `task_link`, `task_unlink`, `task_set_clarification`, `task_restore`, `task_attach_file`, `task_attach_comment_file`, `task_briefing`, `member_briefing` | +| Lead briefing | `lead_briefing` | +| Review | `review_request`, `review_start`, `review_approve`, `review_request_changes` | +| Messages | `message_send` | +| Cross-team | `cross_team_send`, `cross_team_list_targets`, `cross_team_get_outbox` | +| Kanban | `kanban_get`, `kanban_set_column`, `kanban_clear`, `kanban_list_reviewers`, `kanban_add_reviewer`, `kanban_remove_reviewer` | +| Process registry | `process_register`, `process_list`, `process_stop`, `process_unregister` | +| Runtime bridge | `runtime_bootstrap_checkin`, `runtime_deliver_message`, `runtime_task_event`, `runtime_heartbeat` | + +Most OpenClaw integrations need: + +```text +team_list +team_get +team_create +team_launch +team_stop +task_create +task_list +task_get +message_send +review_request +review_request_changes +review_approve +``` + +The `runtime_*` tools are low-level OpenCode runtime bridge tools. Do not use them for ordinary user/team messaging. + +## 6. MCP Workflow Examples + +The exact call UI depends on OpenClaw. The examples below show the arguments conceptually. + +### 6.1 List Teams Tool: @@ -209,9 +400,29 @@ Arguments: } ``` -`controlUrl` can be omitted if `~/.claude/team-control-api.json` is available. +`controlUrl` can be omitted if `~/.claude/team-control-api.json` exists and points to the running desktop app. -### 5.2 Create a Draft Team +### 6.2 Get a Team + +Tool: + +```text +team_get +``` + +Arguments: + +```json +{ + "teamName": "openclaw-review" +} +``` + +For a draft team, the response includes `pendingCreate` and `savedRequest`. + +For a configured team, the response is the normal team snapshot. + +### 6.3 Create a Draft Review Team Tool: @@ -238,7 +449,7 @@ Arguments: { "name": "reviewer", "role": "Reviewer", - "workflow": "Review OpenClaw's work for bugs, missing tests, incorrect assumptions, and integration risks.", + "workflow": "Review OpenClaw work for bugs, missing tests, incorrect assumptions, and integration risks.", "providerId": "codex", "providerBackendId": "codex-native", "model": "gpt-5.4", @@ -257,27 +468,15 @@ Arguments: } ``` -This creates a **draft** team. It does not launch the runtime yet. +This creates a draft team config. It does not start the runtime. -### 5.3 Inspect a Team +Important: -Tool: +- Put provider/backend/model/fast-mode defaults into `team_create`. +- MCP `team_launch` currently accepts a smaller runtime override shape. +- When launching a draft through MCP, saved draft fields are reused. -```text -team_get -``` - -Arguments: - -```json -{ - "teamName": "openclaw-review" -} -``` - -For a draft team, the response includes draft/saved request data. For a launched/configured team, it returns the team snapshot. - -### 5.4 Launch a Team +### 6.4 Launch the Team Tool: @@ -291,7 +490,7 @@ Arguments: { "teamName": "openclaw-review", "cwd": "/Users/belief/dev/projects/example-project", - "prompt": "Cross-check OpenClaw's latest changes. Focus on regressions, missing tests, and risky assumptions. Report actionable findings.", + "prompt": "Cross-check OpenClaw latest work. Focus on bugs, missing tests, and architectural risks. Return concise actionable findings.", "waitForReady": true, "waitTimeoutMs": 180000 } @@ -300,9 +499,78 @@ Arguments: `team_launch` works for: - a draft team created by `team_create`; -- an existing configured team already known to Agent Teams. +- an existing configured team. -### 5.5 Stop a Team +Current MCP `team_launch` launch overrides are intentionally smaller than `team_create`: + +```text +cwd +prompt +model +effort: low | medium | high +clearContext +skipPermissions +worktree +extraCliArgs +waitForReady +waitTimeoutMs +``` + +Do not pass `providerId`, `providerBackendId`, `fastMode`, or `limitContext` to MCP `team_launch`. +Put those into `team_create` so the saved draft can be reused at launch time. + +If `waitForReady` is true, the tool waits for provisioning to reach `ready` or fail. + +### 6.5 Create a Review Task After Launch + +Use this if OpenClaw wants the team to track review work on the board. + +Tool: + +```text +task_create +``` + +Arguments: + +```json +{ + "teamName": "openclaw-review", + "subject": "Review OpenClaw latest patch", + "description": "Check correctness, tests, edge cases, and integration risks. Report concrete findings only.", + "owner": "reviewer", + "createdBy": "openclaw", + "startImmediately": true +} +``` + +`task_create` requires a configured team, so launch the team first. + +### 6.6 Send a Message To the Team + +Tool: + +```text +message_send +``` + +Arguments: + +```json +{ + "teamName": "openclaw-review", + "to": "reviewer", + "from": "openclaw", + "text": "Please review the latest changes. Focus on regressions and missing tests.", + "summary": "Review request from OpenClaw" +} +``` + +Use `message_send` for normal visible messages. + +Do not use `runtime_deliver_message` for ordinary OpenClaw-to-team communication. + +### 6.7 Stop the Team Tool: @@ -319,49 +587,88 @@ Arguments: } ``` -## 6. Suggested OpenClaw Policy +## 7. Suggested OpenClaw Policy -OpenClaw can use Agent Teams only for work that benefits from parallel review or specialized team behavior. +OpenClaw should not call Agent Teams for every small task. Use it when parallel review or team behavior matters. -Suggested routing: - -1. For small edits, OpenClaw works alone. -2. For risky changes, OpenClaw calls `team_create` if the review team does not exist. -3. OpenClaw calls `team_launch` with a focused review prompt. -4. OpenClaw waits for team readiness. -5. OpenClaw uses the existing MCP board/message tools to create tasks or collect results, if needed. -6. OpenClaw treats Agent Teams feedback as review input, not as automatically trusted output. - -Example instruction for OpenClaw: +Suggested policy: ```text -When the task is complex, high-risk, or needs cross-checking, use the agent-teams MCP server. +Use the agent-teams MCP server when the task is complex, high-risk, user-visible, or needs independent cross-checking. -Prefer reusing an existing team named "openclaw-review". -If it does not exist, create it with team_create. -Launch it with team_launch and a focused review prompt. -Use team_get to inspect team state. +Prefer the existing team "openclaw-review". +Call team_get first. +If it does not exist, call team_create. +Call team_launch with a focused prompt. +If the review should be tracked, create a task with task_create. +Use message_send for visible follow-up messages. Do not create duplicate teams with the same purpose. -Do not expose the local control API outside localhost unless the user explicitly configured a secure tunnel. +Do not call runtime_* tools unless implementing an OpenCode runtime bridge. +Do not expose the local control API outside localhost without a secure tunnel. ``` -## 7. Direct REST API Integration +Recommended task routing: -Use REST if OpenClaw cannot use MCP, or if you want a very small integration without MCP tool registration. +1. Small code change: OpenClaw handles it alone. +2. Medium risk: OpenClaw launches `openclaw-review` for cross-checking. +3. High risk: OpenClaw creates explicit review tasks for multiple reviewers. +4. Release/blocking work: OpenClaw uses task/review tools and waits for explicit review outcome. -Base URL: +## 8. Direct REST API Integration + +REST is useful when: + +- OpenClaw cannot run MCP yet; +- you need a simple health/lifecycle wrapper; +- you are debugging the desktop control API; +- you want a non-agent script to create or launch teams. + +REST is **not** currently the full board/message/review surface. + +Use MCP for task board and messaging operations. + +### 8.1 Base URL + +Default: ```text http://127.0.0.1:3456 ``` -Discover the current base URL: +Discover current: ```bash cat ~/.claude/team-control-api.json ``` -### 7.1 REST Endpoint Summary +Use: + +```bash +BASE_URL="$( + node <<'NODE' +const fs = require('fs'); +const path = require('path'); + +const statePath = path.join(process.env.HOME, '.claude', 'team-control-api.json'); + +if (fs.existsSync(statePath)) { + const state = JSON.parse(fs.readFileSync(statePath, 'utf8')); + console.log(state.baseUrl || 'http://127.0.0.1:3456'); +} else { + console.log('http://127.0.0.1:3456'); +} +NODE +)" +echo "$BASE_URL" +``` + +Or set manually: + +```bash +BASE_URL="http://127.0.0.1:3456" +``` + +### 8.2 REST Endpoint Summary | Method | Path | Purpose | | ------ | -------------------------------- | --------------------------------- | @@ -374,7 +681,7 @@ cat ~/.claude/team-control-api.json | `GET` | `/api/teams/provisioning/:runId` | Poll launch/provisioning status | | `GET` | `/api/teams/runtime/alive` | List alive team runtime states | -Advanced OpenCode runtime bridge endpoints also exist: +Advanced OpenCode runtime bridge endpoints: | Method | Path | | ------ | --------------------------------------------------------- | @@ -383,19 +690,19 @@ Advanced OpenCode runtime bridge endpoints also exist: | `POST` | `/api/teams/:teamName/opencode/runtime/task-event` | | `POST` | `/api/teams/:teamName/opencode/runtime/heartbeat` | -Most OpenClaw integrations should not need the OpenCode runtime bridge endpoints. +Do not use the OpenCode runtime bridge endpoints for normal OpenClaw user/team messages. -### 7.2 List Teams +### 8.3 List Teams ```bash -curl -s http://127.0.0.1:3456/api/teams | jq . +curl -s "$BASE_URL/api/teams" | jq . ``` -### 7.3 Create a Draft Team +### 8.4 Create a Draft Team ```bash curl -s \ - -X POST http://127.0.0.1:3456/api/teams \ + -X POST "$BASE_URL/api/teams" \ -H 'content-type: application/json' \ -d '{ "teamName": "openclaw-review", @@ -431,13 +738,13 @@ Expected response: } ``` -### 7.4 Get a Draft or Existing Team +### 8.5 Get a Draft or Existing Team ```bash -curl -s http://127.0.0.1:3456/api/teams/openclaw-review | jq . +curl -s "$BASE_URL/api/teams/openclaw-review" | jq . ``` -Draft response shape: +Draft shape: ```json { @@ -446,18 +753,24 @@ Draft response shape: "savedRequest": { "teamName": "openclaw-review", "cwd": "/Users/belief/dev/projects/example-project", - "members": [] + "providerId": "codex", + "members": [ + { + "name": "reviewer", + "role": "Reviewer" + } + ] } } ``` -Configured team response shape is the normal Agent Teams team data snapshot. +Configured teams return the normal Agent Teams team snapshot. -### 7.5 Launch a Team +### 8.6 Launch a Team ```bash curl -s \ - -X POST http://127.0.0.1:3456/api/teams/openclaw-review/launch \ + -X POST "$BASE_URL/api/teams/openclaw-review/launch" \ -H 'content-type: application/json' \ -d '{ "cwd": "/Users/belief/dev/projects/example-project", @@ -467,7 +780,6 @@ curl -s \ "model": "gpt-5.4", "effort": "high", "fastMode": "inherit", - "limitContext": true, "skipPermissions": false }' | jq . ``` @@ -480,21 +792,28 @@ Expected response: } ``` -### 7.6 Poll Launch Status +For draft teams, missing launch fields fall back to the saved draft request where supported. + +For existing configured teams, the launch payload is the runtime override for this launch. + +⚠️ `limitContext` should be set during `team_create` for this integration path. +Do not depend on it as a configured-team REST launch override unless the route parser is extended. + +### 8.7 Poll Launch Status ```bash RUN_ID="paste-run-id-here" -curl -s "http://127.0.0.1:3456/api/teams/provisioning/$RUN_ID" | jq . +curl -s "$BASE_URL/api/teams/provisioning/$RUN_ID" | jq . ``` -Ready states: +Terminal states: - `ready` - `failed` - `disconnected` - `cancelled` -A successful launch reaches: +Successful launch: ```json { @@ -502,24 +821,26 @@ A successful launch reaches: } ``` -### 7.7 Get Runtime State +### 8.8 Get Runtime State ```bash -curl -s http://127.0.0.1:3456/api/teams/openclaw-review/runtime | jq . +curl -s "$BASE_URL/api/teams/openclaw-review/runtime" | jq . ``` -### 7.8 Stop a Team +### 8.9 Stop a Team ```bash curl -s \ - -X POST http://127.0.0.1:3456/api/teams/openclaw-review/stop \ + -X POST "$BASE_URL/api/teams/openclaw-review/stop" \ -H 'content-type: application/json' \ -d '{}' | jq . ``` -## 8. JavaScript REST Client Example +## 9. JavaScript REST Client Example -This is a minimal OpenClaw-side helper. +This is a minimal lifecycle-only helper for OpenClaw. + +It does not implement task/message/review operations. Use MCP for those. ```js import fs from 'node:fs/promises'; @@ -558,15 +879,23 @@ async function requestJson(pathname, options = {}) { return payload; } -export async function ensureReviewTeam() { - const teamName = 'openclaw-review'; - +async function teamExists(teamName) { try { - return await requestJson(`/api/teams/${teamName}`); + return await requestJson(`/api/teams/${encodeURIComponent(teamName)}`); } catch (error) { - if (!String(error.message).includes('not found')) { - throw error; + const message = String(error.message || '').toLowerCase(); + if (message.includes('not found')) { + return null; } + throw error; + } +} + +export async function ensureReviewTeam({ cwd = process.cwd() } = {}) { + const teamName = 'openclaw-review'; + const existing = await teamExists(teamName); + if (existing) { + return existing; } await requestJson('/api/teams', { @@ -574,11 +903,15 @@ export async function ensureReviewTeam() { body: { teamName, displayName: 'OpenClaw Review', - cwd: process.cwd(), + description: 'Team used by OpenClaw to cross-check complex work', + cwd, providerId: 'codex', providerBackendId: 'codex-native', model: 'gpt-5.4', effort: 'high', + fastMode: 'inherit', + limitContext: true, + skipPermissions: false, members: [ { name: 'reviewer', @@ -588,41 +921,73 @@ export async function ensureReviewTeam() { providerBackendId: 'codex-native', model: 'gpt-5.4', effort: 'high', + fastMode: 'inherit', }, ], }, }); - return requestJson(`/api/teams/${teamName}`); + return requestJson(`/api/teams/${encodeURIComponent(teamName)}`); } -export async function launchReviewTeam(prompt) { +export async function launchReviewTeam({ cwd = process.cwd(), prompt }) { const teamName = 'openclaw-review'; - await ensureReviewTeam(); - const launch = await requestJson(`/api/teams/${teamName}/launch`, { + await ensureReviewTeam({ cwd }); + + const launch = await requestJson(`/api/teams/${encodeURIComponent(teamName)}/launch`, { method: 'POST', body: { - cwd: process.cwd(), + cwd, prompt, providerId: 'codex', providerBackendId: 'codex-native', model: 'gpt-5.4', effort: 'high', + fastMode: 'inherit', skipPermissions: false, }, }); + return launch; } + +export async function waitForReady(runId, { timeoutMs = 180000, pollMs = 1000 } = {}) { + const startedAt = Date.now(); + + while (Date.now() - startedAt < timeoutMs) { + const status = await requestJson(`/api/teams/provisioning/${encodeURIComponent(runId)}`); + if (status.state === 'ready') { + return status; + } + if (['failed', 'disconnected', 'cancelled'].includes(status.state)) { + throw new Error(`Team launch ended in ${status.state}: ${status.error || 'no details'}`); + } + await new Promise((resolve) => setTimeout(resolve, pollMs)); + } + + throw new Error(`Timed out waiting for run ${runId}`); +} ``` -## 9. Validation and Error Behavior +Example use: -### 9.1 Team Names +```js +const launch = await launchReviewTeam({ + cwd: '/Users/belief/dev/projects/example-project', + prompt: 'Cross-check the latest OpenClaw changes. Return concrete bugs and missing tests.', +}); + +await waitForReady(launch.runId); +``` + +## 10. Validation Rules + +### 10.1 Team Names Team names must be kebab-case: ```text -[a-z0-9-], max 64 chars +lowercase alphanumeric segments separated by single hyphens, max 64 chars ``` Good: @@ -639,18 +1004,19 @@ Bad: OpenClaw Review openclaw_review review team +review--team +-review +review- ``` -### 9.2 Member Names - -Member names are validated by Agent Teams. +### 10.2 Member Names Avoid reserved names: - `user` - `team-lead` -Use simple names: +Good: ```text reviewer @@ -659,7 +1025,57 @@ tester architect ``` -### 9.3 Common HTTP Status Codes +### 10.3 Providers and Runtime Fields + +Provider IDs: + +```text +anthropic +codex +gemini +opencode +``` + +Provider backend IDs: + +```text +auto +adapter +api +cli-sdk +codex-native +``` + +Fast mode: + +```text +inherit +on +off +``` + +Effort values are provider-dependent. Common values include: + +```text +low +medium +high +``` + +Codex-oriented create flows may also use values such as: + +```text +none +minimal +xhigh +max +``` + +Use values supported by the selected provider/runtime. + +## 11. Error Behavior + +Common REST status codes: | Status | Meaning | | ------ | ------------------------------------------------------------------- | @@ -669,143 +1085,128 @@ architect | `501` | Team control service is not available in this mode | | `500` | Unexpected server/runtime error | -## 10. Recommended Choice +Common MCP failures: -### Option A: MCP-first integration +| Symptom | Likely cause | +| ----------------------------------- | ---------------------------------------------------------------------------------------------------- | +| Control API unavailable | Desktop app not running, server mode disabled, wrong `CLAUDE_TEAM_CONTROL_URL`, or wrong `~/.claude` | +| `team_create` conflict | Team already exists | +| `team_launch` timeout | Runtime auth/model/cwd/provisioning issue | +| Task tools fail after `team_create` | Team is still a draft. Launch it first | +| Remote OpenClaw cannot connect | Missing tunnel or wrong host mapping | -Confidence: `9/10`. Reliability: `8/10`. Complexity: `4/10`. Roughly `20-60 LOC` of OpenClaw config/glue. +## 12. Troubleshooting -Use this when OpenClaw supports MCP. It is the cleanest integration because OpenClaw sees tools like `team_create`, `team_get`, and `team_launch`. - -Pros: - -- idiomatic for AI clients; -- no custom HTTP client needed; -- multiple MCP processes are safe; -- automatically uses the same team/task tool surface that Agent Teams already gives agents. - -Cons: - -- OpenClaw must support stdio MCP servers; -- debugging involves MCP logs plus desktop logs. - -### Option B: REST-first integration - -Confidence: `8/10`. Reliability: `7/10`. Complexity: `5/10`. Roughly `80-180 LOC` of OpenClaw code. - -Use this when OpenClaw does not support MCP or when you want direct lifecycle control. - -Pros: - -- easy to call from any language; -- simple to debug with curl; -- no MCP client integration needed. - -Cons: - -- only exposes HTTP routes currently implemented; -- OpenClaw must implement retries/polling; -- task/message/board workflows are richer through MCP. - -### Option C: Hybrid MCP + REST - -Confidence: `8/10`. Reliability: `8/10`. Complexity: `7/10`. Roughly `120-260 LOC`. - -Use MCP for normal AI tool calls and REST for health checks, diagnostics, or non-agent automation. - -Pros: - -- best operational visibility; -- can recover from MCP-client-specific issues; -- useful for dashboards or service wrappers. - -Cons: - -- more moving parts; -- more integration surface to test. - -Recommended starting point: **Option A, MCP-first**. - -## 11. Troubleshooting - -### MCP tool says the control API is unavailable - -Check: +### 12.1 Confirm Desktop Control API ```bash cat ~/.claude/team-control-api.json -curl -s http://127.0.0.1:3456/api/teams +curl -s http://127.0.0.1:3456/api/teams | jq . ``` -Fix: +If the file has another port, use that port. -- start the Agent Teams desktop app; -- enable the HTTP server in settings; -- pass `CLAUDE_TEAM_CONTROL_URL` explicitly in OpenClaw MCP config. +### 12.2 Confirm MCP Starts -### OpenClaw starts MCP, but tool calls fail - -Possible causes: - -- wrong `AGENT_TEAMS_MCP_CLAUDE_DIR`; -- desktop app is using a different Claude root; -- HTTP server is disabled; -- port changed because `3456` was busy; -- OpenClaw runs on another machine without a tunnel. - -### `team_create` returns conflict - -The team already exists. Use `team_get` and either reuse it or choose a new name. - -### `team_launch` hangs or times out - -Check provisioning status: +From the repo: ```bash -curl -s http://127.0.0.1:3456/api/teams/provisioning/ | jq . +pnpm --dir /Users/belief/dev/projects/claude/claude_team/mcp-server dev ``` -Possible causes: +This starts the stdio server and waits for MCP JSON-RPC input. It will not print a normal HTTP URL. -- model/provider unavailable; -- runtime auth missing; -- working directory is invalid; -- app-side provisioning failed; -- team is already in a conflicting runtime state. +In a real OpenClaw setup, OpenClaw starts this process itself. -### Remote OpenClaw cannot connect +### 12.3 MCP Starts But Tool Calls Fail -This is expected if it is not on the same machine. The API is local-only by default. +Check: -Use an SSH tunnel, for example: +- `AGENT_TEAMS_MCP_CLAUDE_DIR` points to the same Claude root as the app; +- `CLAUDE_TEAM_CONTROL_URL` points to the app's current local HTTP URL; +- Agent Teams desktop app is running; +- Browser Access / server mode is enabled; +- OpenClaw is on the same machine or has a working tunnel. -```bash -ssh -N -L 3456:127.0.0.1:3456 user@mac-mini-host -``` +### 12.4 `team_create` Says Team Already Exists -Then OpenClaw can use: +Use: ```text -http://127.0.0.1:3456 +team_get ``` -from its own machine if the tunnel is established there. +Then reuse the existing team, or pick another `teamName`. -## 12. Security Notes +### 12.5 `team_launch` Hangs -- The current control API is intended for local use. -- It should not be bound to public interfaces without authentication. -- Prefer SSH tunnels for remote access. -- Treat access to the control API as access to team runtime control. +With REST, poll: + +```bash +curl -s "$BASE_URL/api/teams/provisioning/" | jq . +``` + +With MCP, use `waitForReady: true` and a larger `waitTimeoutMs`. + +Possible causes: + +- model unavailable; +- provider authentication missing; +- invalid working directory; +- provisioning failure; +- already-running/stale runtime state. + +### 12.6 Remote OpenClaw Cannot Connect + +This is expected without a tunnel. + +The desktop API binds to `127.0.0.1`, so a remote process cannot see it directly. + +Use SSH tunnel or VPN. Do not publish the control API to a public interface without authentication. + +## 13. Security Notes + +- Treat the control API as runtime control access. +- Keep it local by default. +- Prefer SSH tunnels for remote use. +- Do not expose it publicly without authentication and transport security. - Do not share `~/.claude` with untrusted processes. +- Remember that an AI client with this MCP server can create, launch, stop, and coordinate teams. -## 13. Summary for the Original Request +## 14. What To Tell The User -The requested integration is realistic: +Use this short explanation: -- OpenClaw can call Agent Teams through MCP. -- OpenClaw can also call the local REST API directly. -- Each agent/OpenClaw can run its own stdio MCP process. -- Those MCP processes do not conflict because they do not bind ports. -- The single shared control point is the Agent Teams desktop HTTP API. -- For local Mac mini usage, this is the right initial architecture. +```text +Yes, this is feasible. + +Agent Teams can expose a local control API from the desktop app, and an outside AI like OpenClaw can access it through the agent-teams MCP server. + +OpenClaw would start agent-teams-mcp as a stdio MCP server. That MCP process does not listen on a port, so it is fine if multiple agents and OpenClaw each start their own copy. They all point back to the same local Agent Teams desktop control API and shared ~/.claude state. + +For a local Mac mini setup, this is straightforward: +1. Run the Agent Teams desktop app. +2. Enable Browser Access / server mode. +3. Configure OpenClaw with the agent-teams MCP server. +4. Let OpenClaw call team_list, team_get, team_create, and team_launch. +5. Use task/message/review MCP tools for deeper coordination. + +REST is also available for lifecycle calls like list/create/get/launch/stop, but MCP is the better integration surface for actual AI-to-team work. +``` + +## 15. Final Recommendation + +Start with MCP-first local integration. + +Use this minimum viable flow: + +```text +1. OpenClaw starts agent-teams-mcp. +2. OpenClaw calls team_get("openclaw-review"). +3. If missing, OpenClaw calls team_create(...). +4. OpenClaw calls team_launch(..., waitForReady: true). +5. OpenClaw creates a review task with task_create or sends a message with message_send. +6. OpenClaw reads results via task_get/task_list/message flow. +``` + +This gives OpenClaw the team coordination behavior without inventing a separate orchestration layer. From 02c7e99970c131334eb2805182cebf5280b1f85c Mon Sep 17 00:00:00 2001 From: 777genius Date: Wed, 29 Apr 2026 11:26:43 +0300 Subject: [PATCH 22/25] test(changes): drop legacy evidence mode assertions --- test/main/services/team/ChangeExtractorService.test.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/main/services/team/ChangeExtractorService.test.ts b/test/main/services/team/ChangeExtractorService.test.ts index b7b3898a..c1d8ff38 100644 --- a/test/main/services/team/ChangeExtractorService.test.ts +++ b/test/main/services/team/ChangeExtractorService.test.ts @@ -1082,7 +1082,6 @@ describe('ChangeExtractorService', () => { ); expect(backfillInput.deliveryContextHash).toMatch(/^[a-f0-9]{64}$/); expect(deliveryContextHashVerified).toBe(true); - expect(backfillOpenCodeTaskLedger.mock.calls[0]?.[0]).not.toHaveProperty('evidenceMode'); expect(workerClient.computeTaskChanges).not.toHaveBeenCalled(); }); @@ -1531,9 +1530,6 @@ describe('ChangeExtractorService', () => { }) ); }); - const backfillCalls = backfillOpenCodeTaskLedger.mock.calls as unknown as Array<[Record]>; - expect(backfillCalls[0]?.[0]).not.toHaveProperty('evidenceMode'); - expect(settled).toBe(false); expect(workerClient.computeTaskChanges).not.toHaveBeenCalled(); pendingBackfill.resolve({ From 6bf2de89c5fb99c272503fa0801c67bc0ce78acf Mon Sep 17 00:00:00 2001 From: 777genius Date: Wed, 29 Apr 2026 11:58:56 +0300 Subject: [PATCH 23/25] chore(team): refine member work sync planning --- .../member-work-sync-control-plane-plan.md | 258 ++++++++++++++++-- src/main/services/team/TeamFsWorkerClient.ts | 6 +- 2 files changed, 246 insertions(+), 18 deletions(-) diff --git a/docs/team-management/member-work-sync-control-plane-plan.md b/docs/team-management/member-work-sync-control-plane-plan.md index 456487bc..d0d4c2bf 100644 --- a/docs/team-management/member-work-sync-control-plane-plan.md +++ b/docs/team-management/member-work-sync-control-plane-plan.md @@ -2359,6 +2359,184 @@ watchdog does not trust work-sync reports as progress. This prevents a valid `still_working` lease from hiding a real task stall. +### 16.6 Current Codebase Fit Audit + +This plan was checked against the current codebase shape, not only against the target design. + +Existing codebase facts: + +- `docs/FEATURE_ARCHITECTURE_STANDARD.md` already requires a full feature slice for medium and large features. +- `src/features/recent-projects` is the cleanest local template for contracts/core/main/preload/renderer boundaries. +- `TeamTaskReader` already normalizes persisted task files, deleted tasks, history events, comments, attachments, and review state. +- `TeamKanbanManager` already owns kanban/reviewer overlay state. +- `TeamMembersMetaStore` already owns active/removed member metadata. +- `TeamConfigReader` already has bounded team reads, worker fallback, launch summary projection, and soft-deleted team handling. +- `TeamTaskStallSnapshotSource` already contains useful provider/member resolution, but it is stall-monitor-specific and should not become the new feature core. +- `TeamTaskStallMonitor` already has active-team registry, unref timers, startup/activation grace, and scan scheduling. MemberWorkSync should reuse the pattern, not the internals. +- `TeamChangeEvent` is intentionally lightweight. Some events only provide `detail` strings such as `inboxes/.json`, so routing must handle incomplete event data safely. +- MCP tools are registered through `mcp-server`, but the authoritative tool catalog comes from the workspace package `agent-teams-controller`. + +Concrete integration decisions: + +| Area | Safe fit | Avoid | +|---|---|---| +| Feature structure | `src/features/member-work-sync` full slice | adding another large class under `src/main/services/team` | +| Agenda source | output adapter wraps `TeamTaskReader`, `TeamKanbanManager`, `TeamMembersMetaStore`, `TeamConfigReader` | parsing task JSON again in feature core | +| Lifecycle source | output adapter composes config summary, bootstrap state, launch state, active runtime evidence | treating config existence as active team | +| Event routing | feature-owned queue receives `TeamChangeEvent` and re-reads state | doing reconciliation inside `teamChangeEmitter` synchronously | +| MCP tool | add catalog entry plus tool registration plus controller type bridge | only adding `mcp-server/src/tools/*` and forgetting catalog registration | +| Watchdog | read cooldowns only in Phase 2, reports never count as progress | sharing mutable journal state with `TeamTaskStallMonitor` | +| Renderer | read-only adapter/view-model after main status API exists | putting agenda policy in React components | + +### 16.7 Additional Bug Risks To Guard Explicitly + +These are the places most likely to create subtle bugs during implementation. + +#### Lightweight TeamChangeEvent Data + +Current events often do not contain enough structured data to route to one exact member. For example, inbox events may carry only `detail: "inboxes/bob.json"` or `detail: "sentMessages.json"`. + +Rules: + +- If member can be parsed confidently from `detail`, enqueue only that member. +- If member cannot be parsed, enqueue all active members with debounce and bounded concurrency. +- Never treat event `detail` as authoritative task/member ownership. +- Always re-read current tasks and members before writing status. + +This keeps event routing level-triggered, not edge-triggered. + +#### Current Reviewer Resolution + +`stallMonitor/reviewerResolution.ts` can use historical actors because stall detection needs historical evidence. MemberWorkSync must resolve current action ownership only. + +Rules: + +- Prefer current kanban reviewer overlay when available and task is in `review`. +- Otherwise use current-cycle history only: latest `review_requested` or `review_started` in the current review cycle. +- Do not use old `review_approved` actor as reviewer. +- If current reviewer is ambiguous, skip review agenda item and add diagnostic. + +This avoids incorrectly pinging a reviewer from a previous cycle. + +#### Team Lifecycle Ambiguity + +The codebase has multiple lifecycle signals: config, soft delete, bootstrap state, launch-state summary, active process registry, and OpenCode runtime metadata. + +Rules: + +- Deleted team -> inactive. +- Pending create without config -> inactive. +- Bootstrap/provisioning `cancelled` or stopped runtime -> inactive. +- Active process or current launch evidence -> active. +- Unknown lifecycle in Phase 1 -> do not nudge and write conservative diagnostic only. + +Phase 1 must prefer false negatives over false positives. + +#### Tool Catalog Registration + +The MCP server registration loop is driven by `AGENT_TEAMS_MCP_TOOL_GROUPS` from `agent-teams-controller`. + +Cut 2 must update all of these together: + +- `agent-teams-controller/src/mcpToolCatalog.js` +- `mcp-server/src/tools/index.ts` +- `mcp-server/src/tools/.ts` +- `mcp-server/src/agent-teams-controller.d.ts` +- stdio/e2e tool list tests + +If any of these are missing, the tool may compile but never appear to agents. + +Recommended tool group: + +```text +workSync +teammateOperational: true +toolNames: ["member_work_sync_report", "member_work_sync_status"] +``` + +If adding a new group causes broad type churn, fallback is to temporarily place the tools in the existing `process` group, but only with a TODO and dedicated tests. Do not put report/status tools in `task`, because they are operational state, not board mutation. + +#### Main-Service Dependency Direction + +The feature application layer must not depend on `TeamDataService`, `TeamProvisioningService`, Electron, filesystem, or process APIs. + +Allowed shape: + +```ts +// core/application +class MemberWorkSyncReconciler { + constructor( + private readonly agendaSource: WorkAgendaSourcePort, + private readonly statusStore: MemberWorkSyncStatusStorePort, + private readonly lifecycle: TeamLifecyclePort, + private readonly clock: ClockPort + ) {} +} +``` + +Main adapters may depend on existing services: + +```ts +// main/adapters/output +class TeamServicesWorkAgendaSource implements WorkAgendaSourcePort { + constructor( + private readonly taskReader: TeamTaskReader, + private readonly kanbanManager: TeamKanbanManager, + private readonly membersMetaStore: TeamMembersMetaStore, + private readonly configReader: TeamConfigReader + ) {} +} +``` + +This keeps DIP intact and makes core tests cheap. + +#### Store And Locking Reuse + +OpenCode has `VersionedJsonStore`, but it is under OpenCode-specific runtime code. + +Rules: + +- Do not deep-import OpenCode runtime store internals from `member-work-sync`. +- Extract a generic team-store helper only if it can be used without changing OpenCode behavior. +- Otherwise implement a tiny feature-local JSON store with atomic write, size cap, schema version, and per-team lock. +- All writes must be bounded and best-effort on deleted/missing team dirs. + +This avoids coupling a generic feature to OpenCode runtime internals. + +#### Task Comments And User-Visible Noise + +`member_work_sync_report` must not create task comments, inbox messages, or user notifications. + +Rules: + +- Accepted report writes only work-sync status/report store. +- Rejected report writes only bounded diagnostics. +- Report note is diagnostic only and must be size-limited. +- Renderer Phase 1 shows status in details/dev diagnostics, not alarming warnings. + +This prevents another "agent spammed the board" class of bugs. + +#### Phase 2 Dispatch Safety + +Nudges are intentionally out of Phase 1. When Phase 2 starts, dispatch must revalidate immediately before sending. + +Required dispatch guard: + +```text +load outbox item +read current lifecycle +read current member active state +recompute agenda +compare fingerprint +check no accepted valid lease +check recent watchdog cooldown +check one-in-flight key +send nudge +mark dispatched +``` + +If any check fails, drop or supersede the item. Do not send stale nudges. + --- ## 17. Store And Locking @@ -3055,19 +3233,58 @@ Cross-repo acceptance: Recommended commit sequence: 1. `docs: add member work sync control plane plan` -2. `refactor(team): extract current review cycle resolver` -3. `refactor(team): extract reusable versioned json store` -4. `feat(member-work-sync): add domain agenda and fingerprint model` -5. `feat(member-work-sync): add status store and shadow reconciler` -6. `feat(member-work-sync): expose current agenda fingerprint read surface` -7. `feat(member-work-sync): add report token adapter and validation` -8. `feat(member-work-sync): add report validation use case` -9. `feat(agent-teams): add member work sync report tool` -10. `test(member-work-sync): add shadow control plane coverage` -11. `feat(member-work-sync): wire shadow reconciler to team changes` +2. `test(team): cover current review cycle action ownership` +3. `refactor(team): extract current review cycle resolver` +4. `refactor(team): extract reusable versioned json store` +5. `feat(member-work-sync): add domain agenda and fingerprint model` +6. `feat(member-work-sync): add status store and shadow reconciler` +7. `feat(member-work-sync): expose current agenda fingerprint read surface` +8. `feat(member-work-sync): add report token adapter and validation` +9. `feat(member-work-sync): add report validation use case` +10. `feat(agent-teams): add member work sync report tool` +11. `test(member-work-sync): add shadow control plane coverage` +12. `feat(member-work-sync): wire shadow reconciler to team changes` Keep Phase 2 in separate commits/PR. +### 27.0 Cut 0: Fit Check Before Code + +Goal: + +```text +Prove the feature can sit on existing service boundaries before creating new runtime behavior. +``` + +Estimate: `🎯 10 🛡️ 10 🧠 3`, `80-180 LOC`. + +Step order: + +1. Add test fixtures for agenda inputs using existing `TeamTaskReader` output shape. + - Include pending owned work, active work, review, needs-fix, deleted tasks, removed members, and stale reviewer history. + - Do not test raw JSON parser details here. + +2. Add current-review-cycle resolver tests. + - Prove old `review_approved` actor does not become current reviewer. + - Prove kanban reviewer overlay wins when task is actively in review. + - Prove ambiguity returns `null` plus diagnostic. + +3. Add lifecycle adapter contract tests with fakes. + - deleted team -> inactive; + - pending create -> inactive; + - cancelled bootstrap -> inactive; + - active runtime evidence -> active; + - unknown -> shadow diagnostic only. + +4. Add MCP catalog registration checklist test plan. + - No implementation yet. + - Confirm adding a new operational group requires `agent-teams-controller` catalog and `mcp-server` registration changes. + +Cut 0 stop criteria: + +- Existing task/review types cannot represent a planned agenda item without guessing. +- Lifecycle cannot be resolved conservatively from existing sources. +- MCP catalog cannot support a new operational group without broad unrelated changes. + ### 27.1 Cut 1: Shadow Core And Status Goal: @@ -3202,16 +3419,20 @@ Step order: 5. Add app validation bridge contract. - `claude_team` exposes a narrow main-process/application port. - - Orchestrator calls that port when available. + - The MCP/controller boundary calls that port when available. - Result is structured and does not leak internal task data. + - Do not create a second validation implementation in `agent-teams-controller`. -6. Add orchestrator MCP tool. +6. Add MCP/controller tool registration. - Tool name: `member_work_sync_report`. - - Controller validates schema, size, reserved actors, obvious unsafe aliases. - - Controller attaches trusted runtime/session context when available. - - Controller forwards to app validation port. - - If app validation port unavailable, controller writes pending intent only if identity is not terminally invalid. - - Controller never returns accepted lease unless app returned accepted lease. + - Update `agent-teams-controller/src/mcpToolCatalog.js` so the tool is in a teammate-operational group. + - Update `mcp-server/src/tools/index.ts` and add a dedicated work-sync tools module. + - Update `mcp-server/src/agent-teams-controller.d.ts`. + - Controller/MCP layer validates schema, size, reserved actors, obvious unsafe aliases. + - Controller/MCP layer attaches trusted runtime/session context when available. + - Controller/MCP layer forwards to app validation port. + - If app validation port is unavailable, write pending intent only if identity is not terminally invalid. + - Never return accepted lease unless app returned accepted lease. 7. Add pending intent replay. - Replay through the same reporter use case. @@ -3228,6 +3449,9 @@ Cut 2 tests: ```bash pnpm vitest run test/features/member-work-sync pnpm vitest run test/main/services/team/TeamProvisioningServiceRelay.test.ts +pnpm --filter agent-teams-controller test +pnpm --filter agent-teams-mcp test +pnpm --filter agent-teams-mcp test:e2e cd /Users/belief/dev/projects/claude/agent_teams_orchestrator && bun test src/services/opencode/OpenCodeBridgeCommandHandler.test.ts cd /Users/belief/dev/projects/claude/claude_team && pnpm typecheck --pretty false git diff --check diff --git a/src/main/services/team/TeamFsWorkerClient.ts b/src/main/services/team/TeamFsWorkerClient.ts index 4fb39708..4a2fa090 100644 --- a/src/main/services/team/TeamFsWorkerClient.ts +++ b/src/main/services/team/TeamFsWorkerClient.ts @@ -74,6 +74,10 @@ function resolveWorkerPath(): string | null { return null; } +function shouldWarnUnavailableWorker(): boolean { + return process.env.NODE_ENV !== 'test' && process.env.VITEST !== 'true'; +} + export class TeamFsWorkerClient { private worker: Worker | null = null; private readonly workerPath: string | null = resolveWorkerPath(); @@ -84,7 +88,7 @@ export class TeamFsWorkerClient { >(); isAvailable(): boolean { - if (!this.workerPath && !this.warnedUnavailable) { + if (!this.workerPath && !this.warnedUnavailable && shouldWarnUnavailableWorker()) { this.warnedUnavailable = true; const baseDir = typeof __dirname === 'string' && __dirname.length > 0 From 8c847202465628300736c535f10abe62002ec2d3 Mon Sep 17 00:00:00 2001 From: 777genius Date: Wed, 29 Apr 2026 12:18:50 +0300 Subject: [PATCH 24/25] chore(runtime): bump orchestrator lock to 0.0.13 --- runtime.lock.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/runtime.lock.json b/runtime.lock.json index 11995845..56440b59 100644 --- a/runtime.lock.json +++ b/runtime.lock.json @@ -1,27 +1,27 @@ { - "version": "0.0.12", - "sourceRef": "v0.0.12", + "version": "0.0.13", + "sourceRef": "v0.0.13", "sourceRepository": "777genius/agent_teams_orchestrator", "releaseRepository": "777genius/claude_agent_teams_ui", "releaseTag": "v1.2.0", "assets": { "darwin-arm64": { - "file": "agent-teams-runtime-darwin-arm64-v0.0.12.tar.gz", + "file": "agent-teams-runtime-darwin-arm64-v0.0.13.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "darwin-x64": { - "file": "agent-teams-runtime-darwin-x64-v0.0.12.tar.gz", + "file": "agent-teams-runtime-darwin-x64-v0.0.13.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "linux-x64": { - "file": "agent-teams-runtime-linux-x64-v0.0.12.tar.gz", + "file": "agent-teams-runtime-linux-x64-v0.0.13.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "win32-x64": { - "file": "agent-teams-runtime-win32-x64-v0.0.12.zip", + "file": "agent-teams-runtime-win32-x64-v0.0.13.zip", "archiveKind": "zip", "binaryName": "claude-multimodel.exe" } From 5a8a934d8dea19918ff59ddf8129d857d55b205c Mon Sep 17 00:00:00 2001 From: iliya Date: Wed, 29 Apr 2026 12:29:49 +0300 Subject: [PATCH 25/25] fix: make Windows agent launches independent of tmux --- .../buildTmuxAutoInstallCapability.ts | 2 +- .../buildTmuxEffectiveAvailability.ts | 16 +- .../runtime/TmuxInstallerRunnerAdapter.ts | 2 +- .../installer/TmuxInstallStrategyResolver.ts | 12 +- .../adapters/TmuxInstallerBannerAdapter.ts | 6 +- .../TmuxInstallerBannerAdapter.test.ts | 18 +- .../__tests__/useTmuxInstallerBanner.test.tsx | 2 +- .../TmuxInstallerBannerView.test.tsx | 6 +- .../renderer/utils/formatTmuxInstallerText.ts | 7 +- .../services/team/TeamMcpConfigBuilder.ts | 10 +- src/main/utils/childProcess.ts | 2 +- .../team/AnthropicRuntimeMemory.live.test.ts | 62 +++++- .../team/TeamMcpConfigBuilder.test.ts | 21 +- .../utils/AgentCliLaunch.live-e2e.test.ts | 179 ++++++++++++++++++ test/main/utils/childProcess.test.ts | 76 ++++++++ 15 files changed, 375 insertions(+), 46 deletions(-) create mode 100644 test/main/utils/AgentCliLaunch.live-e2e.test.ts diff --git a/src/features/tmux-installer/core/domain/policies/buildTmuxAutoInstallCapability.ts b/src/features/tmux-installer/core/domain/policies/buildTmuxAutoInstallCapability.ts index 1f232e6a..ab765ce7 100644 --- a/src/features/tmux-installer/core/domain/policies/buildTmuxAutoInstallCapability.ts +++ b/src/features/tmux-installer/core/domain/policies/buildTmuxAutoInstallCapability.ts @@ -67,7 +67,7 @@ function buildManualHints(platform: TmuxPlatform): TmuxInstallHint[] { }, { title: 'Install Ubuntu', - description: 'Recommended WSL distro for the tmux runtime path.', + description: 'Recommended WSL distro for optional tmux pane transport.', command: 'wsl --install -d Ubuntu --no-launch', }, { diff --git a/src/features/tmux-installer/core/domain/policies/buildTmuxEffectiveAvailability.ts b/src/features/tmux-installer/core/domain/policies/buildTmuxEffectiveAvailability.ts index b6fe3d0c..78a674a4 100644 --- a/src/features/tmux-installer/core/domain/policies/buildTmuxEffectiveAvailability.ts +++ b/src/features/tmux-installer/core/domain/policies/buildTmuxEffectiveAvailability.ts @@ -24,7 +24,7 @@ export function buildTmuxEffectiveAvailability( binaryPath: input.wsl.tmuxBinaryPath, runtimeReady: false, detail: - 'tmux is available inside WSL, but the persistent teammate runtime still needs native Windows pane support.', + 'tmux is available inside WSL. On Windows it is optional and is not required for teammate runtime startup.', }; } @@ -36,7 +36,7 @@ export function buildTmuxEffectiveAvailability( binaryPath: input.host.binaryPath, runtimeReady: false, detail: - 'tmux was found on Windows, but the app currently relies on a WSL-backed tmux runtime for the most reliable teammate path.', + 'tmux was found on Windows. Native process teammates do not require it; tmux remains optional for pane-based terminal transport.', }; } @@ -49,7 +49,7 @@ export function buildTmuxEffectiveAvailability( runtimeReady: false, detail: input.wsl?.statusDetail ?? - 'You can keep using the app, but Windows needs WSL before tmux can improve teammate reliability.', + 'You can keep using the app without tmux. Install WSL only if you want optional tmux pane transport.', }; } @@ -61,7 +61,7 @@ export function buildTmuxEffectiveAvailability( runtimeReady: false, detail: input.wsl?.statusDetail ?? - 'WSL is available, but tmux is not ready there yet. Finish the Linux setup, install tmux, then re-check.', + 'WSL is available, but tmux is not ready there yet. Install tmux only if you want optional pane transport.', }; } @@ -72,7 +72,7 @@ export function buildTmuxEffectiveAvailability( version: input.host.version, binaryPath: input.host.binaryPath, runtimeReady: input.nativeSupported, - detail: 'tmux is available for the persistent teammate runtime.', + detail: 'tmux is available as an optional pane transport for teammate sessions.', }; } @@ -84,9 +84,9 @@ export function buildTmuxEffectiveAvailability( runtimeReady: false, detail: input.platform === 'darwin' - ? 'You can keep using the app, but tmux improves persistent teammate reliability and restart behavior.' + ? 'You can keep using the app without tmux. Install tmux only if you want optional pane transport.' : input.platform === 'linux' - ? 'You can keep using the app, but tmux improves long-running teammate stability and cleaner recovery.' - : 'You can keep using the app, but tmux improves persistent teammate reliability.', + ? 'You can keep using the app without tmux. Install tmux only if you want optional pane transport.' + : 'You can keep using the app without tmux. Install tmux only if you want optional pane transport.', }; } diff --git a/src/features/tmux-installer/main/adapters/output/runtime/TmuxInstallerRunnerAdapter.ts b/src/features/tmux-installer/main/adapters/output/runtime/TmuxInstallerRunnerAdapter.ts index cadabecd..14d1ae4d 100644 --- a/src/features/tmux-installer/main/adapters/output/runtime/TmuxInstallerRunnerAdapter.ts +++ b/src/features/tmux-installer/main/adapters/output/runtime/TmuxInstallerRunnerAdapter.ts @@ -177,7 +177,7 @@ export class TmuxInstallerRunnerAdapter strategy: 'wsl', message: 'Preparing the Windows WSL tmux setup...', detail: - 'The app can keep working without tmux, but WSL-backed tmux gives the most reliable persistent teammate path on Windows.', + 'The app can keep working without tmux. WSL-backed tmux is optional and only adds pane-based terminal transport on Windows.', error: null, canCancel: true, acceptsInput: false, diff --git a/src/features/tmux-installer/main/infrastructure/installer/TmuxInstallStrategyResolver.ts b/src/features/tmux-installer/main/infrastructure/installer/TmuxInstallStrategyResolver.ts index ab99dc20..4ba5607b 100644 --- a/src/features/tmux-installer/main/infrastructure/installer/TmuxInstallStrategyResolver.ts +++ b/src/features/tmux-installer/main/infrastructure/installer/TmuxInstallStrategyResolver.ts @@ -171,22 +171,22 @@ export class TmuxInstallStrategyResolver { if (input.effective.available) { return input.effective.location === 'wsl' ? 'tmux is available inside WSL on Windows.' - : 'tmux is available for persistent teammate runtime.'; + : 'tmux is available as an optional pane transport for teammate sessions.'; } if (input.platform === 'darwin') { - return 'You can keep using the app, but tmux improves persistent teammate reliability and restart behavior.'; + return 'You can keep using the app without tmux. Install tmux only if you want optional pane transport.'; } if (input.platform === 'linux') { - return 'You can keep using the app, but tmux improves long-running teammate stability and cleaner recovery.'; + return 'You can keep using the app without tmux. Install tmux only if you want optional pane transport.'; } if (input.platform === 'win32') { return ( input.wsl?.statusDetail ?? - 'You can keep using the app, but tmux on Windows goes through WSL for the best teammate experience.' + 'You can keep using the app without tmux. On Windows, tmux setup uses WSL and is only needed for optional pane transport.' ); } - return 'You can keep using the app, but tmux improves persistent teammate reliability.'; + return 'You can keep using the app without tmux. Install tmux only if you want optional pane transport.'; } #buildCommand( @@ -329,7 +329,7 @@ export class TmuxInstallStrategyResolver { if (status.wslInstalled && !status.distroName) { this.#prependUniqueHint(manualHints, { title: 'Install Ubuntu', - description: 'Recommended WSL distro for the tmux runtime path.', + description: 'Recommended WSL distro for optional tmux pane transport.', command: 'wsl --install -d Ubuntu --no-launch', }); } diff --git a/src/features/tmux-installer/renderer/adapters/TmuxInstallerBannerAdapter.ts b/src/features/tmux-installer/renderer/adapters/TmuxInstallerBannerAdapter.ts index c989089e..79b10973 100644 --- a/src/features/tmux-installer/renderer/adapters/TmuxInstallerBannerAdapter.ts +++ b/src/features/tmux-installer/renderer/adapters/TmuxInstallerBannerAdapter.ts @@ -83,14 +83,14 @@ export class TmuxInstallerBannerAdapter { snapshot.message ?? status?.effective.detail ?? status?.wsl?.statusDetail ?? - 'tmux improves persistent teammate reliability and cleaner recovery for long-running tasks.'; + 'tmux is optional. Install it only if you want pane-based terminal transport for long-running teammate sessions.'; const benefitsBody = status && !status.effective.available ? formatTmuxOptionalBenefits(status.platform) : null; const runtimeReadyLabel = status ? status.effective.runtimeReady - ? 'Ready for persistent teammates' + ? 'Pane transport ready' : status.effective.available - ? 'Installed, but not active yet' + ? 'Installed, optional transport inactive' : null : null; const versionLabel = diff --git a/src/features/tmux-installer/renderer/adapters/__tests__/TmuxInstallerBannerAdapter.test.ts b/src/features/tmux-installer/renderer/adapters/__tests__/TmuxInstallerBannerAdapter.test.ts index e3d5af09..0d3ab57e 100644 --- a/src/features/tmux-installer/renderer/adapters/__tests__/TmuxInstallerBannerAdapter.test.ts +++ b/src/features/tmux-installer/renderer/adapters/__tests__/TmuxInstallerBannerAdapter.test.ts @@ -20,7 +20,7 @@ const baseStatus: TmuxStatus = { version: null, binaryPath: null, runtimeReady: false, - detail: 'tmux improves persistent teammate reliability.', + detail: 'tmux is optional. Install it only if you want pane transport.', }, error: null, autoInstall: { @@ -72,9 +72,9 @@ describe('TmuxInstallerBannerAdapter', () => { expect(result.progressPercent).toBeNull(); expect(result.manualHints).toHaveLength(1); expect(result.manualHintsCollapsible).toBe(false); - expect(result.body).toContain('persistent teammate reliability'); - expect(result.benefitsBody).toContain('Optional, but recommended'); - expect(result.benefitsBody).toContain('multi-agent teams that mix providers'); + expect(result.body).toContain('tmux is optional'); + expect(result.benefitsBody).toContain('Optional'); + expect(result.benefitsBody).toContain('pane-based terminal transport'); expect(result.installButtonPrimary).toBe(true); expect(result.showRefreshButton).toBe(true); }); @@ -102,7 +102,7 @@ describe('TmuxInstallerBannerAdapter', () => { expect(result.title).toBe('Installing tmux'); expect(result.body).toBe('Renderer bridge failed'); - expect(result.benefitsBody).toContain('Optional, but recommended'); + expect(result.benefitsBody).toContain('Optional'); expect(result.error).toBe('Renderer bridge failed'); expect(result.installDisabled).toBe(true); expect(result.canCancel).toBe(true); @@ -171,7 +171,7 @@ describe('TmuxInstallerBannerAdapter', () => { expect(result.primaryGuideUrl).toBe('https://learn.microsoft.com/en-us/windows/wsl/install'); expect(result.progressPercent).toBe(82); expect(result.manualHintsCollapsible).toBe(true); - expect(result.benefitsBody).toContain('With tmux in WSL'); + expect(result.benefitsBody).toContain('WSL-backed tmux'); expect(result.showRefreshButton).toBe(true); }); @@ -188,7 +188,7 @@ describe('TmuxInstallerBannerAdapter', () => { version: 'tmux 3.4', binaryPath: 'C:\\tmux.exe', runtimeReady: false, - detail: 'tmux was found on Windows, but WSL-backed tmux is still preferred.', + detail: 'tmux was found on Windows. Native process teammates do not require it.', }, }, snapshot: idleSnapshot, @@ -199,7 +199,7 @@ describe('TmuxInstallerBannerAdapter', () => { expect(result.visible).toBe(false); expect(result.locationLabel).toBe('Host runtime'); - expect(result.runtimeReadyLabel).toBe('Installed, but not active yet'); + expect(result.runtimeReadyLabel).toBe('Installed, optional transport inactive'); expect(result.versionLabel).toBe('tmux 3.4'); expect(result.benefitsBody).toBeNull(); }); @@ -216,7 +216,7 @@ describe('TmuxInstallerBannerAdapter', () => { version: 'tmux 3.6a', binaryPath: '/opt/homebrew/bin/tmux', runtimeReady: true, - detail: 'tmux is available for persistent teammates.', + detail: 'tmux is available as an optional pane transport.', }, }, snapshot: { diff --git a/src/features/tmux-installer/renderer/hooks/__tests__/useTmuxInstallerBanner.test.tsx b/src/features/tmux-installer/renderer/hooks/__tests__/useTmuxInstallerBanner.test.tsx index 8f6cc321..2ec3595c 100644 --- a/src/features/tmux-installer/renderer/hooks/__tests__/useTmuxInstallerBanner.test.tsx +++ b/src/features/tmux-installer/renderer/hooks/__tests__/useTmuxInstallerBanner.test.tsx @@ -25,7 +25,7 @@ const baseStatus: TmuxStatus = { version: null, binaryPath: null, runtimeReady: false, - detail: 'tmux improves persistent teammate reliability.', + detail: 'tmux is optional. Install it only if you want pane transport.', }, error: null, autoInstall: { diff --git a/src/features/tmux-installer/renderer/ui/__tests__/TmuxInstallerBannerView.test.tsx b/src/features/tmux-installer/renderer/ui/__tests__/TmuxInstallerBannerView.test.tsx index 5766f8f1..c477700d 100644 --- a/src/features/tmux-installer/renderer/ui/__tests__/TmuxInstallerBannerView.test.tsx +++ b/src/features/tmux-installer/renderer/ui/__tests__/TmuxInstallerBannerView.test.tsx @@ -21,7 +21,7 @@ const baseViewModel: TmuxInstallerBannerViewModel = { title: 'tmux is not installed', body: 'WSL is available, but no Linux distribution is installed yet.', benefitsBody: - 'Optional, but recommended. The app works without tmux. With tmux in WSL, teammates are more reliable. Without tmux, creating multi-agent teams that mix providers may be blocked.', + 'Optional. The app works without tmux. Install WSL-backed tmux only if you want pane-based terminal transport for long-running teammate sessions.', error: null, platformLabel: 'Windows', locationLabel: null, @@ -94,8 +94,8 @@ describe('TmuxInstallerBannerView', () => { const { host, root } = renderBanner(baseViewModel); expect(host.textContent).toContain('tmux is not installed'); - expect(host.textContent).toContain('Optional, but recommended'); - expect(host.textContent).toContain('multi-agent teams that mix providers'); + expect(host.textContent).toContain('Optional'); + expect(host.textContent).toContain('pane-based terminal transport'); expect(host.textContent).not.toContain( 'WSL is available, but no Linux distribution is installed yet.' ); diff --git a/src/features/tmux-installer/renderer/utils/formatTmuxInstallerText.ts b/src/features/tmux-installer/renderer/utils/formatTmuxInstallerText.ts index ca0d4f69..02d4eea6 100644 --- a/src/features/tmux-installer/renderer/utils/formatTmuxInstallerText.ts +++ b/src/features/tmux-installer/renderer/utils/formatTmuxInstallerText.ts @@ -67,12 +67,9 @@ export function formatTmuxOptionalBenefits(platform: TmuxPlatform | null): strin return null; } - const mixedProviderLimit = - 'Without tmux, creating multi-agent teams that mix providers may be blocked.'; - if (platform === 'win32') { - return `Optional, but recommended. The app works without tmux. With tmux in WSL, teammates are more reliable for long-running work, restarts are cleaner, and recovery after reconnects is better. ${mixedProviderLimit}`; + return 'Optional. The app works without tmux. Install WSL-backed tmux only if you want pane-based terminal transport for long-running teammate sessions.'; } - return `Optional, but recommended. The app works without tmux. With tmux, teammates are more reliable for long-running work, restarts are cleaner, and recovery after reconnects is better. ${mixedProviderLimit}`; + return 'Optional. The app works without tmux. Install tmux only if you want pane-based terminal transport for long-running teammate sessions.'; } diff --git a/src/main/services/team/TeamMcpConfigBuilder.ts b/src/main/services/team/TeamMcpConfigBuilder.ts index 6f91ea84..f4d7ebd4 100644 --- a/src/main/services/team/TeamMcpConfigBuilder.ts +++ b/src/main/services/team/TeamMcpConfigBuilder.ts @@ -1,4 +1,8 @@ -import { getMcpConfigsBasePath, getMcpServerBasePath } from '@main/utils/pathDecoder'; +import { + getClaudeBasePath, + getMcpConfigsBasePath, + getMcpServerBasePath, +} from '@main/utils/pathDecoder'; import { createLogger } from '@shared/utils/logger'; import { execFile } from 'child_process'; import { randomUUID } from 'crypto'; @@ -13,6 +17,7 @@ export interface McpLaunchSpec { } const MCP_SERVER_NAME = 'agent-teams'; +const MCP_CLAUDE_DIR_ENV = 'AGENT_TEAMS_MCP_CLAUDE_DIR'; const logger = createLogger('Service:TeamMcpConfigBuilder'); const MCP_CONFIG_PREFIX = 'agent-teams-mcp-'; const MCP_CONFIG_REMOVE_RETRY_DELAYS_MS = [25, 75, 150] as const; @@ -273,6 +278,9 @@ export class TeamMcpConfigBuilder { [MCP_SERVER_NAME]: { command: launchSpec.command, args: launchSpec.args, + env: { + [MCP_CLAUDE_DIR_ENV]: getClaudeBasePath(), + }, }, }; diff --git a/src/main/utils/childProcess.ts b/src/main/utils/childProcess.ts index 58dd66f2..6bc72d94 100644 --- a/src/main/utils/childProcess.ts +++ b/src/main/utils/childProcess.ts @@ -137,7 +137,7 @@ function resolveGeneratedBunLauncher( } function resolveNpmNodeShim(content: string, launcherDir: string): DirectWindowsLauncher | null { - const scriptMatch = /"%_prog%"\s+"([^"]+\.(?:cjs|mjs|js))"\s+%\*/i.exec(content); + const scriptMatch = /"%_prog%"\s+"([^"]+(?:\.(?:cjs|mjs|js))?)"\s+%\*/i.exec(content); const scriptTemplate = scriptMatch?.[1]; if (!scriptTemplate) { return null; diff --git a/test/main/services/team/AnthropicRuntimeMemory.live.test.ts b/test/main/services/team/AnthropicRuntimeMemory.live.test.ts index 786306e4..1c05b0e7 100644 --- a/test/main/services/team/AnthropicRuntimeMemory.live.test.ts +++ b/test/main/services/team/AnthropicRuntimeMemory.live.test.ts @@ -29,6 +29,7 @@ liveDescribe('Anthropic runtime memory live e2e', () => { let previousDisableRuntimeBootstrap: string | undefined; let previousHome: string | undefined; let previousUserProfile: string | undefined; + let previousNodeEnv: string | undefined; let svc: TeamProvisioningService | null; let teamName: string | null; @@ -45,8 +46,10 @@ liveDescribe('Anthropic runtime memory live e2e', () => { previousDisableRuntimeBootstrap = process.env.CLAUDE_DISABLE_DETERMINISTIC_TEAM_BOOTSTRAP; previousHome = process.env.HOME; previousUserProfile = process.env.USERPROFILE; + previousNodeEnv = process.env.NODE_ENV; process.env.HOME = tempHome; process.env.USERPROFILE = tempHome; + process.env.NODE_ENV = 'production'; process.env.CLAUDE_AGENT_TEAMS_ORCHESTRATOR_CLI_PATH = process.env.CLAUDE_AGENT_TEAMS_ORCHESTRATOR_CLI_PATH?.trim() || DEFAULT_ORCHESTRATOR_CLI; process.env.CLAUDE_TEAM_CLI_FLAVOR = 'agent_teams_orchestrator'; @@ -67,7 +70,13 @@ liveDescribe('Anthropic runtime memory live e2e', () => { restoreEnv('CLAUDE_DISABLE_DETERMINISTIC_TEAM_BOOTSTRAP', previousDisableRuntimeBootstrap); restoreEnv('HOME', previousHome); restoreEnv('USERPROFILE', previousUserProfile); - await fs.rm(tempDir, { recursive: true, force: true }); + restoreEnv('NODE_ENV', previousNodeEnv); + if (process.env.ANTHROPIC_RUNTIME_MEMORY_LIVE_KEEP_TEMP === '1') { + // Live-debug only: preserve process/runtime logs for failed Windows liveness triage. + process.stderr.write(`Preserving Anthropic runtime memory live temp dir: ${tempDir}\n`); + return; + } + await removeTempDirWithRetries(tempDir); }); it('creates a real Anthropic team and reports teammate RSS in the runtime snapshot', async () => { @@ -79,6 +88,7 @@ liveDescribe('Anthropic runtime memory live e2e', () => { teamName = `anthropic-memory-live-${Date.now()}`; const projectPath = path.join(tempDir, 'project'); await fs.mkdir(projectPath, { recursive: true }); + await writeTrustedClaudeConfig(tempClaudeRoot, projectPath); await fs.writeFile( path.join(projectPath, 'README.md'), '# Anthropic runtime memory live e2e\n', @@ -133,7 +143,7 @@ liveDescribe('Anthropic runtime memory live e2e', () => { typeof alice.rssBytes === 'number' && alice.rssBytes > 0 ); - }, 60_000); + }, 180_000, 1_000, () => JSON.stringify(snapshot, null, 2)); expect(snapshot!.members.alice).toMatchObject({ alive: true, @@ -158,10 +168,53 @@ async function assertExecutable(filePath: string): Promise { await fs.access(filePath, fsConstants.X_OK); } +async function writeTrustedClaudeConfig(configDir: string, projectPath: string): Promise { + const normalizedProjectPath = path.normalize(projectPath).replace(/\\/g, '/'); + const approvedApiKeySuffix = process.env.ANTHROPIC_API_KEY?.trim().slice(-20); + const config: { + projects: Record; + customApiKeyResponses?: { approved: string[]; rejected: string[] }; + } = { + projects: { + [normalizedProjectPath]: { + hasTrustDialogAccepted: true, + }, + }, + }; + if (approvedApiKeySuffix) { + config.customApiKeyResponses = { + approved: [approvedApiKeySuffix], + rejected: [], + }; + } + await fs.writeFile( + path.join(configDir, '.claude.json'), + `${JSON.stringify(config, null, 2)}\n`, + 'utf8' + ); +} + +async function removeTempDirWithRetries(dirPath: string): Promise { + const attempts = process.platform === 'win32' ? 20 : 1; + for (let attempt = 1; attempt <= attempts; attempt += 1) { + try { + await fs.rm(dirPath, { recursive: true, force: true }); + return; + } catch (error) { + const code = (error as NodeJS.ErrnoException).code; + if ((code !== 'EBUSY' && code !== 'EPERM') || attempt === attempts) { + throw error; + } + await new Promise((resolve) => setTimeout(resolve, 100)); + } + } +} + async function waitUntil( predicate: () => Promise, timeoutMs: number, - pollMs = 1_000 + pollMs = 1_000, + describeState?: () => string ): Promise { const deadline = Date.now() + timeoutMs; let lastError: unknown; @@ -178,7 +231,8 @@ async function waitUntil( } const suffix = lastError instanceof Error && lastError.message ? ` Last error: ${lastError.message}` : ''; - throw new Error(`Timed out after ${timeoutMs}ms waiting for condition.${suffix}`); + const state = describeState ? ` Last state: ${describeState()}` : ''; + throw new Error(`Timed out after ${timeoutMs}ms waiting for condition.${suffix}${state}`); } function formatProgressDump(progressEvents: TeamProvisioningProgress[]): string { diff --git a/test/main/services/team/TeamMcpConfigBuilder.test.ts b/test/main/services/team/TeamMcpConfigBuilder.test.ts index 22e70f16..91f68eaa 100644 --- a/test/main/services/team/TeamMcpConfigBuilder.test.ts +++ b/test/main/services/team/TeamMcpConfigBuilder.test.ts @@ -45,7 +45,7 @@ vi.mock('@main/utils/pathDecoder', async (importOriginal) => { }; }); -import { setAppDataBasePath } from '@main/utils/pathDecoder'; +import { setAppDataBasePath, setClaudeBasePathOverride } from '@main/utils/pathDecoder'; import { TeamMcpConfigBuilder } from '@main/services/team/TeamMcpConfigBuilder'; describe('TeamMcpConfigBuilder', () => { @@ -77,10 +77,10 @@ describe('TeamMcpConfigBuilder', () => { function readGeneratedServer( configPath: string - ): { command?: string; args?: string[] } | undefined { + ): { command?: string; args?: string[]; env?: Record } | undefined { const raw = fs.readFileSync(configPath, 'utf8'); const parsed = JSON.parse(raw) as { - mcpServers?: Record; + mcpServers?: Record }>; }; return parsed.mcpServers?.['agent-teams']; } @@ -180,6 +180,7 @@ describe('TeamMcpConfigBuilder', () => { afterEach(() => { setAppDataBasePath(null); + setClaudeBasePathOverride(null); setPackagedMode(false); setResourcesPath(originalResourcesPath); moduleInternal._load = originalModuleLoad; @@ -370,6 +371,20 @@ describe('TeamMcpConfigBuilder', () => { expectTsxEntry(parsed.mcpServers['agent-teams'], sourceEntry); }); + it('passes the configured Claude root to the MCP server', async () => { + const claudeRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'team-mcp-claude-root-')); + createdDirs.push(claudeRoot); + setClaudeBasePathOverride(claudeRoot); + + const builder = new TeamMcpConfigBuilder(); + const configPath = await builder.writeConfigFile(); + createdPaths.push(configPath); + + expect(readGeneratedServer(configPath)?.env).toMatchObject({ + AGENT_TEAMS_MCP_CLAUDE_DIR: claudeRoot, + }); + }); + it('ignores malformed user MCP file', async () => { const homeDir = fs.mkdtempSync(path.join(os.tmpdir(), 'team-mcp-home-')); const projectDir = fs.mkdtempSync(path.join(os.tmpdir(), 'team-mcp-project-')); diff --git a/test/main/utils/AgentCliLaunch.live-e2e.test.ts b/test/main/utils/AgentCliLaunch.live-e2e.test.ts new file mode 100644 index 00000000..582a5d5b --- /dev/null +++ b/test/main/utils/AgentCliLaunch.live-e2e.test.ts @@ -0,0 +1,179 @@ +// @vitest-environment node +import { execFile } from 'child_process'; +import * as fs from 'fs'; +import * as path from 'path'; +import { promisify } from 'util'; + +import { describe, expect, it } from 'vitest'; + +import { CodexBinaryResolver } from '@main/services/infrastructure/codexAppServer/CodexBinaryResolver'; +import { execCli, killProcessTree, spawnCli } from '@main/utils/childProcess'; + +const execFileAsync = promisify(execFile); +const liveDescribe = process.env.AGENT_CLI_LAUNCH_LIVE_E2E === '1' ? describe : describe.skip; +const CLI_LAUNCH_TIMEOUT_MS = 15_000; + +type AgentCliProvider = 'opencode' | 'codex' | 'claude'; + +type AgentCliSpec = { + providerId: AgentCliProvider; + command: string; + overrideEnv: string; + versionPattern: RegExp; + resolver?: () => Promise; +}; + +const AGENT_CLI_SPECS: AgentCliSpec[] = [ + { + providerId: 'opencode', + command: 'opencode', + overrideEnv: 'OPENCODE_CLI_PATH', + versionPattern: /\b\d+\.\d+\.\d+\b/, + }, + { + providerId: 'codex', + command: 'codex', + overrideEnv: 'CODEX_CLI_PATH', + versionPattern: /\b(?:codex-cli\s+)?\d+\.\d+\.\d+\b/i, + resolver: () => CodexBinaryResolver.resolve(), + }, + { + providerId: 'claude', + command: 'claude', + overrideEnv: 'CLAUDE_CLI_PATH', + versionPattern: /\b\d+\.\d+\.\d+\b.*Claude Code/i, + }, +]; + +liveDescribe('agent CLI launch live e2e', () => { + it.each(AGENT_CLI_SPECS)( + 'resolves and executes $providerId through execCli without tmux', + async (spec) => { + const binaryPath = await resolveCliBinary(spec); + expect(binaryPath, `${spec.providerId} binary must be installed`).toBeTruthy(); + + const result = await execCli(binaryPath, ['--version'], { + timeout: CLI_LAUNCH_TIMEOUT_MS, + windowsHide: true, + }); + const output = `${result.stdout}\n${result.stderr}`.trim(); + + expect(output).toMatch(spec.versionPattern); + expect(output).not.toMatch(/tmux/i); + expect(output).not.toMatch(/running scripts is disabled/i); + expect(output).not.toMatch(/not digitally signed/i); + }, + CLI_LAUNCH_TIMEOUT_MS + 5_000 + ); + + it.each(AGENT_CLI_SPECS)( + 'spawns $providerId through spawnCli and exits cleanly without tmux', + async (spec) => { + const binaryPath = await resolveCliBinary(spec); + expect(binaryPath, `${spec.providerId} binary must be installed`).toBeTruthy(); + + const result = await spawnAndCollect(binaryPath, ['--version']); + const output = `${result.stdout}\n${result.stderr}`.trim(); + + expect(result.exitCode).toBe(0); + expect(output).toMatch(spec.versionPattern); + expect(output).not.toMatch(/tmux/i); + expect(output).not.toMatch(/running scripts is disabled/i); + expect(output).not.toMatch(/not digitally signed/i); + }, + CLI_LAUNCH_TIMEOUT_MS + 5_000 + ); +}); + +async function resolveCliBinary(spec: AgentCliSpec): Promise { + const override = process.env[spec.overrideEnv]?.trim(); + if (override) { + return override; + } + + if (spec.resolver) { + const resolved = await spec.resolver(); + if (resolved) { + return preferWindowsCmdShim(resolved); + } + } + + return preferWindowsCmdShim(await resolveCommandFromPath(spec.command)); +} + +async function resolveCommandFromPath(command: string): Promise { + if (process.platform === 'win32') { + const { stdout } = await execFileAsync('where.exe', [command], { + timeout: CLI_LAUNCH_TIMEOUT_MS, + windowsHide: true, + }); + const candidates = stdout + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean); + const cmdCandidate = candidates.find((candidate) => /\.cmd$/i.test(candidate)); + return cmdCandidate ?? candidates[0] ?? command; + } + + const { stdout } = await execFileAsync('which', [command], { + timeout: CLI_LAUNCH_TIMEOUT_MS, + }); + return stdout.trim().split(/\r?\n/)[0] ?? command; +} + +function preferWindowsCmdShim(binaryPath: string): string { + if (process.platform !== 'win32') { + return binaryPath; + } + + const extension = path.extname(binaryPath).toLowerCase(); + if (extension === '.cmd') { + return binaryPath; + } + + const cmdPeer = extension ? `${binaryPath.slice(0, -extension.length)}.cmd` : `${binaryPath}.cmd`; + return fs.existsSync(cmdPeer) ? cmdPeer : binaryPath; +} + +function spawnAndCollect( + binaryPath: string, + args: string[] +): Promise<{ exitCode: number | null; stdout: string; stderr: string }> { + return new Promise((resolve, reject) => { + const child = spawnCli(binaryPath, args, { + stdio: ['ignore', 'pipe', 'pipe'], + windowsHide: true, + }); + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + let settled = false; + const timeout = setTimeout(() => { + if (!settled) { + settled = true; + killProcessTree(child, 'SIGKILL'); + reject(new Error(`Timed out launching ${binaryPath}`)); + } + }, CLI_LAUNCH_TIMEOUT_MS); + + child.stdout?.on('data', (chunk) => stdoutChunks.push(Buffer.from(chunk))); + child.stderr?.on('data', (chunk) => stderrChunks.push(Buffer.from(chunk))); + child.once('error', (error) => { + if (!settled) { + settled = true; + clearTimeout(timeout); + reject(error); + } + }); + child.once('close', (exitCode) => { + if (!settled) { + settled = true; + clearTimeout(timeout); + resolve({ + exitCode, + stdout: Buffer.concat(stdoutChunks).toString('utf8'), + stderr: Buffer.concat(stderrChunks).toString('utf8'), + }); + } + }); + }); +} diff --git a/test/main/utils/childProcess.test.ts b/test/main/utils/childProcess.test.ts index feeb50bc..14f5982c 100644 --- a/test/main/utils/childProcess.test.ts +++ b/test/main/utils/childProcess.test.ts @@ -64,6 +64,41 @@ function createGeneratedBunLauncher(): { dir: string; launcher: string; target: return { dir, launcher, target }; } +function createExtensionlessNpmNodeLauncher(): { + dir: string; + launcher: string; + target: string; +} { + const dir = mkdtempSync(path.join(tmpdir(), 'cat-cli-npm-launcher-')); + const targetDir = path.join(dir, 'node_modules', 'opencode-ai', 'bin'); + mkdirSync(targetDir, { recursive: true }); + const target = path.join(targetDir, 'opencode'); + writeFileSync(target, 'console.log("ok")', 'utf8'); + const launcher = path.join(dir, 'opencode.cmd'); + writeFileSync( + launcher, + [ + '@ECHO off', + 'GOTO start', + ':find_dp0', + 'SET dp0=%~dp0', + 'EXIT /b', + ':start', + 'SETLOCAL', + 'CALL :find_dp0', + 'IF EXIST "%dp0%\\node.exe" (', + ' SET "_prog=%dp0%\\node.exe"', + ') ELSE (', + ' SET "_prog=node"', + ')', + 'endLocal & goto #_undefined_# 2>NUL || title %COMSPEC% & "%_prog%" "%dp0%\\node_modules\\opencode-ai\\bin\\opencode" %*', + '', + ].join('\r\n'), + 'utf8' + ); + return { dir, launcher, target }; +} + describe('cli child process helpers', () => { beforeEach(() => { vi.resetAllMocks(); @@ -152,6 +187,24 @@ describe('cli child process helpers', () => { } }); + it('runs extensionless npm node cmd launchers directly', () => { + setPlatform('win32'); + const fake = {} as any; + const spawnMock = child.spawn as unknown as Mock; + spawnMock.mockReturnValue(fake); + const { dir, launcher, target } = createExtensionlessNpmNodeLauncher(); + try { + const result = spawnCli(launcher, ['--model', 'test%PATH%"arg']); + expect(spawnMock).toHaveBeenCalledTimes(1); + expect(spawnMock.mock.calls[0][0]).toBe('node'); + expect(spawnMock.mock.calls[0][1]).toEqual([target, '--model', 'test%PATH%"arg']); + expect(spawnMock.mock.calls[0][2]).not.toHaveProperty('shell'); + expect(result).toBe(fake); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + it('uses shell directly when path contains non-ASCII on windows', () => { setPlatform('win32'); const fake = {} as any; @@ -281,6 +334,29 @@ describe('cli child process helpers', () => { } }); + it('executes extensionless npm node cmd launchers directly', async () => { + setPlatform('win32'); + const execFileMock = child.execFile as unknown as Mock; + const execMock = child.exec as unknown as Mock; + execFileMock.mockImplementation( + (_cmd: string, _args: string[], _opts: unknown, cb: ExecCallback) => { + cb(null, 'ok', ''); + return {} as any; + } + ); + const { dir, launcher, target } = createExtensionlessNpmNodeLauncher(); + try { + const result = await execCli(launcher, ['--model', 'test%PATH%"arg']); + expect(execFileMock).toHaveBeenCalledTimes(1); + expect(execFileMock.mock.calls[0][0]).toBe('node'); + expect(execFileMock.mock.calls[0][1]).toEqual([target, '--model', 'test%PATH%"arg']); + expect(execMock).not.toHaveBeenCalled(); + expect(result.stdout).toBe('ok'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + it('skips straight to shell when path contains non-ASCII on windows', async () => { setPlatform('win32'); const execFileMock = child.execFile as unknown as Mock;