diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 98715bc4..0a8e6bdd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,6 +10,7 @@ on:
- 'mcp-server/**'
- 'packages/**'
- 'resources/runtime/**'
+ - '.runtime-download/**'
- 'runtime.lock.json'
- 'test/**'
- '.github/workflows/**'
@@ -29,6 +30,7 @@ on:
- 'mcp-server/**'
- 'packages/**'
- 'resources/runtime/**'
+ - '.runtime-download/**'
- 'runtime.lock.json'
- 'test/**'
- '.github/workflows/**'
@@ -49,8 +51,13 @@ jobs:
- name: Checkout
uses: actions/checkout@v6
+ - name: Guard runtime artifacts
+ run: node ./scripts/ci/forbid-runtime-artifacts.cjs
+
- name: Setup pnpm
- uses: pnpm/action-setup@v4
+ uses: pnpm/action-setup@v6
+ with:
+ version: 10.33.0
- name: Setup Node.js
uses: actions/setup-node@v6
@@ -59,7 +66,7 @@ jobs:
cache: pnpm
- name: Install dependencies
- run: pnpm install --no-frozen-lockfile
+ run: pnpm install --frozen-lockfile
- name: Restore ESLint cache
uses: actions/cache@v5
@@ -67,9 +74,6 @@ jobs:
path: .eslintcache
key: eslint-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml', 'eslint.config.*', 'src/**/*.ts', 'src/**/*.tsx') }}
- - name: Auto-fix import sort (Node version parity)
- run: npx eslint src/ --fix --no-cache || true
-
- name: Validate workspace truth gate
run: pnpm check:ci
@@ -81,7 +85,9 @@ jobs:
uses: actions/checkout@v6
- name: Setup pnpm
- uses: pnpm/action-setup@v4
+ uses: pnpm/action-setup@v6
+ with:
+ version: 10.33.0
- name: Setup Node.js
uses: actions/setup-node@v6
@@ -90,7 +96,7 @@ jobs:
cache: pnpm
- name: Install dependencies
- run: pnpm install --no-frozen-lockfile
+ run: pnpm install --frozen-lockfile
- name: Test
run: pnpm test:workspace:ci
@@ -108,7 +114,9 @@ jobs:
run: git config --global core.longpaths true
- name: Setup pnpm
- uses: pnpm/action-setup@v4
+ uses: pnpm/action-setup@v6
+ with:
+ version: 10.33.0
- name: Setup Node.js
uses: actions/setup-node@v6
@@ -117,7 +125,7 @@ jobs:
cache: pnpm
- name: Install dependencies
- run: pnpm install --no-frozen-lockfile
+ run: pnpm install --frozen-lockfile
- name: Test task change ledger
run: pnpm test:task-change-ledger
diff --git a/.github/workflows/codex-runtime-smoke.yml b/.github/workflows/codex-runtime-smoke.yml
new file mode 100644
index 00000000..c162a884
--- /dev/null
+++ b/.github/workflows/codex-runtime-smoke.yml
@@ -0,0 +1,68 @@
+name: Codex Runtime Smoke
+
+on:
+ workflow_dispatch:
+ pull_request:
+ paths:
+ - '.github/workflows/codex-runtime-smoke.yml'
+ - 'package.json'
+ - 'pnpm-lock.yaml'
+ - 'pnpm-workspace.yaml'
+ - 'scripts/smoke/codex-runtime-install.ts'
+ - 'src/features/codex-runtime-installer/**'
+ - 'src/main/services/infrastructure/codexAppServer/**'
+ - 'src/main/services/runtime/providerAwareCliEnv.ts'
+ - 'src/main/utils/childProcess.ts'
+ - 'src/main/utils/pathDecoder.ts'
+ - 'tsconfig*.json'
+ push:
+ branches: [main, dev]
+ paths:
+ - '.github/workflows/codex-runtime-smoke.yml'
+ - 'package.json'
+ - 'pnpm-lock.yaml'
+ - 'pnpm-workspace.yaml'
+ - 'scripts/smoke/codex-runtime-install.ts'
+ - 'src/features/codex-runtime-installer/**'
+ - 'src/main/services/infrastructure/codexAppServer/**'
+ - 'src/main/services/runtime/providerAwareCliEnv.ts'
+ - 'src/main/utils/childProcess.ts'
+ - 'src/main/utils/pathDecoder.ts'
+ - 'tsconfig*.json'
+
+jobs:
+ install:
+ name: Install Codex runtime (${{ matrix.os }})
+ runs-on: ${{ matrix.os }}
+ timeout-minutes: 20
+
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, macos-latest, windows-latest]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v6
+
+ - name: Enable Windows long paths
+ if: runner.os == 'Windows'
+ shell: pwsh
+ run: git config --global core.longpaths true
+
+ - name: Setup pnpm
+ uses: pnpm/action-setup@v6
+ with:
+ version: 10.33.0
+
+ - name: Setup Node.js
+ uses: actions/setup-node@v6
+ with:
+ node-version: 22
+ cache: pnpm
+
+ - name: Install dependencies
+ run: pnpm install --frozen-lockfile --ignore-scripts
+
+ - name: Smoke Codex app-managed runtime install
+ run: pnpm smoke:codex-runtime-install
diff --git a/.github/workflows/landing.yml b/.github/workflows/landing.yml
index 51d075da..f457394c 100644
--- a/.github/workflows/landing.yml
+++ b/.github/workflows/landing.yml
@@ -4,15 +4,15 @@ on:
push:
branches: [main]
paths: [landing/**]
+ pull_request:
+ paths: [landing/**]
workflow_dispatch:
permissions:
contents: read
- pages: write
- id-token: write
concurrency:
- group: pages
+ group: landing-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
@@ -24,6 +24,8 @@ jobs:
- uses: actions/setup-node@v6
with:
node-version: 22
+ cache: npm
+ cache-dependency-path: landing/package-lock.json
- name: Install dependencies
working-directory: landing
@@ -37,18 +39,24 @@ jobs:
NUXT_PUBLIC_GITHUB_REPO: 777genius/agent-teams-ai
run: npm run generate:all
- - uses: actions/configure-pages@v5
+ - uses: actions/configure-pages@v6
+ if: github.event_name != 'pull_request'
- - uses: actions/upload-pages-artifact@v3
+ - uses: actions/upload-pages-artifact@v5
+ if: github.event_name != 'pull_request'
with:
path: landing/.output/public
deploy:
needs: build
runs-on: ubuntu-latest
+ if: github.event_name != 'pull_request'
+ permissions:
+ pages: write
+ id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- id: deployment
- uses: actions/deploy-pages@v4
+ uses: actions/deploy-pages@v5
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 1ab97a28..6421e4d4 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -18,7 +18,9 @@ jobs:
uses: actions/checkout@v6
- name: Setup pnpm
- uses: pnpm/action-setup@v4
+ uses: pnpm/action-setup@v6
+ with:
+ version: 10.33.0
- name: Setup Node.js
uses: actions/setup-node@v6
@@ -27,7 +29,7 @@ jobs:
cache: pnpm
- name: Install dependencies
- run: pnpm install --no-frozen-lockfile
+ run: pnpm install --frozen-lockfile
- name: Set version from tag
if: startsWith(github.ref, 'refs/tags/v')
@@ -57,7 +59,7 @@ jobs:
--draft=false 2>/dev/null || echo "Release $TAG already exists, skipping creation"
- name: Upload dist artifact
- uses: actions/upload-artifact@v6
+ uses: actions/upload-artifact@v7
with:
name: dist
path: |
@@ -67,13 +69,13 @@ jobs:
prepare-runtime:
runs-on: ubuntu-latest
- if: startsWith(github.ref, 'refs/tags/v')
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Create GitHub Release
+ if: startsWith(github.ref, 'refs/tags/v')
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
@@ -84,7 +86,12 @@ jobs:
--generate-notes \
--draft=false 2>/dev/null || echo "Release $TAG already exists, skipping creation"
+ - name: Skip runtime asset preparation for manual builds
+ if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
+ run: echo "Runtime asset preparation is only needed for tagged releases."
+
- name: Check runtime assets
+ if: startsWith(github.ref, 'refs/tags/v')
id: runtime-assets
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -116,10 +123,10 @@ jobs:
--method POST \
"repos/${SOURCE_REPO}/actions/workflows/release-runtime.yml/dispatches" \
-f ref=main \
- -f inputs[source_ref]="$SOURCE_REF" \
- -f inputs[runtime_version]="$RUNTIME_VERSION" \
- -f inputs[target_release_repo]="$GITHUB_REPOSITORY" \
- -f inputs[target_release_tag]="$TARGET_TAG"
+ -f "inputs[source_ref]=$SOURCE_REF" \
+ -f "inputs[runtime_version]=$RUNTIME_VERSION" \
+ -f "inputs[target_release_repo]=$GITHUB_REPOSITORY" \
+ -f "inputs[target_release_tag]=$TARGET_TAG"
- name: Wait for runtime assets
if: steps.runtime-assets.outputs.missing == '1'
@@ -175,7 +182,9 @@ jobs:
name: dist
- name: Setup pnpm
- uses: pnpm/action-setup@v4
+ uses: pnpm/action-setup@v6
+ with:
+ version: 10.33.0
- name: Setup Node.js
uses: actions/setup-node@v6
@@ -184,12 +193,12 @@ jobs:
cache: pnpm
- name: Setup Python for node-gyp
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Install dependencies
- run: pnpm install --no-frozen-lockfile
+ run: pnpm install --frozen-lockfile
- name: Set version from tag
if: startsWith(github.ref, 'refs/tags/v')
@@ -256,6 +265,9 @@ jobs:
- name: Validate packaged bundle (macOS ${{ matrix.arch }})
run: node ./scripts/electron-builder/verifyBundle.cjs "release/mac-${{ matrix.arch }}/Agent Teams UI.app" darwin ${{ matrix.arch }}
+ - name: Smoke packaged app (macOS ${{ matrix.arch }})
+ run: node ./scripts/electron-builder/smokePackagedApp.cjs "release/mac-${{ matrix.arch }}/Agent Teams UI.app" darwin
+
- name: Upload assets to release
if: startsWith(github.ref, 'refs/tags/v')
env:
@@ -284,7 +296,9 @@ jobs:
name: dist
- name: Setup pnpm
- uses: pnpm/action-setup@v4
+ uses: pnpm/action-setup@v6
+ with:
+ version: 10.33.0
- name: Setup Node.js
uses: actions/setup-node@v6
@@ -293,12 +307,12 @@ jobs:
cache: pnpm
- name: Setup Python for node-gyp
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Install dependencies
- run: pnpm install --no-frozen-lockfile
+ run: pnpm install --frozen-lockfile
- name: Set version from tag
if: startsWith(github.ref, 'refs/tags/v')
@@ -359,6 +373,10 @@ jobs:
shell: bash
run: node ./scripts/electron-builder/verifyBundle.cjs "release/win-unpacked" win32 x64
+ - name: Smoke packaged app (Windows)
+ shell: bash
+ run: node ./scripts/electron-builder/smokePackagedApp.cjs "release/win-unpacked" win32
+
- name: Upload assets to release
if: startsWith(github.ref, 'refs/tags/v')
shell: bash
@@ -388,7 +406,9 @@ jobs:
name: dist
- name: Setup pnpm
- uses: pnpm/action-setup@v4
+ uses: pnpm/action-setup@v6
+ with:
+ version: 10.33.0
- name: Setup Node.js
uses: actions/setup-node@v6
@@ -397,17 +417,17 @@ jobs:
cache: pnpm
- name: Setup Python for node-gyp
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Install Linux packaging dependencies
run: |
sudo apt-get update
- sudo apt-get install -y libarchive-tools rpm
+ sudo apt-get install -y libarchive-tools rpm xvfb
- name: Install dependencies
- run: pnpm install --no-frozen-lockfile
+ run: pnpm install --frozen-lockfile
- name: Set version from tag
if: startsWith(github.ref, 'refs/tags/v')
@@ -463,6 +483,9 @@ jobs:
- name: Validate packaged bundle (Linux)
run: node ./scripts/electron-builder/verifyBundle.cjs "release/linux-unpacked" linux x64
+ - name: Smoke packaged app (Linux)
+ run: xvfb-run -a node ./scripts/electron-builder/smokePackagedApp.cjs "release/linux-unpacked" linux
+
- name: Upload assets to release
if: startsWith(github.ref, 'refs/tags/v')
env:
@@ -542,8 +565,8 @@ jobs:
# Canonical Windows feed
download_asset "Claude-Agent-Teams-UI-Setup.exe"
- WIN_SHA="$(sha512_base64 Claude-Agent-Teams-UI-Setup.exe)"
- WIN_SIZE="$(file_size Claude-Agent-Teams-UI-Setup.exe)"
+ WIN_SHA="$(sha512_base64 "Claude-Agent-Teams-UI-Setup.exe")"
+ WIN_SIZE="$(file_size "Claude-Agent-Teams-UI-Setup.exe")"
cat > latest.yml < latest-linux.yml < latest-mac.yml </`
- Reference implementation: `src/features/recent-projects`
diff --git a/AGENT_CRITICAL_GUARDRAILS.md b/AGENT_CRITICAL_GUARDRAILS.md
index 653c8983..711df1f4 100644
--- a/AGENT_CRITICAL_GUARDRAILS.md
+++ b/AGENT_CRITICAL_GUARDRAILS.md
@@ -4,6 +4,7 @@ These are the hard rules to keep agent work predictable and safe in this repo.
- Read `CLAUDE.md` first, then follow `docs/FEATURE_ARCHITECTURE_STANDARD.md` for new medium and large features.
- Use `pnpm` for project commands. Do not switch to `npm` or `yarn`.
+- Use the desktop Electron app (`pnpm dev`) for normal local development and smoke checks unless browser-mode internals are explicitly requested.
- Do not run `pnpm lint:fix` unless the user explicitly asks for broad formatting changes.
- Keep main, preload, renderer, and shared responsibilities separate.
- Use `wrapAgentBlock(text)` instead of manually concatenating agent block markers.
diff --git a/CLAUDE.md b/CLAUDE.md
index adf58968..dbaaa643 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -16,9 +16,9 @@ Key capabilities:
- **MCP integration** — built-in mcp-server for external tools and agent plugins
- **Post-compact context recovery** — restores team-management instructions after context compaction
- **Notification system** — alerts on task completion, agent attention needed, errors
-- **Zero-setup onboarding** — built-in Claude Code installation and authentication
+- **Zero-setup onboarding** — built-in runtime detection and provider authentication for Claude, Codex, and OpenCode
-100% free, open source. No API keys. No configuration. Runs entirely locally.
+100% free, open source, and local-first. The app uses available Claude/Codex/OpenCode provider access instead of forcing a single app-level API-key setup.
## Tech Stack
Electron 40.x, React 19.x, TypeScript 5.x, Tailwind CSS 3.x, Zustand 4.x
@@ -31,7 +31,7 @@ When running build/typecheck/test commands, pipe through `tail -20` to avoid flo
- Hard guardrails: [`AGENT_CRITICAL_GUARDRAILS.md`](AGENT_CRITICAL_GUARDRAILS.md)
- `pnpm install` - Install dependencies
-- `pnpm dev` - Dev server with hot reload
+- `pnpm dev` - Desktop Electron app with hot reload
- `pnpm build` - Production build
- `pnpm typecheck` - Type checking
- `pnpm lint:fix` - Lint and auto-fix
@@ -108,8 +108,8 @@ Task tool_use blocks are filtered when subagent exists
Keep orphaned Task calls (no matching subagent) for visibility.
### Agent Teams
-Claude Code's "Orchestrate Teams" feature: multiple sessions coordinate as a team.
-Official docs: https://code.claude.com/docs/en/agent-teams
+Agent Teams is this app's orchestration layer across Claude, Codex, and OpenCode runtimes.
+For Claude runtime behavior, also track Anthropic's upstream agent-team docs: https://code.claude.com/docs/en/agent-teams
#### Debugging Team Launches And Teammates
- Use [`docs/team-management/debugging-agent-teams.md`](docs/team-management/debugging-agent-teams.md) when a team launch hangs, a teammate remains `registered`, OpenCode shows `bootstrap unconfirmed`, messages are missing, or Task Log Stream looks wrong.
@@ -119,12 +119,13 @@ Official docs: https://code.claude.com/docs/en/agent-teams
#### Message Delivery Architecture
- **Lead** reads ONLY stdin (stream-json). Messages to lead must go through `relayLeadInboxMessages()` which converts inbox entries to stdin.
-- **Teammates** are independent CLI processes. Claude Code runtime monitors each teammate's inbox file and delivers messages between turns. No relay through lead needed.
-- **User → Teammate DM**: UI writes to `inboxes/{member}.json` with `from: "user"`. Teammate reads it directly.
-- **Teammate → User response**: Teammate writes to `inboxes/user.json`. UI reads all inbox files including `user.json` via `TeamInboxReader`.
-- **`relayMemberInboxMessages` is DISABLED** for teammate DMs (commented out in `teams.ts` and `index.ts`). It caused bugs: lead responding instead of teammate, duplicate messages, relay loops. Code preserved but not called.
-- **`relayLeadInboxMessages` is ACTIVE** — lead needs it because lead reads stdin, not inbox files.
-- Messages in `user.json` may lack `messageId` — `TeamInboxReader` generates deterministic IDs via sha256(from+timestamp+text).
+- **Native teammates** are independent CLI/process teammates. Claude/Codex-style teammates read their own inbox files between turns; no relay through the lead is needed.
+- **OpenCode secondary lanes** do not watch teammate inbox files. User DMs are persisted to `inboxes/{member}.json`, then delivered through the OpenCode runtime bridge with explicit delivery proof.
+- **User → Teammate DM**: UI writes to `inboxes/{member}.json` with `from: "user"`. Native teammates read it directly; OpenCode teammates receive it through runtime delivery.
+- **Teammate → User response**: Teammate writes to `inboxes/user.json` or uses the runtime-specific Agent Teams message tool that persists there. UI reads all inbox files including `user.json` via `TeamInboxReader`.
+- **`relayMemberInboxMessages` is legacy fallback code, not the normal teammate-DM path.** The UI caller in `src/main/ipc/teams.ts` is disabled because lead-mediated relay caused lead replies, duplicate messages, and relay loops.
+- **`relayLeadInboxMessages` is ACTIVE** - lead needs it because lead reads stdin, not inbox files.
+- Messages in `user.json` may lack `messageId` - `TeamInboxReader` generates deterministic IDs via sha256(from+timestamp+text).
- See `docs/team-management/research-messaging.md` for full architecture details.
#### Team Protocol Details
diff --git a/README.md b/README.md
index da1ec0e8..7b547385 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
-
+
@@ -201,10 +201,12 @@ Fact sources checked on May 5, 2026: [detailed research notes](docs/research/gas
## Quick start
1. **Download** the app for your platform (see [Installation](#installation))
-2. **Launch** — On first run, the setup wizard will detect the runtime and guide provider authentication
+2. **Launch the desktop app** - On first run, the setup wizard will detect the runtime and guide provider authentication
3. **Create a team** — Pick a project, define roles, write a provisioning prompt
4. **Watch** — Agents spawn, create tasks, and work. You see it all on the kanban board
+Use the desktop app as the primary product. The browser/web path is not needed for normal use and does not provide the full desktop runtime, IPC, terminal, provider auth, or team lifecycle behavior.
+
---
@@ -274,7 +276,9 @@ pnpm install
pnpm dev
```
-The app auto-discovers Claude Code projects from `~/.claude/`.
+`pnpm dev` starts the desktop Electron app. Do not start a browser/web dev server for normal development; that path is limited and is not the supported way to run agent teams locally.
+
+The desktop app auto-discovers Claude Code projects from `~/.claude/`.
### Debug teammate runtimes
@@ -303,7 +307,7 @@ pnpm dist # macOS + Windows + Linux
| Command | Description |
|---------|-------------|
-| `pnpm dev` | Development with hot reload |
+| `pnpm dev` | Desktop app development with hot reload |
| `pnpm build` | Production build |
| `pnpm typecheck` | TypeScript type checking |
| `pnpm lint` | Lint (no auto-fix) |
diff --git a/docs/team-management/README.md b/docs/team-management/README.md
index 326b7540..d1f21d8e 100644
--- a/docs/team-management/README.md
+++ b/docs/team-management/README.md
@@ -1,134 +1,135 @@
# Team Management Feature
-Интерфейс для управления командами тиммейтов Claude Code внутри Agent Teams (Electron).
+UI for managing AI teammate teams inside Agent Teams (Electron), including Claude, Codex, and OpenCode runtime paths.
-## Что делает
+## What It Does
-- Видеть состав команды и роли участников
-- Kanban-доска с 5 колонками: TODO, IN PROGRESS, REVIEW, DONE, APPROVED
-- Отправка сообщений тиммейтам через inbox-файлы
-- Review flow: запрос ревью, ручное ревью и прямое manual approval из DONE
-- Live updates через file watcher
+- Shows team members and their roles.
+- Provides a Kanban board with 5 columns: TODO, IN PROGRESS, REVIEW, DONE, APPROVED.
+- Sends messages to teammates through inbox files and runtime-aware delivery for OpenCode.
+- Supports review flow: review requests, manual review, and direct manual approval from DONE.
+- Provides live updates through the file watcher.
-## Документация
+## Documentation
-| Файл | Содержание |
-| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- |
-| [research-inbox.md](./research-inbox.md) | Формат inbox-файлов, race conditions, atomic write, доставка сообщений |
-| [research-tasks.md](./research-tasks.md) | Формат task-файлов, .lock, .highwatermark, конкурентный доступ |
-| [research-messaging.md](./research-messaging.md) | Сравнение подходов (inbox vs SDK vs CLI), почему выбрали inbox |
-| [kanban-design.md](./kanban-design.md) | Kanban flow, колонки, review mechanism, kanban-state.json |
-| [implementation.md](./implementation.md) | Техплан: файлы, шаги, verification |
-| [openclaw-agent-teams-integration.md](./openclaw-agent-teams-integration.md) | How to connect OpenClaw or another outside AI through Agent Teams MCP and REST control API |
-| [research-worktrees.md](./research-worktrees.md) | Git worktrees + teams, запуск Claude процессов из UI (Phase 2) |
-| [task-queue-derived-agenda-plan.md](./task-queue-derived-agenda-plan.md) | Подробный rollout-plan по разделению queue/inventory, derived actionOwner и phased agenda/delta sync |
-| [debugging-agent-teams.md](./debugging-agent-teams.md) | Runtime debugging runbook, включая `CLAUDE_TEAM_TEAMMATE_MODE=tmux` для pane-backed teammate debug |
+| File | Contents |
+| ---- | -------- |
+| [research-inbox.md](./research-inbox.md) | Inbox file format, race conditions, atomic writes, message delivery |
+| [research-tasks.md](./research-tasks.md) | Task file format, .lock, .highwatermark, concurrent access |
+| [research-messaging.md](./research-messaging.md) | Comparison of inbox, SDK, and CLI approaches, and why inbox was chosen |
+| [kanban-design.md](./kanban-design.md) | Kanban flow, columns, review mechanism, kanban-state.json |
+| [implementation.md](./implementation.md) | Technical plan: files, steps, verification |
+| [openclaw-agent-teams-integration.md](./openclaw-agent-teams-integration.md) | How to connect OpenClaw or another outside AI through Agent Teams MCP and REST control API |
+| [research-worktrees.md](./research-worktrees.md) | Git worktrees + teams, launching Claude processes from the UI (Phase 2) |
+| [task-queue-derived-agenda-plan.md](./task-queue-derived-agenda-plan.md) | Detailed rollout plan for queue/inventory split, derived actionOwner, and phased agenda/delta sync |
+| [debugging-agent-teams.md](./debugging-agent-teams.md) | Runtime debugging runbook, including `CLAUDE_TEAM_TEAMMATE_MODE=tmux` for pane-backed teammate debug |
+| [adaptive-task-graphs-research-note.md](./adaptive-task-graphs-research-note.md) | Research note on LATTE/AgentConductor: dynamic task graphs, frontier scheduling, selective verify, release stragglers |
-## Ключевые решения
+## Key Decisions
-⚠️ `docs/iterations/*` - это исторические planning notes. Они полезны для контекста, но не являются source-of-truth для текущего поведения продукта. Актуальный контракт review flow описан в этом файле и в [kanban-design.md](./kanban-design.md).
+Warning: `docs/iterations/*` contains historical planning notes. These files are useful for context, but they are not the source of truth for current product behavior. The current review-flow contract is documented here and in [kanban-design.md](./kanban-design.md).
-⚠️ `agent-attachments-*.md` (architecture plan + phase 1-5 plans) - это исторические дизайн-документы для feature attachments. Фактическая реализация в `src/features/agent-attachments/` может отличаться от описанной архитектуры. Для актуального состояния см. код в `src/features/agent-attachments/core/domain/` и тесты.
+Warning: `agent-attachments-*.md` files (architecture plan + phase 1-5 plans) are historical design documents for feature attachments. The actual implementation in `src/features/agent-attachments/` may differ from that architecture. For current behavior, see the code in `src/features/agent-attachments/core/domain/` and the tests.
-### 1. Messaging: Inbox-файлы
+### 1. Messaging: Inbox + Runtime Delivery
-Единственный способ общаться с **запущенными** тиммейтами. SDK и CLI создают новые сессии, а не подключаются к существующим. Подробности: [research-messaging.md](./research-messaging.md)
+For native Claude/Codex-style teammates, the primary path is durable inbox files. Lead inbox delivery uses `relayLeadInboxMessages()` because the lead reads stdin. OpenCode secondary lanes do not read `inboxes/{member}.json` directly, so the UI first persists the message to the inbox and then delivers it through the runtime bridge with delivery proof. Details: [research-messaging.md](./research-messaging.md) and [debugging-agent-teams.md](./debugging-agent-teams.md).
-### 1.1 Roster source: members.meta.json + inboxes
+### 1.1 Roster Source: members.meta.json + inboxes
-- `config.json` не используется как полный реестр участников (он может содержать только team-lead и служебные поля CLI).
-- Источник метаданных участников (role/color/agentType): `members.meta.json`.
-- Источник runtime-состава и адресации сообщений: `inboxes/{member}.json`.
+- `config.json` is not used as the complete member registry. It may contain only the team lead and CLI service fields.
+- Member metadata source (role/color/agentType): `members.meta.json`.
+- Runtime membership and message-addressing source: `inboxes/{member}.json`.
-### 2. Kanban Storage: Собственный файл
+### 2. Kanban Storage: Dedicated File
-Kanban-позиция (REVIEW, APPROVED) хранится в `kanban-state.json`, а не в task metadata. Причина: metadata может быть перезаписан агентом при TaskUpdate. Подробности: [kanban-design.md](./kanban-design.md)
+Kanban position (REVIEW, APPROVED) is stored in `kanban-state.json`, not task metadata. Reason: task metadata may be overwritten by an agent during TaskUpdate. Details: [kanban-design.md](./kanban-design.md).
### 3. Review Flow: Approve / Request Changes
-- Есть ревьюверы в команде → автоматическое назначение через inbox
-- Юзер также может вручную одобрить задачу напрямую из `DONE` без отдельного захода в `REVIEW`
-- Нет ревьюверов → ручное ревью юзером (Approve / Request Changes в UI)
-- При Request Changes → юзер описывает проблему (опционально) → задача возвращается owner'у в `pending` с `needsFix`
+- Reviewers exist in the team -> automatic assignment through inbox.
+- The user can also manually approve a task directly from `DONE` without entering `REVIEW`.
+- No reviewers -> manual user review (Approve / Request Changes in the UI).
+- Request Changes -> the user optionally describes the issue -> the task returns to its owner in `pending` with `needsFix`.
### 4. Atomic Write
-Все записи через tmp + rename для предотвращения corrupted JSON.
+All writes use tmp + rename to prevent corrupted JSON.
### 5. Sender Identity
-Отправляем `from: "user"`. Fallback на `from: "team-lead"` если не работает.
+Messages are sent with `from: "user"`. Fallback to `from: "team-lead"` exists only if needed.
-## Финальные решения после ревью
+## Final Decisions After Review
-По итогам 3 раундов ревью (13 экспертов) приняты следующие решения:
+After 3 review rounds with 13 experts, the following decisions were accepted.
-### Inbox: Atomic write + messageId verify
+### Inbox: Atomic Write + messageId Verify
-- Atomic write (tmp + rename) предотвращает corrupted JSON
-- После записи читаем файл обратно и проверяем наличие нашего `messageId`
-- Полный CAS/retry-цикл — не нужен на MVP: проверка при следующем read достаточна
-- Риск race condition с агентом реален, но вероятность низкая
+- Atomic write (tmp + rename) prevents corrupted JSON.
+- After writing, read the file back and verify that our `messageId` is present.
+- A full CAS/retry loop is not needed for MVP. Verification on the next read is enough.
+- Race condition risk with an agent is real, but probability is low.
-### Kanban: kanban-state.json с безопасным GC
+### Kanban: kanban-state.json With Safe GC
-- GC устаревших записей kanban-state выполняется ТОЛЬКО ПОСЛЕ полной загрузки tasks
-- Иначе при startup возможна race condition: GC удаляет запись до того как task-файл прочитан
+- Stale `kanban-state` entries are garbage-collected only after all tasks are fully loaded.
+- Otherwise, startup can race: GC may delete an entry before the task file has been read.
### Review Flow: Approve / Request Changes
-- Кнопки переименованы: **Approve** (вместо OK) и **Request Changes** (вместо Error)
-- Комментарий при Request Changes — опционален
-- Manual UI допускает два valid path:
+- Buttons were renamed: **Approve** instead of OK, and **Request Changes** instead of Error.
+- Request Changes comment is optional.
+- Manual UI allows two valid paths:
- `DONE -> REVIEW -> APPROVED`
- - `DONE -> APPROVED` как быстрый manual approval
-- `Request Changes` снимает kanban-state запись и возвращает задачу в `pending` с `needsFix`
-- `reviewHistory` и round-robin балансировка → Phase 2, не MVP
+ - `DONE -> APPROVED` as fast manual approval
+- `Request Changes` removes the kanban-state entry and returns the task to `pending` with `needsFix`.
+- `reviewHistory` and round-robin balancing are Phase 2, not MVP.
-### Members: полный список через union
+### Members: Complete List Through Union
-- `union(config members + inbox filenames + task owners)` — единственный способ получить полный список
-- `owner` в task-файлах — опционален (агент может не иметь owner до назначения)
+- `union(members.meta.json + config members + inbox filenames + task owners)` is the only way to get the complete member list.
+- `owner` in task files is optional. An agent may not have an owner before assignment.
### Graceful Degradation
-- `try/catch` везде в TeamDataService — при ошибке чтения возвращаем безопасные дефолты
-- 3 состояния участника: `ACTIVE` / `IDLE` / `TERMINATED`
- - `ACTIVE`: idle < 5 минут
- - `IDLE`: idle > 5 минут
- - `TERMINATED`: получен `shutdown_response` с `approve: true`
+- `try/catch` is used throughout `TeamDataService`; read errors return safe defaults.
+- Member has 3 states: `ACTIVE` / `IDLE` / `TERMINATED`.
+ - `ACTIVE`: idle < 5 minutes
+ - `IDLE`: idle > 5 minutes
+ - `TERMINATED`: received `shutdown_response` with `approve: true`
-### @dnd-kit and review transitions
+### @dnd-kit and Review Transitions
-- Переходы между review-колонками делаются через card actions в UI
-- `@dnd-kit` сейчас используется в первую очередь для перестановки задач внутри колонки
-- Phase 2: полноценный D&D через `@dnd-kit`
+- Transitions between review columns happen through card actions in the UI.
+- `@dnd-kit` is currently used primarily for reordering tasks inside a column.
+- Phase 2: full drag-and-drop through `@dnd-kit`.
---
-## Открытые вопросы
+## Open Questions
-- **FileWatcher расширение**: FileWatcher.ts уже 1243 строк — добавление teams/tasks watchers нетривиально, требует отдельного спайка
-- **Windows atomic rename**: `fs.renameSync` на Windows бросает `EXDEV`/`EBUSY` при кросс-устройственном rename — нужна обёртка
-- **leadSessionId интеграция**: config.json содержит `leadSessionId`, но интеграция с session viewer (переход к сессии лида) — открытый вопрос
-- **Hard Interrupt**: сообщения доставляются между turns (1-30с задержка). В будущем нужен способ прервать mid-turn
-- **Архивация**: inbox не чистится автоматически, нужна кнопка "Архивировать"
+- **FileWatcher extension**: FileWatcher.ts is already 1243 lines. Adding teams/tasks watchers is non-trivial and needs a separate spike.
+- **Windows atomic rename**: `fs.renameSync` on Windows can throw `EXDEV`/`EBUSY` for cross-device rename. A wrapper is needed.
+- **leadSessionId integration**: config.json contains `leadSessionId`, but integration with the session viewer (navigating to the lead session) remains open.
+- **Hard Interrupt**: messages are delivered between turns with a 1-30 second delay. A future mechanism is needed to interrupt mid-turn.
+- **Archival**: inbox is not cleaned automatically. An "Archive" button is needed.
-## Файловая структура Claude Code
+## Claude Code File Structure
-```
+```text
~/.claude/
├── teams/{teamName}/
-│ ├── config.json # Конфиг команды (lead + служебные поля)
-│ ├── members.meta.json # Роли/цвета/типы участников (teammates)
-│ └── inboxes/{memberName}.json # Inbox каждого участника
+│ ├── config.json # Team config (lead + service fields)
+│ ├── members.meta.json # Member roles/colors/types (teammates)
+│ └── inboxes/{memberName}.json # Inbox for each member
└── tasks/{teamName}/
- ├── {id}.json # Файл задачи
- ├── .lock # Lock-файл (0 байт)
- └── .highwatermark # Последний ID задачи
+ ├── {id}.json # Task file
+ ├── .lock # Lock file (0 bytes)
+ └── .highwatermark # Latest task ID
```
-**ВАЖНО**:
+**Important**:
-- `config.json` не является source-of-truth для полного roster.
-- Полный roster для UI формируется как `members.meta.json + inbox filenames (+ lead из config)`.
+- `config.json` is not the source of truth for the complete roster.
+- The UI builds the complete roster from `members.meta.json + inbox filenames (+ lead from config)`.
diff --git a/docs/team-management/adaptive-task-graphs-research-note.md b/docs/team-management/adaptive-task-graphs-research-note.md
new file mode 100644
index 00000000..7b8c2ac7
--- /dev/null
+++ b/docs/team-management/adaptive-task-graphs-research-note.md
@@ -0,0 +1,181 @@
+# Adaptive Task Graphs For Agent Teams
+
+**Date:** 2026-05-14
+**Status:** Research note, not an approved implementation plan
+**Scope:** Team Management, task graph scheduling, lead/member coordination, token and conflict reduction
+
+## Sources
+
+- [AgentConductor: Topology Evolution for Multi-Agent Competition-Level Code Generation](https://arxiv.org/abs/2602.17100)
+- [Improving the Efficiency of Language Agent Teams with Adaptive Task Graphs](https://arxiv.org/html/2605.06320v1)
+
+## Why This Is Interesting
+
+These papers point at the same product problem we already see in Agent Teams: multi-agent performance is limited less by raw model capability and more by coordination overhead.
+
+The useful idea is not "replace our orchestrator with a research framework". The useful idea is to make the task board itself a more explicit coordination graph:
+
+- tasks are graph nodes
+- `blockedBy` / `blocks` are dependency edges
+- ready work is the graph frontier
+- workers should receive scoped local context, not full team history
+- stalled work should be released or reassigned explicitly
+- risky or high-impact work should get selective verification
+- coordination quality should be measured, not inferred from vibes
+
+This fits our existing direction because the product already has task dependencies, review workflow, stall monitoring, task logs, context tracking, and lead/member briefing surfaces.
+
+## Most Valuable Ideas To Preserve
+
+### 1. LATTE-style dynamic task graph
+
+LATTE is the more directly useful paper for us.
+
+Core idea:
+
+- the lead owns global graph consistency
+- workers can propose or claim local work
+- structural updates are serialized through the lead or controller
+- execution stays parallel where dependencies allow it
+- the graph remains inspectable, so coordination decisions are visible in the UI
+
+Relevant operators to consider:
+
+- `Discover` - create a newly discovered task when implementation reveals missing work
+- `Assign` - set an owner for a ready task
+- `Claim` - allow an idle member to take an unowned ready task
+- `Complete` - mark task completion
+- `Release` - clear owner or return stalled work to the ready queue
+- `Close` - close stale/completed tasks when tests or evidence prove completion
+- `Verify` - insert a lightweight review/check task before downstream work proceeds
+
+🎯 Product value: 9/10
+🛡️ Reliability if implemented incrementally: 8/10
+🧠 Complexity: 6/10
+Expected change size for a first useful version: about 700-1400 LOC.
+
+### 2. Frontier-based scheduling
+
+The board should be able to derive "what is actionable now" from graph state:
+
+- a task is ready when all `blockedBy` tasks are completed or approved
+- blocked tasks should not be started automatically
+- ready unowned tasks can be offered to idle members
+- ready owned tasks belong in the owner's operational queue
+- lead briefing should show graph bottlenecks and unassigned frontier work
+
+This connects directly to `task-queue-derived-agenda-plan.md`. The key addition is to treat the queue as a graph frontier, not just a filtered task list.
+
+🎯 Product value: 9/10
+🛡️ Reliability: 8/10
+🧠 Complexity: 5/10
+Expected change size: about 500-1000 LOC if built on the current derived agenda work.
+
+### 3. Selective verification instead of review everything
+
+LATTE's `Verify` is useful because it scales review cost with risk:
+
+- verify upstream tasks that many other tasks depend on
+- verify work touching shared files or public contracts
+- verify tasks whose owner reported uncertainty
+- skip extra verification for small isolated changes unless policy requires it
+
+This maps well to our existing review UI and task comments. A future implementation could create a verification task or request review based on graph impact.
+
+🎯 Product value: 8/10
+🛡️ Reliability: 7/10
+🧠 Complexity: 5/10
+Expected change size: about 350-800 LOC.
+
+### 4. Straggler release as first-class behavior
+
+LATTE explicitly models stalled workers and `Release`. We already have task-stall monitoring, but the next step is to make release/reassign a structured board action, not only a message nudge.
+
+Useful behavior:
+
+- detect a task with weak or stale progress evidence
+- notify or nudge the current owner first
+- if still stalled, clear owner or reassign with context
+- preserve evidence and avoid duplicate nudges
+- never auto-start new runtime lanes as a side effect
+
+This must stay compatible with existing OpenCode delivery watchdog and stall-monitor semantics.
+
+🎯 Product value: 8/10
+🛡️ Reliability: 7/10
+🧠 Complexity: 6/10
+Expected change size: about 600-1200 LOC.
+
+### 5. Coordination metrics as a product surface
+
+LATTE is especially useful because it externalizes coordination and measures failures:
+
+- idle rounds
+- straggler tail latency
+- inter-agent messages
+- file conflicts or concurrent writes
+- redundant output
+- wasted tokens
+- task graph growth and bottlenecks
+
+For Agent Teams, this could become a "team efficiency" diagnostic panel and a safer prerequisite before changing scheduling behavior.
+
+🎯 Product value: 8/10
+🛡️ Reliability: 9/10
+🧠 Complexity: 4/10
+Expected change size: about 350-800 LOC.
+
+## AgentConductor Ideas Worth Keeping
+
+AgentConductor is less directly implementable because it depends on an RL/SFT-trained orchestrator and competition-code benchmarks. Still, one product idea is valuable:
+
+**Task difficulty should control graph density.**
+
+Possible lightweight version for Agent Teams:
+
+- easy task - solo or small graph, minimal messaging, no extra verification by default
+- medium task - split by independent deliverables, use dependencies only where real ordering exists
+- hard task - more explicit roles, denser review/checkpoints, stronger integration pass
+- failed execution feedback - adapt the graph instead of repeating the same topology
+
+Do not adopt the paper's full GRPO/SFT training path for now. It is too heavy for the app and not necessary to get product value.
+
+🎯 Product value: 7/10
+🛡️ Reliability: 6/10
+🧠 Complexity: 7/10
+Expected change size for a heuristic MVP: about 600-1300 LOC.
+
+## Objectivity And Risk Notes
+
+The LATTE paper is directionally credible but should not be treated as production proof.
+
+Strong points:
+
+- the core claim matches practical distributed-systems intuition
+- the paper compares against several coordination styles, not only one weak baseline
+- it evaluates multiple collaborative task types
+- it emphasizes metrics we can independently measure
+- the mechanism is simple enough to port incrementally
+
+Limitations:
+
+- it is an arXiv preprint, not final production validation
+- benchmark tasks are controlled research tasks, not our full Electron plus runtime matrix
+- baseline implementations may not match best possible production implementations
+- reported improvements should be validated against our own teams, logs, and providers
+
+Practical conclusion:
+
+⚠️ Treat LATTE as a strong design signal, not a dependency or spec. Implement the ideas gradually behind our existing task board, lead/member briefings, and runtime-specific guardrails.
+
+## Recommended Internal Path
+
+1. Add coordination metrics first.
+2. Derive a graph frontier from current task state.
+3. Make lead and member briefings use the frontier as the operational queue.
+4. Add structured release/reassign for stalled work.
+5. Add selective verification for high-risk graph nodes.
+6. Only after that, consider difficulty-aware graph density hints.
+
+This ordering gives us evidence before automation. It also keeps the rollout compatible with existing `blockedBy`, review flow, task-stall monitor, OpenCode delivery watchdog, and context tracking.
+
diff --git a/docs/team-management/debugging-agent-teams.md b/docs/team-management/debugging-agent-teams.md
index cf48676f..8decd6c6 100644
--- a/docs/team-management/debugging-agent-teams.md
+++ b/docs/team-management/debugging-agent-teams.md
@@ -17,17 +17,18 @@ Team root:
```bash
TEAM=""
TEAM_DIR="$HOME/.claude/teams/$TEAM"
+TASKS_DIR="$HOME/.claude/tasks/$TEAM"
```
Important files and folders:
- `config.json` - configured members, provider/model selection, project path
-- `members-meta.json` - member metadata, removed members, worktree settings if present
+- `members.meta.json` - member metadata, removed members, worktree settings if present
- `launch-state.json` - current app-side truth for member launch/liveness
- `bootstrap-state.json` - bootstrap phase summary when present
- `bootstrap-journal.jsonl` - ordered bootstrap events from the CLI/runtime
- `inboxes/*.json` - durable inbox messages for user, lead, and native teammates
- `sentMessages.json` - app-side sent-message records
-- `tasks/*.json` - task board state
+- `$TASKS_DIR/*.json` - task board state
- `.opencode-runtime/lanes.json` - OpenCode lane index
- `.opencode-runtime/lanes//manifest.json` - lane-scoped runtime store manifest
- `.opencode-runtime/lanes//opencode-sessions.json` - committed OpenCode session evidence
@@ -155,7 +156,7 @@ For task stalls:
```bash
TASK=""
-rg -n "$TASK" "$TEAM_DIR/tasks" "$TEAM_DIR/inboxes" "$TEAM_DIR/bootstrap-journal.jsonl" 2>/dev/null
+rg -n "$TASK" "$TASKS_DIR" "$TEAM_DIR/inboxes" "$TEAM_DIR/bootstrap-journal.jsonl" 2>/dev/null
```
Important distinctions:
@@ -195,9 +196,9 @@ Before changing launch or runtime logic:
Recommended verification:
```bash
-pnpm vitest run test/main/services/team/TeamProvisioningService.test.ts
-pnpm vitest run test/main/services/team/TeamAgentLaunchMatrix.safe-e2e.test.ts
-pnpm typecheck --pretty false
+pnpm test -- test/main/services/team/TeamProvisioningService.test.ts
+pnpm test -- test/main/services/team/TeamAgentLaunchMatrix.safe-e2e.test.ts
+pnpm typecheck
git diff --check
```
diff --git a/docs/team-management/opencode-delivery-task-log-phased-hardening-plan.md b/docs/team-management/opencode-delivery-task-log-phased-hardening-plan.md
new file mode 100644
index 00000000..87349f5f
--- /dev/null
+++ b/docs/team-management/opencode-delivery-task-log-phased-hardening-plan.md
@@ -0,0 +1,8822 @@
+# OpenCode Delivery And Task Log Phased Hardening Plan
+
+**Status:** implementation plan
+**Scope:** OpenCode secondary teammates, message delivery latency, task log attribution, member-work-sync wakeups
+**Primary repo:** `claude_team`
+**Secondary repo:** `agent_teams_orchestrator`
+**Key decision:** keep `TeamTask.workIntervals` unchanged as status-time intervals
+**Related docs:**
+
+- `docs/team-management/member-work-sync-control-plane-plan.md`
+- `docs/team-management/member-work-sync-opencode-turn-settled-plan.md`
+- `docs/team-management/member-work-sync-runtime-stop-hook-plan.md`
+- `docs/team-management/member-work-sync-debugging.md`
+- `docs/FEATURE_ARCHITECTURE_STANDARD.md`
+
+---
+
+## 1. Summary
+
+Do not change `workIntervals`.
+
+`workIntervals` should continue to mean:
+
+```text
+time while task.status === "in_progress"
+```
+
+That is a board/status interval, not proof that a runtime is actively executing tools. Keeping this invariant makes behavior provider-neutral for Claude, Codex, OpenCode, and future runtimes.
+
+The actual OpenCode problem observed in `comet-hub` is different:
+
+1. A task was created as `in_progress`, so `workIntervals` began immediately.
+2. The OpenCode teammate did not start task execution until several minutes later.
+3. Delivery spent time repairing stale OpenCode session/MCP state and waiting inside the bridge command.
+4. The app treated some sends as acceptance-unknown after bridge timeout, so watchdog/retry logic became conservative.
+5. Task Log Stream can miss logs from recreated OpenCode sessions because current transcript lookup is lane/member oriented, not session-evidence oriented.
+
+The fix should be phased:
+
+```text
+Phase 0 - diagnostic baseline and invariants
+Phase 1 - clarify UI copy around status-time
+Phase 2 - session-evidence based OpenCode task log lookup
+Phase 3 - accept-fast OpenCode delivery with async durable turn observation
+Phase 4 - targeted retry/recreate tuning and live validation
+```
+
+Recommended total implementation:
+
+`🎯 9 🛡️ 8 🧠 7`, roughly `1250-2150 LOC` across both repos including tests.
+
+The highest-risk phase is Phase 3 because it touches OpenCode delivery acceptance semantics. It must be implemented after Phase 2 gives better visibility and after targeted tests prove idempotency.
+
+---
+
+## 2. Core Invariants
+
+These must remain true throughout all phases.
+
+### 2.1 `workIntervals` Invariant
+
+`TeamTask.workIntervals` remains status-time:
+
+- start when a task enters `in_progress`;
+- close when it leaves `in_progress`;
+- reopen on later `in_progress`;
+- do not try to represent "model actively working";
+- do not special-case OpenCode.
+
+Why:
+
+- `workIntervals` are already used by task change scoping, reviewability, member timers, task logs, and diagnostics.
+- Changing them to "actual runtime work" would break established semantics and create provider-specific behavior.
+- The board truth should be provider-neutral.
+
+### 2.2 Delivery Invariant
+
+OpenCode delivery must remain idempotent:
+
+- one app-level `messageId`;
+- one `relayOfMessageId`;
+- bounded attempts;
+- deterministic ledger record;
+- no duplicate prompt after endpoint acceptance unless existing watchdog/retry rules explicitly decide it is safe.
+
+### 2.3 Member Work Sync Invariant
+
+Runtime turn-settled events are wake-up signals only.
+
+They must not:
+
+- mark tasks complete;
+- mark messages read;
+- count as semantic task progress;
+- bypass busy/cooldown/rate-limit guards;
+- conflict with `TeamTaskStallMonitor`.
+
+### 2.4 Task Log Invariant
+
+Task Log Stream may show less data when evidence is insufficient, but must not pull unrelated runtime work into a task.
+
+Safety order:
+
+```text
+correct task/member/session > complete logs > pretty UI
+```
+
+### 2.5 Rollout Invariant
+
+Do not add a long-lived production feature flag for this hardening.
+
+Instead:
+
+- ship phases in small commits;
+- keep each phase independently testable;
+- preserve old behavior as fallback inside the implementation where needed;
+- use env gates only for expensive live E2E tests;
+- use explicit command/API mode fields for semantic differences, for example `settlementMode`, not global hidden flags.
+
+Why:
+
+- this avoids another permanent state combination;
+- rollback stays simple by commit/revert;
+- the behavior is a correctness fix, not an experimental user preference.
+
+---
+
+## 3. Current Failure Model
+
+Observed with OpenCode teammates:
+
+```text
+task created in_progress
+-> foreground inbox assignment saved
+-> OpenCode send starts
+-> stale session or MCP not ready
+-> orchestrator recreates session or reattaches MCP
+-> prompt_async may be accepted late
+-> app bridge may time out around 45 seconds
+-> ledger marks acceptance unknown
+-> watchdog retries later
+-> actual task_start appears minutes after task created
+```
+
+This makes the UI look like:
+
+```text
+Work time 6m 28s
+```
+
+even though the runtime first touched the task much later.
+
+That number is not wrong under current `workIntervals` semantics. The label is misleading if a user reads it as "active agent execution time".
+
+---
+
+## 4. Source-Audit Findings To Preserve
+
+This section records the fragile seams found in the current code. Treat these as implementation constraints, not background notes.
+
+### 4.1 `OpenCodeReadinessBridge` Already Has Timeout Recovery
+
+Current file:
+
+```text
+src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts
+```
+
+Current behavior:
+
+- `sendOpenCodeTeamMessage()` executes `opencode.sendMessage` with a default `45_000ms` timeout.
+- On command timeout, it calls `opencode.commandStatus`.
+- `commandStatus` is matched by:
+ - `originalRequestId`;
+ - `deliveryAttemptId`;
+ - `teamId`;
+ - `teamName`;
+ - `laneId`;
+ - `memberName`;
+ - `messageId`;
+ - `payloadHash`;
+ - `projectPath`;
+ - `runId`.
+
+Do not remove this recovery path.
+
+Phase 3 must extend it so an acceptance-fast command can still be recovered after a timeout. The command status response must remain strict about precondition mismatch, otherwise a stale outcome from another team/lane/message could be accepted.
+
+### 4.2 Delivery Is Serialized Per OpenCode Member
+
+Current file:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Current behavior:
+
+- `deliverOpenCodeMemberMessage()` checks the active ledger record for the member/lane.
+- If another message is still active, the next delivery is queued behind it.
+- The active record is rechecked for visible reply proof before deciding whether to unblock the next message.
+
+Do not bypass this queue.
+
+Phase 3 accept-fast must not make every `accepted` record immediately eligible for the next prompt. "Prompt accepted" is not the same as "response proof complete". The active delivery slot should stay occupied until one of these is true:
+
+- visible reply proof is sufficient;
+- task progress proof is sufficient for that action mode;
+- record is terminal failure;
+- existing retry/observation policy explicitly allows moving forward.
+
+### 4.3 Ledger Schema Does Not Track Runtime Prompt Message ID Yet
+
+Current file:
+
+```text
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts
+src/main/services/team/runtime/OpenCodeTeamRuntimeAdapter.ts
+```
+
+Current ledger record has:
+
+- `runtimeSessionId`;
+- `prePromptCursor`;
+- `deliveredUserMessageId`;
+- `observedAssistantMessageId`;
+- visible reply fields;
+- `attempts`;
+- `acceptanceUnknown`;
+- status and response state.
+
+It does not currently have a first-class list of OpenCode `runtimePromptMessageId` values.
+
+Phase 3 should add this carefully because exact observation needs it:
+
+```ts
+runtimePromptMessageIds: string[];
+lastRuntimePromptMessageId: string | null;
+lastDeliveryAttemptIdWithAcceptedPrompt: string | null;
+```
+
+Migration rule:
+
+- schema stays backward compatible;
+- missing fields default to empty/null;
+- do not invalidate old ledger files;
+- keep retention/pruning behavior unchanged.
+
+Why this matters:
+
+- `messageId` is app-level logical delivery identity.
+- `runtimePromptMessageId` is OpenCode runtime prompt identity.
+- Mixing them can create duplicate prompt or wrong correlation bugs.
+
+Write rules:
+
+- append a runtime prompt ID only after `prompt_async` endpoint acceptance;
+- never append on failed-before-accept;
+- keep insertion order by attempt;
+- dedupe if commandStatus recovery sees the same prompt ID twice;
+- include the latest prompt ID in observe calls, but preserve older IDs for late visible proof correlation.
+- increment `attempts` for a new delivery attempt, not for the same accepted prompt recovered twice through commandStatus/observe.
+
+Current app-side bridge result also does not expose `runtimePromptMessageId`. Additive fields must be threaded through:
+
+```text
+orchestrator send response
+-> OpenCodeReadinessBridge recovery response
+-> OpenCodeTeamRuntimeAdapter result
+-> TeamProvisioningService ledger write
+```
+
+Do not store the runtime prompt ID only in diagnostics. Diagnostics are not a durable contract.
+
+### 4.4 Orchestrator Outcome Store Has Status Ordering Semantics
+
+Current file:
+
+```text
+agent_teams_orchestrator/src/services/opencode/OpenCodeCommandOutcomeStore.ts
+```
+
+Current behavior:
+
+- status order is controlled by `STATUS_RANK`;
+- `safeToRetry` is derived from status;
+- `prompt_submitting` with a runtime prompt ID is protected against downgrading to `failed_before_accept`;
+- prune removes records only when `completedAt` exists and is old.
+
+Phase 3 must be explicit about any new outcome status.
+
+If adding an acceptance-fast status, for example:
+
+```ts
+"acceptance_returned"
+```
+
+then define:
+
+- rank relative to `prompt_accepted`, `turn_observed`, `reconciled`;
+- `safeToRetry=false`;
+- whether `completedAt` is set;
+- whether `commandStatus` reports it as accepted;
+- retention behavior so pending accepted outcomes do not leak forever.
+
+Do not overload `reconciled` for "accepted but not observed". That would make `sendMessageData.responseObservation` look more complete than it is.
+
+### 4.5 Task Log Attribution Currently Loses Exact Session Records
+
+Current file:
+
+```text
+src/main/services/team/taskLogs/stream/OpenCodeTaskLogStreamSource.ts
+```
+
+Current behavior:
+
+```ts
+const transcript = await getOpenCodeTranscript({ teamId, memberName });
+if (record.sessionId && transcript.sessionId !== record.sessionId) {
+ continue;
+}
+```
+
+This means an attribution record with an exact `sessionId` can still be lost if `getOpenCodeTranscript()` returns the current member/lane transcript instead of that exact session.
+
+Phase 2 must fetch by exact `sessionId` before applying this comparison. Otherwise the new evidence source will not fix recreated-session gaps.
+
+### 4.6 Task Log Cache Key Must Include New Evidence
+
+Current cache key:
+
+```text
+teamName::stableTaskWindowKey(task)::stableAttributionKey(attributionRecords)
+```
+
+Current TTL:
+
+```text
+1500ms
+```
+
+If Phase 2 adds ledger/session evidence, the cache key must include a stable evidence key or the source must intentionally bypass/rebuild cache when evidence changes.
+
+Recommended:
+
+```ts
+const cacheKey = [
+ teamName,
+ stableTaskWindowKey(task),
+ stableAttributionKey(attributionRecords),
+ stableOpenCodeSessionEvidenceKey(sessionEvidence),
+].join("::");
+```
+
+Without this, a task log opened just before ledger evidence appears can keep returning null until TTL expiry. TTL is short, but a deterministic cache key is still safer and makes tests predictable.
+
+### 4.7 Runtime Transcript CLI Resolves Only Stored Records
+
+Current file:
+
+```text
+agent_teams_orchestrator/src/cli/handlers/runtime.ts
+```
+
+Current behavior:
+
+- CLI resolves an OpenCode session by `team/member` and optional `lane`;
+- if multiple records exist, `--lane` is required;
+- there is no `--session-id`.
+
+Phase 2 must decide how exact session lookup works when the session is no longer the latest stored record.
+
+Safe resolver rule:
+
+```text
+--session-id may select a stored record by opencodeSessionId only if it also matches team/member and optional lane.
+```
+
+If no stored record exists for that session:
+
+- do not guess from arbitrary filesystem paths in v1;
+- return a structured not-found diagnostic;
+- let task log source fall back to current behavior.
+
+This avoids a broad and risky historical-session scan.
+
+### 4.8 Turn-Settled Review Findings Are Regression Gates For Phase 3
+
+Before changing acceptance semantics, the OpenCode turn-settled observer must satisfy these rules:
+
+- same-session `session.error` during `submitting` is buffered until endpoint acceptance;
+- premature SSE EOF before terminal idle is distinguishable from ordinary timeout;
+- observed wrapper rejects `noReply` at the wrapper boundary;
+- `session.status idle` and deprecated `session.idle` do not double-emit;
+- error wins over later idle for the same accepted prompt.
+
+Current code already appears to have tests and implementation for several of these cases. Treat this section as a regression gate, not a mandate to rewrite working observer code.
+
+Rule for implementation:
+
+```text
+if a current test already proves the invariant, keep it and add only missing coverage.
+```
+
+If these regress, accept-fast delivery can make diagnostics less reliable and can produce false member-work-sync wakeups.
+
+### 4.9 `observeMessageDelivery` Is Also Current-Session Oriented
+
+Current files:
+
+```text
+src/main/services/team/runtime/OpenCodeTeamRuntimeAdapter.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts
+agent_teams_orchestrator/src/services/opencode/OpenCodeBridgeCommandHandler.ts
+```
+
+Current app-side observe input carries:
+
+- `teamName`;
+- `laneId`;
+- `memberName`;
+- `messageId`;
+- `prePromptCursor`.
+
+It does not carry:
+
+- `runtimeSessionId`;
+- `runtimePromptMessageId`;
+- `deliveryAttemptId`;
+- `payloadHash`.
+
+That is risky after session recreate:
+
+```text
+prompt accepted in session A
+-> session/lane registry later points to session B
+-> watchdog calls observeMessageDelivery
+-> observe inspects session B and reports not observed
+```
+
+Phase 3 must make observation exact when acceptance produced runtime identity. The observe command should prefer:
+
+```text
+runtimeSessionId + runtimePromptMessageId
+```
+
+then fall back to:
+
+```text
+prePromptCursor in current member/lane session
+```
+
+only when old ledger records do not have runtime prompt identity.
+
+Do not add exact transcript lookup in Phase 2 but leave observe path current-session-only in Phase 3. That would fix UI logs while keeping delivery proof fragile.
+
+### 4.10 Attributed Task Logs Currently Cache And Segment By Member
+
+Current file:
+
+```text
+src/main/services/team/taskLogs/stream/OpenCodeTaskLogStreamSource.ts
+```
+
+Current attributed path uses:
+
+```ts
+const transcriptCache = new Map(); // key: memberName
+const projectedByParticipant = new Map(); // key: participant
+```
+
+This is unsafe when one member has multiple OpenCode sessions:
+
+```text
+record 1 -> bob / session A
+record 2 -> bob / session B
+```
+
+If cache key is only `bob`, the second record can reuse the first transcript and be skipped or merged incorrectly. If segment key is only participant, messages from multiple runtime sessions can be merged under one actor/session.
+
+Phase 2 must isolate by session:
+
+```ts
+type TranscriptCacheKey = `${memberKey}::${laneId ?? ""}::${sessionId ?? "current"}`;
+type ProjectionGroupKey = `${participantKey}::${sessionId ?? "current"}`;
+```
+
+Renderer filters can still show one participant named `bob`, but segment identity and actor session must remain exact. This preserves provenance without changing user-facing participant labels.
+
+### 4.11 Ledger `accepted` Is Still An Active Delivery Slot
+
+Current file:
+
+```text
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+```
+
+Current `getActiveForMember()` excludes only terminal records, and `isTerminalForAutomaticSelection()` treats:
+
+```text
+failed_terminal
+responded
+```
+
+as terminal, except a special plain-text response case that still needs read/proof handling.
+
+Phase 3 must not make `accepted` terminal. A record with:
+
+```text
+status = accepted
+responseState = pending | not_observed | tool_error | prompt_delivered_no_assistant_message
+```
+
+must continue blocking later prompts for the same member/lane until proof, terminal failure, or existing retry policy says it is safe.
+
+This mirrors production queue patterns: prompt acceptance is like a visibility lease, not job completion. SQS and BullMQ style systems require idempotent processing because delivery can be at-least-once or a lock can stall; our ledger must keep the active slot until completion proof, not just endpoint acceptance.
+
+### 4.12 Existing Inline Observation And Materialization Are Safety Nets
+
+Current file:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Current delivery path has several proof passes:
+
+- visible destination proof before sending a new prompt;
+- plain-text materialization before sending a new prompt;
+- observe-before-retry for non-pending records;
+- inline observe after a prompt for direct user manual/tool-error cases;
+- visible proof and materialization after every observation.
+
+Phase 3 must not delete these as "old watchdog code". They are the code that clears stale banners, unblocks queued deliveries, and prevents duplicate prompts when a reply already exists.
+
+If accept-fast adds a new observation path, route the result through the same proof/materialization functions instead of duplicating read-commit logic.
+
+### 4.13 Normal Delivery And Work-Sync Have Different Proof Contracts
+
+Current files:
+
+```text
+src/features/team-management/adapters/opencode/OpenCodeTeamRuntimeAdapter.ts
+src/main/services/team/TeamProvisioningService.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+```
+
+Normal OpenCode runtime delivery currently instructs the model to:
+
+```text
+call agent-teams_message_send
+include source="runtime_delivery"
+include relayOfMessageId
+include exact taskRefs when present
+if message_send is unavailable/not connected/missing, write the concise reply as plain assistant text once
+```
+
+Work-sync nudges are intentionally different. They are not normal user-visible replies. A valid proof can be:
+
+- `member_work_sync_report`;
+- concrete task progress;
+- a blocker/clarification update;
+- in narrow cases, a visible message if the nudge explicitly asks for one.
+
+Phase 3 must preserve this distinction. Do not make a generic "assistant output exists" rule for all delivery types.
+
+Required DTO propagation:
+
+```ts
+interface OpenCodeDeliveryProofContext {
+ messageKind?: string;
+ actionMode?: "do" | "ask" | "delegate";
+ taskRefs: string[];
+ relayOfMessageId: string;
+ workSyncIntent?: "board_sync" | "task_progress" | "unknown";
+}
+```
+
+Rules:
+
+- normal user/member delivery still needs visible/tool proof according to current read-commit policy;
+- work-sync nudge proof must not be used to mark an unrelated normal delivery as responded;
+- plain text fallback remains a last-resort materialization path, not a broad success path;
+- accept-fast may return early only for prompt acceptance, not for proof;
+- exact observe must pass enough context to preserve these proof rules.
+
+Tests must include:
+
+- normal delivery with `message_send` tool error plus plain assistant text remains pending unless materialization/semantic proof passes;
+- work-sync nudge with valid `member_work_sync_report` does not require `message_send`;
+- work-sync nudge proof does not unblock a previous normal delivery for the same member;
+- missing `taskRefs` in normal task-linked delivery stays retryable/pending.
+
+### 4.14 `BoardTaskLogStreamService` Merge Can Drop Unsafe Segment IDs
+
+Current file:
+
+```text
+src/main/services/team/taskLogs/stream/BoardTaskLogStreamService.ts
+```
+
+Runtime fallback is merged by `segment.id`. If OpenCode fallback emits:
+
+```text
+opencode-attributed:::bob
+```
+
+for two different sessions, the second segment can be dropped or merged incorrectly.
+
+Phase 2 must make segment IDs session-aware:
+
+```text
+opencode-attributed:::bob:
+opencode-heuristic:::bob:
+```
+
+Also watch the service-level layout cache. It is keyed around team/task/transcript discovery generation, while OpenCode fallback has its own short cache. If exact session evidence appears after the first empty render, the stream must not keep serving a stale "only MCP" or empty summary.
+
+Source-audit confirmation:
+
+```text
+BoardTaskLogStreamService.shouldMergeRuntimeFallback()
+ returns false when any activity record has linkKind === "execution"
+```
+
+This is too coarse for the exact-session OpenCode fix. A primary transcript can contain an execution slice from a different session/provider while the OpenCode owner session still needs fallback projection.
+
+Rules:
+
+- session-specific fallback segments must have stable session-specific IDs;
+- merge dedupe should still remove identical tool/message rows by source ID or native tool signature;
+- OpenCode merge dedupe must include `sessionId` before source ID; native tool signatures are only safe inside the same session;
+- cache key must include OpenCode evidence generation/session candidate identity;
+- runtime fallback suppression must be session/member/provider-aware, not just "any execution record exists";
+- an unrelated execution slice must not hide exact-session OpenCode native tools;
+- no user-visible debug rows for cache misses;
+- diagnostics should say `exact_session_candidate_cache_miss` or `fallback_segment_deduped` only in developer metadata/logs.
+
+Tests must include:
+
+- primary stream plus OpenCode fallback with same participant but different sessions preserves both safe segments;
+- existing execution record from another session/provider does not suppress exact OpenCode fallback;
+- exact session evidence appearing after a previous empty render invalidates the OpenCode source cache;
+- duplicate retry MCP markers do not duplicate native tool rows;
+- native tools from session B are not merged into session A's segment.
+
+### 4.15 Member-Work-Sync Already Has Rate And Busy Controls
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncEventQueue.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeActivationPolicy.ts
+src/renderer/utils/teamMessageFiltering.ts
+```
+
+Current timings:
+
+```text
+turn_settled/tool_finished -> runAfter about 5s
+task_changed/inbox_changed/runtime_activity -> runAfter about 15s
+startup/config/member_spawned -> runAfter about 30s
+```
+
+The dispatcher also has:
+
+- recent-delivery rate limits;
+- busy-signal suppression;
+- watchdog cooldown checks;
+- a short delivery wake after a nudge is scheduled.
+
+This is good. Do not bypass it from accept-fast or task-log work.
+
+Important UI invariant:
+
+```text
+WORK SYNC messages are hidden from the normal Messages feed by default.
+```
+
+`filterTeamMessages()` currently excludes member-work-sync nudges unless `includeMemberWorkSyncNudges=true`. Keep that behavior. Sync pings are control-plane activity and belong in audit/debug details, not the user's main conversation.
+
+Rules:
+
+- turn-settled can enqueue member-work-sync reconcile;
+- member-work-sync should not be used as normal delivery response proof;
+- delivery watchdog owns normal response proof retry;
+- task-stall watchdog owns semantic task progress stalls;
+- if delivery watchdog already nudged the same member/task recently, member-work-sync should stay suppressed;
+- if work-sync already nudged a member/fingerprint recently, task-stall should avoid immediate duplicate "please continue" copy when possible.
+
+Tests must include:
+
+- turn-settled event enqueues reconcile but does not itself send a visible message;
+- member-work-sync nudge stays hidden in normal Messages filtering;
+- a work-sync nudge does not mark a normal OpenCode delivery read/responded;
+- watchdog cooldown suppresses immediate duplicate work-sync nudge.
+
+### 4.16 Timeout Recovery Must Preserve Runtime Prompt Identity
+
+Current file:
+
+```text
+src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts
+```
+
+Current timeout recovery calls `opencode.commandStatus`. If `status.sendMessageData` is present, it returns that data. If not, it synthesizes an accepted response from status fields.
+
+Fragile point:
+
+```text
+synthesized accepted response must include runtimePromptMessageId when status has it
+```
+
+Otherwise the app records:
+
+```text
+accepted=true
+sessionId=...
+runtimePromptMessageId missing
+```
+
+That breaks exact observe and task-log session evidence precisely in the timeout-recovery path, which is the path accept-fast is supposed to make safer.
+
+Required contract:
+
+```ts
+interface OpenCodeSendMessageCommandData {
+ accepted: boolean;
+ sessionId?: string;
+ runtimePromptMessageId?: string;
+ prePromptCursor?: string | null;
+ // existing fields...
+}
+```
+
+Rules:
+
+- `opencode.commandStatus` accepted response always returns runtime prompt identity if known;
+- `OpenCodeReadinessBridge.recoverTimedOutSendMessage()` copies it into the synthesized response;
+- `OpenCodeTeamRuntimeAdapter.sendMessageToMember()` exposes it to `TeamProvisioningService`;
+- tests cover timeout recovery with and without `sendMessageData`.
+
+### 4.17 Turn-Settled Spool Schema Is Consumed By Member-Work-Sync
+
+Current files:
+
+```text
+agent_teams_orchestrator/src/services/opencode/OpenCodeRuntimeTurnSettledEmitter.ts
+agent_teams_orchestrator/src/services/opencode/OpenCodeTurnSettledEmissionCoordinator.ts
+src/features/member-work-sync/main/infrastructure/OpenCodeTurnSettledPayloadNormalizer.ts
+```
+
+The normalizer accepts a narrow schema:
+
+```text
+provider = opencode
+source = agent-teams-orchestrator-opencode
+eventName = runtime_turn_settled or hookEventName = Stop
+sessionId
+teamName/memberName
+runtimePromptMessageId -> threadId
+```
+
+Phase 3 must not rename these fields casually. If the orchestrator changes event names or source strings, member-work-sync will silently stop receiving OpenCode turn-settled wakeups.
+
+Rules:
+
+- keep `schemaVersion: 1` backward compatible unless both sides migrate in one cut;
+- keep `source` stable;
+- keep `hookEventName: "Stop"` even though this is orchestrator-native, because the shared normalizer intentionally treats runtime-turn-settled like a provider stop/settled event;
+- include `runtimePromptMessageId` when known so `threadId` remains stable;
+- include `outcome` but do not let member-work-sync treat outcome as delivery proof.
+
+Tests must cover:
+
+- current emitted payload normalizes to a member-work-sync turn-settled event;
+- missing/renamed `source` is rejected;
+- missing `runtimePromptMessageId` still creates a session-level event, but cannot be used for exact prompt proof.
+
+### 4.18 Advisory Banners Depend On Terminal/Error Classification
+
+Current files:
+
+```text
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryAdvisoryPolicy.ts
+src/main/services/team/TeamMemberRuntimeAdvisoryService.ts
+src/main/services/team/TeamDataService.ts
+```
+
+The advisory policy suppresses or defers generic/proof-missing issues, but hard states such as `tool_error`, `session_error`, `permission_blocked`, and `reconcile_failed` can surface quickly.
+
+Phase 3 must avoid mapping ordinary post-acceptance observation lag to a hard error.
+
+Additional fragile point:
+
+```text
+TeamMemberRuntimeAdvisoryService caches member/team advisories for 30 seconds.
+```
+
+The service already exposes:
+
+```ts
+invalidateMemberAdvisory(teamName, memberName)
+invalidateTeamAdvisories(teamName)
+```
+
+and `TeamDataService` wraps them as:
+
+```ts
+invalidateMemberRuntimeAdvisory(teamName, memberName)
+invalidateTeamRuntimeAdvisories(teamName)
+```
+
+This is important because a stale warning can remain visible even after the member replied, unless the proof write path invalidates this cache. Do not rely on the cache TTL for correctness.
+
+Current app bootstrap already wires:
+
+```ts
+teamProvisioningService.setMemberRuntimeAdvisoryInvalidator((teamName, memberName) => {
+ teamDataService?.invalidateMemberRuntimeAdvisory(teamName, memberName);
+ getTeamDataWorkerClient().invalidateMemberRuntimeAdvisory(teamName, memberName);
+});
+```
+
+Phase 3 must keep using this boundary. If async observation/proof code is extracted out of `TeamProvisioningService`, it should receive a small invalidation port instead of importing `TeamDataService` directly.
+
+Source-audit warning:
+
+```text
+TeamDataWorkerClient.invalidateMemberRuntimeAdvisory()
+ silently returns when teamName/memberName does not match SAFE_NAME_RE.
+```
+
+That is safe for IPC, but fragile for cache consistency. If a future member name contains spaces or other characters that fail the worker-safe regex, a member-scoped invalidation can update only the in-process cache and leave the worker cache stale.
+
+Rules:
+
+- keep member names canonical and worker-safe at the invalidation boundary;
+- if member name validation fails or canonicalization is uncertain, fallback to team-scoped invalidation rather than doing nothing;
+- do not reuse attribution-store name regexes for unrelated user-visible member validation;
+- add one test where member-scoped worker invalidation is rejected and the invalidation port falls back to team-scoped invalidation.
+
+Rules:
+
+- `turn_observation_timeout` after prompt acceptance should stay pending/deferred, not `failed_terminal`;
+- `stream_unavailable` is a diagnostic and retry signal unless response proof also fails;
+- `tool_error` remains hard because it means the model actually hit a broken tool;
+- if visible reply/task progress proof arrives after a warning candidate, `hasSupersedingOpenCodeRuntimeDeliveryProof()` must suppress the advisory;
+- successful proof should clear the UI banner without waiting for a full cache TTL.
+- every ledger transition that creates proof, materializes a visible reply, marks read after proof, or records task progress proof must invalidate the affected member advisory cache;
+- invalidation must be member-scoped when the member is known, team-scoped only when the proof path cannot safely identify the member;
+- renderer code must not clear the banner optimistically without backend proof, because that hides real tool/session failures.
+
+Tests must include:
+
+- accepted prompt + observation timeout does not immediately surface a user warning;
+- accepted prompt + tool_error can surface after proof grace/policy says so;
+- visible reply after an error candidate suppresses advisory;
+- task progress after a proof-missing candidate suppresses advisory when policy allows.
+- warning candidate cached first, then visible reply proof arrives, then the next team snapshot has no runtime advisory without waiting 30 seconds;
+- proof for `bob` invalidates `bob` only and does not erase unrelated hard advisory for `jack`.
+
+### 4.19 Task Log Stream Is Not The File-Change Ledger
+
+Current file:
+
+```text
+src/main/services/team/ChangeExtractorService.ts
+```
+
+Task Log Stream answers:
+
+```text
+what did the runtime/tool transcript show for this task?
+```
+
+The Changes panel answers:
+
+```text
+what file changes can be proven for this task?
+```
+
+These are related but not interchangeable. OpenCode native `write`/`edit` tool rows can help a user understand activity, but they are not a reliable file-change ledger by themselves. The existing changes path uses task-change ledgers, persisted summaries, worker computation, and OpenCode backfill with its own cache/in-flight behavior.
+
+Rules:
+
+- Phase 2 must not synthesize file changes directly from task-log tool rows;
+- if Changes needs improvement, do it through `ChangeExtractorService`/task-change ledger/backfill in a separate cut;
+- task-log native tool visibility can be used as a diagnostic that work happened, not as an authoritative diff;
+- avoid coupling task-log cache invalidation to task-change summary cache unless intentionally designed.
+
+Tests for this plan should assert task-log stream rows appear, not that Changes panel file diffs become non-empty.
+
+### 4.20 Payload Hash Is An Idempotency Contract, Not A Debug Detail
+
+Current files:
+
+```text
+src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts
+```
+
+Current behavior:
+
+```ts
+function buildSendPayloadHash(input: OpenCodeSendMessageCommandBody): string {
+ const { payloadHash: _payloadHash, ...hashable } = input;
+ return stableHash(hashable);
+}
+```
+
+The prompt delivery ledger also fails an existing logical message terminally if the current inbox row payload hash no longer matches the existing ledger payload hash.
+
+Fragile point:
+
+```text
+adding transport-only fields to OpenCodeSendMessageCommandBody can change payloadHash
+```
+
+Examples of fields that should not change the logical delivery payload:
+
+- `settlementMode`;
+- local timeout budget;
+- observation mode;
+- debug flags;
+- runtime prompt identity returned after acceptance.
+
+Rules:
+
+- define a canonical send payload hash shape explicitly;
+- hash user-visible/logical delivery fields and idempotency fields, not observation transport knobs;
+- do not include `runtimePromptMessageId` in send payload hash because it is produced after acceptance;
+- if `settlementMode` is added, decide explicitly whether it is excluded from `payloadHash`;
+- add contract tests for hash stability before and after adding new optional fields.
+
+Recommended shape:
+
+```ts
+type OpenCodeSendPayloadHashShape = Pick<
+ OpenCodeSendMessageCommandBody,
+ | "runId"
+ | "laneId"
+ | "teamId"
+ | "teamName"
+ | "projectPath"
+ | "memberName"
+ | "text"
+ | "messageId"
+ | "deliveryAttemptId"
+ | "fileParts"
+ | "actionMode"
+ | "messageKind"
+ | "taskRefs"
+ | "agent"
+ | "noReply"
+>;
+```
+
+If this shape changes, update both app-side recovery tests and orchestrator precondition mismatch tests.
+
+### 4.21 Versioned JSON Stores Make "Optional" Fields Still Risky
+
+Current files:
+
+```text
+src/main/services/team/opencode/store/VersionedJsonStore.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+```
+
+`VersionedJsonStore.updateLocked()` validates the whole next data set before writing. A new field can break old data if the validator becomes stricter than the migration.
+
+Rules for ledger fields like `runtimePromptMessageIds`:
+
+- parser accepts missing fields on old records;
+- writer normalizes missing fields into safe defaults only when the record is touched;
+- schema version stays compatible unless a real migration is required;
+- no existing ledger file should be quarantined only because it lacks new optional fields;
+- tests must read an old schema-1 fixture, update one record, and verify old untouched records still validate.
+
+Do not rely on TypeScript optionality alone. The runtime validator is the real compatibility boundary.
+
+### 4.22 Acceptance Unknown Is A Separate State From Accepted
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryRepairPolicy.ts
+```
+
+Current delivery path can mark:
+
+```text
+acceptanceUnknown = true
+status = retry_scheduled
+reason = opencode_prompt_acceptance_unknown_after_bridge_timeout
+```
+
+Phase 3 must not collapse this into accepted. Acceptance unknown means:
+
+```text
+the app does not know whether prompt_async reached OpenCode
+```
+
+It is not the same as:
+
+```text
+prompt_async accepted and runtimePromptMessageId known
+```
+
+Rules:
+
+- if commandStatus cannot prove acceptance, keep `acceptanceUnknown`;
+- do not write a fake `runtimePromptMessageId`;
+- observe/status recovery can later upgrade it to accepted if exact prompt evidence is found;
+- retry policy must still avoid duplicate prompt as much as possible, but it cannot use exact observe without a prompt ID.
+
+Tests:
+
+- bridge timeout with no commandStatus acceptance stays `acceptanceUnknown`;
+- bridge timeout recovered with `runtimePromptMessageId` upgrades to accepted and clears `acceptanceUnknown`;
+- accepted state never has `acceptanceUnknown=true`.
+
+### 4.23 There Are Two Payload Hash Layers
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts
+```
+
+The source audit shows two different hash concepts:
+
+```text
+app ledger logical payload hash:
+ hashOpenCodePromptDeliveryPayload()
+
+bridge command precondition hash:
+ OpenCodeReadinessBridge.buildSendPayloadHash()
+```
+
+They solve different problems.
+
+The app ledger hash decides whether the same inbox message still represents the same logical delivery. It currently hashes:
+
+```text
+text, replyRecipient, actionMode, taskRefs, attachments metadata, source
+```
+
+The bridge command hash protects commandStatus recovery and orchestrator precondition matching. It currently hashes almost the full OpenCode send command body except `payloadHash` itself.
+
+Risk:
+
+If Phase 3 adds `settlementMode`, observation timeout, runtime prompt fields, or debug flags to the bridge command body, only the bridge hash should be considered for precondition recovery. The app ledger hash must not change unless the user-visible/logical delivery changes.
+
+Rules:
+
+- do not reuse the app ledger payload hash as the orchestrator command hash;
+- do not add transport/observation fields to `hashOpenCodePromptDeliveryPayload()`;
+- define a canonical bridge send-hash shape before adding `settlementMode`;
+- `runtimePromptMessageId` must not be in either send hash because it is produced after acceptance;
+- if app logical payload changes, keep existing terminal payload mismatch behavior;
+- if only observation transport knobs change, neither app ledger hash nor bridge precondition hash should create a false logical mismatch.
+
+Tests:
+
+- changing `settlementMode` does not change app ledger hash;
+- changing observation timeout/debug fields does not create app ledger payload mismatch;
+- changing text/taskRefs/actionMode still changes app ledger hash;
+- commandStatus recovery still rejects a truly different bridge command.
+
+### 4.24 Task Progress Proof Is Coarse And Must Stay In Its Lane
+
+Current files:
+
+```text
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryProofReader.ts
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryProofMatching.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+`OpenCodeRuntimeDeliveryProofReader` can compute `taskProgressAt` from task comments/history by the same member and task after the prompt time. This is useful for advisory suppression, because it proves the member did some board-visible work after the runtime delivery.
+
+It is not the same as normal message delivery proof.
+
+Risk:
+
+For a user/member visible message, a task comment by the owner can be enough to suppress a stale warning, but it must not automatically mark the original inbox row read/responded unless the existing `TeamProvisioningService` read-commit policy says that response state/action mode/task refs make it acceptable.
+
+Rules:
+
+- `taskProgressAt` can suppress runtime advisory candidates when policy allows;
+- `taskProgressAt` cannot replace `agent-teams_message_send` proof for normal direct replies;
+- `taskProgressAt` cannot clear a peer relay that required a visible reply to a specific recipient;
+- weak start-only comments should not be treated as strong progress by any new fast path;
+- work-sync and task-stall paths may use board progress, but only under their own proof contracts;
+- keep final read/responded decision centralized in `TeamProvisioningService` proof helpers.
+
+Tests:
+
+- task comment after prompt suppresses advisory but does not mark a normal direct message read unless read-commit policy allows;
+- peer relay with task comment but no correct recipient reply stays pending;
+- weak start-only task comment does not count as strong completion proof;
+- work-sync report/progress does not unblock unrelated normal delivery.
+
+### 4.25 OpenCode Task-Log Attribution Store Is A Compatibility Boundary
+
+Current files:
+
+```text
+src/main/services/team/taskLogs/stream/OpenCodeTaskLogAttributionStore.ts
+src/main/services/team/taskLogs/stream/OpenCodeTaskLogAttributionService.ts
+```
+
+The attribution store is deliberately tolerant on read and strict on write:
+
+```text
+read side:
+ supports schemaVersion=1
+ supports both tasks[taskId] and legacy records[]
+ ignores malformed/oversized/timeout records by returning []
+
+write side:
+ validates team/task/member names
+ validates task_session requires sessionId
+ validates member_session_window requires since or startMessageUuid
+ writes canonical tasks{} shape
+ caps file size at 512KB
+```
+
+Phase 2 must preserve this behavior. Exact session evidence should be added through the attribution service or a narrow evidence source, not by sprinkling raw file writes in task-log projection code.
+
+Rules:
+
+- keep read compatibility for both `tasks` and legacy `records`;
+- keep malformed attribution files non-fatal for user task logs;
+- keep writer validation strict and explicit;
+- do not increase the 512KB file cap without a separate performance review;
+- exact delivery-ledger evidence can be read separately, but if persisted into attribution, use `OpenCodeTaskLogAttributionService`;
+- attribution audit fields (`createdAt`, `updatedAt`, `source`) must not be used as segment identity.
+
+Tests:
+
+- legacy `records[]` attribution file still reads;
+- malformed attribution file returns fallback/empty without crashing stream;
+- `task_session` without sessionId is rejected on write;
+- oversized attribution file does not block normal task-log response;
+- changing only attribution audit fields does not create duplicate segments.
+
+### 4.26 Message Kind Enums Drift Across Boundaries
+
+Current files:
+
+```text
+src/shared/types/team.ts
+src/main/services/team/TeamInboxReader.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+src/renderer/utils/teamMessageFiltering.ts
+src/shared/utils/teamAutomationMessages.ts
+```
+
+Source-audit finding:
+
+```text
+InboxMessageKind includes task_stall_remediation
+OpenCodeSendMessageCommandBody includes task_stall_remediation
+OpenCodePromptDeliveryLedger validator currently does not accept task_stall_remediation
+TeamInboxReader currently does not preserve task_stall_remediation from stored rows
+```
+
+This is easy to miss because normal runtime delivery mostly uses `member_work_sync_nudge`, `task_comment_notification`, or default messages. But any plan that relies on message kind as proof context must keep all boundary whitelists in sync.
+
+Rules:
+
+- every new or existing `InboxMessageKind` used by OpenCode delivery must be accepted by:
+ - shared type;
+ - inbox reader;
+ - ledger validator;
+ - bridge command contract;
+ - renderer filtering helpers;
+- do not add proof policy that depends on a message kind before the kind survives read/write roundtrip;
+- if a kind is intentionally not supported by OpenCode delivery ledger, document and reject it before ledger creation with a structured diagnostic;
+- add enum parity tests rather than relying on TypeScript types.
+
+Tests:
+
+- `task_stall_remediation` round-trips through inbox reader when persisted;
+- OpenCode delivery ledger either accepts `task_stall_remediation` or rejects it before store validation with a clear reason;
+- renderer automation filtering still hides automation rows by default after message kind roundtrip;
+- adding a future `InboxMessageKind` fails a parity test until all whitelists are updated.
+
+### 4.27 Repair Control Text Must Not Become The Logical Payload
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryRepairPolicy.ts
+src/main/services/team/runtime/OpenCodeTeamRuntimeAdapter.ts
+```
+
+Source-audit finding:
+
+```text
+hashOpenCodePromptDeliveryPayload() is computed from original logical input.text
+buildOpenCodePromptDeliveryAttemptText() prepends retry/control text later
+```
+
+This order is important. Repair control text is transport/retry instruction, not a new user-visible logical message. If the implementation starts hashing the final `deliveryText`, every retry can look like a payload mismatch for the same inbox row.
+
+Rules:
+
+- keep app ledger payload hash based on the original logical message, not retry control text;
+- bridge command hash may include actual prompt text sent to OpenCode, because it protects one concrete command attempt;
+- `deliveryAttemptId` must distinguish retry attempts when control text changes;
+- retry control text must never be persisted as the user message text or shown as the original inbox message;
+- tests must assert that adding retry control text does not mutate the app ledger payload hash.
+
+### 4.28 `message_send` Tool Error Needs Transport Repair, Not Only Better Prompt Text
+
+Current files:
+
+```text
+src/main/services/team/runtime/OpenCodeTeamRuntimeAdapter.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryRepairPolicy.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Current OpenCode runtime prompt intentionally says:
+
+```text
+If message_send returns an unavailable, not connected, or missing-tool error,
+write the exact concise reply as plain assistant text once, then stop.
+```
+
+That is useful as a last-resort semantic fallback, but it also means a broken MCP connection can produce a plausible assistant response that is still not visible in the app.
+
+Rules:
+
+- before every retry prompt, run the same MCP/session readiness gate as first delivery;
+- if readiness repair fails, do not send another repair prompt into the broken session;
+- `message_send` tool error should stay a hard transport/protocol signal until visible reply materialization or exact proof succeeds;
+- plain-text materialization remains allowed only through the existing direct-user semantic gate;
+- do not broaden plain-text materialization to peer relays or task-linked replies without exact recipient/task proof.
+
+Tests:
+
+- `message_send` tool error with plain assistant text still triggers MCP/session repair before any retry prompt;
+- failed readiness repair keeps the ledger pending/retryable and does not send another prompt;
+- direct-user semantically sufficient plain text can materialize as visible proof;
+- peer relay plain text with no correct recipient stays pending.
+
+### 4.29 Visible Reply Recovery Can Overmatch Old Inbox Rows
+
+Current files:
+
+```text
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryProofMatching.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+isOpenCodeRecoveredVisibleReplyCandidate()
+ accepts message.source === undefined for compatibility
+ allows timestamp up to 5 seconds before inboxTimestamp
+ can recover by taskRefs without relayOfMessageId
+```
+
+These are useful compatibility paths for older inbox rows and missing `relayOfMessageId`, but they are intentionally weaker than exact relay correlation.
+
+Rules:
+
+- exact `relayOfMessageId` and expected visible message ID win over taskRefs-only recovery;
+- taskRefs-only recovery is a fallback, never a replacement for exact correlation when exact fields exist;
+- if multiple taskRefs-only candidates exist, prefer the first eligible candidate only when all candidates point to the same member/recipient/task set and none contradict source/timestamp rules;
+- do not widen the 5 second timestamp grace without a separate false-positive review;
+- source-missing compatibility should be allowed only with strong taskRefs/semantic proof, and should add a diagnostic like `visible_reply_missing_runtime_delivery_source`;
+- taskRefs-only recovery can attach missing taskRefs to a relay-correlated message, but must not attach unrelated taskRefs to a different visible reply.
+
+Tests:
+
+- relay-correlated reply beats an older taskRefs-only candidate;
+- two taskRefs-only candidates with different recipients do not auto-commit read/responded;
+- source-missing recovered reply records diagnostic and still requires semantic sufficiency;
+- message just outside the timestamp grace does not recover.
+
+### 4.30 Member-Work-Sync Inbox Idempotency Is Split Across Outbox And Inbox
+
+Current files:
+
+```text
+src/features/member-work-sync/core/domain/MemberWorkSyncNudge.ts
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+src/features/member-work-sync/main/adapters/output/TeamInboxMemberWorkSyncNudgeSink.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+```
+
+Source-audit finding:
+
+```text
+JsonMemberWorkSyncStore.ensurePending()
+ detects payloadHash conflict for same outbox id
+
+MemberWorkSyncInboxNudgePort.insertIfAbsent()
+ accepts payloadHash
+
+TeamInboxMemberWorkSyncNudgeSink.insertIfAbsent()
+ currently checks only existing messageId
+```
+
+That is mostly safe when the nudge ID includes enough agenda identity, but it is still a contract split. If payload text or intent semantics change while messageId stays stable, the inbox sink can return `inserted=false` for an old row and the dispatcher can treat it as delivered/existing.
+
+Rules:
+
+- `buildMemberWorkSyncNudgeId()` must include every field that can change the intended work-sync action, or the inbox sink must detect payload conflict;
+- if inbox rows do not store payload hash, compare stable visible payload fields (`text`, `taskRefs`, `workSyncIntent`, `workSyncIntentKey`, review request IDs) before treating an existing message as equivalent;
+- never schedule a delivery wake for an existing nudge whose payload no longer matches;
+- if conflict is detected after inbox insertion, mark outbox terminal or retryable according to whether a new deterministic message ID can be generated safely;
+- keep this separate from OpenCode prompt delivery ledger; work-sync idempotency must not reuse OpenCode delivery record IDs.
+
+Tests:
+
+- same work-sync messageId and same payload returns existing without duplicate inbox row;
+- same work-sync messageId and different payload returns conflict;
+- conflict does not schedule OpenCode delivery wake;
+- review-pickup intent key changes produce a different nudge identity.
+
+### 4.31 Member-Work-Sync Is Not Foreground Assignment Delivery
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/MemberWorkSyncReconciler.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeActivationPolicy.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/core/domain/SyncDecisionPolicy.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+MemberWorkSyncReconciler plans nudges only from queue reconciliation.
+MemberWorkSyncNudgeActivationPolicy can block nudges while phase2 metrics are collecting or unhealthy.
+MemberWorkSyncNudgeDispatcher revalidates cooldown, busy signals, lifecycle, and agenda fingerprint before delivery.
+Foreground unread assignments intentionally suppress duplicate work-sync nudges.
+```
+
+This means member-work-sync is not a reliable first-start transport for a newly assigned task. It is a reconciliation and anti-stall backstop after the authoritative board/inbox state exists.
+
+Rules:
+
+- initial task assignment and direct teammate messages must still use the normal delivery path;
+- accept-fast cannot depend on work-sync to wake a member after foreground assignment delivery fails;
+- work-sync can reconcile after turn-settled, task changes, inbox changes, or runtime activity, but it must not replace delivery watchdog retries;
+- phase2 readiness, rate limits, busy signals, lifecycle state, and watchdog cooldowns remain valid reasons to skip a sync nudge;
+- if a user sees a task assigned but no runtime logs for several minutes, debug normal delivery acceptance/observation first, then work-sync state;
+- launch/bootstrap should not trigger user-visible work-sync chatter while teammates are still confirming readiness;
+- sync nudges during launch must be suppressed unless they are delayed until bootstrap confirmed and no foreground unread assignment is already active.
+
+Tests:
+
+- foreground assignment delivery accepted but no visible proof remains owned by delivery watchdog, not member-work-sync;
+- foreground unread assignment suppresses work-sync nudge even when agenda state is `needs_sync`;
+- phase2 collecting metrics can skip generic sync nudge without blocking normal task assignment delivery;
+- launch/bootstrap pending state does not insert visible work-sync messages;
+- after a real turn-settled event and no active foreground unread assignment, work-sync may plan one idempotent nudge.
+
+### 4.32 Task Log Stream Live Refresh Is Event-Scoped
+
+Current files:
+
+```text
+src/renderer/components/team/taskLogs/TaskLogStreamSection.tsx
+src/renderer/components/team/taskLogs/TaskLogsPanel.tsx
+src/renderer/utils/teamChangeEvents.ts
+src/renderer/store/index.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+TaskLogStreamSection reloads on:
+ event.type === "log-source-change"
+ or isTaskLogActivityChangeEvent(event) for the same taskId
+
+isTaskLogActivityChangeEvent(event) accepts:
+ taskSignalKind === "log"
+ or detail starts with "opencode-runtime-task-event:"
+
+TeamProvisioningService.recordOpenCodeRuntimeTaskEvent()
+ emits task-log-change with taskSignalKind "log"
+```
+
+This means the stream is not a continuous poller. If backend exact-session evidence is written but no task-scoped event is emitted, the fixed backend can still look late or empty until another task marker, visibility change, or manual refresh.
+
+Rules:
+
+- when delivery ledger/session evidence becomes sufficient to query a task-scoped OpenCode session, emit a narrow `task-log-change` for each affected taskRef;
+- use `taskSignalKind: "log"` for log-only refresh so renderer does not trigger unnecessary full team-data refresh;
+- do not emit one event per native tool row; emit on evidence creation/update, attribution upsert, or settled observation that changes the task-log candidate set;
+- include `teamName`, `taskId`, and `runId` when known;
+- do not broadcast to all tasks in the team;
+- hidden sections should still avoid immediate reload because the renderer already checks visibility/open state;
+- summary count refresh should use the same event path as full stream reload, otherwise the badge can remain `0` while logs are available.
+
+Tests:
+
+- exact session evidence write emits one task-log log signal for every referenced task;
+- native tool rows appearing in an exact session can be loaded after that signal without waiting for another board MCP tool;
+- hidden Task Log Stream does not start heavy loading just because the signal fired;
+- task log badge count updates after the signal when the task logs panel has been opened;
+- unrelated taskRef does not reload the current task stream.
+
+### 4.33 OpenCode Inbox Relay Is A Single-Member Queue
+
+Current file:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+relayOpenCodeMemberInboxMessages()
+ coalesces by team/member in openCodeMemberInboxRelayInFlight
+ sorts unread messages by priority and timestamp
+ gives member_work_sync_nudge lower priority than normal foreground messages
+ breaks the relay loop when delivery is accepted but response is still pending
+
+getOpenCodeMemberDeliveryBusyStatus()
+ treats unread foreground messages as busy
+ treats active prompt ledger record as busy
+```
+
+This is the queue boundary that prevents multiple prompts from being pushed into one OpenCode teammate at once.
+
+Rules:
+
+- accept-fast delivery must still break the relay loop after the accepted pending message;
+- do not continue relaying later unread inbox rows just because the endpoint accepted the prompt;
+- work-sync delivery wakes must go through the same relay queue and should stay behind foreground assignment/user messages;
+- `onlyMessageId` wake should not bypass an in-flight relay for the same member;
+- active ledger record remains the source of busy state until proof or terminal failure;
+- do not introduce a separate "fast path" for member-work-sync review pickup that skips foreground/busy checks.
+
+Tests:
+
+- two unread foreground messages for one OpenCode member send only the first while its ledger record is accepted/pending;
+- work-sync nudge waits behind unread foreground assignment;
+- `onlyMessageId` wake during in-flight relay does not start a parallel prompt;
+- active ledger record makes `getOpenCodeMemberDeliveryBusyStatus()` return busy for work-sync.
+
+### 4.34 Cross-Repo OpenCode Bridge Capability Is A Hard Boundary
+
+Current files:
+
+```text
+src/main/services/runtime/ClaudeMultimodelBridgeService.ts
+src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandClient.ts
+/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/opencode/OpenCodeBridgeCommandHandler.ts
+/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/opencode/OpenCodeSessionBridge.ts
+```
+
+Source-audit finding:
+
+```text
+claude_team and agent_teams_orchestrator are deployed as two repos.
+The app can run with an older dev runtime root.
+Existing provider capability code exists, but accept-fast OpenCode delivery needs a more specific bridge command capability.
+```
+
+This is a hard compatibility boundary. If `claude_team` sends exact observe fields to an older orchestrator that silently ignores them, the app can believe it is observing the accepted prompt while the orchestrator is still using current lane/session fallback.
+
+Rules:
+
+- accept-fast must be enabled only when the orchestrator explicitly supports the needed OpenCode bridge capability;
+- required capability should cover at least `promptAsyncWithTurnSettled`, exact `runtimePromptMessageId`, exact `runtimeSessionId`, command outcome recovery, and no-reply protection;
+- add an explicit bridge protocol field, for example `opencodeDeliveryAcceptanceContractVersion`, instead of overloading generic `supportedCommands`;
+- validate that contract version in the same handshake path that already validates app-managed bootstrap contracts;
+- prefer an explicit capability probe or bridge status field over version-string parsing;
+- if capability is missing, fall back to current observed behavior and emit a developer diagnostic like `opencode_accept_fast_capability_missing`;
+- do not send required-only new fields to older commands unless the command schema is additive and old runtimes ignore them safely;
+- if the app requests acceptance mode and the orchestrator returns a response without accepted runtime prompt identity, classify as `acceptanceUnknown`, not accepted;
+- contract tests must simulate old orchestrator responses with missing fields, unsupported command errors, and stale commandStatus output;
+- live E2E must log the detected orchestrator capability snapshot so debugging does not depend on guessing which binary was running.
+
+Tests:
+
+- old orchestrator fixture without accept-fast capability uses observed mode;
+- orchestrator missing `runtimePromptMessageId` never upgrades ledger to accepted;
+- unsupported exact observe command returns structured diagnostic and no duplicate prompt;
+- capability-supported orchestrator preserves exact prompt identity through commandStatus recovery;
+- no-reply call path is rejected by the observed wrapper even if a future caller bypasses the command handler.
+
+### 4.35 Lane Registry Lock And Current-Lane State Must Not Own Exact Evidence
+
+Current files:
+
+```text
+src/main/services/team/opencode/store/OpenCodeRuntimeManifestEvidenceReader.ts
+src/main/services/team/opencode/store/VersionedJsonStore.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+lanes.json is protected by withFileLock.
+OpenCode launch, reattach, stale cleanup, advisory, and recovery paths all read or write the same lane index.
+Previous live failures included lanes.json lock timeout and lane registry loss while runtime pid/session evidence still existed elsewhere.
+```
+
+The exact session evidence introduced by this plan must reduce dependence on the current lane registry, not make it stronger.
+
+Rules:
+
+- lane registry remains the active-lane and lifecycle index, not the source of truth for an already accepted prompt;
+- accepted prompt identity must be stored in the delivery ledger or command outcome store under its own bounded lock before optional lane diagnostics;
+- exact observe and exact task-log lookup must try recorded `runtimeSessionId` first, even if current lane now points to a newer session;
+- if lane index read fails after prompt acceptance, keep the accepted identity and record diagnostic `opencode_lane_index_unavailable_after_acceptance`;
+- if lane index read fails before delivery and there is no exact runtime evidence, block delivery with structured runtime diagnostic rather than guessing a lane;
+- do not hold the lane index lock while reading transcripts, writing task-log attribution, emitting renderer events, or doing network calls to OpenCode;
+- lane index writes should be bounded and sequential per team, never spawned in unbounded `Promise.all` from multi-taskRef fanout;
+- task-log evidence writes must not require lane index write success;
+- stale or empty lane registry must not delete exact prompt evidence until existing retention and team cleanup rules say it is safe.
+
+Tests:
+
+- accepted prompt survives simulated `lanes.json` lock timeout during post-send diagnostics;
+- exact task log lookup reads session evidence when current lane points elsewhere;
+- missing lane index before first delivery returns structured blocked result;
+- multi-taskRef evidence update does not issue unbounded parallel lane index writes;
+- stale lane cleanup does not remove delivery ledger exact prompt identity.
+
+### 4.36 Runtime Delivery Inbox Dedupe Must Stay A Correlation Aid
+
+Current file:
+
+```text
+src/main/services/team/TeamInboxWriter.ts
+```
+
+Source-audit finding:
+
+```text
+sendMessage()
+ uses findRuntimeDeliveryDuplicateIndex()
+
+findRuntimeDeliveryDuplicateIndex()
+ dedupes only source="runtime_delivery"
+ requires same relayOfMessageId, normalized from/to, and normalized text
+ returns the existing inbox messageId
+ merges taskRefs into the existing row
+```
+
+This is useful for repeated visible replies after retries, but it must not become an independent proof mechanism or hide exact relay correlation bugs.
+
+Rules:
+
+- runtime-delivery dedupe can merge duplicate visible rows, but proof commit still belongs to the delivery ledger/watchdog correlation path;
+- if inbox write returns `deduplicated=true`, downstream proof code must use the returned existing `messageId`, not the attempted new `messageId`;
+- dedupe must never cross different `relayOfMessageId`;
+- dedupe must never apply to `member_work_sync_nudge`, task-stall remediation, or system notifications;
+- taskRef merge is safe only after same relay/from/to/text match;
+- a deduped visible reply should still clear advisory and unblock the active delivery record once ledger correlation validates it;
+- do not add broader text-only dedupe for OpenCode plain-text fallback;
+- keep duplicate rows in debug artifacts identifiable by original delivery attempt ID when possible.
+
+Tests:
+
+- two identical runtime-delivery replies with the same relay dedupe and merge taskRefs;
+- identical text with different `relayOfMessageId` creates separate rows;
+- deduped row returns existing messageId and ledger proof uses it;
+- member-work-sync nudge with the same text is not deduped as runtime delivery;
+- text-only plain assistant fallback cannot satisfy runtime-delivery dedupe without relay/source proof.
+
+### 4.37 Bridge Command Idempotency Must Survive Accept-Fast Tuning
+
+Current files:
+
+```text
+src/main/services/team/opencode/bridge/OpenCodeStateChangingBridgeCommandService.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandLedgerStore.ts
+src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts
+```
+
+Source-audit finding:
+
+```text
+createOpenCodeBridgeIdempotencyKey()
+ hashes command, team, lane, run, and body
+
+OpenCodeBridgeCommandLedger.begin()
+ rejects same idempotencyKey with different requestHash
+ rejects retry while status is unknown_after_timeout
+
+OpenCodeReadinessBridge.recoverTimedOutSendMessage()
+ recovers by originalRequestId, deliveryAttemptId, messageId, payloadHash, team/lane/member/run
+```
+
+Accept-fast adds transport behavior to the same send command. If `settlementMode`, observe timeout, or retry-control fields drift between retries, the bridge ledger can treat the same logical delivery as a different command or fail recovery after a timeout.
+
+Rules:
+
+- choose `settlementMode` before the first bridge send attempt and persist it on the app delivery ledger record;
+- do not switch a pending delivery from observed to acceptance mode during retry unless a new delivery attempt ID is created and duplicate prompt risk is explicitly accepted;
+- fields that are purely local observation options should not enter the bridge command body if they do not need to affect orchestrator idempotency;
+- fields that the orchestrator must honor, such as acceptance mode and exact observation contract, should enter the bridge body and be part of bridge idempotency;
+- `originalRequestId` must be stored before command execution so commandStatus recovery can query the exact attempt after timeout;
+- orchestrator success data must echo `idempotencyKey`; otherwise `assertBridgeEvidenceCanCommitToRuntimeStores()` will reject state mutation;
+- timeout recovery must preserve returned `runtimePromptMessageId` and `runtimeSessionId`, not synthesize accepted without identity;
+- app ledger payload hash and bridge command request hash must be documented as different contracts and tested separately.
+
+Tests:
+
+- retrying the same delivery with the same persisted settlement mode reuses the same bridge idempotency semantics;
+- changing settlement mode for the same delivery attempt is rejected or forces a new attempt ID;
+- commandStatus timeout recovery with matching originalRequestId returns exact runtime prompt identity;
+- commandStatus recovery with mismatched payloadHash or messageId does not accept;
+- result without echoed idempotencyKey is rejected before ledger/advisory mutation.
+
+### 4.38 Runtime Delivery Journal Is Separate From Prompt Delivery Ledger
+
+Current files:
+
+```text
+src/main/services/team/opencode/delivery/RuntimeDeliveryService.ts
+src/main/services/team/opencode/delivery/RuntimeDeliveryJournal.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+deliverOpenCodeRuntimeMessage()
+ resolves lane and verifies runtime evidence
+ calls RuntimeDeliveryService.deliver()
+
+RuntimeDeliveryService.deliver()
+ normalizes RuntimeDeliveryEnvelope
+ computes payloadHash from provider/run/team/member/session/to/text/summary/taskRefs/createdAt
+ builds destinationMessageId from idempotencyKey/run/team
+ verifies destination before and after write
+ emits team change only after verified write
+
+RuntimeDeliveryJournalStore.begin()
+ treats same idempotencyKey with different payloadHash as conflict
+```
+
+This is the OpenCode agent-to-app path used by `agent-teams_message_send`. It is not the same as the app-to-OpenCode prompt delivery ledger.
+
+Rules:
+
+- prompt acceptance can never be treated as runtime delivery commit;
+- runtime delivery commit can be used as visible reply proof only after the destination write is verified;
+- if `message_send` returns `idempotency_conflict`, do not classify it as MCP disconnected or missing tool;
+- same `idempotencyKey` with changed text/taskRefs/createdAt must be rejected or explicitly re-keyed, not silently overwritten;
+- if the runtime generates a retry after tool error, either preserve the same payload exactly or use a new idempotency key;
+- `createdAt` is part of the current payload hash, so retry instructions and tests must not assume it is ignored;
+- destination change events should stay destination-specific: `lead-message` for user messages, `inbox` for member/cross-team rows;
+- runtime delivery journal reconciliation can prove a missing destination write, but it must not re-prompt OpenCode;
+- do not store runtime delivery journal state in member-work-sync or prompt delivery ledger records.
+
+Tests:
+
+- repeated identical runtime delivery idempotencyKey returns duplicate/committed without duplicate visible row;
+- same idempotencyKey with different `createdAt` or text returns conflict and no visible overwrite;
+- runtime delivery commit emits the expected destination change event after verification;
+- committed runtime delivery can clear a prompt delivery advisory only through the visible proof correlation path;
+- runtime delivery reconciliation reports recovery needed without sending another OpenCode prompt.
+
+### 4.39 Visible Proof Reader Must Read The Same Stores Runtime Delivery Writes
+
+Current files:
+
+```text
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryProofReader.ts
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryProofMatching.ts
+src/main/services/team/opencode/delivery/RuntimeDeliveryService.ts
+src/main/services/team/TeamSentMessagesStore.ts
+src/main/services/team/TeamInboxReader.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+RuntimeDeliveryService user_sent_messages port
+ writes visible user replies to sentMessages.json
+
+OpenCodeRuntimeDeliveryProofReader
+ reads inbox candidates through TeamInboxReader
+ skips candidate rows whose source is not runtime_delivery
+```
+
+If direct replies to the user are stored in `sentMessages.json`, proof logic that only scans inbox rows can miss a real visible reply. That leaves advisory/watchdog state pending even though the user saw the answer.
+
+Rules:
+
+- visible proof reader must cover every destination kind that `RuntimeDeliveryService` can commit;
+- direct user replies should be recovered from `sentMessages.json` or from committed runtime delivery journal location, not only from a synthetic user inbox;
+- if runtime-delivery user messages are meant to satisfy proof by source, either write `source="runtime_delivery"` or make proof use committed journal location instead of source string;
+- do not weaken proof by accepting arbitrary `lead_process` sent messages;
+- `replyRecipient="user"` and lead-recipient fallback must have separate tests;
+- member inbox proof remains inbox-based and must not scan sentMessages;
+- cross-team proof should use its own committed location semantics, not user inbox fallback;
+- advisory clearing must happen after proof reader sees the actual committed destination, not just after `RuntimeDeliveryService.deliver()` returns.
+
+Tests:
+
+- OpenCode direct reply to user written to `sentMessages.json` is visible to proof reader;
+- unrelated `lead_process` user message does not satisfy OpenCode runtime proof;
+- member-to-member runtime delivery remains inbox-only proof;
+- committed runtime delivery journal location can recover proof after a missed change event;
+- user proof and lead-recipient fallback do not double-count the same visible message.
+
+### 4.40 Sent Messages Writes Need Inbox-Level Safety For Runtime Delivery
+
+Current files:
+
+```text
+src/main/services/team/TeamSentMessagesStore.ts
+src/main/services/team/TeamInboxWriter.ts
+src/main/services/team/opencode/delivery/RuntimeDeliveryService.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+TeamInboxWriter.sendMessage()
+ uses withFileLock and withInboxLock
+ verifies writes and runtime_delivery dedupe
+
+TeamSentMessagesStore.appendMessage()
+ reads sentMessages.json
+ appends in memory
+ writes atomically
+ does not use a file lock
+ trims to MAX_MESSAGES
+
+RuntimeDeliveryService user_sent_messages port
+ writes direct OpenCode replies through TeamSentMessagesStore.appendMessage()
+```
+
+That is safe enough for low-volume lead output, but it is fragile for concurrent OpenCode runtime deliveries to the user. Two members can reply at the same time, both read the old file, and the later write can drop the earlier visible proof.
+
+Rules:
+
+- direct user runtime delivery should use a locked append path equivalent to inbox writes;
+- destinationMessageId should be checked under the same lock before appending;
+- append result should tell whether the row was inserted, already existed, or could not be verified;
+- trim-to-`MAX_MESSAGES` must keep the just-written row and must not silently evict a just-committed proof row;
+- proof/advisory clearing should not rely only on an unlocked write result;
+- do not make `TeamSentMessagesStore` own OpenCode proof semantics; expose safe append/read primitives and keep proof policy in delivery services;
+- if a locked sent-message writer is added, update normal lead writes carefully so live lead overlay behavior does not duplicate sent rows;
+- tests should include concurrent appends, duplicate destinationMessageId, and trim boundary.
+
+Tests:
+
+- two concurrent `appendMessage()` calls with different message IDs preserve both rows;
+- duplicate destinationMessageId does not create duplicate sent rows;
+- runtime user delivery verifies the row after locked append;
+- trim at `MAX_MESSAGES` preserves the newest committed row;
+- ordinary lead_process sent message behavior remains unchanged.
+
+### 4.41 Runtime Delivery TaskRefs Shape Must Be Strict
+
+Current files:
+
+```text
+src/main/services/team/opencode/delivery/RuntimeDeliveryJournal.ts
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/runtime/OpenCodeTeamRuntimeAdapter.ts
+```
+
+Source-audit finding:
+
+```text
+normalizeRuntimeDeliveryEnvelope()
+ accepts taskRefs only when each item is a string
+ silently filters non-string taskRefs
+
+runtimeTaskRefs()
+ maps each string to { teamName, taskId: ref, displayId: ref }
+
+teamToolTaskRefs()
+ supports structured taskRefs elsewhere, but runtime delivery does not use it
+```
+
+This is a contract boundary with the OpenCode MCP tool. If the tool prompt or future schema sends structured task refs, runtime delivery can silently drop them. That weakens visible proof matching, task links in Messages, and task-log attribution.
+
+Rules:
+
+- runtime `message_send` taskRefs schema must be explicit: either string IDs only or structured `TaskRef[]`, not ambiguous;
+- invalid taskRefs must be rejected with a structured tool error or preserved through a normalizer, not silently filtered;
+- if string refs are accepted, define whether they are real task IDs or display IDs and resolve consistently;
+- prompt text and MCP schema must match the app normalizer;
+- proof matching should not depend on displayId when taskId is available;
+- tests must include string taskRefs, structured taskRefs, invalid mixed taskRefs, and missing taskRefs;
+- if structured taskRefs are adopted, update `hashRuntimeDeliveryEnvelope()` so equivalent refs hash deterministically.
+
+Tests:
+
+- string taskRefs preserve task links in user and member destinations;
+- structured taskRefs are either accepted and preserved or rejected with a clear error;
+- mixed invalid taskRefs do not silently produce an empty taskRefs array;
+- taskRefs normalization is stable across runtime delivery hash and visible proof matching;
+- OpenCode prompt artifact test matches the accepted taskRefs schema.
+
+### 4.42 Runtime Control Calls Must Not Default Unknown Secondary Member To Primary
+
+Current file:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+deliverOpenCodeRuntimeMessage()
+recordOpenCodeRuntimeTaskEvent()
+recordOpenCodeRuntimeHeartbeat()
+ resolve lane through resolveOpenCodeRuntimeLaneId(teamName, runId, memberName)
+
+resolveOpenCodeRuntimeLaneId()
+ checks primary runtime run
+ checks in-memory secondary runtime runs
+ checks tracked mixed lanes
+ checks persisted launch-state member laneId
+ falls back to "primary"
+```
+
+Fallback to `primary` is acceptable for true primary OpenCode teams, but risky for mixed secondary teammates when lane metadata is missing or stale. A secondary member control call should not write delivery, task-log evidence, or heartbeat under the wrong lane.
+
+Rules:
+
+- runtime control calls with a non-lead `memberName` should require a resolved lane that is known to belong to that member, or reject with structured stale-evidence diagnostic;
+- fallback to `primary` is allowed only when the run is the primary OpenCode runtime run or the member is the configured OpenCode lead;
+- if persisted lane registry is missing but committed session evidence contains the exact `runtimeSessionId/memberName/runId`, use that exact evidence rather than blind primary fallback;
+- if neither lane nor exact session evidence exists, fail closed and do not write runtime delivery journal, task-log attribution, or heartbeat;
+- rejection reason must distinguish `lane_unresolved`, `run_tombstoned`, and `member_not_configured`;
+- recovery/debug artifacts should include the attempted memberName/runId/runtimeSessionId/lane resolution source.
+
+Tests:
+
+- secondary member runtime delivery with missing lane metadata rejects instead of writing under `primary`;
+- primary OpenCode lead runtime delivery can still use primary lane;
+- exact committed session evidence can recover lane for a secondary member when launch-state is stale;
+- tombstoned run rejects runtime delivery before destination write;
+- task event and heartbeat follow the same lane resolution rules as message delivery.
+
+### 4.43 OpenCode Inbox Relay Priority Must Keep Foreground Work First
+
+Current file:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+getOpenCodeInboxRelayPriority()
+ member_work_sync_nudge -> 30
+ system_notification -> 20
+ normal foreground -> 0
+
+relayOpenCodeMemberInboxMessages()
+ sorts ascending by priority, then timestamp
+ delivers at most one message before breaking
+ stops the loop when delivery is accepted but response proof is pending
+
+getOpenCodeMemberDeliveryBusyStatus()
+ excludes member_work_sync_nudge from foreground blockers
+ blocks work-sync when unread/recent foreground messages exist
+```
+
+This is the main anti-delay invariant for OpenCode teammates. Work-sync can help after a turn settles, but it must not jump ahead of task assignment, review request, direct user message, or normal foreground teammate message.
+
+Rules:
+
+- keep the relay sort order explicit and covered by tests: lower priority number means earlier delivery;
+- foreground messages should beat work-sync nudges unless `onlyMessageId` intentionally targets one exact automation row;
+- `onlyMessageId` must be used only by controlled paths that already know the target message, not as a broad "wake this member" shortcut;
+- when a foreground delivery becomes accepted-pending, the relay loop must stop and keep later work-sync messages unread;
+- busy-status checks should still ignore work-sync as foreground noise, but only for deciding whether to schedule more work-sync;
+- if a future work-sync path needs urgent review pickup, it must use a distinct intent and tests, not invert the global priority order;
+- diagnostics should include `activeMessageKind` so skipped work-sync can be explained without showing automation rows in Messages.
+
+Tests:
+
+- normal unread task assignment is delivered before older `member_work_sync_nudge`;
+- accepted-pending foreground delivery stops the loop and leaves later work-sync unread;
+- `onlyMessageId` can deliver the targeted work-sync row without reordering the whole inbox;
+- busy status reports `opencode_foreground_inbox_unread` when foreground exists and a work-sync nudge is also pending;
+- review-pickup exception stays narrow and does not make all system notifications foreground blockers.
+
+### 4.44 Automation/Work-Sync Hiding Must Stay Presentation-Only
+
+Current files:
+
+```text
+src/main/services/team/TeamMessageFeedService.ts
+src/renderer/utils/teamMessageFiltering.ts
+src/shared/utils/teamAutomationMessages.ts
+src/shared/utils/teamInternalControlMessages.ts
+src/main/services/team/TeamInboxReader.ts
+```
+
+Source-audit finding:
+
+```text
+TeamMessageFeedService
+ builds a normalized feed from inbox, lead session, sent messages, and synthetic bootstrap
+ filters only internal protocol envelopes with isTeamInternalControlMessageEnvelope()
+
+teamMessageFiltering
+ hides task_stall_remediation and member_work_sync_nudge from normal UI by default
+ can include automation rows for diagnostics/activity when explicitly requested
+
+teamAutomationMessages
+ identifies task_stall_remediation by kind or legacy task-stall: id prefix
+ identifies member_work_sync_nudge by messageKind
+```
+
+Hiding automation from the normal Messages feed is correct, but it must not mutate durable inbox state or starve delivery/watchdog paths. UI filtering and durable delivery are different responsibilities.
+
+Rules:
+
+- hide `member_work_sync_nudge` and task-stall automation in renderer/feed presentation, not by deleting or marking inbox rows read;
+- `TeamInboxReader` must preserve automation `messageKind` values so renderer filtering, work-sync, watchdog, and diagnostics see the same metadata;
+- if main-process feed filters additional automation in the future, it must expose a debug/audit path that can still show the hidden rows;
+- feed `feedRevision` may ignore hidden rows for conversational UI, but work-sync diagnostics must not depend on that revision;
+- delivery relays, prompt ledgers, watchdog, and member-work-sync must read durable inbox stores directly, not the UI-filtered message feed;
+- legacy ID-prefix classification is compatibility only; new rows should rely on explicit `messageKind`;
+- hiding automation must not change unread counts used by the delivery queue.
+
+Tests:
+
+- `member_work_sync_nudge` is hidden in normal Messages but still present in raw inbox diagnostics;
+- `task_stall_remediation` round-trips through `TeamInboxReader` with its `messageKind`;
+- renderer diagnostic mode can include member-work-sync rows only when explicitly requested;
+- hiding a work-sync row does not mark it read and does not stop OpenCode relay from delivering it when selected;
+- feed cache invalidation does not become the only way to observe hidden automation writes.
+
+### 4.45 OpenCode File-Change Backfill Is A Separate Evidence Pipeline
+
+Current files:
+
+```text
+src/main/services/team/ChangeExtractorService.ts
+src/main/services/team/TaskChangeLedgerReader.ts
+src/main/services/team/opencode/bridge/OpenCodeBridgeCommandContract.ts
+test/main/services/team/ChangeExtractorService.test.ts
+test/main/services/team/TaskChangeLedgerReader.test.ts
+```
+
+Source-audit finding:
+
+```text
+ChangeExtractorService.runOpenCodeBackfill()
+ writes a temporary delivery context file plus deliveryContextHash
+ calls backfillOpenCodeTaskLedger()
+ accepts imported events or current-contract duplicates-only evidence
+ invalidates task change summaries only when importedEvents > 0
+
+TaskChangeLedgerReader
+ maps opencode_toolpart_write/edit/apply_patch to UI snippets
+ ranks evidence by sourceImportKey and full-text availability
+ can surface metadata-only fallback as manual review / unavailable content
+```
+
+Task Log Stream rows and file-change ledger evidence are related but not interchangeable. A visible `write` row in Task Log Stream does not prove a reviewable diff, and `No file changes recorded` does not prove the model did nothing if OpenCode backfill failed or only metadata-only evidence exists.
+
+Rules:
+
+- accept-fast changes must preserve the delivery context fields consumed by `ChangeExtractorService`: team, task, member, lane, session, taskRefs, payload hash, and evidence contract;
+- OpenCode file-change recovery should remain driven by task change ledger/backfill, not by Task Log Stream native tool rows;
+- metadata-only or empty toolpart rows should render as unavailable/manual-review evidence, not as successful text diffs;
+- `deliveryContextHash` must be stable for the exact delivery context and must not include transient retry-control text;
+- negative backfill cache entries must be invalidated when a new OpenCode delivery context appears;
+- duplicates-only results are cacheable only when `opencodeTaskLedgerEvidenceContractVersion` is current;
+- summary-only change extraction should await OpenCode backfill when delivery context exists, but should not hang the UI indefinitely;
+- a failed backfill should add diagnostics and preserve fallback behavior, not hide existing non-OpenCode changes.
+
+Tests:
+
+- summary-only change extraction triggers OpenCode backfill when exact delivery context exists;
+- negative OpenCode backfill cache is not reused after delivery context appears;
+- current-contract duplicates-only evidence is cacheable, old-contract duplicates-only evidence is not;
+- metadata-only OpenCode evidence shows manual-review/unavailable state without claiming no changes;
+- delivery context hash does not change when retry-control text changes but the logical task delivery does not.
+
+### 4.46 Runtime Store Manifest Recovery Must Not Downgrade Canonical Evidence
+
+Current files:
+
+```text
+src/main/services/team/opencode/store/RuntimeStoreManifest.ts
+src/main/services/team/opencode/store/OpenCodeRuntimeManifestEvidenceReader.ts
+src/main/services/team/opencode/delivery/RuntimeDeliveryJournal.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+```
+
+Source-audit finding:
+
+```text
+RuntimeStoreManifest descriptors
+ opencode.deliveryJournal -> rebuildable_from_canonical_destination
+ opencode.promptDeliveryLedger -> rebuildable_from_canonical_destination
+ opencode.sessionStore -> rebuildable_from_provider
+ opencode.launchState / launchTransaction -> readiness_blocking
+ opencode.runtimeDiagnostics -> diagnostic_only, drop_after_quarantine
+```
+
+This is the recovery boundary after partial writes, lock timeouts, corrupted JSON, or stale lane registry. Prompt delivery ledgers and runtime delivery journals are canonical delivery evidence. They must not be treated like disposable diagnostics.
+
+Rules:
+
+- corrupted diagnostic stores can be dropped, but prompt/runtime delivery ledgers must be recovered from canonical destinations or quarantined with clear delivery state;
+- readiness-blocking launch stores can block new delivery, but cannot delete already committed prompt/runtime delivery evidence;
+- rebuilding from provider must not overwrite canonical destination evidence with older session-store data;
+- manifest rebuild should preserve lane-scoped file paths and not merge secondary lane evidence into primary;
+- accepted prompt identity and committed runtime delivery location should be read before lane registry fallback;
+- if canonical destination verification is incomplete, keep the delivery as `acceptanceUnknown` or `pending`, not `responded`;
+- artifact packs should include manifest recovery actions, quarantine paths, and rebuild source so production failures are debuggable.
+
+Tests:
+
+- corrupted diagnostics store is dropped without changing prompt delivery ledger;
+- corrupted prompt delivery ledger is not silently dropped and reports rebuild_required or quarantine;
+- rebuild from provider cannot downgrade a committed runtime delivery journal row;
+- stale session store does not overwrite exact accepted prompt identity;
+- secondary lane manifest recovery preserves lane-specific evidence and never rewrites it as primary.
+
+### 4.47 Stopped Teams And Tombstoned Runs Must Fence Runtime Evidence
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/opencode/store/RuntimeRunTombstoneStore.ts
+test/main/services/team/RuntimeRunTombstoneStore.test.ts
+test/main/services/team/TeamAgentLaunchMatrix.safe-e2e.test.ts
+```
+
+Source-audit finding:
+
+```text
+deliverOpenCodeRuntimeMessage()
+recordOpenCodeRuntimeTaskEvent()
+recordOpenCodeRuntimeHeartbeat()
+ all resolve laneId
+ all call assertOpenCodeRuntimeEvidenceAccepted()
+ then write destination/task-log/liveness evidence
+
+RuntimeRunTombstoneStore.assertEvidenceAccepted()
+ rejects missing run id
+ rejects current run missing
+ rejects run mismatch
+ rejects tombstoned run/evidence kind
+
+stopTeam()
+ clears tracked run state
+ stops secondary OpenCode lanes
+ clears lane storage
+ emits process stop events
+```
+
+This is the protection against "team is stopped, but OpenCode still writes messages". It must be treated as a write-boundary invariant, not just a runtime cleanup detail.
+
+Rules:
+
+- every OpenCode runtime-originated write must validate team/run/lane evidence immediately before the destination write;
+- destination write means sent messages, member inbox, task attribution, task activity, heartbeat/liveness, prompt delivery ledger updates, advisory clearing, and task-log refresh events;
+- stopped parent team must make mixed secondary lanes non-deliverable even if a stale OpenCode process still has a live HTTP host;
+- tombstoned run evidence must be rejected before any user-visible message is appended;
+- old run IDs after relaunch must be diagnostic-only and must not clear current warnings or unread rows;
+- clearing lane storage during stop must not delete prompt/runtime delivery ledger evidence before it can be quarantined or used for debugging;
+- `RuntimeStaleEvidenceError` should surface machine-readable diagnostics (`missing_run_id`, `current_run_missing`, `run_mismatch`, `run_tombstoned`) without falling back to "provider unavailable";
+- any post-stop cleanup that kills orphaned OpenCode processes must be narrow: team/run/lane matched, not global `opencode serve` cleanup.
+
+Tests:
+
+- stopped pure OpenCode team rejects runtime `message_send` before sent-message/inbox write;
+- stopped mixed OpenCode secondary lane rejects task event and heartbeat before attribution/liveness write;
+- stale old run after relaunch cannot clear current member advisory or prompt delivery ledger row;
+- tombstoned run with matching evidence kind rejects delivery, heartbeat, and bridge result separately;
+- missing current run produces `current_run_missing` diagnostic and no user-visible message;
+- cleanup of stopped team leaves delivery ledger artifacts available for artifact pack/debug;
+- stale runtime process from stopped team is not recovered from persisted lane evidence after app restart.
+
+### 4.48 Destination Writes Must Drive Cache And Advisory Invalidation
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/TeamDataService.ts
+src/main/services/team/TeamDataWorkerClient.ts
+src/main/workers/team-data-worker.ts
+src/renderer/store/index.ts
+test/renderer/store/teamChangeThrottle.test.ts
+test/main/ipc/teams.test.ts
+```
+
+Source-audit finding:
+
+```text
+renderer store
+ lead-message -> refresh tracked message feed only
+ inbox -> refresh message feed plus structural-safety team data refresh
+ member-advisory -> refresh advisory/team detail surface
+
+team-data-worker
+ invalidateTeamMessageFeed(team)
+ invalidateMemberRuntimeAdvisory(team, member?)
+ invalidateTeamConfig(team)
+```
+
+Several bugs in this area look like delivery bugs but are actually stale UI/cache state: the reply exists, but warning/advisory is still visible; hidden automation row exists, but diagnostics are stale; task log row exists, but badge count was cached.
+
+Rules:
+
+- after a successful runtime destination write, emit the same change signal that the destination's normal writer emits;
+- direct user reply in sent messages should cause `lead-message` feed refresh and member-advisory invalidation;
+- member inbox write should cause `inbox` refresh and member-advisory invalidation when it can satisfy proof;
+- task attribution/task event write should cause narrow `task-log-change` with taskId and runId;
+- advisory invalidation must be keyed by canonical member name, and unsafe/unknown names should fall back to team-scoped invalidation;
+- hiding work-sync/task-stall rows from normal Messages must not suppress diagnostic cache invalidation;
+- worker cache invalidation is best-effort, but failure must not block the durable destination write;
+- accept-fast should return "accepted" based on prompt acceptance, not on whether renderer cache has already refreshed.
+
+Tests:
+
+- direct OpenCode reply to user clears runtime advisory after feed/proof refresh;
+- member inbox runtime reply emits `inbox` and invalidates the correct member advisory;
+- hidden work-sync row does not appear in normal Messages, but diagnostics and advisory state refresh;
+- task event emits `task-log-change` and reloads stream/count without full team refresh;
+- unsafe member name in invalidation falls back to team advisory invalidation;
+- worker unavailable path still writes destination and logs diagnostic only.
+
+### 4.49 Ledger Rebuild From Durable Destinations Must Stay Conservative
+
+Current files:
+
+```text
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+src/main/services/team/opencode/delivery/RuntimeDeliveryJournal.ts
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryProofReader.ts
+src/main/services/team/TeamInboxReader.ts
+src/main/services/team/TeamSentMessagesStore.ts
+```
+
+Source-audit finding:
+
+```text
+OpenCodeRuntimeDeliveryProofReader
+ accepts strict relay/source proof
+ must read every destination store that RuntimeDeliveryService can write
+
+TeamInboxReader
+ normalizes messageKind values
+ currently must preserve automation and runtime-delivery metadata
+```
+
+If a ledger is rebuilt after corruption or version migration, it must not invent success. Rebuild from durable destination writes can prove a visible reply exists, but it cannot prove the prompt transport was accepted unless exact prompt identity also survived.
+
+Rules:
+
+- rebuild can mark visible proof as found only when destination row has strict relay/source/idempotency evidence;
+- rebuild cannot upgrade transport state to accepted without exact runtime prompt identity or command outcome proof;
+- rebuilt rows without prompt acceptance proof should be `acceptanceUnknown`, `pending`, or `responded_with_unknown_acceptance`, not normal accepted;
+- read/hidden automation state must not be changed during rebuild;
+- rebuilt proof must preserve `messageKind`, `source`, `relayOfMessageId`, `taskRefs`, destination kind, and destination message ID;
+- rejected/stale/tombstoned run evidence must not be used as rebuild input;
+- multiple plausible destination rows should keep the ledger ambiguous and advisory visible instead of guessing.
+
+Tests:
+
+- rebuild from strict sent-message proof clears advisory but keeps acceptance unknown when prompt identity is missing;
+- rebuild ignores UI-hidden work-sync rows for normal user reply proof unless message kind matches the delivery intent;
+- duplicate plausible reply candidates do not commit a single responded ledger row;
+- stale run destination row cannot rebuild current run delivery state;
+- taskRefs and messageKind survive rebuild and remain available to task-log/proof readers.
+
+### 4.50 Member-Work-Sync Scheduling Must Stay Causality-Safe
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncEventQueue.ts
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncNudgeDispatchScheduler.ts
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeOutboxPlanner.ts
+```
+
+Source-audit finding:
+
+```text
+MemberWorkSyncEventQueue
+ default turn_settled/tool_finished delay -> 5s
+ default task_changed/inbox_changed/runtime_activity delay -> 15s
+ default startup/config/member_spawned delay -> 30s
+ optional queueQuietWindowMs overrides most non-manual triggers
+
+createMemberWorkSyncFeature
+ reconcile(queue) writes status
+ then dispatches due nudges for ready teams
+ scheduled dispatcher runs every 60s
+ canDispatchNudges can filter teams before delivery
+
+MemberWorkSyncNudgeDispatcher
+ revalidates agenda/status/fingerprint before insertion
+ checks phase2 activation, busy signal, watchdog cooldown, rate limit
+ schedules delivery wake 500ms after inbox insertion
+```
+
+This is the area most likely to create the "logs appeared after 6 minutes" class of confusion. The queue can be correct but too slow, or fast but causally wrong. The plan must protect both.
+
+Rules:
+
+- do not set a broad production `queueQuietWindowMs` unless every trigger timing is explicitly revalidated;
+- `turn_settled` and `tool_finished` are fast consistency checks, not normal delayed watchdog nudges;
+- `startup_scan`, `config_changed`, and `member_spawned` should not deliver nudges until launch/bootstrap readiness says the team can dispatch nudges;
+- nudge delivery must pass `canDispatchNudges`, agenda fingerprint revalidation, busy signal, watchdog cooldown, and rate-limit checks immediately before inbox insertion;
+- foreground unread work must suppress generic work-sync nudge delivery, not just hide it from UI;
+- accepted-pending OpenCode delivery must keep work-sync queued behind it, not trigger a second simultaneous prompt;
+- event queue diagnostics must expose queued age, trigger reasons, runAt, maxRunAt, running age, and rerunRequested;
+- scheduled dispatcher should be recovery-only for due outbox rows, not the primary latency path for fresh task assignment.
+
+Tests:
+
+- `turn_settled` reconcile runs on the fast policy and is not delayed by broad quiet window defaults;
+- `startup_scan` during launch materializes status but does not deliver a nudge before `canDispatchNudges` is true;
+- foreground unread task assignment suppresses generic work-sync nudge even when work-sync outbox row exists;
+- accepted-pending OpenCode delivery causes work-sync dispatch to retry later without another prompt;
+- scheduled dispatcher can recover an already due outbox row after app restart;
+- queue diagnostics show the reason a member was delayed instead of leaving only "Waiting for response".
+
+### 4.51 Delivery Latency Breadcrumbs Must Be End-To-End And Correlatable
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/OpenCodeReadinessBridge.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts
+src/features/member-work-sync/core/application/MemberWorkSyncAudit.ts
+src/features/member-work-sync/main/infrastructure/FileMemberWorkSyncAuditJournal.ts
+test/main/services/team/openCodeLiveTestHarness.ts
+```
+
+Source-audit finding:
+
+```text
+OpenCode delivery, runtime proof, task logs, task activity, and member-work-sync
+currently have separate journals/diagnostics.
+That is correct architecturally, but hard to debug unless IDs are correlated.
+```
+
+If a task starts 6 minutes after assignment, we need to know which segment was slow: inbox write, relay selection, MCP repair, prompt acceptance, model turn, runtime tool failure, task log projection, or work-sync nudge dispatch.
+
+Rules:
+
+- keep journals separate by responsibility, but include a common correlation set: `teamName`, `memberName`, `taskId`, `messageId`, `relayOfMessageId`, `deliveryAttemptId`, `runtimeSessionId`, `runtimePromptMessageId`, `laneId`, `runId`;
+- capture timestamps for key phases: task created, inbox written, relay selected, MCP-ready check, prompt accepted, turn settled, first task tool, first native tool, visible proof, task completed, work-sync queued/planned/delivered/skipped;
+- diagnostics must be developer/audit metadata, not normal chat rows;
+- latency report should be read-only and derived from existing ledgers where possible;
+- do not add a single mega-log writer to multiple layers; each layer records its own event with shared correlation fields;
+- live E2E should print a compact phase table for failures and slow passes.
+
+Tests:
+
+- dry fixture can build a complete timeline from ledgers without reading UI state;
+- missing phase is reported as `missing:` with the previous known phase;
+- slow phase detection identifies relay wait vs prompt wait vs model/tool wait;
+- hidden work-sync rows do not disappear from the latency timeline;
+- stale-run evidence is shown as rejected phase, not as a gap.
+
+### 4.52 Member Status Presentation Must Not Hide Runtime Failures Behind Task Labels
+
+Current files:
+
+```text
+src/renderer/utils/memberLaunchDiagnostics.ts
+src/renderer/utils/teamProvisioningPresentation.ts
+src/renderer/components/team/members/MemberHoverCard.tsx
+src/renderer/components/team/TeamProvisioningBanner.test.ts
+test/renderer/utils/memberLaunchDiagnostics.test.ts
+test/renderer/utils/teamProvisioningPresentation.test.ts
+```
+
+Source-audit finding:
+
+```text
+member cards can show task-centric state such as "working on"
+runtime launch/spawn/advisory state is computed separately
+OpenCode secondary lanes can be failed_to_start, registered_only, runtime_pending_bootstrap, or confirmed_alive
+```
+
+The UI must not let a task label imply that a failed or unbootstrapped OpenCode member is actually working. This is a presentation invariant, but it protects debugging and user decisions.
+
+Rules:
+
+- runtime failure/bootstrap-pending/advisory state has higher visual priority than "working on";
+- task label can remain visible as assigned work context, but must not replace failed/registered/bootstrap status;
+- `registered_only` and `runtime_process without bootstrap` should be surfaced as runtime state, not inferred as online;
+- Worktree badge remains independent from runtime health;
+- member detail/hover card should expose laneId/sessionId/path diagnostics when available;
+- renderer selectors should prefer canonical spawn status snapshot over stale cached roster/task data;
+- stale spawn-status fetch after offline/stopped must not resurrect a member as working.
+
+Tests:
+
+- member with assigned task plus `failed_to_start` shows failure state and task context;
+- `registered_only` OpenCode member shows registered/bootstrap warning, not working;
+- current task assignment does not suppress runtime advisory;
+- stale spawn-status fetch after stopped team is ignored;
+- hover card shows runtime diagnostic and task label separately.
+
+### 4.53 OpenCode Tool-Error Plain Text Fallback Must Not Become A Dead End
+
+Current files:
+
+```text
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryRepairPolicy.ts
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryDiagnostics.ts
+src/main/services/team/TeamMemberRuntimeAdvisoryService.ts
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryProofReader.ts
+test/main/services/team/OpenCodePromptDeliveryRepairPolicy.test.ts
+test/main/services/team/OpenCodeRuntimeDeliveryDiagnostics.test.ts
+```
+
+Source-audit finding:
+
+```text
+OpenCode can produce a transcript-only assistant answer after message_send returns Not connected.
+The user may see the text in task logs, but the app has no durable visible reply unless runtime delivery wrote the destination.
+```
+
+This is the exact "model says it will provide summary as plain text" class of bug. It should trigger repair/retry semantics, not be accepted as a completed app delivery.
+
+Rules:
+
+- transcript-only plain text after `message_send` tool error is not visible proof;
+- `mcp_not_connected`, `tool_missing`, `destination_write_failed`, and idempotency conflict remain separate diagnostics;
+- repair prompt can reference the plain-text content, but must still require a real destination write or task progress proof;
+- work-sync/task-stall may consider substantive task board changes as progress, but not a transcript-only "I will send";
+- if the model completed task files but failed to notify, Changes/task ledger can show work while delivery advisory remains actionable;
+- no automatic duplicate user-visible reply should be synthesized by the app from transcript text.
+
+Tests:
+
+- `message_send Not connected` plus assistant plain text remains pending/proof-missing;
+- MCP readiness repair runs before the next retry prompt;
+- existing task changes remain visible in Changes/Task Log even while reply advisory stays pending;
+- idempotency conflict does not trigger MCP reattach;
+- a later real `runtime_delivery` destination write clears the advisory.
+
+### 4.54 Agenda Fingerprint Must Not Churn On Presentation-Only Changes
+
+Current files:
+
+```text
+src/features/member-work-sync/core/domain/ActionableWorkAgenda.ts
+src/features/member-work-sync/core/domain/AgendaFingerprint.ts
+src/features/member-work-sync/main/adapters/output/TeamTaskAgendaSource.ts
+test/features/member-work-sync/core/ActionableWorkAgenda.test.ts
+```
+
+Source-audit finding:
+
+```text
+buildActionableWorkAgenda already hashes canonical actionable items and generatedAt is not part of the fingerprint.
+TeamTaskAgendaSource currently does not pass sourceRevision, so future sourceRevision use must be explicit and tested.
+```
+
+This is good for stability, but it is fragile because adding a volatile field to `items`, `evidence`, or `sourceRevision` can turn every harmless board refresh into a new fingerprint and a new possible nudge.
+
+Rules:
+
+- fingerprint includes only actionable work semantics, not timestamps, UI order, unread counters, activity row IDs, or display-only cache revisions;
+- `generatedAt`, raw comment text, feed count, message count, work interval duration, and member-card presentation state never enter the fingerprint;
+- item ordering remains stable by semantic key, not task array order;
+- `blockedByTaskIds`, `blockerTaskIds`, review diagnostics, and history event IDs remain sorted before hashing;
+- if `sourceRevision` is introduced later, it must be a semantic revision, not a general team-data or renderer revision;
+- subject/displayId changes can be included only if the product wants them to invalidate report tokens and trigger sync;
+- tests must explicitly prove no churn on task array reorder and cosmetic/presentation-only changes.
+
+Tests:
+
+- same tasks in different array order produce the same fingerprint;
+- `generatedAt` and work-duration/presentation fields do not change fingerprint;
+- changing owner/status/dependency/review obligation changes fingerprint;
+- changing unrelated task for another member does not change this member fingerprint unless it affects dependency/review/lead clarification;
+- future `sourceRevision` use has a dedicated test that documents exactly why it changes the fingerprint.
+
+### 4.55 Member-Work-Sync Reports And Tokens Must Be Fingerprint-Scoped
+
+Current files:
+
+```text
+src/features/member-work-sync/core/domain/MemberWorkSyncReportValidator.ts
+src/features/member-work-sync/core/application/MemberWorkSyncReporter.ts
+src/features/member-work-sync/core/application/MemberWorkSyncPendingReportIntentReplayer.ts
+src/features/member-work-sync/main/infrastructure/HmacMemberWorkSyncReportTokenAdapter.ts
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+test/features/member-work-sync/core/MemberWorkSyncReportValidator.test.ts
+test/features/member-work-sync/main/HmacMemberWorkSyncReportTokenAdapter.test.ts
+test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts
+```
+
+Source-audit finding:
+
+```text
+The HMAC token binds teamName, memberName, agendaFingerprint, and expiresAt.
+Reporter re-loads current agenda before accepting a report.
+Pending report replay calls the same reporter, so stale intents should be rejected by the same validator.
+```
+
+This is the correct shape. The fragile part is replay and offline intents: a stale report must never suppress a newer actionable agenda just because it was stored earlier.
+
+Rules:
+
+- `caught_up` is accepted only when the current server agenda is empty;
+- `still_working` and `blocked` are accepted only for the current fingerprint;
+- `blocked` requires current board-backed blocker evidence;
+- pending report replay must mark stale fingerprint/token intents rejected or superseded, not accepted;
+- pending report intent ID may include token/report payload, but acceptance still depends on current agenda validation;
+- rejected reports can update diagnostics/status, but cannot extend leases or clear `needs_sync`;
+- token secret regeneration invalidates old tokens safely and should be diagnostic-only.
+
+Tests:
+
+- stale fingerprint report is rejected even if taskIds still look plausible;
+- expired token report is rejected and does not extend the previous lease;
+- pending replay of an old `caught_up` intent after a new task appears remains rejected;
+- pending replay after member removal is superseded and does not materialize a nudge;
+- `blocked` without current blocked agenda evidence is rejected;
+- corrupt/regenerated token secret does not crash the reporter and forces a fresh status read.
+
+### 4.56 Runtime Turn-Settled Spool Must Be Durable, Idempotent, And Targeted
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/RuntimeTurnSettledIngestor.ts
+src/features/member-work-sync/main/infrastructure/FileRuntimeTurnSettledEventStore.ts
+src/features/member-work-sync/main/infrastructure/RuntimeTurnSettledSpoolInitializer.ts
+src/features/member-work-sync/main/adapters/output/TeamRuntimeTurnSettledTargetResolver.ts
+test/features/member-work-sync/main/RuntimeTurnSettledIngestor.test.ts
+test/features/member-work-sync/main/FileRuntimeTurnSettledEventStore.test.ts
+test/features/member-work-sync/main/TeamRuntimeTurnSettledTargetResolver.test.ts
+```
+
+Source-audit finding:
+
+```text
+FileRuntimeTurnSettledEventStore moves incoming -> processing -> processed/invalid and recovers stale processing files.
+RuntimeTurnSettledIngestor ignores non-terminal OpenCode outcomes and resolves provider-owned events through configured active members.
+Claude events are resolved by transcript/session lookup.
+```
+
+The spool is the bridge between runtime-level "turn settled" and member-work-sync. If it loses events, routes to the wrong member, or retries forever, the app will either miss sync opportunities or spam the wrong agent.
+
+Rules:
+
+- incoming payload write must be atomic or temporary-file based before it becomes claimable;
+- processing recovery must be bounded and must not process `.meta.json` files as events;
+- provider-owned `codex` and `opencode` events require explicit teamName/memberName and matching configured provider;
+- Claude transcript/session lookup must reject provider mismatch, removed member, reserved member, and deleted team;
+- non-terminal OpenCode outcomes remain ignored for work-sync, but still leave processed diagnostics;
+- malformed/oversized/unsupported-provider payloads are quarantined, not retried forever;
+- duplicate sourceId/event files must be idempotent at queue/outbox level, even if the file store sees both;
+- draining stays bounded and never blocks app startup on a huge spool.
+
+Tests:
+
+- stale processing file is recovered once and then processed;
+- invalid provider and oversized payload go to invalid with reason;
+- OpenCode `timeout`/`stream_unavailable` outcomes are ignored and do not enqueue reconcile;
+- OpenCode successful terminal event enqueues only the matching active OpenCode member;
+- provider mismatch rejects event for the wrong configured provider;
+- duplicate runtime sourceId does not produce duplicate user-visible nudges after reconcile/outbox planning.
+
+### 4.57 Task Impact Routing Must Stay Narrow But Safe
+
+Current files:
+
+```text
+src/features/member-work-sync/main/adapters/input/MemberWorkSyncTeamChangeRouter.ts
+src/features/member-work-sync/main/adapters/input/MemberWorkSyncTaskImpactResolver.ts
+test/features/member-work-sync/main/MemberWorkSyncTeamChangeRouter.test.ts
+test/features/member-work-sync/main/MemberWorkSyncTaskImpactResolver.test.ts
+```
+
+Source-audit finding:
+
+```text
+Task/team-change routing uses taskId/detail parsing, then resolves owner, reviewer, lead clarification, broken dependencies, and dependent task owners.
+If taskId is missing or resolver says fallbackTeamWide, it enqueues all active members.
+```
+
+This keeps most task changes narrow, but fallback behavior is a sharp edge: too narrow misses the agent that should wake; too broad creates unnecessary work-sync scans and possible nudge pressure.
+
+Rules:
+
+- task owner, current reviewer, lead for lead clarification, lead for broken dependencies, and owners of affected dependent tasks are the only normal impacted members;
+- unknown task ID can fall back team-wide for status materialization, but dispatch still revalidates foreground/readiness/cooldown before sending a nudge;
+- removed/inactive members are filtered before materialization;
+- self-review routes to lead, not to the same owner as reviewer;
+- task-log-change with a file path detail must only extract safe task JSON names, never arbitrary paths;
+- team-wide fallback must be visible in diagnostics so slow/spam cases are explainable;
+- resolver errors should fall back to team-wide scan, not drop the change silently.
+
+Tests:
+
+- owner-only task change enqueues only owner;
+- review task enqueues current reviewer and not stale reviewers;
+- self-review enqueues lead only;
+- broken dependency enqueues lead and dependent owners;
+- missing/unknown task ID uses team-wide fallback but downstream nudge planning still suppresses unsafe sends;
+- removed member is not materialized or queued.
+
+### 4.58 Busy Signal Must Be Advisory And Time-Bounded
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncToolActivityBusySignal.ts
+src/features/member-work-sync/main/infrastructure/CompositeMemberWorkSyncBusySignal.ts
+test/features/member-work-sync/main/MemberWorkSyncToolActivityBusySignal.test.ts
+```
+
+Source-audit finding:
+
+```text
+Tool activity busy signal tracks active tool IDs and recent finish grace in memory.
+Composite busy signal returns busy on provider errors for 60 seconds.
+```
+
+Busy is a useful anti-spam signal, but it must never become a hidden correctness gate. If a finish/reset event is missed, busy can suppress nudges longer than intended unless every path is time-bounded and diagnostic.
+
+Rules:
+
+- busy signal is advisory only and cannot block normal foreground delivery;
+- active tool IDs should have a maximum stale lifetime or reset path in addition to finish events;
+- recent-finish grace stays short and tested;
+- `lead-activity: offline` drops team busy state;
+- busy signal errors can delay briefly, but must not suppress nudges indefinitely;
+- busy diagnostics include reason and retryAfterIso;
+- future persisted busy state must include TTL and team/run/member scope.
+
+Tests:
+
+- finish creates recent busy only until grace expires;
+- reset clears one member or whole team;
+- offline drops all team activity;
+- busy signal error returns a bounded retryAfter and later allows dispatch;
+- normal foreground delivery ignores generic busy state.
+
+### 4.59 Nudge Outbox Must Keep Plan-Time And Claim-Time Revalidation Separate
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeOutboxPlanner.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+test/features/member-work-sync/core/MemberWorkSyncUseCases.test.ts
+```
+
+Source-audit finding:
+
+```text
+Planner creates an outbox row only after current status, metrics, activation, and review-pickup capability checks.
+Dispatcher re-loads current agenda before delivery and supersedes stale fingerprints.
+Dispatcher also re-checks lifecycle, phase2 activation, rate limit, busy signal, and watchdog cooldown.
+```
+
+This two-step design is important. Planning an outbox item is not permission to send forever. Dispatch is the safety boundary because the board can change between planning and delivery.
+
+Rules:
+
+- outbox rows are durable intent, not final authorization;
+- dispatch must revalidate current agenda fingerprint before writing inbox;
+- dispatch must re-check team lifecycle and nudge dispatch readiness;
+- dispatch must re-check phase2/targeted recovery activation;
+- dispatch must re-check rate limit, busy signal, and watchdog cooldown;
+- stale outbox rows are superseded, not delivered;
+- retryable failures must get bounded `nextAttemptAt`;
+- terminal failures must not be revived unless a new fingerprint or explicitly supported intent key appears;
+- review-pickup partial delivery filtering remains request-event based, not broad agenda based.
+
+Tests:
+
+- planned nudge is superseded when agenda becomes empty before dispatch;
+- planned nudge is superseded when member reports `still_working` before dispatch;
+- planned nudge is retryable when busy/cooldown/rate-limit blocks dispatch;
+- planned nudge is terminal on inbox payload conflict;
+- delivered review-pickup request is not sent again for the same reviewRequestEventId;
+- new reviewRequestEventId after prior delivery creates only the missing event nudge.
+
+### 4.60 Inbox Nudge Sink Must Not Mask Payload Drift
+
+Current files:
+
+```text
+src/features/member-work-sync/main/adapters/output/TeamInboxMemberWorkSyncNudgeSink.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+test/features/member-work-sync/main/TeamInboxMemberWorkSyncNudgeSink.test.ts
+```
+
+Source-audit finding:
+
+```text
+TeamInboxMemberWorkSyncNudgeSink returns inserted=false when inbox already contains the stable messageId.
+It currently does not compare payloadHash itself.
+Outbox store checks payloadHash before dispatch, so the sink must stay behind that outbox boundary.
+```
+
+The sink is intentionally thin today, but that makes the dependency important. Section 4.89 upgrades this into a write-boundary invariant: existing inbox rows must be payload-compatible before they are treated as delivered.
+
+Rules:
+
+- only the outbox dispatcher should call the sink in production;
+- sink idempotency by messageId is acceptable only if payload equivalence is validated;
+- because the sink already receives `payloadHash`, prefer validating at the sink instead of relying only on callers;
+- writer result messageId must be used for outbox deliveredMessageId;
+- existing messageId with incompatible messageKind/source/taskRefs is a conflict, not a delivered nudge;
+- hidden automation filtering must not hide this row from the dispatcher/proof/debug stores.
+
+Tests:
+
+- outbox payload conflict prevents sink call;
+- sink existing messageId path is covered only for identical outbox payload;
+- existing inbox row with wrong messageKind/source is conflict if sink-level validation is added;
+- writer returning a different messageId is either accepted intentionally and recorded, or rejected with a test;
+- hidden automation row remains readable by raw inbox diagnostics after insertion.
+
+### 4.61 Targeted Recovery Must Stay Narrow And Provider-Specific
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeActivationPolicy.ts
+src/features/member-work-sync/core/application/MemberWorkSyncTargetedRecoveryPolicy.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+test/features/member-work-sync/core/application/MemberWorkSyncNudgeActivationPolicy.test.ts
+test/features/member-work-sync/core/application/MemberWorkSyncTargetedRecoveryPolicy.test.ts
+```
+
+Source-audit finding:
+
+```text
+Targeted recovery bypasses full shadow readiness only for OpenCode runtime delivery and lead inbox relay.
+Strict review pickup has its own bypass path.
+Non-OpenCode secondary providers stay behind phase2 readiness unless the agenda is strict review pickup.
+```
+
+This protects the system from enabling broad nudges before shadow metrics are healthy. The risk is accidentally expanding OpenCode-targeted recovery into "all providers can be nudged while collecting".
+
+Rules:
+
+- OpenCode targeted recovery applies only to providerId `opencode`;
+- lead targeted recovery applies only to canonical lead-like member names;
+- Codex/Anthropic/Gemini secondary members do not use OpenCode targeted recovery;
+- strict review pickup is the only cross-provider early-delivery exception;
+- ambiguous review pickup evidence does not use the review-pickup bypass;
+- targeted recovery still goes through dispatch-time lifecycle, busy, cooldown, rate limit, and inbox write checks.
+
+Tests:
+
+- OpenCode needs_sync can activate during shadow collection;
+- Codex/Anthropic/Gemini needs_sync stay inactive during shadow collection unless strict review pickup;
+- lead-like member activates through lead targeted recovery;
+- non-lead member named like provider does not activate targeted recovery;
+- ambiguous review pickup stays out of strict review-pickup path;
+- targeted recovery dispatch still respects busy/watchdog/rate limit.
+
+### 4.62 Queue Coalescing Must Preserve Fast Triggers
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncEventQueue.ts
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+test/features/member-work-sync/main/MemberWorkSyncEventQueue.test.ts
+```
+
+Source-audit finding:
+
+```text
+Default trigger timing is fast for turn_settled/tool_finished and moderate for task_changed/inbox_changed.
+Passing queueQuietWindowMs currently becomes a broad fallback for every trigger except manual_refresh.
+If queueQuietWindowMs is large, it can delay turn_settled/tool_finished far beyond their default 5 seconds.
+```
+
+This is the likely class of "logs appeared after 6 minutes" bug: a broad quiet window or coalescing max wait can make a real wakeup wait behind a generic startup/quiet policy.
+
+Rules:
+
+- production should prefer per-trigger `triggerTiming` over broad `queueQuietWindowMs`;
+- `turn_settled` and `tool_finished` must keep low runAfter and bounded max wait;
+- `manual_refresh` must remain immediate;
+- startup/config/member-spawn scans can be slower and readiness-gated;
+- coalescing can delay duplicate work, but cannot push fast triggers beyond their documented max;
+- running-item follow-up keeps urgent reasons and schedules within 5 seconds as the current code does;
+- diagnostics must show firstQueuedAt, runAt, maxRunAt, trigger reasons, and reason counts.
+
+Tests:
+
+- default `turn_settled` and `tool_finished` run after about 5 seconds;
+- broad quietWindow override cannot accidentally delay fast triggers if production uses explicit triggerTiming;
+- coalesced task_changed plus turn_settled runs at the earlier fast time;
+- running reconcile followed by turn_settled schedules a fast follow-up;
+- queue diagnostics expose enough timing data to explain a delayed start.
+
+### 4.63 Status Read Staleness Refresh Must Not Become A UI Polling Loop
+
+Current files:
+
+```text
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+src/features/member-work-sync/core/application/MemberWorkSyncDiagnosticsReader.ts
+src/features/member-work-sync/renderer/hooks/useMemberWorkSyncStatus.ts
+```
+
+Source-audit finding:
+
+```text
+getStatus() reads diagnostics and enqueues manual_refresh when status is stale or an accepted lease expired.
+manual_refresh is immediate in the event queue.
+```
+
+This helps self-heal stale state, but if renderer polling repeatedly reads the same stale status faster than the queue can reconcile, it can create noisy coalescing and confusing audit logs.
+
+Rules:
+
+- stale-status read can enqueue refresh, but must rely on queue coalescing and not dispatch directly;
+- repeated reads for the same team/member should collapse into one queued/running refresh;
+- stale refresh cannot bypass lifecycle inactive checks;
+- status read must remain side-effect-light: no inbox write, no prompt delivery, no direct nudge dispatch;
+- renderer polling intervals should not be used as correctness timing;
+- diagnostics should distinguish `status_stale_refresh_enqueued` from actual nudge delivery.
+
+Tests:
+
+- repeated stale `getStatus()` calls coalesce into one manual refresh;
+- inactive team stale status does not dispatch a nudge;
+- expired accepted lease triggers refresh but not immediate inbox write;
+- renderer status hook does not display hidden work-sync rows as normal messages.
+
+### 4.64 Scheduled Dispatcher Is Recovery, Not Fresh Assignment Delivery
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncNudgeDispatchScheduler.ts
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+test/features/member-work-sync/main/MemberWorkSyncNudgeDispatchScheduler.test.ts
+```
+
+Source-audit finding:
+
+```text
+The event queue dispatches due nudges for the reconciled team immediately after queue reconciliation.
+The scheduler runs periodically over lifecycle-active teams and dispatches due outbox rows after restart or missed wakeups.
+```
+
+This split is healthy. The scheduler should not become the primary path for fresh task assignment, because its default interval is one minute and can hide actual delivery bugs.
+
+Rules:
+
+- normal task assignment delivery stays in the foreground delivery path;
+- work-sync event queue can plan and dispatch after reconcile for the affected team;
+- scheduled dispatcher only recovers due outbox rows after restart, missed timer, or transient failure;
+- scheduler must list lifecycle-active teams, not all team directories;
+- scheduler run is non-overlapping and bounded;
+- scheduler failures are warning diagnostics and do not block the app;
+- live slow-start diagnostics should say whether the nudge came from queue or scheduler.
+
+Tests:
+
+- fresh task assignment does not wait for scheduler tick when queue path is healthy;
+- due outbox row after app restart is dispatched by scheduler;
+- scheduler does not overlap runs;
+- scheduler skips inactive/no-team cases;
+- scheduler failure logs and recovers on next run.
+
+### 4.65 Runtime Turn-Settled Installation Must Match The Provider Emitter
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/RuntimeTurnSettledSpoolInitializer.ts
+src/features/member-work-sync/main/infrastructure/ShellRuntimeTurnSettledHookScriptInstaller.ts
+src/features/member-work-sync/main/infrastructure/runtimeTurnSettledHookSettings.ts
+src/features/member-work-sync/main/infrastructure/runtimeTurnSettledEnvironment.ts
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+src/main/services/team/TeamProvisioningService.ts
+src/main/index.ts
+test/features/member-work-sync/main/RuntimeTurnSettledHookSettings.test.ts
+test/main/services/team/TeamProvisioningServicePrepare.test.ts
+test/features/member-work-sync/main/createMemberWorkSyncFeature.test.ts
+test/main/services/team/MemberWorkSyncCodex.live.test.ts
+test/main/services/team/MemberWorkSyncOpenCode.live.test.ts
+```
+
+Source-audit finding:
+
+```text
+Claude uses a shell Stop hook settings payload.
+Codex receives the spool environment through normal team provisioning.
+OpenCode receives the spool environment through the OpenCode bridge process wiring, not through the Claude shell hook installer.
+The shell hook script currently validates the provider argument as claude/codex only.
+```
+
+This is not one generic "install hook" path. Treating it as generic is a bug risk: a test could prove Claude hooks work while OpenCode never emits, or OpenCode bridge wiring could work while Codex secondary members miss the environment.
+
+Rules:
+
+- Claude Stop hook settings are installed only for Claude provider launches;
+- Codex turn-settled uses environment injection from provisioning, not Claude hook settings;
+- OpenCode turn-settled uses bridge environment injection from the main runtime bridge and live harness;
+- the OpenCode bridge env path must be covered directly, not inferred from shell hook tests;
+- if the shell script is ever reused for OpenCode, its provider allowlist must be changed in the same commit and tested;
+- missing turn-settled env must degrade to no sync event, not break foreground delivery or launch;
+- workspace trust env filtering must preserve the `AGENT_TEAMS_RUNTIME_TURN_SETTLED_` prefix;
+- diagnostics should record provider, install mode, and spool root without logging full prompt payloads.
+
+Tests:
+
+- Claude settings include one non-blocking Stop hook command and preserve user settings;
+- Codex primary and secondary launches include `AGENT_TEAMS_RUNTIME_TURN_SETTLED_SPOOL_ROOT`;
+- OpenCode bridge env includes the same spool root in desktop app and live harness composition;
+- OpenCode bridge env missing produces no member-work-sync turn event but does not fail prompt delivery;
+- shell hook provider validation rejects unsupported providers and is not used as proof for OpenCode;
+- workspace trust preflight preserves the turn-settled spool environment variable.
+
+### 4.66 Runtime Turn-Settled Normalizers Must Stay Provider-Strict
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/ClaudeStopHookPayloadNormalizer.ts
+src/features/member-work-sync/main/infrastructure/CodexNativeTurnSettledPayloadNormalizer.ts
+src/features/member-work-sync/main/infrastructure/OpenCodeTurnSettledPayloadNormalizer.ts
+src/features/member-work-sync/main/infrastructure/CompositeRuntimeTurnSettledPayloadNormalizer.ts
+src/features/member-work-sync/contracts/types.ts
+test/features/member-work-sync/main/RuntimeTurnSettledIngestor.test.ts
+test/features/member-work-sync/main/CodexNativeTurnSettledPayloadNormalizer.test.ts
+test/features/member-work-sync/main/OpenCodeTurnSettledPayloadNormalizer.test.ts
+```
+
+Source-audit finding:
+
+```text
+Claude payloads are accepted only for provider "claude" and Stop hook data.
+Codex payloads require provider "codex" and source "agent-teams-orchestrator-codex-native".
+OpenCode payloads require provider "opencode" and source "agent-teams-orchestrator-opencode".
+The composite normalizer tries normalizers in order and should not turn a source mismatch into another provider's event.
+```
+
+This provider strictness is a safety boundary. It prevents a malformed OpenCode or Codex payload from being routed to the wrong member and creating a false "agent stopped" signal.
+
+Rules:
+
+- provider id and source string are part of the public turn-settled contract;
+- malformed payloads are invalid, not best-effort accepted;
+- unsupported-provider results may fall through to another normalizer, but provider/source mismatch for a claimed provider must fail closed;
+- `sourceId` must be stable for the same logical event and different for different turns;
+- `payloadHash` is useful for diagnostics and dedupe, but cannot be the only identity if the payload contains timestamp-like fields;
+- OpenCode outcomes that are not terminal enough for work-sync should be ingested as diagnostic or ignored by policy, not treated as caught-up proof;
+- old payload shapes must be accepted only when they still carry enough team/member/provider identity.
+
+Tests:
+
+- Codex payload with OpenCode source is rejected, not normalized as Codex;
+- OpenCode payload with Codex source is rejected, not normalized as OpenCode;
+- valid duplicate payloads produce the same `sourceId`;
+- changed session, turn, runtime prompt, or payload hash produces a distinct `sourceId`;
+- malformed JSON object is quarantined and never enqueues member reconcile;
+- empty or unknown provider payload cannot target `lead` by fallback.
+
+### 4.67 Runtime Turn-Settled Drain Scheduler Must Be Non-Blocking And Observable
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/RuntimeTurnSettledDrainScheduler.ts
+src/features/member-work-sync/main/infrastructure/FileRuntimeTurnSettledEventStore.ts
+src/features/member-work-sync/main/RuntimeTurnSettledIngestor.ts
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+test/features/member-work-sync/main/RuntimeTurnSettledIngestor.test.ts
+test/features/member-work-sync/main/createMemberWorkSyncFeature.test.ts
+```
+
+Source-audit finding:
+
+```text
+The scheduler starts a first drain after a short delay, repeats periodically, skips overlapping runs, logs failures, and continues scheduling.
+```
+
+This is the right shape. The fragile part is operational: a failed drain must not silently stop future drains, and a slow drain must not block the Electron app.
+
+Rules:
+
+- first drain should run soon after feature composition so live agents do not wait for a full periodic interval;
+- drain runs are non-overlapping;
+- drain failure is warning-only and schedules the next attempt;
+- drain must have a bounded claim/read batch size;
+- stale `processing` spool files are recovered or quarantined deterministically;
+- invalid payloads are quarantined with diagnostics and never retried forever;
+- drain cannot write inbox nudges directly; it only ingests events and enqueues/reconciles through member-work-sync use cases.
+
+Tests:
+
+- scheduler starts a near-immediate drain and then repeats;
+- overlapping run is skipped without dropping the next scheduled run;
+- drain exception logs once and next tick still runs;
+- stale processing file is recovered and ingested once;
+- invalid payload is quarantined and not reprocessed forever;
+- manual drain in tests can assert event-to-reconcile without waiting for timers.
+
+### 4.68 Audit Journal Must Stay Diagnostic, Not Canonical State
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/FileMemberWorkSyncAuditJournal.ts
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+test/features/member-work-sync/main/FileMemberWorkSyncAuditJournal.test.ts
+```
+
+Source-audit finding:
+
+```text
+The audit journal sanitizes and truncates payloads, rotates files, serializes appends per file, uses file locks, and logs append failures as warnings.
+```
+
+That makes it a good debug source and a bad source of truth. Do not base correctness on it.
+
+Rules:
+
+- correctness state lives in member-work-sync store, outbox, inbox, ledger, and runtime event store;
+- audit append failure cannot fail delivery, reconcile, report validation, or task write;
+- audit rotation/truncation means audit entries cannot be required for exact proof;
+- audit rows may be included in artifact packs as explanation only;
+- audit payloads must remain sanitized and bounded;
+- no full prompt, API key, auth path, or unbounded transcript should be written to audit.
+
+Tests:
+
+- audit append failure logs warning and use case still succeeds;
+- rotated audit files still leave current metrics/status intact;
+- truncated audit data is never parsed back as canonical proof;
+- artifact/debug export can include audit snippets without exposing secrets.
+
+### 4.69 Phase 2 Readiness Metrics Must Be Conservative Under Sparse Or Corrupt Data
+
+Current files:
+
+```text
+src/features/member-work-sync/core/domain/MemberWorkSyncPhase2Readiness.ts
+src/features/member-work-sync/core/application/MemberWorkSyncMetricsReader.ts
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+test/features/member-work-sync/core/MemberWorkSyncPhase2Readiness.test.ts
+test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts
+```
+
+Source-audit finding:
+
+```text
+Phase 2 readiness is derived from recent metric events. Empty data returns an assessment from empty metrics. Store repair/truncation can affect available events.
+```
+
+The safe default is conservative. Sparse or repaired data should keep the feature in collecting/shadow mode, not accidentally enable active nudges.
+
+Rules:
+
+- empty metrics must not report active-ready;
+- repaired or truncated metrics index must not create a false ready state;
+- high would-nudge rate, high fingerprint churn, high report rejection rate, and too few observed members block active readiness;
+- readiness rates must be computed from a bounded observation window with safe denominators;
+- audit journal rotation must not affect readiness metrics;
+- readiness state should explain its blocking reasons in diagnostics.
+
+Tests:
+
+- empty metrics returns collecting/not-ready with explicit reasons;
+- one member and too few events cannot become active-ready;
+- metrics truncation or corrupt event quarantine keeps readiness conservative;
+- high nudge, high churn, or high rejection rates block readiness;
+- healthy synthetic shadow data crosses thresholds only after minimum observation duration and event count.
+
+### 4.70 Member Work Sync Paths Must Use Canonical Member Storage
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncStorePaths.ts
+src/main/services/team/TeamMemberStoragePaths.ts
+test/main/services/team/TeamMemberStoragePaths.test.ts
+test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts
+```
+
+Source-audit finding:
+
+```text
+Member-work-sync path construction delegates per-member path safety to TeamMemberStoragePaths and getMemberKey.
+```
+
+Keep that boundary. Raw member names are user/team data and should not be manually joined into paths.
+
+Rules:
+
+- never build per-member sync paths by joining raw `memberName`;
+- indexes store canonical member keys and display names separately;
+- reserved identities such as `user`, `system`, and automation sources must not create teammate store directories;
+- removed member storage can be read for diagnostics but cannot be reused for a new active member without canonical identity checks;
+- case-only name drift must not split one member into two active sync stores;
+- path errors should fail closed and not dispatch nudges.
+
+Tests:
+
+- unsafe member names resolve through `TeamMemberStoragePaths` or are rejected;
+- case-only member-name drift reuses canonical key or fails closed consistently;
+- reserved identities cannot get member-work-sync report leases;
+- removed teammate diagnostics do not dispatch nudges to stale member storage.
+
+### 4.71 Team-Change Routing Must Treat Event Detail As Untrusted
+
+Current files:
+
+```text
+src/features/member-work-sync/main/adapters/input/MemberWorkSyncTeamChangeRouter.ts
+src/features/member-work-sync/main/adapters/input/MemberWorkSyncTaskImpactResolver.ts
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncEventQueue.ts
+test/features/member-work-sync/main/MemberWorkSyncTeamChangeRouter.test.ts
+test/features/member-work-sync/main/MemberWorkSyncTaskImpactResolver.test.ts
+```
+
+Source-audit finding:
+
+```text
+The router parses inbox recipient from detail shaped like inboxes/.json.
+It parses tool and member-turn-settled detail as JSON.
+It extracts task id from event.taskId or a safe .json detail.
+Task resolver failures fall back to team-wide enqueue.
+```
+
+This is a fragile input-adapter boundary. `TeamChangeEvent.detail` is transport metadata, not domain truth. Dropping an event because detail had a new shape can recreate delayed or missing sync; broad fallback for every malformed event can create nudge pressure.
+
+Rules:
+
+- malformed JSON detail never throws out of `noteTeamChange`;
+- known durable task changes fallback team-wide only when exact impacted members cannot be resolved;
+- inbox/lead-message events should support every detail shape emitted by the app, or explicitly log/drop with diagnostics;
+- unknown inbox recipient must not create a raw member path;
+- task-log-change path parsing only accepts safe task JSON names, not arbitrary paths;
+- materializer failure must not prevent queueing other impacted active members;
+- team-wide fallback is diagnostic and still goes through queue, readiness, busy, cooldown, and outbox revalidation.
+
+Tests:
+
+- malformed tool/member-turn-settled JSON does not throw and does not enqueue wrong member;
+- inbox detail shapes emitted by current writers resolve the intended member;
+- unknown inbox detail is either diagnostic fallback or explicit no-op with a test documenting why;
+- task detail with absolute path or dotfile does not create a fake task/member;
+- materializer failure for one member does not block other impacted members;
+- resolver exception falls back team-wide but dispatcher revalidation suppresses unsafe nudges.
+
+### 4.72 Nudge Delivery Wake Is Best-Effort, But Must Not Be Invisible
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/core/application/ports.ts
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+test/features/member-work-sync/main/createMemberWorkSyncFeature.test.ts
+test/features/member-work-sync/core/MemberWorkSyncUseCases.test.ts
+```
+
+Source-audit finding:
+
+```text
+Normal nudge dispatch inserts the inbox row, marks outbox delivered, then schedules a short delivery wake.
+Wake failure is logged/audited as nudge_wake_failed, but the outbox row is already delivered because the inbox write succeeded.
+```
+
+That state model is reasonable: after durable inbox insert, retrying the outbox could duplicate rows. The risk is observability and recovery. If wake fails, the user can see a delayed agent even though the work-sync data layer thinks the nudge was delivered.
+
+Rules:
+
+- inbox insert is the durable delivery boundary for generic work-sync nudges;
+- wake failure after insert is not an outbox retry by default, because the row already exists;
+- wake failure must be visible in diagnostics, latency timeline, and artifact packs;
+- another safe trigger such as inbox refresh, manual refresh, or scheduled dispatcher may re-attempt the wake path, but must not insert a duplicate inbox row;
+- OpenCode foreground delivery and review-pickup direct delivery keep their own stricter acceptance semantics;
+- `member_work_sync_nudge_existing` wake cannot bypass foreground/busy checks.
+
+Tests:
+
+- wake failure after successful inbox insert records `nudge_wake_failed` and keeps outbox delivered;
+- a later wake for the same messageId does not insert another row;
+- wake failure appears in diagnostics/latency timeline;
+- foreground unread OpenCode delivery still blocks existing-nudge wake;
+- review-pickup direct delivery failure does not get mislabeled as generic wake failure.
+
+### 4.73 Pending Report Replay Must Be Bounded And Current-Agenda-Based
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/MemberWorkSyncPendingReportIntentReplayer.ts
+src/features/member-work-sync/core/application/MemberWorkSyncReporter.ts
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+test/features/member-work-sync/core/MemberWorkSyncUseCases.test.ts
+test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts
+```
+
+Source-audit finding:
+
+```text
+Pending replay uses the same reporter and therefore reloads current agenda.
+If reporter execution throws, the intent currently stays pending for a future replay.
+Validation failures are marked accepted/rejected/superseded through markPendingReportProcessed.
+```
+
+Replay is a recovery path for agents that called the report tool while the control API was unavailable. It must not become an infinite startup loop or a way to apply stale leases.
+
+Rules:
+
+- every replay uses current agenda and current token validation;
+- accepted/rejected/superseded validation results are marked processed;
+- transient execution failures may stay pending, but must have bounded retry diagnostics or an eventual stale cutoff;
+- replay cannot accept `caught_up` unless current agenda is empty;
+- replay cannot accept `still_working`/`blocked` unless current fingerprint and evidence still match;
+- replay should process a bounded number of intents per team per run;
+- old pending intents from removed members or inactive teams are superseded, not retried forever.
+
+Tests:
+
+- stale pending `caught_up` after new work appears is rejected and marked processed;
+- removed member pending intent is superseded;
+- reporter throw leaves pending only for a bounded/transient path and logs diagnostics;
+- many pending intents are processed in deterministic order with a limit;
+- replay summary distinguishes accepted, rejected, superseded, and unprocessed transient failures.
+
+### 4.74 Review-Pickup Escalation Must Be Idempotent
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeOutboxPlanner.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/core/application/ports.ts
+test/features/member-work-sync/core/MemberWorkSyncUseCases.test.ts
+test/features/member-work-sync/core/application/MemberWorkSyncNudgeActivationPolicy.test.ts
+```
+
+Source-audit finding:
+
+```text
+Strict review pickup can bypass normal Phase 2 activation.
+Planner can escalate when delivery capability is unavailable or already-delivered review request is still stuck.
+Dispatcher can also escalate when a claimed review-pickup delivery cannot be delivered.
+```
+
+This is intentionally stronger than generic agenda sync, but it creates a duplicate-notification risk if plan-time and dispatch-time failures both notify lead for the same review request.
+
+Rules:
+
+- escalation key should include team, member, reviewRequestEventId set, agendaFingerprint or intent key, and reason class;
+- one review request event should not produce repeated lead escalations while the underlying board state is unchanged;
+- direct review-pickup delivery remains scoped to current review obligation only;
+- generic agenda sync must not use review-pickup bypass rules;
+- delivered review-pickup request IDs are durable and checked before planning more direct nudges;
+- failed direct delivery should either retry through outbox or escalate, not both for the same attempt.
+
+Tests:
+
+- plan-time capability absence escalates once for the same review request;
+- dispatch-time capability absence does not duplicate an existing plan-time escalation for the same key;
+- new reviewRequestEventId allows a new escalation;
+- generic agenda_sync item cannot use review-pickup bypass;
+- already-delivered review pickup that remains stuck escalates once and does not insert another direct nudge.
+
+### 4.75 Feature Composition Must Own Timers, Drains, And Disposal
+
+Current files:
+
+```text
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncEventQueue.ts
+src/features/member-work-sync/main/infrastructure/RuntimeTurnSettledDrainScheduler.ts
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncNudgeDispatchScheduler.ts
+test/features/member-work-sync/main/createMemberWorkSyncFeature.test.ts
+test/features/member-work-sync/main/MemberWorkSyncEventQueue.test.ts
+```
+
+Source-audit finding:
+
+```text
+createMemberWorkSyncFeature starts runtime turn-settled drain scheduler and optional nudge dispatch scheduler immediately.
+dispose stops the drain scheduler and awaits queue/nudge scheduler cleanup.
+```
+
+This is the main-process composition root. If old timers survive app reload, tests, or feature recreation, they can dispatch stale nudges from an old dependency graph.
+
+Rules:
+
+- all timers created by the feature are owned by the facade returned from composition;
+- dispose stops future drains, future scheduled nudge dispatch, and queued/running reconciles from scheduling follow-up work;
+- in-flight reconciliation may complete, but must pass lifecycle/revalidation before any inbox write;
+- feature recreation must not share old queue state, stale report token cache, or stale busy signal memory;
+- dispose must be idempotent;
+- live tests should dispose the feature before deleting temp team dirs.
+
+Tests:
+
+- dispose prevents queued item from reconciling later;
+- dispose while reconcile is running does not schedule follow-up queue item;
+- scheduler timers are cleared and no later dispatchDue call fires;
+- creating a second feature instance does not receive events from the disposed instance;
+- dispose can be called twice safely.
+
+### 4.76 Agenda Source Member Merge Must Preserve Runtime Identity
+
+Current files:
+
+```text
+src/features/member-work-sync/main/adapters/output/TeamTaskAgendaSource.ts
+src/features/member-work-sync/core/domain/ActionableWorkAgenda.ts
+src/shared/utils/teamProvider.ts
+test/features/member-work-sync/main/adapters/output/TeamTaskAgendaSource.test.ts
+test/features/member-work-sync/core/ActionableWorkAgenda.test.ts
+```
+
+Source-audit finding:
+
+```text
+TeamTaskAgendaSource merges config members and members-meta by normalized name.
+Provider id comes from member.providerId or model inference.
+Removed members are filtered from active member names.
+```
+
+Provider id is not part of the agenda fingerprint today, but it controls provider-specific behavior such as OpenCode direct remediation and targeted recovery. A merge bug can make the agenda look correct while dispatch chooses the wrong delivery path.
+
+Rules:
+
+- config/meta merge must prefer removedAt and provider/runtime metadata consistently;
+- provider id changes should affect dispatch capability and diagnostics, not churn agenda fingerprint unless actionable work semantics changed;
+- unknown provider id fails closed for provider-specific direct delivery;
+- removedAt from meta must remove the member from active sync even if config still lists it;
+- lead-like detection must not accidentally classify provider name `codex` as a teammate name;
+- provider inference from model is compatibility only and should be diagnostic when it affects dispatch.
+
+Tests:
+
+- meta removedAt overrides active config member for sync eligibility;
+- providerId from meta/config enables OpenCode targeted recovery only for the matching member;
+- unknown provider keeps agenda status readable but blocks provider-specific direct delivery;
+- provider/model-only changes do not churn fingerprint unless a product decision says otherwise;
+- lead-like and reserved names cannot become secondary member sync targets.
+
+### 4.77 Outbox Claimed Rows Need A Lease Recovery Contract
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts
+test/features/member-work-sync/core/MemberWorkSyncUseCases.test.ts
+```
+
+Source-audit finding:
+
+```text
+claimDue() moves pending or failed_retryable rows to claimed and increments attemptGeneration.
+markDelivered(), markSuperseded(), and markFailed() only update rows that match the claimed attemptGeneration.
+There is no obvious due-path recovery for rows that stay claimed after a process crash or killed dispatcher.
+```
+
+This is a classic queue lease problem. `claimed` cannot mean "owned forever". It must be a short lease. Otherwise a crash between claim and mark can permanently hide a valid nudge.
+
+Rules:
+
+- claimed rows need a bounded claim lease such as `claimedAt + claimStaleMs`;
+- stale claimed rows are returned to `failed_retryable` or `pending` with a diagnostic, not delivered blindly;
+- recovery must preserve `attemptGeneration` monotonicity so late writes from the old dispatcher are ignored;
+- terminal rows remain terminal and are never revived by stale-claim recovery;
+- recovery runs inside the same team/index lock order as normal claim, so it cannot race with active dispatch;
+- tests must prove crash recovery does not create duplicate inbox rows when the old attempt actually delivered just before restart.
+
+Tests:
+
+- stale claimed row becomes claimable again after lease expiry;
+- non-stale claimed row is not claimed by a second dispatcher;
+- late `markDelivered` from an older attemptGeneration is ignored after recovery;
+- stale claimed row with an existing inbox messageId remains idempotent through sink/outbox validation;
+- terminal delivered/superseded/failed_terminal rows are not revived.
+
+### 4.78 Runtime Turn-Settled Poison Payloads Need Backoff Or Quarantine
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/RuntimeTurnSettledIngestor.ts
+src/features/member-work-sync/main/infrastructure/FileRuntimeTurnSettledEventStore.ts
+test/features/member-work-sync/main/RuntimeTurnSettledIngestor.test.ts
+test/features/member-work-sync/main/FileRuntimeTurnSettledEventStore.test.ts
+```
+
+Source-audit finding:
+
+```text
+claimPending() moves incoming files to processing.
+Invalid normalizer results are marked invalid.
+If the ingestor catches an unexpected exception while processing a claimed payload, it logs and leaves the file in processing.
+The file store later recovers stale processing files back to incoming.
+```
+
+That is good for transient failure, but dangerous for poison payloads that repeatedly crash target resolution or enqueue. They can create endless drain churn.
+
+Rules:
+
+- unexpected processing failures should keep retry semantics, but with bounded attempts recorded in metadata;
+- after a small retry budget, payload moves to invalid with reason `processing_failed_repeatedly`;
+- retry count must survive app restart;
+- poison payload quarantine must not block later payloads in the same drain batch;
+- drain summary should distinguish `failed_transient` from `invalid_quarantined`;
+- OpenCode non-terminal outcomes that are intentionally ignored remain processed, not retried.
+
+Tests:
+
+- one transient exception is retried after stale processing recovery;
+- repeated exception exceeds budget and quarantines the file;
+- poison file does not prevent a later valid file from enqueueing;
+- invalid/quarantined metadata includes provider, sourceId if known, and concise reason;
+- ignored OpenCode timeout/stream_unavailable does not count as poison.
+
+### 4.79 IPC And HTTP Boundaries Must Validate Member Work Sync Requests
+
+Current files:
+
+```text
+src/features/member-work-sync/main/adapters/input/registerMemberWorkSyncIpc.ts
+src/features/member-work-sync/preload/index.ts
+src/main/http/teams.ts
+src/renderer/api/httpClient.ts
+test/features/member-work-sync/main/registerMemberWorkSyncIpc.test.ts
+test/renderer/api/httpClient.memberWorkSync.test.ts
+```
+
+Source-audit finding:
+
+```text
+Electron IPC handlers forward typed requests directly to the feature.
+HTTP routes validate teamName and some report fields, but browser-mode report currently posts the full request while the HTTP route normalizes source to "mcp".
+Renderer/preload code relies on TypeScript shape, not runtime validation.
+```
+
+Renderer and HTTP inputs are not domain-trusted. This is a Clean Architecture boundary: adapter validates and normalizes, application use cases receive a safe command.
+
+Rules:
+
+- shared request validators should live at the contract/input-adapter boundary, not inside domain policy;
+- IPC and HTTP should normalize teamName/memberName/state/fingerprint/taskIds consistently;
+- path-like member names may be valid display names only after canonical member validation, not raw path use;
+- report `source` must preserve real provenance: MCP tool report, app UI report, replay, or test;
+- browser-mode HTTP route must not label app-originated reports as MCP unless it is actually an MCP tool endpoint;
+- invalid IPC/HTTP input returns a structured error and never creates status, pending report, or outbox storage.
+
+Tests:
+
+- IPC rejects missing/blank teamName or memberName before calling the feature;
+- HTTP report rejects invalid state, empty fingerprint, non-array taskIds, and oversized note;
+- browser-mode report preserves or explicitly maps source provenance in a tested way;
+- encoded slash in memberName is treated consistently across HTTP client and route;
+- invalid requests do not create `.member-work-sync` files.
+
+### 4.80 Phase 2 Readiness Must Not Be Trained By Manual Status Reads
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/MemberWorkSyncReconciler.ts
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+src/features/member-work-sync/core/domain/MemberWorkSyncPhase2Readiness.ts
+src/features/member-work-sync/renderer/hooks/useMemberWorkSyncStatus.ts
+test/features/member-work-sync/core/MemberWorkSyncPhase2Readiness.test.ts
+test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts
+```
+
+Source-audit finding:
+
+```text
+MemberWorkSyncReconciler writes status for both request and queue contexts.
+JsonMemberWorkSyncStore creates status_evaluated metric events using evaluatedAt.
+Renderer status panel calls getStatus when opened.
+```
+
+If every manual status read trains readiness metrics, a user opening diagnostics repeatedly can move Phase 2 from shadow to active readiness. That is a hidden feedback loop.
+
+Rules:
+
+- metrics that unlock active nudges should be based on queue/runtime/team-change evaluations, not arbitrary UI reads;
+- request-based reads may update status for visibility, but should either not count toward readiness or carry a separate metric kind;
+- `refreshStatus` from UI must remain diagnostic and never directly dispatch a nudge;
+- readiness should expose how many events were automation-derived versus manual/request-derived;
+- test clocks should prove evaluatedAt changes alone do not create readiness.
+
+Tests:
+
+- repeated `getStatus`/request reconciles do not satisfy active readiness thresholds by themselves;
+- queue reconciles still count toward readiness;
+- manual refresh writes status but does not plan outbox;
+- mixed manual and queue events report clear readiness diagnostics;
+- opening `MemberWorkSyncStatusPanel` cannot turn on generic nudges.
+
+### 4.81 Watchdog Cooldown Fail-Closed Behavior Must Be Bounded And Visible
+
+Current files:
+
+```text
+src/features/member-work-sync/main/adapters/output/TeamTaskStallJournalWorkSyncCooldown.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+test/features/member-work-sync/main/TeamTaskStallJournalWorkSyncCooldown.test.ts
+test/features/member-work-sync/core/MemberWorkSyncUseCases.test.ts
+```
+
+Source-audit finding:
+
+```text
+Missing stall journal returns false.
+Malformed or unreadable stall journal returns true, suppressing work-sync nudges.
+Dispatcher treats watchdog_cooldown_active as retryable.
+```
+
+Failing closed is safer than spamming, but an unreadable journal can suppress sync forever if the retry path keeps seeing the same parse failure.
+
+Rules:
+
+- missing journal means no cooldown;
+- malformed/unreadable journal can suppress temporarily, but must expose a diagnostic reason;
+- repeated unreadable journal should not hide all work-sync forever without artifact visibility;
+- retry backoff for `watchdog_cooldown_active` should be bounded and observable;
+- watchdog cooldown is task-scoped: unrelated tasks for the same member should not be suppressed if taskIds do not overlap.
+
+Tests:
+
+- ENOENT returns no cooldown;
+- malformed JSON returns cooldown with diagnostic/audit reason;
+- unrelated task IDs do not trigger cooldown;
+- expired alertedAt does not suppress;
+- dispatcher retry after cooldown keeps outbox recoverable and does not duplicate nudge.
+
+### 4.82 Main-Process Event Fanout Must Stay Idempotent
+
+Current files:
+
+```text
+src/main/index.ts
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncEventQueue.ts
+test/features/member-work-sync/main/createMemberWorkSyncFeature.test.ts
+test/features/member-work-sync/main/MemberWorkSyncEventQueue.test.ts
+```
+
+Source-audit finding:
+
+```text
+memberWorkSyncFeature.noteTeamChange() is called from file watcher forwarding and from TeamProvisioningService/teamLogSourceTracker emitters.
+The event queue coalesces by team/member and trigger reason.
+```
+
+Duplicate event delivery is expected in Electron main. The plan must not rely on "exactly once" event fanout.
+
+Rules:
+
+- noteTeamChange is at-least-once, not exactly-once;
+- duplicate events must coalesce into one reconcile per team/member window;
+- multiple trigger reasons can be preserved for diagnostics without changing agenda fingerprint;
+- event fanout must never call dispatcher directly;
+- feature disposal must remove or ignore stale fanout safely;
+- tests should simulate duplicate file watcher plus service-emitter events for the same task/inbox change.
+
+Tests:
+
+- duplicate inbox event enqueues one member reconcile and preserves trigger reasons;
+- duplicate task event does not create duplicate outbox rows;
+- event order `task then inbox` and `inbox then task` converges to the same agenda/outbox state;
+- post-dispose noteTeamChange is ignored or safe no-op;
+- repeated renderer broadcast does not imply repeated work-sync dispatch.
+
+### 4.83 Store Index Repair Must Not Drop Members With Bad Metadata Silently
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncStorePaths.ts
+src/main/services/team/TeamMemberStoragePaths.ts
+test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts
+```
+
+Source-audit finding:
+
+```text
+Index repair scans per-member storage by reading members//member.meta.json.
+Malformed member storage dirs are ignored during repair.
+```
+
+Ignoring malformed member dirs is safe for path traversal, but it can hide existing outbox/report/status rows if the meta file is corrupt. The repair path must make that degradation visible and avoid destructive cleanup.
+
+Rules:
+
+- malformed member meta during repair is diagnostic, not destructive;
+- repair must not delete member feature files just because meta is unreadable;
+- indexes can skip unsafe dirs, but artifact/debug output should list skipped member keys;
+- legacy store fallback must not create new raw member dirs when meta is corrupt;
+- canonical member meta repair belongs to `TeamMemberStoragePaths`/team storage, not member-work-sync domain code.
+
+Tests:
+
+- corrupt member.meta leaves member feature files untouched;
+- repair emits audit/log diagnostic for skipped member storage;
+- valid member dirs still repair indexes when another dir is malformed;
+- legacy fallback does not create unsafe raw member directory;
+- metrics/outbox repair remains deterministic with mixed valid and malformed member dirs.
+
+### 4.84 Public Status Surfaces Must Separate Operator Diagnostics From User State
+
+Current files:
+
+```text
+src/features/member-work-sync/renderer/adapters/memberWorkSyncStatusViewModel.ts
+src/features/member-work-sync/renderer/ui/MemberWorkSyncStatusPanel.tsx
+src/renderer/components/team/messages/MessagesPanel.tsx
+src/renderer/components/team/activity/ActivityItem.tsx
+test/features/member-work-sync/renderer/memberWorkSyncStatusViewModel.test.ts
+test/renderer/components/team/messages/MessagesPanel.test.tsx
+```
+
+Source-audit finding:
+
+```text
+MemberWorkSyncStatusPanel can expose detailed status.
+MessagesPanel hides work-sync nudges from normal Messages.
+ActivityItem still has automation/work-sync rows for audit/activity views.
+```
+
+This split is correct. The risk is product semantics drift: a user-facing "Needs sync" badge can look like a broken agent, while a hidden automation row can look like missing history.
+
+Rules:
+
+- normal Messages feed hides `member_work_sync_nudge` and task-stall automation by default;
+- activity/debug views may show automation, but with clear control-plane copy;
+- member card/status surfaces should explain "work agenda sync" without implying user message failure;
+- status badge must not be used as a delivery proof source;
+- diagnostics panels can show fingerprint/token/outbox state, but not full prompts or secrets;
+- screenshots and tests should cover both normal-user and diagnostics modes.
+
+Tests:
+
+- normal Messages excludes work-sync nudges;
+- Activity/audit view can show work-sync rows with automation labeling;
+- status view model maps inactive/unknown/needs_sync without scary false-error copy;
+- hidden automation row still appears in diagnostics/raw inbox;
+- status panel never triggers dispatch by rendering.
+
+### 4.85 Protocol Proof Missing Is A Recovery Signal, Not A Success Proof
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/runtime/RuntimeDiagnosticClassifier.ts
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryDiagnostics.ts
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryAdvisoryPolicy.ts
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryRepairPolicy.ts
+src/features/member-work-sync/main/adapters/input/MemberWorkSyncTeamChangeRouter.ts
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncEventQueue.ts
+test/main/services/team/OpenCodeRuntimeDeliveryAdvisoryPolicy.test.ts
+test/main/services/team/OpenCodeRuntimeDeliveryDiagnostics.test.ts
+test/main/services/team/TeamProvisioningService.test.ts
+```
+
+Source-audit finding:
+
+```text
+getOpenCodeDeliveryPendingReason() can produce non_visible_tool_without_task_progress.
+RuntimeDiagnosticClassifier maps this class to protocol_proof_missing.
+OpenCodeRuntimeDeliveryAdvisoryPolicy delays generic proof-missing warnings and suppresses them after superseding proof.
+OpenCodePromptDeliveryRepairPolicy uses progress_proof_required for non-visible tool responses without known progress proof.
+```
+
+The screenshot class `OpenCode proof missing` is therefore not a completed delivery. It is a recovery condition.
+
+Rules:
+
+- `protocol_proof_missing` must never mark an inbox row as read;
+- `protocol_proof_missing` must never mark a delivery ledger record as accepted proof;
+- `protocol_proof_missing` must never satisfy `member_work_sync_report`;
+- `protocol_proof_missing` may enqueue a recovery signal only after the existing proof grace window and only with exact message identity;
+- proof-missing recovery must stay level-triggered and idempotent.
+
+Required identity:
+
+```ts
+type OpenCodeProofMissingRecoveryIdentity = {
+ teamName: string
+ memberName: string
+ originalMessageId: string
+ relayOfMessageId?: string
+ taskRefs: readonly string[]
+ reasonCode: 'protocol_proof_missing'
+ reasonToken:
+ | 'non_visible_tool_without_task_progress'
+ | 'visible_reply_still_required'
+ | 'responded_non_visible_tool'
+ | 'progress_proof_required'
+ runId?: string
+ attempt?: number
+}
+```
+
+Implementation guidance:
+
+- Add a narrow application-service adapter from runtime advisory state to member-work-sync event input. Do not place recovery logic in renderer code.
+- Use the original delivery `messageId` and `relayOfMessageId`. Do not create a new logical delivery.
+- Include `taskRefs` when available. If task refs are missing, do not broad-ping the whole team.
+- Use a deterministic coalescing key:
+
+```ts
+const key = [
+ 'opencode-proof-missing',
+ identity.teamName,
+ identity.memberName,
+ identity.originalMessageId,
+ identity.reasonToken,
+ identity.taskRefs.join(','),
+].join(':')
+```
+
+- If a later `task_add_comment`, `task_complete`, `task_set_status`, `write`, `edit`, or visible `message_send` proof supersedes the diagnostic, cancel or suppress any queued recovery for the same key.
+- Keep progress proof strict. Do not count `read`, `bash`, `Thinking`, failed `message_send`, plain assistant fallback text, or MCP error output as proof.
+- Do not call OpenCode delivery directly from the advisory policy. The advisory policy stays pure and returns derived state only.
+
+Tests:
+
+- proof-missing advisory enqueues one recovery event but leaves ledger pending and inbox unread;
+- repeated advisory evaluation with the same message id does not duplicate recovery events;
+- a later visible reply suppresses advisory and prevents queued recovery from sending;
+- a later task progress proof suppresses advisory and prevents queued recovery from sending;
+- proof-missing without exact `messageId` or `memberName` logs a diagnostic and does not enqueue a broad nudge;
+- `read` plus `bash` plus failed `message_send` remains proof-missing.
+
+### 4.86 Advisory Cache And Recovery Signals Must Clear Together
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/TeamMemberRuntimeAdvisoryService.ts
+src/main/services/team/TeamDataService.ts
+src/main/index.ts
+test/main/services/team/TeamMemberRuntimeAdvisoryService.test.ts
+test/main/services/team/TeamProvisioningService.test.ts
+```
+
+Source-audit finding:
+
+```text
+TeamMemberRuntimeAdvisoryService has batch caching.
+Runtime proof writes and member advisory changes invalidate derived service state.
+OpenCodeRuntimeDeliveryAdvisoryPolicy already has superseding-proof semantics.
+```
+
+The fragile part is stale cache: a valid proof can be recorded while the UI still shows `OpenCode proof missing`.
+
+Rules:
+
+- user-visible advisory state is derived state, not a second source of truth;
+- recovery queue state must not keep an advisory alive after the delivery ledger or task board contains valid proof;
+- cache invalidation failures must be diagnostics, not state transitions.
+
+Implementation guidance:
+
+- Treat `member-advisory` invalidation as part of the write boundary for visible reply and task progress proof.
+- When a recovery event is coalesced or canceled, emit a narrow advisory invalidation for the affected team/member.
+- If invalidation fails, log a developer diagnostic with the recovery key and continue. Do not retry by sending another agent message.
+- Keep cache values immutable from callers. Return cloned arrays/maps if the current service shares collections.
+- Keep `OpenCode proof missing` badge derived from current advisory evaluation. Do not persist the badge as durable recovery state.
+
+Tests:
+
+- advisory badge disappears after task progress proof is recorded;
+- advisory badge disappears after visible reply proof is recorded;
+- stale batch cache is invalidated for the affected member without refreshing unrelated teams;
+- failed invalidation logs diagnostics and does not create an extra work-sync nudge.
+
+### 4.87 Proof-Missing Recovery Must Not Fight Prompt Repair
+
+Current files:
+
+```text
+src/main/services/team/opencode/delivery/OpenCodePromptDeliveryRepairPolicy.ts
+src/main/services/team/TeamProvisioningService.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeOutboxPlanner.ts
+```
+
+Source-audit finding:
+
+```text
+Prompt repair already has a control-message path for progress_proof_required.
+Member-work-sync already has queue, cooldown, foreground unread checks, busy checks, and outbox dedupe.
+Both can observe the same proof-missing condition.
+```
+
+Rules:
+
+- delivery repair owns original delivery proof;
+- member-work-sync owns board/actionable-state reconciliation;
+- they may observe the same failure, but only one may send a nudge for a given logical message inside the cooldown window.
+
+Implementation guidance:
+
+- Before sending a work-sync proof-missing recovery, check whether the delivery ledger has an immediate retry scheduled or recently sent for the same `messageId`.
+- Before sending a delivery retry, check whether the work-sync outbox has a proof-missing recovery recently sent for the same `messageId`.
+- Prefer delivery repair for direct user-to-member messages where visible reply is required.
+- Prefer member-work-sync for task-scoped cases where the missing proof is task progress and the member has actionable board work.
+- If both are eligible, choose delivery repair first and let work-sync re-evaluate after cooldown.
+
+Example arbitration result:
+
+```ts
+type ProofMissingRecoveryDecision =
+ | { kind: 'send_delivery_repair'; messageId: string }
+ | { kind: 'send_work_sync_recovery'; messageId: string; taskRefs: readonly string[] }
+ | { kind: 'suppress_recent_recovery'; messageId: string; recoveryKey: string }
+ | { kind: 'wait_for_grace'; messageId: string; until: string }
+```
+
+Tests:
+
+- existing delivery retry suppresses work-sync recovery for the same message;
+- existing work-sync recovery suppresses duplicate delivery repair for the same message until cooldown expires;
+- task-scoped proof missing with actionable board work routes to work-sync when no delivery retry is active;
+- direct visible-reply proof missing routes to delivery repair when no board work is involved.
+
+### 4.88 Diagnostic Classifier Must Not Overmatch Protocol Proof Missing
+
+Current files:
+
+```text
+src/main/services/team/runtime/RuntimeDiagnosticClassifier.ts
+src/main/services/team/opencode/delivery/OpenCodeRuntimeDeliveryDiagnostics.ts
+test/main/services/team/RuntimeDiagnosticClassifier.test.ts
+test/main/services/team/OpenCodeRuntimeDeliveryDiagnostics.test.ts
+```
+
+Source-audit finding:
+
+```text
+RuntimeDiagnosticClassifier maps raw diagnostics into normalized reason codes.
+The same renderer surfaces quota/auth/provider errors and protocol proof errors.
+```
+
+Rules:
+
+- `protocol_proof_missing` is a behavior/protocol state, not provider auth, quota, network, or process health;
+- auth/quota/network errors keep higher-priority classifications;
+- classifier output must be safe to show in diagnostics and must not include secrets.
+
+Implementation guidance:
+
+```ts
+const precedence = [
+ 'quota_exhausted',
+ 'auth_error',
+ 'filesystem_error',
+ 'network_error',
+ 'provider_overloaded',
+ 'protocol_proof_missing',
+ 'backend_error',
+ 'unknown'
+] as const
+```
+
+- Keep proof-missing tokens narrow:
+ - `non_visible_tool_without_task_progress`;
+ - `visible_reply_still_required`;
+ - `responded_non_visible_tool`;
+ - `progress_proof_required`.
+- Do not classify arbitrary `not connected` strings as proof missing. With the current shared `MemberRuntimeAdvisory.reasonCode` union, keep those as `backend_error` or `network_error` unless a dedicated MCP reason code is added end-to-end in shared types, renderer copy, tests, and migrations.
+- `classifyRuntimeDiagnostic()` currently has rule priorities, but single-message classification depends on first matching rule order. If proof-missing rules are touched, either select the highest priority match for a single message or add explicit precedence tests proving the array order is intentional.
+- Redact API keys, bearer tokens, and full raw MCP payloads before creating user-facing advisory copy.
+
+Tests:
+
+- `message_send Not connected` does not classify as proof missing;
+- `OpenCode used tools, but did not create visible reply or task progress proof` classifies as proof missing;
+- quota and auth strings win over proof-missing substrings if both appear;
+- `opencode bridge command timed out` keeps the intended bridge/backend classification even though it contains `timed out`;
+- redaction removes token-like substrings from advisory diagnostics.
+
+### 4.89 Inbox Nudge Sink Must Validate Existing Rows, Not Just Message IDs
+
+Current files:
+
+```text
+src/features/member-work-sync/main/adapters/output/TeamInboxMemberWorkSyncNudgeSink.ts
+src/features/member-work-sync/core/domain/MemberWorkSyncNudge.ts
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+test/features/member-work-sync/main/TeamInboxMemberWorkSyncNudgeSink.test.ts
+test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts
+```
+
+Source-audit finding:
+
+```text
+TeamInboxMemberWorkSyncNudgeSink.insertIfAbsent receives payloadHash.
+If an inbox row with the same messageId already exists, the sink returns inserted=false without validating that the existing row matches the requested payload.
+JsonMemberWorkSyncStore.ensurePending detects outbox payload conflicts, but the inbox sink is still the final durable write boundary.
+```
+
+Rules:
+
+- same `messageId` plus different payload must be a conflict, not a silent existing nudge;
+- outbox `delivered` must not be marked for an inbox row with mismatched payload;
+- payload verification must be deterministic and independent from presentation-only fields;
+- this check belongs in the main adapter/write boundary, not renderer code.
+
+Implementation options:
+
+1. Store a durable `workSyncPayloadHash` field on the inbox row.
+ `🎯 9 🛡️ 9 🧠 4`, roughly `70-150 LOC`.
+2. Recompute hash from the existing inbox row shape and compare to requested payload hash.
+ `🎯 7 🛡️ 7 🧠 5`, roughly `90-190 LOC`, more brittle because row normalization can drift.
+3. Rely only on outbox conflict detection.
+ `🎯 5 🛡️ 5 🧠 2`, roughly `0-20 LOC`, not enough because manual/legacy rows can bypass outbox history.
+
+Recommended:
+
+Use option 1. It is the clean write-boundary invariant.
+
+Tests:
+
+- existing inbox row with same messageId and same payload hash returns existing;
+- existing inbox row with same messageId and different payload hash returns conflict;
+- conflict leaves outbox retry/terminal state explicit and does not mark delivered;
+- legacy row without hash is either validated by recompute or fails closed with diagnostic, but never silently delivered.
+
+### 4.90 OpenCode Inbox Relay In-Flight Coalescing Must Be Per Target Message
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+test/main/services/team/TeamProvisioningServiceRelay.test.ts
+```
+
+Source-audit finding:
+
+```text
+relayOpenCodeMemberInboxMessages() uses one in-flight map key per team/member.
+When an existing relay is running and a later wake asks for onlyMessageId, the code only returns early if that target row is already read or missing.
+If the target exists and is still unread, a second relay work item can be started for the same team/member.
+```
+
+Rules:
+
+- one OpenCode member must not receive two overlapping inbox relay loops;
+- `onlyMessageId` wake should either attach to the running relay, queue behind it, or return a retryable queued-behind result;
+- never overwrite the in-flight map with a newer promise while the older promise is still running;
+- relay result must distinguish `already_read`, `queued_behind_active_relay`, `missing`, `accepted_pending`, and `failed_terminal`.
+
+Implementation options:
+
+1. Replace the per-member in-flight promise with a tiny per-member relay queue.
+ `🎯 9 🛡️ 9 🧠 5`, roughly `120-260 LOC`.
+2. Keep the promise map, but if `onlyMessageId` is unread while another relay is active, return `queuedBehindMessageId` and schedule a short wake.
+ `🎯 8 🛡️ 8 🧠 3`, roughly `60-140 LOC`.
+3. Allow overlapping relays and rely on prompt ledger idempotency.
+ `🎯 4 🛡️ 4 🧠 1`, roughly `0 LOC`, too risky for duplicate OpenCode prompts.
+
+Recommended:
+
+Use option 2 for this hardening pass, unless duplicate-relay tests expose a real need for option 1.
+
+Tests:
+
+- two `onlyMessageId` wakes during an active relay do not create two prompt deliveries;
+- unread target behind an active relay returns queued/retryable state and is delivered by a later wake;
+- target already read returns delivered without scheduling a new prompt;
+- missing target returns terminal missing diagnostics;
+- result diagnostics include the active relay key and target message id.
+
+### 4.91 Work-Sync Outbox Delivery Is Inbox Delivery, Not Runtime Acceptance
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/main/adapters/output/TeamInboxMemberWorkSyncNudgeSink.ts
+src/main/index.ts
+src/main/services/team/TeamProvisioningService.ts
+```
+
+Source-audit finding:
+
+```text
+Regular agenda-sync nudges are marked delivered after inbox insertion.
+OpenCode wake is scheduled after that through nudgeDeliveryWake.
+Review-pickup nudges have a provider-specific delivery outcome, but generic agenda-sync nudges do not.
+```
+
+Rules:
+
+- outbox `delivered` means durable inbox row inserted, not necessarily runtime prompt accepted;
+- runtime acceptance/proof stays in OpenCode prompt delivery ledger and advisory state;
+- latency diagnostics must show `inbox_inserted_at`, `wake_scheduled_at`, `relay_started_at`, `prompt_accepted_at`, and `proof_at` when available;
+- UI copy must not imply an OpenCode agent saw the nudge just because outbox status is delivered.
+
+Implementation guidance:
+
+- Keep current durable inbox boundary. Do not block generic outbox completion on model runtime response.
+- Add diagnostics fields rather than changing semantics:
+
+```ts
+type WorkSyncNudgeDeliveryTimeline = {
+ inboxInsertedAt?: string
+ wakeScheduledAt?: string
+ relayStartedAt?: string
+ promptAcceptedAt?: string
+ proofObservedAt?: string
+ terminalReason?: string
+}
+```
+
+- If wake scheduling fails, keep inbox row durable and log `nudge_wake_failed`; do not flip outbox back to pending unless a deliberate re-wake path owns that retry.
+
+Tests:
+
+- generic agenda-sync outbox delivered does not mark OpenCode prompt ledger as accepted;
+- wake failure keeps inbox row and records diagnostic;
+- delivery latency timeline shows which stage caused a slow start;
+- review-pickup provider delivery outcome remains separate from generic agenda-sync inbox delivery.
+
+### 4.92 Busy Signal Must Not Suppress The Same Recovery Forever
+
+Current files:
+
+```text
+src/main/services/team/TeamProvisioningService.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+test/features/member-work-sync/main/createMemberWorkSyncFeature.test.ts
+test/main/services/team/TeamProvisioningServiceRelay.test.ts
+```
+
+Source-audit finding:
+
+```text
+getOpenCodeMemberDeliveryBusyStatus() suppresses work-sync when the member has unread foreground inbox messages, recent foreground messages, inactive lane, or active prompt ledger.
+This is correct for broad sync nudges, but proof-missing recovery for the same logical message can be accidentally suppressed by the very unread row it is trying to repair.
+```
+
+Rules:
+
+- generic member-work-sync nudges must stay suppressed by unread foreground messages;
+- same-message delivery repair should not go through generic work-sync busy suppression;
+- a proof-missing task recovery may bypass foreground suppression only when it targets the exact same `messageId` and only through the delivery-repair path;
+- active prompt ledger still blocks duplicate prompt sends.
+
+Implementation guidance:
+
+- Add an explicit recovery context to busy checks only if needed:
+
+```ts
+type BusyRecoveryContext =
+ | { kind: 'generic_work_sync' }
+ | { kind: 'delivery_repair'; originalMessageId: string }
+ | { kind: 'proof_missing_task_recovery'; originalMessageId: string; taskIds: string[] }
+```
+
+- Default remains `generic_work_sync`.
+- Do not use a broad boolean like `ignoreForeground`.
+- Tests must prove unrelated unread foreground messages still suppress recovery.
+
+Tests:
+
+- generic work-sync nudge is suppressed by unread user/task foreground message;
+- same-message delivery repair is not suppressed by its own unread row;
+- unrelated unread foreground row still suppresses work-sync and proof-missing task recovery;
+- active OpenCode prompt ledger suppresses all duplicate prompt delivery.
+
+### 4.93 Runtime Advisory Reads Must Stay Side-Effect Free
+
+Current files:
+
+```text
+src/main/services/team/TeamMemberRuntimeAdvisoryService.ts
+src/main/services/team/TeamProvisioningService.ts
+src/main/services/team/TeamDataService.ts
+src/main/services/team/TeamDataWorkerClient.ts
+src/main/index.ts
+```
+
+Source-audit finding:
+
+```text
+TeamMemberRuntimeAdvisoryService is currently a query/cache service.
+It reads logs, prompt ledgers, and proof indexes, then returns a MemberRuntimeAdvisory.
+TeamProvisioningService owns advisory side effects today: invalidation, team-change events, notifications, and deferred advisory review timers.
+```
+
+This boundary is important.
+
+Rules:
+
+- polling member cards, task panels, or worker snapshots must never enqueue work-sync nudges;
+- `getMemberAdvisory()` and `getMemberAdvisories()` remain pure read/query operations;
+- proof-missing recovery enqueue belongs to a write-side lifecycle point, for example prompt delivery watchdog, delivery ledger transition, or an explicit member-work-sync command handler;
+- cache invalidation can happen from side-effect code, but cache refresh itself must not trigger delivery;
+- tests must prove repeated UI/advisory reads do not write inbox rows, outbox rows, prompt ledgers, or notifications.
+
+Bad implementation:
+
+```ts
+// Bad: a status panel read can send a nudge.
+const advisory = await advisoryService.getMemberAdvisory(teamName, memberName)
+if (advisory?.reasonCode === 'protocol_proof_missing') {
+ await memberWorkSync.enqueue(...)
+}
+```
+
+Recommended implementation:
+
+```ts
+// Good: command/write-side transition emits a recovery signal once.
+await promptDeliveryLedger.markResponseObserved(...)
+await proofMissingRecoveryScheduler.scheduleIfNeeded({
+ teamName,
+ memberName,
+ inboxMessageId,
+ reasonCode: 'protocol_proof_missing',
+})
+advisoryInvalidator.invalidateMember(teamName, memberName)
+```
+
+Implementation options:
+
+1. Keep recovery scheduling in `TeamProvisioningService` delivery lifecycle and call a member-work-sync application port.
+ `🎯 9 🛡️ 9 🧠 4`, roughly `90-180 LOC`.
+2. Add side effects to `TeamMemberRuntimeAdvisoryService`.
+ `🎯 3 🛡️ 3 🧠 2`, roughly `40-100 LOC`, violates query/write separation and can spam from UI polling.
+3. Let renderer detect advisory warnings and call a recovery endpoint.
+ `🎯 2 🛡️ 2 🧠 3`, roughly `120-240 LOC`, wrong process boundary and easy to duplicate across windows.
+
+Recommended:
+
+Use option 1. It preserves SRP: advisory service derives state, delivery lifecycle owns recovery scheduling.
+
+Tests:
+
+- repeated `TeamDataService` snapshot reads do not enqueue recovery;
+- repeated worker `invalidateMemberRuntimeAdvisory` refreshes do not enqueue recovery;
+- delivery watchdog transition enqueues at most one recovery signal;
+- proof observed after scheduling cancels/suppresses the queued recovery without relying on a renderer refresh.
+
+### 4.94 Proof-Missing Recovery Needs An Explicit Trigger Contract
+
+Current files:
+
+```text
+src/features/member-work-sync/main/infrastructure/MemberWorkSyncEventQueue.ts
+src/features/member-work-sync/main/composition/createMemberWorkSyncFeature.ts
+src/features/member-work-sync/main/adapters/input/MemberWorkSyncTeamChangeRouter.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeActivationPolicy.ts
+src/features/member-work-sync/core/application/MemberWorkSyncTargetedRecoveryPolicy.ts
+```
+
+Source-audit finding:
+
+```text
+MemberWorkSyncTriggerReason currently includes startup_scan, config_changed, task_changed, inbox_changed, member_spawned, tool_finished, runtime_activity, turn_settled, manual_refresh.
+No trigger explicitly means "repair this exact proof-missing delivery".
+Targeted recovery exists, but it is currently status-derived and broad: OpenCode or lead in needs_sync with shadow wouldNudge.
+```
+
+Rules:
+
+- proof-missing recovery must be targeted, not a broad status refresh;
+- trigger diagnostics must show why a nudge exists: task change, turn settled, proof missing, manual refresh, etc;
+- broad `runtime_activity` should not hide proof-missing behavior because it has different timing and coalescing semantics;
+- if a new trigger reason is added, update the union, default run-after, max coalesce, audit metadata, composition wiring, and tests in one cut.
+
+Implementation options:
+
+1. Add explicit trigger reason `proof_missing_recovery` with short run-after and normal coalescing.
+ `🎯 9 🛡️ 9 🧠 4`, roughly `70-160 LOC`.
+2. Reuse `runtime_activity` and pass proof-missing details in metadata.
+ `🎯 6 🛡️ 6 🧠 2`, roughly `25-80 LOC`, cheaper but diagnostics and timing become ambiguous.
+3. Bypass the queue and write outbox rows directly from delivery code.
+ `🎯 4 🛡️ 4 🧠 3`, roughly `60-130 LOC`, violates clean architecture and skips coalescing/rate-limit policy.
+
+Recommended:
+
+Use option 1 if proof-missing recovery is implemented in this phase. If implementation scope must stay smaller, do not add recovery yet. Do not ship option 2 as a silent shortcut.
+
+Code shape:
+
+```ts
+export type MemberWorkSyncTriggerReason =
+ | 'startup_scan'
+ | 'config_changed'
+ | 'task_changed'
+ | 'inbox_changed'
+ | 'member_spawned'
+ | 'tool_finished'
+ | 'runtime_activity'
+ | 'turn_settled'
+ | 'proof_missing_recovery'
+ | 'manual_refresh'
+
+function defaultRunAfterMs(reason: MemberWorkSyncTriggerReason): number {
+ switch (reason) {
+ case 'proof_missing_recovery':
+ return 5_000
+ // existing cases unchanged
+ }
+}
+```
+
+Tests:
+
+- `proof_missing_recovery` has explicit run-after and max-coalesce values;
+- multiple proof-missing events for the same team/member/message coalesce;
+- broad startup/member-spawn scans keep readiness-gated timing;
+- audit rows include `proof_missing_recovery`, original message id, and task refs when known.
+
+### 4.95 Outbox Store Needs A Logical Recovery Lookup
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/ports.ts
+src/features/member-work-sync/main/infrastructure/JsonMemberWorkSyncStore.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeDispatcher.ts
+src/features/member-work-sync/core/application/MemberWorkSyncNudgeOutboxPlanner.ts
+```
+
+Source-audit finding:
+
+```text
+MemberWorkSyncOutboxStorePort can count recent delivered rows and find delivered review-pickup request ids.
+It cannot currently answer "did we already schedule recovery for this exact original delivery message?".
+```
+
+Rules:
+
+- delivery repair and work-sync arbitration should query the outbox through a port, not scan JSON files inside `TeamProvisioningService`;
+- lookup key must be logical and stable, not a display string;
+- lookup must distinguish delivered inbox rows from accepted OpenCode runtime prompts;
+- the query must include team, member, intent, original inbox message id, and optional task ids.
+
+Recommended port extension:
+
+```ts
+export interface MemberWorkSyncOutboxStorePort {
+ // existing methods...
+ findRecentRecoveryByIntent?(input: {
+ teamName: string
+ memberName: string
+ intentKey: string
+ sinceIso: string
+ }): Promise<{
+ id: string
+ status: MemberWorkSyncOutboxStatus
+ deliveredMessageId?: string
+ payloadHash: string
+ updatedAt: string
+ } | null>
+}
+```
+
+Implementation options:
+
+1. Add optional outbox lookup port and implement it in `JsonMemberWorkSyncStore`.
+ `🎯 9 🛡️ 9 🧠 4`, roughly `90-190 LOC`.
+2. Reuse `countRecentDelivered()` with a special `workSyncIntentKey`.
+ `🎯 6 🛡️ 6 🧠 2`, roughly `20-70 LOC`, cannot inspect pending/claimed rows and can miss active recovery.
+3. Let `TeamProvisioningService` inspect store files directly.
+ `🎯 2 🛡️ 2 🧠 3`, roughly `50-120 LOC`, breaks port boundaries.
+
+Recommended:
+
+Use option 1. Keep the port optional during migration if needed, but tests should exercise the real JSON store implementation.
+
+Tests:
+
+- pending, claimed, delivered, retryable, and superseded recovery rows are found by logical key;
+- unrelated task ids or original message ids do not match;
+- stale rows outside cooldown do not suppress new recovery;
+- corrupt store rows are ignored with diagnostics, not treated as delivered.
+
+### 4.96 Inbox Payload Hash Is A Backward-Compatible Schema Change
+
+Current files:
+
+```text
+src/shared/types/team.ts
+src/main/services/team/TeamInboxWriter.ts
+src/main/services/team/TeamInboxReader.ts
+src/features/member-work-sync/main/adapters/output/TeamInboxMemberWorkSyncNudgeSink.ts
+src/renderer
+```
+
+Source-audit finding:
+
+```text
+MemberWorkSyncInboxNudgePort receives payloadHash, but TeamInboxWriter does not persist it and TeamInboxReader does not materialize it.
+The sink can only compare messageId today.
+```
+
+Rules:
+
+- add a field scoped to work-sync automation, for example `workSyncPayloadHash?: string`;
+- do not overload `messageId`;
+- do not require the field on old inbox rows;
+- do not expose hash noise in normal Messages UI;
+- renderer must tolerate the optional field without treating rows as changed visible content.
+
+Recommended schema shape:
+
+```ts
+export interface InboxMessage {
+ // existing fields...
+ workSyncIntent?: 'agenda_sync' | 'review_pickup'
+ workSyncIntentKey?: string
+ workSyncReviewRequestEventIds?: string[]
+ workSyncPayloadHash?: string
+}
+```
+
+Compatibility rule:
+
+```text
+existing row has hash equal requested hash -> existing ok
+existing row has hash different requested hash -> conflict
+existing row lacks hash and has messageKind=member_work_sync_nudge -> recompute from canonical row if possible, else conflict
+existing row lacks hash and is not work-sync -> conflict for this sink
+```
+
+Tests:
+
+- writer persists `workSyncPayloadHash`;
+- reader returns `workSyncPayloadHash`;
+- old rows without the field still render and sort;
+- sink detects same-id/different-payload conflict;
+- normal Messages filtering still hides work-sync automation.
+
+### 4.97 Audit Events Must Be Extended Atomically
+
+Current files:
+
+```text
+src/features/member-work-sync/core/application/ports.ts
+src/features/member-work-sync/core/application/MemberWorkSyncAudit.ts
+src/features/member-work-sync/main/infrastructure/FileMemberWorkSyncAuditJournal.ts
+test/features/member-work-sync
+```
+
+Source-audit finding:
+
+```text
+MemberWorkSyncAuditEventName is a string union, not free-form.
+Adding proof-missing or recovery events requires updating the union and any mapper that produces audit event names.
+```
+
+Rules:
+
+- no string casts to bypass `MemberWorkSyncAuditEventName`;
+- every new recovery/audit event gets one canonical event name;
+- audit append failure remains non-blocking;
+- audit rows must contain enough metadata for debugging without secrets: original message id, intent key, trigger reason, provider id, and task refs.
+
+Suggested event names:
+
+```ts
+type MemberWorkSyncAuditEventName =
+ | 'proof_missing_recovery_scheduled'
+ | 'proof_missing_recovery_coalesced'
+ | 'proof_missing_recovery_suppressed'
+ | 'proof_missing_recovery_conflict'
+ // existing names
+```
+
+Tests:
+
+- each new event name can be appended by the file journal;
+- `reasonToAuditEvent()` maps new skip/retry reasons without falling back to unrelated events;
+- audit append failure does not mark outbox delivered or failed;
+- diagnostics include identity but redact prompt text and secrets.
+
+---
+
+## 5. What Not To Do
+
+Do not solve this by changing `workIntervals`.
+
+Bad variant:
+
+```text
+start workIntervals only after task_start/tool activity
+```
+
+Score:
+
+`🎯 5 🛡️ 4 🧠 6`, roughly `250-550 LOC`, high regression risk.
+
+Why not:
+
+- breaks existing task status duration semantics;
+- creates provider-specific timing gaps;
+- makes old tasks inconsistent;
+- weakens change scoping based on persisted intervals;
+- does not fix OpenCode delivery delay or missing task logs.
+
+Do not ping agents just because UI shows an old `workInterval`.
+
+Bad variant:
+
+```text
+if task has been in progress for N minutes, send another message immediately
+```
+
+Score:
+
+`🎯 4 🛡️ 4 🧠 3`, roughly `100-250 LOC`, spam risk.
+
+Why not:
+
+- duplicates watchdog;
+- interrupts active work;
+- can create loops when delivery is already in flight;
+- confuses foreground unread assignment handling.
+
+Do not make task logs depend on broad member-level transcript fallback without session bounds.
+
+Bad variant:
+
+```text
+if task stream is empty, include recent member OpenCode logs
+```
+
+Score:
+
+`🎯 6 🛡️ 5 🧠 3`, roughly `80-180 LOC`, attribution risk.
+
+Why not:
+
+- can pull another task's work into the selected task;
+- gets worse when a member has multiple recreated sessions;
+- hides the real missing session lookup problem.
+
+Do not fix the Changes panel by treating every OpenCode `write`/`edit` row as an authoritative diff.
+
+Bad variant:
+
+```text
+Task Log Stream write/edit tool row -> synthetic file change summary
+```
+
+Score:
+
+`🎯 5 🛡️ 4 🧠 4`, roughly `120-260 LOC`, audit risk.
+
+Why not:
+
+- tool input can be truncated, malformed, failed, or retried;
+- a successful tool row is not the same as persisted file diff;
+- duplicates and partial writes can create false review data;
+- existing `ChangeExtractorService` and task-change ledger are the correct authority for file changes.
+
+Do not add accept-fast transport knobs into `payloadHash` accidentally.
+
+Bad variant:
+
+```text
+payloadHash = stableHash(full OpenCodeSendMessageCommandBody)
+```
+
+after adding:
+
+```text
+settlementMode
+observationTimeoutMs
+runtimePromptMessageId
+debug flags
+```
+
+Score:
+
+`🎯 6 🛡️ 4 🧠 3`, roughly `20-80 LOC`, idempotency regression risk.
+
+Why not:
+
+- commandStatus can report payload mismatch for the same logical delivery;
+- existing ledger record can become failed_terminal due metadata-only differences;
+- retries can create a new logical attempt instead of recovering the accepted one.
+
+---
+
+## 6. Recommended Phases
+
+### Phase 0 - Baseline, Diagnostics, And Guardrails
+
+Score:
+
+`🎯 10 🛡️ 10 🧠 3`, roughly `120-220 LOC`.
+
+Goal:
+
+Make the current behavior measurable before behavior changes.
+
+#### Phase 0.1 Confirm Existing Invariants
+
+Add or update tests that lock the `workIntervals` meaning:
+
+- task created as `in_progress` opens a work interval at `createdAt`;
+- `task_start` on an already `in_progress` task does not rewrite the first interval;
+- `updateStatus(in_progress)` from non-progress opens a new interval;
+- leaving `in_progress` closes the active interval;
+- `workIntervals` do not depend on provider.
+
+Candidate tests:
+
+- `test/main/services/team/TeamTaskWriter.test.ts`
+- `test/main/services/team/TeamTaskActivityIntervalService.test.ts`
+- `test/shared/utils/taskWorkDuration.test.ts`
+
+Acceptance:
+
+```text
+workIntervals remain a board/status-time contract
+```
+
+#### Phase 0.2 Add Timing Breadcrumbs For OpenCode Delivery
+
+Add machine-readable diagnostics where they are missing, not user-visible noise.
+
+Useful timestamps:
+
+- `delivery_attempt_started_at`
+- `session_record_loaded_at`
+- `stale_session_detected_at`
+- `session_recreate_started_at`
+- `session_recreate_finished_at`
+- `mcp_ready_check_started_at`
+- `mcp_ready_check_finished_at`
+- `prompt_async_started_at`
+- `prompt_async_accepted_at`
+- `turn_settled_wait_started_at`
+- `turn_settled_wait_finished_at`
+- `post_prompt_reconcile_started_at`
+- `post_prompt_reconcile_finished_at`
+- `command_outcome_written_at`
+
+Store them as structured diagnostics in existing ledgers/outcomes, not as long human strings.
+
+Important:
+
+- do not put API keys or prompt bodies in diagnostics;
+- do not add noisy UI messages;
+- do not block delivery on diagnostics write failure.
+
+Candidate files:
+
+- `src/main/services/team/opencode/delivery/OpenCodePromptDeliveryLedger.ts`
+- `src/main/services/team/opencode/bridge/OpenCodeReadinessBridge.ts`
+- `src/main/services/team/TeamProvisioningService.ts`
+- `agent_teams_orchestrator/src/services/opencode/OpenCodeBridgeCommandHandler.ts`
+- `agent_teams_orchestrator/src/services/opencode/OpenCodeCommandOutcomeStore.ts`
+
+Add diagnostics as structured codes where possible. Avoid only-human prose because later phases need to classify:
+
+```text
+opencode_send_session_recreated_after_stale_record
+opencode_send_session_recreated_after_mcp_failure
+opencode_send_mcp_reattached
+opencode_send_prompt_endpoint_accepted
+opencode_send_turn_observation_timeout
+opencode_send_reconcile_after_accept_failed
+```
+
+#### Phase 0.3 Debugging Script Or Runbook Snippet
+
+Extend `docs/team-management/member-work-sync-debugging.md` with a short "OpenCode delayed start" section:
+
+```bash
+jq '.[] | select(.memberName=="jack")' \
+ ~/.claude/teams//.opencode-runtime/lanes/*/opencode-prompt-delivery-ledger.json
+```
+
+Include where to check:
+
+- delivery ledger;
+- member-work-sync journal;
+- launch state;
+- OpenCode lane registry;
+- task history timestamps;
+- runtime turn-settled spool.
+
+Acceptance:
+
+An engineer can prove whether delay came from:
+
+- no prompt acceptance;
+- stale session repair;
+- MCP repair;
+- provider slow response;
+- task log lookup gap;
+- actual model idle/stall.
+
+---
+
+### Phase 1 - UI Semantics Without Data Model Change
+
+Score:
+
+`🎯 10 🛡️ 9 🧠 2`, roughly `40-120 LOC`.
+
+Goal:
+
+Stop implying that `workIntervals` equals active model execution.
+
+#### Phase 1.1 Rename The Visible Label
+
+Change user-facing copy:
+
+```text
+Work time
+```
+
+to one of:
+
+```text
+In progress time
+```
+
+or:
+
+```text
+Time in progress
+```
+
+Recommended:
+
+```text
+In progress time
+```
+
+Why:
+
+- short;
+- accurate;
+- provider-neutral;
+- does not promise active runtime execution.
+
+Candidate file:
+
+- `src/renderer/components/team/dialogs/TaskDetailDialog.tsx`
+
+Known existing test:
+
+- `test/renderer/components/team/dialogs/TaskDetailDialog.test.tsx`
+
+Update test assertion from:
+
+```text
+Work time 5m 00s
+```
+
+to:
+
+```text
+In progress time 5m 00s
+```
+
+#### Phase 1.2 Optional Tooltip
+
+If the component already has a tooltip pattern nearby, add:
+
+```text
+Time since this task entered In Progress. It can include delivery, waiting, and review coordination time.
+```
+
+Do not add a new tooltip framework or large UI component just for this.
+
+#### Phase 1.3 Do Not Rename Storage Fields
+
+Do not rename:
+
+- `workIntervals`;
+- `taskWorkDuration`;
+- ledger fields using `workIntervals`;
+- change scoping reasons.
+
+Storage and code can keep the historical name. The user-facing label is the mismatch.
+
+Acceptance:
+
+- no migration;
+- no task JSON rewrite;
+- tests pass;
+- UI no longer claims "active work".
+
+---
+
+### Phase 2 - Session-Evidence Based OpenCode Task Logs
+
+Score:
+
+`🎯 9 🛡️ 8 🧠 6`, roughly `450-750 LOC` across both repos.
+
+Goal:
+
+Task Log Stream should find OpenCode logs from the actual session that handled the task/delivery, especially after session recreate.
+
+Current likely gap:
+
+```text
+OpenCodeTaskLogStreamSource
+-> runtimeBridge.getOpenCodeTranscript({ teamId, memberName, laneId? })
+-> current lane/session transcript only
+```
+
+But delivery ledger can show:
+
+```text
+runtimeSessionId: "ses_..."
+```
+
+which may not be the current lane registry session after recreate.
+
+#### Phase 2.1 Add Session-ID Transcript Lookup In Orchestrator
+
+Extend runtime transcript CLI to support explicit OpenCode session lookup.
+
+Candidate file:
+
+- `agent_teams_orchestrator/src/cli/handlers/runtime.ts`
+
+Current behavior found:
+
+```text
+runtime transcript is implemented for OpenCode
+but explicit --session is not accepted
+```
+
+Add parameters:
+
+```text
+--session-id
+--team-id
+--member
+--lane
+--limit
+```
+
+Rules:
+
+- `--session-id` must be OpenCode session ID format, for example starts with `ses_`;
+- if `--session-id` is present, it takes precedence over lane/current lookup;
+- still require team/member context when available for diagnostics and redaction;
+- do not allow arbitrary file paths from CLI input;
+- do not bypass existing auth/profile boundaries.
+
+Example command:
+
+```bash
+agent_teams_orchestrator runtime transcript \
+ --provider opencode \
+ --team-id comet-hub \
+ --member jack \
+ --session-id ses_1ddc19603ffe71Lo7UYO5AhHMr \
+ --limit 300
+```
+
+Expected output shape should remain compatible with existing `getOpenCodeTranscript()`:
+
+```ts
+interface OpenCodeTranscriptResult {
+ sessionId?: string;
+ logProjection?: {
+ messages: OpenCodeRuntimeTranscriptLogMessage[];
+ };
+ diagnostics?: string[];
+}
+```
+
+Do not create a new renderer-facing schema in this phase.
+
+#### Phase 2.1.1 Do Not Guess Unknown Historical Sessions
+
+Exact `--session-id` support should be conservative:
+
+```text
+find stored session record where:
+ record.teamId === teamId
+ record.memberName === memberName
+ record.opencodeSessionId === sessionId
+ optional laneId matches when provided
+```
+
+If not found:
+
+```json
+{
+ "diagnostics": ["opencode_transcript_session_not_found"]
+}
+```
+
+Do not recursively search all OpenCode storage for an arbitrary `ses_*` in this phase.
+
+Reason:
+
+- exact session lookup is a correctness fix, not a forensic scanner;
+- broad scans can be slow and can cross team/member boundaries;
+- Phase 2 should stay low-risk and bounded.
+
+#### Phase 2.2 Extend Bridge Port In `claude_team`
+
+Candidate file:
+
+- `src/main/services/runtime/ClaudeMultimodelBridgeService.ts`
+
+Extend:
+
+```ts
+getOpenCodeTranscript(binaryPath, {
+ teamId,
+ memberName,
+ laneId,
+ sessionId,
+ limit,
+ timeoutMs,
+})
+```
+
+Rules:
+
+- append `--session-id` only when provided;
+- keep old lane/member behavior unchanged;
+- tests must verify CLI args for both lane and session lookup;
+- do not force every caller to pass session ID.
+
+Tests:
+
+- `test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts`
+
+Important bridge details:
+
+- keep output temp-dir cleanup in `finally`;
+- keep `--projection-only` default for task logs;
+- do not increase default transcript timeout globally;
+- if `sessionId` and `laneId` are both present, pass both so orchestrator can validate.
+- if the orchestrator returns a transcript whose `sessionId` does not equal the requested `sessionId`, treat it as a transcript miss and keep fallback behavior;
+- for exact-session lookup, validate `transcript.sessionId`, `transcript.logProjection.sessionId`, and every projected message `sessionId` when present;
+- if session IDs disagree, reject that exact candidate with a developer diagnostic instead of mixing messages;
+- keep parity between both transcript surfaces if both remain supported:
+ - CLI `runtime transcript`;
+ - bridge command `opencode.getRuntimeTranscript`.
+
+#### Phase 2.3 Use Delivery Ledger Session Evidence In Task Logs
+
+Candidate file:
+
+- `src/main/services/team/taskLogs/stream/OpenCodeTaskLogStreamSource.ts`
+
+Candidate evidence sources:
+
+- task `sourceMessageId`;
+- OpenCode prompt delivery ledger records;
+- runtime turn-settled records if they contain `runtimeSessionId`;
+- task activity records with OpenCode attribution;
+- current lane fallback.
+
+Lookup order after source audit:
+
+```text
+1. Attribution records with sessionId, fetched by exact sessionId.
+2. Delivery ledger records whose taskRefs/sourceMessageId match this task, fetched by exact runtimeSessionId.
+3. Runtime-turn-settled events for same team/member/session/time window, if already indexed.
+4. Current lane/member transcript with existing marker/time-window logic.
+```
+
+Important:
+
+- session lookup should be bounded, for example max 2-3 session IDs per task load;
+- exact session lookups should use bounded concurrency `2`, not unlimited `Promise.all`;
+- do not scan every historical OpenCode session;
+- dedupe transcripts by `sessionId`;
+- dedupe projected messages by stable source ID;
+- sort by timestamp after merge;
+- preserve task marker window logic.
+- include session evidence in the task-log cache key;
+- exact session candidates must be fetched before comparing `transcript.sessionId` to an attribution record;
+- if exact session fetch fails, record a miss reason and continue to the next candidate.
+- message dedupe must include session identity, for example `sessionId + uuid`;
+- when `uuid` is absent or not stable, prefer `sessionId + sourceToolUseID/sourceToolAssistantUUID + timestamp` over tool-name-only signatures;
+- never use native tool name/input alone to dedupe OpenCode rows across different sessions.
+
+#### Phase 2.3.1 Fix The Existing Attribution Path First
+
+Before adding delivery-ledger evidence, fix the current attribution loop because it already has a same-member/multiple-session bug.
+
+Source-audit confirmation:
+
+```text
+current transcriptCache key = normalized member name only
+current fetch = getOpenCodeTranscript({ teamId, memberName, limit })
+current session filter = compare record.sessionId after fetching current transcript
+current segment id = opencode-attributed:::
+```
+
+That means a recreated old session can be skipped before exact lookup is attempted, and two sessions for the same member can collapse into one segment.
+
+Required changes:
+
+```text
+transcript cache key: memberName -> memberName + laneId + sessionId/current
+projection group key: participantKey -> participantKey + sessionId/current
+segment id: include sessionId when known
+actor.sessionId: exact transcript/session evidence, not first arbitrary message
+message identity key: include sessionId before uuid/sourceToolUseID/sourceToolAssistantUUID
+```
+
+Pseudo-code:
+
+```ts
+const transcriptKey = buildTranscriptCacheKey({
+ memberName,
+ laneId: record.laneId,
+ sessionId: record.sessionId,
+});
+
+const transcript = await getOrFetchTranscript(transcriptKey, () =>
+ runtimeBridge.getOpenCodeTranscript(binaryPath, {
+ teamId: teamName,
+ memberName,
+ laneId: record.laneId,
+ sessionId: record.sessionId,
+ limit: ATTRIBUTED_TRANSCRIPT_LIMIT,
+ })
+);
+
+if (record.sessionId && transcript?.sessionId !== record.sessionId) {
+ recordMiss("exact_session_mismatch");
+ continue;
+}
+
+const projectionKey = buildProjectionGroupKey({
+ memberName,
+ sessionId: transcript.sessionId ?? record.sessionId,
+});
+```
+
+Projection merge rule:
+
+```ts
+function buildOpenCodeProjectedMessageKey(message: ParsedMessage): string {
+ const session = message.sessionId?.trim() || "unknown-session";
+ const id =
+ message.uuid?.trim() ||
+ message.sourceToolUseID?.trim() ||
+ message.sourceToolAssistantUUID?.trim() ||
+ `${message.timestamp.toISOString()}:${message.type}`;
+
+ return `${session}:${id}`;
+}
+```
+
+This is intentionally stricter than generic transcript merge. The same member can have several live or recently recreated OpenCode sessions, and a tool name/input signature is not enough provenance.
+
+User-facing participant filters can still show a single `bob`. Internal segments should remain session-specific:
+
+```text
+opencode-attributed:::bob:
+```
+
+Acceptance:
+
+- two attribution records for the same member but different sessions produce separate safe segments unless message IDs overlap;
+- a current-lane transcript cannot silently hide an exact historical attribution record;
+- renderer still shows simple member filters.
+
+Pseudo-code:
+
+```ts
+const attributionRecords = await attributionStore.readTaskRecords(teamName, task.id);
+const sessionEvidence = await sessionEvidenceSource.findOpenCodeTaskTranscriptCandidates({
+ teamName,
+ taskId: task.id,
+ owner: task.owner,
+ sourceMessageId: task.sourceMessageId,
+ workIntervals: task.workIntervals,
+ attributionRecords,
+});
+
+const cacheKey = buildCacheKey(task, attributionRecords, sessionEvidence);
+
+for (const candidate of sessionEvidence.slice(0, 3)) {
+ const transcript = await runtimeBridge.getOpenCodeTranscript(binaryPath, {
+ teamId,
+ memberName: candidate.memberName,
+ sessionId: candidate.sessionId,
+ laneId: candidate.laneId,
+ limit,
+ timeoutMs,
+ });
+ mergeIfTaskScoped(transcript, candidate);
+}
+```
+
+Better architecture:
+
+Create a narrow adapter/source instead of embedding all ledger parsing in `OpenCodeTaskLogStreamSource`:
+
+```text
+TaskLogOpenCodeSessionEvidenceSource
+```
+
+Responsibilities:
+
+- read OpenCode delivery ledger;
+- return small candidate list;
+- no transcript parsing;
+- no renderer DTOs;
+- no task log rendering decisions.
+
+Suggested candidate type:
+
+```ts
+interface OpenCodeTaskTranscriptCandidate {
+ sessionId: string;
+ memberName: string;
+ laneId?: string;
+ source: "attribution" | "delivery_ledger" | "turn_settled";
+ taskId: string;
+ since?: string;
+ until?: string;
+ startMessageUuid?: string;
+ endMessageUuid?: string;
+ confidence: "exact" | "bounded_window";
+}
+```
+
+Candidate source rules:
+
+- exact candidates sort before bounded window candidates;
+- latest accepted delivery for the task sorts before older retry attempts;
+- records with `failed_terminal` and no `acceptedAt` are ignored;
+- records with `runtimeSessionId` but no task correlation are ignored;
+- maximum candidate count defaults to `3`;
+- every miss reason is diagnostic-only.
+
+This keeps SRP:
+
+- evidence source finds session candidates;
+- runtime bridge fetches transcript;
+- task log stream source projects and filters messages.
+
+#### Phase 2.4 Diagnostics
+
+Add developer diagnostics to stream metadata/logs:
+
+```ts
+{
+ opencodeSessionEvidenceCount: 2,
+ opencodeSessionTranscriptHits: 1,
+ opencodeCurrentLaneFallbackUsed: true,
+ opencodeTranscriptMissReasons: [
+ "session_transcript_empty",
+ "no_runtime_session_id_for_task"
+ ]
+}
+```
+
+Do not show these as regular user-visible task log rows.
+
+#### Phase 2.4.1 Merge And Cache Safety In `BoardTaskLogStreamService`
+
+Before relying on exact OpenCode session fallback, verify the higher-level stream service does not erase it.
+
+Required checks:
+
+```text
+OpenCode fallback segment id includes session identity
+BoardTaskLogStreamService merge keeps distinct session segments
+source-level OpenCode cache invalidates when evidence candidate identity changes
+layout cache does not hide newly available fallback evidence
+```
+
+Suggested helper:
+
+```ts
+function buildOpenCodeTaskLogSegmentId(input: {
+ teamName: string;
+ taskId: string;
+ memberName: string;
+ sessionId?: string | null;
+ source: "attributed" | "delivery_ledger" | "heuristic";
+}): string {
+ const sessionPart = input.sessionId?.trim() || "current";
+ return [
+ "opencode",
+ input.source,
+ input.teamName,
+ input.taskId,
+ input.memberName,
+ sessionPart,
+ ].join(":");
+}
+```
+
+Do not use participant name alone as the segment identity.
+
+Cache rules:
+
+- source cache key includes attribution key plus session evidence key;
+- session evidence key is stable and compact, for example sorted `source/member/sessionId/laneId`;
+- if exact evidence is absent, current fallback cache behavior remains;
+- cache TTL stays short and does not become a correctness dependency;
+- cache miss diagnostics stay developer-only.
+- `shouldMergeRuntimeFallback()` must consider whether existing execution records cover the same OpenCode owner/session before suppressing fallback.
+
+#### Phase 2.5 Tests
+
+Unit:
+
+- ledger evidence returns runtimeSessionId matching task/message/member;
+- session candidates are deduped and bounded;
+- foreign member/task evidence ignored;
+- missing ledger falls back cleanly;
+- `getOpenCodeTranscript()` passes `--session-id`.
+- cache key changes when session evidence appears;
+- exact attribution session is fetched even if current member transcript has a different session;
+- two exact sessions for one member do not share a transcript cache entry;
+- two exact sessions for one member do not collapse into one segment with the wrong actor session;
+- session not found returns null/fallback without throwing user-visible errors.
+- exact transcript with mismatched top-level/projection/message session IDs is rejected as a candidate.
+- BoardTaskLogStreamService merge does not drop two OpenCode fallback segments for the same participant with different session IDs;
+- BoardTaskLogStreamService does not suppress OpenCode fallback because of unrelated execution records;
+- OpenCode fallback cache invalidates when session evidence changes from empty/current to exact session.
+
+Integration:
+
+- OpenCode task stream uses session transcript first when current lane transcript is empty;
+- native tools from session transcript render correctly;
+- wrong session transcript does not leak into task;
+- duplicate retry markers do not duplicate native rows.
+
+Suggested commands:
+
+```bash
+pnpm vitest run \
+ test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts \
+ test/main/services/team/OpenCodeTaskLogStreamSource.test.ts \
+ test/main/services/team/OpenCodeTaskLogStreamSource.fixture-e2e.test.ts \
+ test/main/services/team/BoardTaskLogStreamIntegration.test.ts \
+ test/renderer/components/team/taskLogs/TaskLogStreamSection.opencode-fixture-e2e.test.tsx
+```
+
+In orchestrator:
+
+```bash
+bun test src/cli/handlers/runtime.test.ts src/services/opencode/OpenCodeSessionBridge.test.ts
+bun run build
+```
+
+Acceptance:
+
+- a recreated OpenCode session can still supply task logs;
+- task logs appear from the actual prompt/session evidence, not only the current lane;
+- no unrelated member logs are pulled in.
+
+---
+
+### Phase 3 - Accept-Fast OpenCode Delivery With Async Durable Observation
+
+Score:
+
+`🎯 8 🛡️ 7 🧠 8`, roughly `550-950 LOC` across both repos.
+
+Goal:
+
+Do not make the app wait for a full OpenCode turn/reconcile inside the user-facing send command before treating prompt delivery as accepted.
+
+Current high-level flow:
+
+```text
+claude_team sendOpenCodeTeamMessage timeout ~45s
+-> orchestrator opencode.sendMessage
+-> ensure MCP/session
+-> promptAsyncWithTurnSettled
+-> wait for SSE settle
+-> reconcile
+-> write outcome
+-> return
+```
+
+This is robust for evidence, but too slow for delivery acceptance when OpenCode is repairing state or model response takes longer.
+
+Recommended flow:
+
+```text
+ensure session/MCP before prompt
+-> prompt_async accepted
+-> return accepted quickly with runtimePromptMessageId/sessionId
+-> durable observer/outcome continues in orchestrator-owned bounded background or follow-up command
+-> claude_team ledger observes outcome/status later
+-> member-work-sync reconcile wakes from turn-settled event when available
+```
+
+Important:
+
+This phase must not lose the safety work already done in OpenCode turn-settled.
+
+#### Phase 3.1 Split Acceptance From Observation
+
+Define two separate concepts:
+
+```ts
+type OpenCodePromptAcceptance = {
+ accepted: true;
+ runtimeSessionId: string;
+ runtimePromptMessageId: string;
+ deliveryAttemptId: string;
+ acceptedAt: string;
+};
+
+type OpenCodeTurnObservation = {
+ outcome: "success" | "error" | "timeout" | "stream_unavailable";
+ observedAt: string;
+ diagnostics: string[];
+};
+```
+
+Acceptance means:
+
+```text
+OpenCode endpoint accepted prompt_async for the intended session.
+```
+
+It does not mean:
+
+```text
+agent replied visibly
+agent used tools
+task started
+message_send succeeded
+```
+
+Those remain ledger/watchdog/member-work-sync concerns.
+
+#### Phase 3.1.1 Add Status Vocabulary Without Lying
+
+Current orchestrator outcome statuses include:
+
+```text
+received
+preconditions_checked
+session_resolved
+mcp_ready
+prompt_submitting
+prompt_accepted
+turn_observed
+reconciled
+failed_before_accept
+failed_after_accept
+```
+
+Do not use `reconciled` for accept-fast return.
+
+Recommended addition:
+
+```ts
+type OpenCodeCommandOutcomeStatus =
+ | ExistingStatus
+ | "acceptance_returned"
+ | "observation_pending"
+ | "observation_completed";
+```
+
+Minimum acceptable alternative:
+
+```text
+keep status = prompt_accepted, set completedAt, and add explicit acceptedButObservationPending flag
+```
+
+The first option is cleaner:
+
+`🎯 8 🛡️ 8 🧠 5`, roughly `80-160 LOC`.
+
+Required status rules:
+
+- `prompt_accepted`, `acceptance_returned`, `observation_pending` all imply `accepted=true`;
+- `safeToRetry=false`;
+- `completedAt` can be set when the command returned to the app, but observation status remains pending;
+- prune must eventually remove old accepted/pending outcomes after a safe retention period;
+- `commandStatus` must expose accepted runtime prompt identity.
+
+#### Phase 3.2 Keep Pre-Prompt Repair Synchronous
+
+Before returning accepted, still do synchronous repair:
+
+- load session record;
+- if stale, recreate once;
+- ensure Agent Teams MCP ready;
+- if MCP unavailable, recreate once;
+- if still unavailable, reject before prompt.
+
+Reason:
+
+Returning accepted when MCP is definitely broken recreates known `Not connected` failures.
+
+Do not make pre-prompt repair asynchronous.
+
+Also preserve these current gates from `TeamProvisioningService.deliverOpenCodeMemberMessage()`:
+
+- runtime lane must be active or recoverable;
+- lane must have runtime evidence on disk;
+- bootstrap must be confirmed for secondary OpenCode lanes;
+- stopped teams must stop/cleanup lanes rather than deliver;
+- active delivery for the same member/lane blocks a new prompt until proof or terminal state.
+
+#### Phase 3.2.1 Preserve Message Proof Context
+
+Accept-fast is not a proof shortcut. The app still needs to know what kind of response would satisfy the original delivery.
+
+When writing acceptance and observation records, preserve:
+
+```text
+messageKind
+source
+actionMode
+taskRefs
+replyRecipient
+relayOfMessageId
+workSyncIntent when applicable
+```
+
+This is necessary because proof requirements differ:
+
+```text
+normal direct message -> visible message_send or strict plain-text materialization proof
+task-linked runtime delivery -> visible/tool proof with correct taskRefs/actionMode
+peer relay -> relayOfMessageId and recipient correctness
+work-sync nudge -> member_work_sync_report or concrete board progress can be enough
+task-stall remediation -> progress/blocker/comment proof, not generic ack
+```
+
+Do not let `runtimePromptMessageId` become the only correlation key. It identifies the OpenCode prompt, but it does not encode delivery semantics.
+
+Implementation guidance:
+
+```ts
+type RuntimeDeliveryObservationContext = {
+ appMessageId: string;
+ relayOfMessageId?: string;
+ messageKind?: string;
+ source?: string;
+ actionMode?: "ask" | "do" | "delegate";
+ taskRefs: string[];
+ replyRecipient?: string;
+ runtimeSessionId?: string;
+ runtimePromptMessageId?: string;
+ workSyncIntent?: string;
+};
+```
+
+Rules:
+
+- store this context on the ledger when the prompt is accepted;
+- pass this context into observe/proof helpers;
+- do not re-derive taskRefs from assistant output when the original input already had exact refs;
+- do not mark a record read/responded from a proof valid only for a different message kind.
+
+#### Phase 3.3 Return Immediately After `prompt_async` Acceptance
+
+Add an opt-in command mode rather than mutating every OpenCode prompt caller.
+
+Candidate API:
+
+```ts
+opencode.sendMessageV2
+```
+
+or existing:
+
+```ts
+opencode.sendMessage
+```
+
+with field:
+
+```ts
+settlementMode: "acceptance" | "observed"
+```
+
+Recommended:
+
+Use an explicit field on existing command to minimize new CLI surface:
+
+```ts
+settlementMode?: "observed" | "acceptance"
+```
+
+Default:
+
+```text
+observed
+```
+
+until all current callers are audited.
+
+For `claude_team` OpenCode teammate delivery, pass:
+
+```text
+acceptance
+```
+
+Do not switch launch/bootstrap prompts to acceptance mode in the same cut. They have different readiness semantics and should stay on existing observed behavior unless separately audited.
+
+#### Phase 3.3.1 Rollout Matrix For `settlementMode`
+
+Keep the rollout explicit. Do not change all OpenCode prompt calls at once.
+
+| Call site | Initial mode | Why |
+| --- | --- | --- |
+| Secondary teammate normal delivery from `TeamProvisioningService.deliverOpenCodeMemberMessage()` | `acceptance` after Cut 3 tests | This is the user-facing path suffering from long send latency. |
+| Observe/retry commands | not applicable | They should inspect existing runtime prompt identity, not create new prompts. |
+| Launch/bootstrap prompts | `observed` | Bootstrap readiness depends on actual tool/checkin proof. |
+| `noReply` context injection | existing no-observe path | It must not attach turn-settled observer. |
+| Manual maintenance/context prompts | `observed` unless audited | Safety over latency. |
+| Live smoke/model matrix prompts | explicit per test | Avoid masking provider/model behavior differences. |
+
+Rules:
+
+- default contract remains `observed` until all non-delivery call sites are audited;
+- `claude_team` may pass `acceptance` only for normal OpenCode teammate delivery after pre-prompt MCP/session repair;
+- renderer cannot choose settlement mode;
+- settlement mode is transport behavior, not delivery proof policy;
+- settlement mode should be excluded from logical payload hash unless explicitly proven otherwise.
+
+#### Phase 3.4 Durable Observer Continuation
+
+The hardest part:
+
+The orchestrator command process is short-lived. A simple in-memory `setTimeout` or background Promise can die when the process exits.
+
+Acceptable designs:
+
+##### Option A - Command Outcome Poller In `claude_team`
+
+`🎯 8 🛡️ 8 🧠 6`, roughly `350-650 LOC`.
+
+After prompt acceptance, `claude_team` schedules a follow-up `opencode.commandStatus` or `opencode.observeDelivery` command.
+
+Pros:
+
+- no daemon requirement;
+- works with current short-lived CLI model;
+- app owns retries and timeouts;
+- easy to test by fake bridge command responses.
+
+Cons:
+
+- extra command call;
+- slightly more app-side orchestration.
+
+##### Option B - Orchestrator Writes Pending Outcome And Same Command Polls With Small Budget
+
+`🎯 7 🛡️ 7 🧠 5`, roughly `250-500 LOC`.
+
+The command returns accepted but also stores a pending outcome that can later be recovered by `commandStatus`.
+
+Pros:
+
+- reuses existing outcome store;
+- smaller diff.
+
+Cons:
+
+- if the command exits before observation, no one observes unless a later status command triggers it;
+- can become lazy observation rather than active observation.
+
+##### Option C - Long-Lived Orchestrator Sidecar Observer
+
+`🎯 6 🛡️ 8 🧠 8`, roughly `700-1200 LOC`.
+
+Run a persistent observer service for OpenCode sessions.
+
+Pros:
+
+- cleanest runtime telemetry long term;
+- fewer repeated CLI invocations.
+
+Cons:
+
+- larger operational surface;
+- process lifecycle risks;
+- likely too much for this hardening pass.
+
+Recommended for this phase:
+
+```text
+Option A - app-owned follow-up observer command
+```
+
+It fits the existing `OpenCodeReadinessBridge` and avoids daemon lifecycle risk.
+
+#### Phase 3.4.1 Follow-Up Observer Must Reuse Existing Observe Path First
+
+Current orchestrator already has:
+
+```text
+opencode.observeMessageDelivery
+```
+
+and `TeamProvisioningService` already calls `adapter.observeMessageDelivery()` for non-pending records.
+
+Phase 3 should prefer extending this path before creating a brand-new observer command.
+
+Required extensions:
+
+- accept `runtimePromptMessageId` when available;
+- accept `runtimeSessionId` when available;
+- accept `deliveryAttemptId` and `payloadHash` when available, for command/outcome matching only;
+- accept `prePromptCursor`;
+- return structured observation reason;
+- preserve existing responseObservation shape.
+
+Do not create parallel observation logic that ignores `observeOpenCodeDeliveryResponse()`.
+
+Recommended contract change:
+
+```ts
+interface OpenCodeObserveMessageDeliveryCommandBody {
+ runId?: string;
+ laneId: string;
+ teamId: string;
+ teamName: string;
+ projectPath: string;
+ memberName: string;
+ messageId: string;
+ runtimeSessionId?: string;
+ runtimePromptMessageId?: string;
+ deliveryAttemptId?: string;
+ payloadHash?: string;
+ prePromptCursor?: string | null;
+}
+```
+
+Orchestrator validation rules:
+
+```text
+if runtimeSessionId provided:
+ resolve stored session by exact opencodeSessionId
+ require same team/member/lane when lane is known
+ if not found -> session_stale / exact_session_not_found
+
+if runtimePromptMessageId provided:
+ observe around that exact prompt id first
+ verify it is a user message in the same session
+ do not fall back to another prompt id silently
+
+if no runtimePromptMessageId:
+ use current prePromptCursor behavior for old ledger compatibility
+```
+
+App-side adapter changes:
+
+- extend `OpenCodeTeamRuntimeMessageInput` or observe-specific input with `runtimeSessionId` and `runtimePromptMessageId`;
+- pass values from `OpenCodePromptDeliveryLedgerRecord.runtimeSessionId` and `lastRuntimePromptMessageId`;
+- keep old records working when these fields are absent;
+- do not expose these IDs in renderer copy, only diagnostics.
+
+Tests must include:
+
+- accepted prompt in session A, current lane now session B, observe still reads session A;
+- exact session not found returns `session_stale`/`reconcile_failed` and does not inspect session B;
+- runtimePromptMessageId mismatch does not count an unrelated prompt as delivered;
+- old ledger with only `prePromptCursor` still uses fallback.
+
+#### Phase 3.5 Idempotency Requirements
+
+Never reuse these concepts incorrectly:
+
+- `messageId` - app-level logical delivery ID, stable across retry;
+- `deliveryAttemptId` - attempt ID, one per attempt;
+- `runtimePromptMessageId` - OpenCode prompt_async message ID, one per accepted runtime prompt;
+- `relayOfMessageId` - correlation to original app message.
+
+Rules:
+
+- if `prompt_async` accepted, do not issue another prompt for the same attempt;
+- if follow-up observation times out, ledger may stay pending/unknown but must not blindly duplicate;
+- retry creates a new `deliveryAttemptId` and a new `runtimePromptMessageId`;
+- the ledger must keep all accepted runtime prompt IDs for correlation;
+- if late visible reply arrives from older prompt, correlation accepts first valid proof and suppresses duplicate warning.
+- `payloadHash` mismatch must remain terminal/precondition-style, not "retry with new payload" under the same ledger ID;
+- `deliveryAttemptId` remains the idempotency key for commandStatus recovery;
+- `runtimePromptMessageId` must never be used as the app inbox `messageId`.
+
+#### Phase 3.6 Bridge Timeout Changes
+
+Current app-side `DEFAULT_SEND_TIMEOUT_MS` is around 45 seconds.
+
+With accept-fast:
+
+- app send timeout can be lower for acceptance path, for example 15-25 seconds;
+- observation timeout can be separate, for example 45-90 seconds;
+- never use a single timeout for both acceptance and completion.
+
+Candidate constants:
+
+```ts
+const OPENCODE_PROMPT_ACCEPTANCE_TIMEOUT_MS = 20_000;
+const OPENCODE_TURN_OBSERVATION_TIMEOUT_MS = 75_000;
+```
+
+Do not tune these by gut feeling. Add metrics from Phase 0 first.
+
+Crucial:
+
+- lowering acceptance timeout must not break stale/MCP repair path;
+- if pre-prompt repair commonly takes longer than 20s, keep send timeout higher until repair is separately optimized;
+- use measured `mcp_ready_check` and `session_recreate` timings from Phase 0.
+
+#### Phase 3.7 Interaction With Existing Watchdog
+
+Existing delivery watchdog remains responsible for:
+
+- no visible reply;
+- no task progress proof;
+- tool error such as `Not connected`;
+- retrying same logical message when safe.
+
+Accept-fast changes only:
+
+```text
+prompt_async endpoint acceptance is detected sooner
+```
+
+It must not:
+
+- mark inbox read earlier;
+- mark delivery responded;
+- suppress watchdog proof checks;
+- create extra pings in member-work-sync.
+
+Specific watchdog rules:
+
+- records with `acceptedAt` and `runtimePromptMessageId` should not become `acceptanceUnknown`;
+- observation timeout after acceptance should schedule observation/proof follow-up, not immediate prompt retry;
+- prompt retry is allowed only when current ledger policy says retryable and no accepted prompt is still awaiting proof;
+- foreground unread assignment still suppresses member-work-sync nudges.
+
+#### Phase 3.8 Tests
+
+Orchestrator:
+
+- healthy MCP returns accepted quickly after `prompt_async`;
+- stale session recreate happens before prompt;
+- MCP attach failure rejects before prompt;
+- no-reply command cannot use observed wrapper;
+- same-session `session.error` during submitting is buffered until acceptance;
+- premature SSE EOF returns `stream_unavailable` diagnostics;
+- commandStatus/follow-up observer maps accepted prompt to final outcome;
+- observeMessageDelivery uses `runtimeSessionId` + `runtimePromptMessageId` even if the current lane record now points elsewhere;
+- observeMessageDelivery returns session-stale diagnostics instead of inspecting a different session when exact session lookup fails;
+- no double `promptAsync` for one accepted attempt.
+
+`claude_team`:
+
+- `deliverOpenCodeMemberMessage()` records accepted prompt without waiting for visible reply;
+- ledger stores `lastRuntimePromptMessageId` and appends accepted runtime prompt IDs without duplicating them on commandStatus recovery;
+- ledger remains pending until proof;
+- watchdog observes accepted prompt and does not duplicate immediately;
+- watchdog passes exact runtime session/prompt identity into observe path when available;
+- response proof updates same ledger record;
+- timeout after acceptance produces pending/needs_observation, not failed terminal;
+- member-work-sync gets turn-settled event and only enqueues reconcile;
+- foreground unread assignment still suppresses duplicate work-sync nudge.
+- active member delivery queue is not unblocked by acceptance alone;
+- commandStatus precondition mismatch is still rejected;
+- outcome store does not leak accepted pending outcomes forever;
+- accepted prompt with later visible proof clears advisory banner.
+
+Suggested commands:
+
+```bash
+pnpm vitest run \
+ test/main/services/team/OpenCodeReadinessBridge.test.ts \
+ test/main/services/team/TeamProvisioningService.test.ts \
+ test/main/services/team/OpenCodePromptDeliveryLedger.test.ts \
+ test/features/member-work-sync/main/createMemberWorkSyncFeature.test.ts
+```
+
+```bash
+bun test \
+ src/services/opencode/OpenCodeBridgeCommandHandler.test.ts \
+ src/services/opencode/OpenCodeSessionBridge.test.ts \
+ src/services/opencode/OpenCodeTurnSettledObserver.test.ts \
+ src/services/opencode/OpenCodeCommandOutcomeStore.test.ts
+```
+
+Acceptance:
+
+- initial prompt acceptance is not delayed by full model turn;
+- accepted prompt is never duplicated by the same attempt;
+- visible reply/task progress still controls final delivery state;
+- member-work-sync and watchdog do not conflict.
+
+---
+
+### Phase 4 - Retry, Recreate, And Live Validation
+
+Score:
+
+`🎯 8 🛡️ 8 🧠 5`, roughly `180-350 LOC` plus live test scripts/results.
+
+Goal:
+
+After Phase 3 changes acceptance behavior, tune retry/recreate policies based on real evidence.
+
+#### Phase 4.1 Reclassify Timeouts
+
+Separate:
+
+```text
+acceptance_timeout
+observation_timeout
+response_proof_timeout
+mcp_unavailable
+session_recreate_failed
+provider_error
+accepted_observation_pending
+accepted_response_proof_missing
+```
+
+Do not collapse all of these into:
+
+```text
+OpenCode bridge command timed out
+```
+
+Why:
+
+- acceptance timeout may mean prompt was never accepted;
+- observation timeout may mean prompt accepted but model still running;
+- response proof timeout may mean model answered plain text or wrong tool;
+- MCP unavailable is actionable repair path;
+- provider error should not become a sync-nudge problem.
+- accepted observation pending should not create a duplicate prompt.
+- accepted response proof missing should use existing proof/watchdog logic.
+
+#### Phase 4.2 Retry Delay Policy
+
+Current retry delay is around 15 seconds.
+
+With accept-fast:
+
+- if prompt accepted, do not retry too quickly;
+- if prompt not accepted due repair failure, retry can stay relatively short;
+- if MCP unavailable after recreate, retry should allow session recovery time;
+- if provider error, retry policy should distinguish rate/credit/model unavailable from prompt failure.
+
+Candidate policy:
+
+```ts
+function getOpenCodeDeliveryRetryDelay(reason: DeliveryFailureReason): number {
+ switch (reason) {
+ case "acceptance_timeout":
+ return 10_000;
+ case "observation_timeout_after_acceptance":
+ return 60_000;
+ case "mcp_unavailable":
+ return 30_000;
+ case "provider_error":
+ return 90_000;
+ default:
+ return 15_000;
+ }
+}
+```
+
+Only implement after tests cover idempotency.
+
+#### Phase 4.3 Live Smoke Matrix
+
+Keep live tests narrow. Do not run a full expensive model matrix by default.
+
+Recommended live scenarios:
+
+1. Cheap OpenCode model, direct task assignment.
+2. OpenCode stale session recreate before prompt.
+3. OpenCode MCP missing/reattach before prompt.
+4. OpenCode task log lookup after session recreate.
+5. Member-work-sync turn-settled wakeup after accepted prompt.
+6. Active queue scenario: send two messages to one OpenCode member and prove the second does not prompt until the first has proof or terminal state.
+7. Recreated-session task logs: force/reuse a stale session, deliver a task, and verify task logs load from the accepted runtime session.
+
+Live command examples should be added to a runbook, not hardcoded into unit tests.
+
+Example env gates:
+
+```bash
+OPENCODE_E2E=1 \
+OPENCODE_DELIVERY_ACCEPT_FAST_LIVE=1 \
+pnpm vitest run test/main/services/team/OpenCodeAcceptFastDelivery.live-e2e.test.ts
+```
+
+Do not require paid model credentials for normal CI.
+
+#### Phase 4.4 Production Diagnostics
+
+Add a compact diagnostic summary visible in developer details:
+
+```text
+OpenCode delivery:
+- prompt accepted after 6.4s
+- session recreated once
+- MCP reattached
+- observation settled after 31.2s
+- visible reply proof received after 33.7s
+```
+
+Do not show this as a warning if final delivery succeeded.
+
+Acceptance:
+
+- delayed-start cases can be explained from artifacts;
+- successful replies do not show stale warning banners;
+- failed cases identify the failed layer.
+
+---
+
+## 7. Highest-Risk Implementation Details
+
+This section is the extra caution layer before coding. These are the places most likely to create subtle bugs.
+
+### 7.1 Exact Session Identity Must Flow End-To-End
+
+If any layer drops `runtimeSessionId` or `runtimePromptMessageId`, the system can fall back to current lane state and reintroduce the old bug.
+
+Required flow:
+
+```text
+orchestrator prompt_async accepted
+-> OpenCodeSendMessageCommandData.sessionId
+-> OpenCodeSendMessageCommandData.runtimePromptMessageId
+-> OpenCodeTeamRuntimeAdapter result
+-> OpenCodePromptDeliveryLedger runtime fields
+-> observeMessageDelivery exact session/prompt
+-> task-log session evidence
+```
+
+If a field is missing:
+
+- old ledger compatibility is allowed;
+- new accepted attempts should emit a diagnostic;
+- do not guess with current lane if exact evidence was expected and mismatched.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 7`, roughly `160-280 LOC`.
+
+### 7.2 Queue Slot Semantics Must Stay Completion-Based
+
+The active delivery slot must not be released by `accepted=true`.
+
+Safe state transitions:
+
+```text
+pending -> accepted
+accepted -> responded
+accepted -> unanswered
+accepted -> failed_retryable
+accepted -> failed_terminal
+```
+
+Unsafe transition:
+
+```text
+accepted -> terminal just because endpoint accepted
+```
+
+The active slot should be released only by:
+
+- read-commit allowed proof;
+- terminal failure;
+- existing queue/retry policy after proof checks.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 6`, roughly `80-160 LOC`.
+
+### 7.3 Observation Must Be Read-Only Unless It Updates The Same Ledger Record
+
+Observation should not create a new logical delivery. It should only enrich the existing ledger record.
+
+Rules:
+
+- `applyObservation()` should not increment attempts;
+- `applyObservation()` should not replace `inboxMessageId`;
+- observation timeout after acceptance should not immediately call `sendMessageToMember()`;
+- visible proof/materialization must run after observation result, not before returning stale pending state.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 5`, roughly `60-120 LOC`.
+
+### 7.4 Task Log Source Must Stay Conservative
+
+Task logs are user-facing audit evidence. Incorrect logs are worse than empty logs.
+
+Required filter order:
+
+```text
+team match
+member match
+session match when exact
+task marker or task window match
+time/window bounds
+dedupe
+sort
+render
+```
+
+Never include native tools from a session solely because the member owns the task. There must be an anchor:
+
+- task marker;
+- attribution bounds;
+- accepted delivery prompt window;
+- explicit source message/task reference.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 6`, roughly `140-260 LOC`.
+
+### 7.5 Member-Work-Sync And Watchdog Must Not Become Two Retry Loops
+
+Phase 3 can create more "accepted but no proof yet" states. That must not cause both systems to nudge at the same time.
+
+Rules:
+
+- turn-settled event only enqueues member-work-sync reconcile;
+- delivery watchdog owns response proof retry;
+- task-stall watchdog owns semantic task progress stalls;
+- member-work-sync nudge is suppressed by foreground unread actionable messages and watchdog cooldowns;
+- delivery retry is not triggered by member-work-sync alone.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 7`, roughly `120-220 LOC`.
+
+### 7.6 Timeout Taxonomy Must Be Visible In Artifacts
+
+Without a clear taxonomy, future debugging will again collapse into "OpenCode timed out".
+
+Every timeout should be classified as one of:
+
+```text
+pre_prompt_repair_timeout
+prompt_acceptance_timeout
+turn_observation_timeout
+response_proof_timeout
+command_status_timeout
+transcript_lookup_timeout
+```
+
+Diagnostics should be machine-readable and short. User-facing warnings should be based on final proof state, not raw timeout class.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `80-160 LOC`.
+
+### 7.7 Proof Contract Drift Is A Bigger Risk Than Timeout Tuning
+
+The most dangerous accidental simplification is:
+
+```text
+OpenCode prompt accepted + assistant produced any output = delivery succeeded
+```
+
+That is false.
+
+Current behavior intentionally distinguishes:
+
+- visible reply to user;
+- peer relay;
+- task-linked progress proof;
+- plain-text fallback after tool failure;
+- work-sync lease/report;
+- task-stall remediation progress.
+
+If Phase 3 loses `messageKind`, `taskRefs`, `relayOfMessageId`, or `actionMode`, later observation can clear the wrong delivery.
+
+Required guard:
+
+```text
+read/responded commit must stay in TeamProvisioningService proof helpers
+```
+
+Do not move final proof semantics into orchestrator. The orchestrator can observe runtime events, but it should not decide app-level read/responded state.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 7`, roughly `120-240 LOC`.
+
+### 7.8 Task Log Cache Can Make A Fixed Session Lookup Look Broken
+
+Even if exact session transcript lookup works, the user can still see stale empty logs if cache keys do not include evidence identity.
+
+Fragile cache layers:
+
+```text
+OpenCodeTaskLogStreamSource short cache
+BoardTaskLogStreamService layout cache
+runtime transcript bridge temp-output path
+renderer query/cache layer
+```
+
+Rules:
+
+- exact session evidence changes the OpenCode source cache key;
+- fallback segment IDs include session identity;
+- layout merge must not drop fallback segments with distinct session IDs;
+- fallback suppression must be scoped to the same task owner/session/provider, not any execution record;
+- no long TTL should be introduced for OpenCode exact evidence;
+- diagnostics should say whether the empty state came from no evidence, transcript miss, projection miss, or cache hit.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 6`, roughly `100-220 LOC`.
+
+### 7.9 Sync Control Plane Must Stay Mostly Invisible To Users
+
+Member-work-sync is a control plane, not conversation content. If Phase 3 or 4 starts surfacing every sync nudge, users will see "automation spam" even when the system is behaving correctly.
+
+Rules:
+
+- normal Messages feed hides `member_work_sync_nudge` by default;
+- task/activity/debug surfaces can expose sync details when explicitly requested;
+- work-sync nudge delivery does not count as user-visible response proof for unrelated messages;
+- sync nudge rate limits and watchdog cooldowns remain centralized in member-work-sync services.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `40-90 LOC`.
+
+### 7.10 Timeout Recovery Can Lose The Only Exact Prompt Anchor
+
+The timeout-recovery path is exactly where exact runtime identity matters most. If bridge timeout recovery synthesizes an accepted response but drops `runtimePromptMessageId`, later observe and task-log lookup regress to current-session heuristics.
+
+Rules:
+
+- `runtimePromptMessageId` is part of the acceptance contract;
+- `commandStatus` recovery must preserve it even when `sendMessageData` is absent;
+- app adapter result must expose it;
+- ledger dedupes repeated recovery for the same `deliveryAttemptId + runtimePromptMessageId`.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 5`, roughly `70-140 LOC`.
+
+### 7.11 Turn-Settled Event Schema Drift Can Disable OpenCode Work-Sync Silently
+
+Member-work-sync uses a payload normalizer with strict provider/source/event fields. A harmless-looking orchestrator schema rename can stop OpenCode turn-settled events from being consumed.
+
+Rules:
+
+- keep `source = agent-teams-orchestrator-opencode`;
+- keep `eventName = runtime_turn_settled`;
+- keep `hookEventName = Stop`;
+- keep `runtimePromptMessageId` mapping to `threadId`;
+- version any breaking schema change and migrate normalizer in the same cut.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `40-90 LOC`.
+
+### 7.12 Advisory Classification Must Not Turn Lag Into Error
+
+Accept-fast increases the chance of a period where prompt acceptance is known but proof is not observed yet. That period is expected and should not become an immediate user-facing error.
+
+Rules:
+
+- observation lag after acceptance is `checking`/pending, not hard failure;
+- hard tool/session errors still surface according to existing policy;
+- superseding visible reply/task progress clears stale advisory candidates;
+- renderer should not show a warning after successful proof.
+- backend proof paths must invalidate `TeamMemberRuntimeAdvisoryService` cache for the affected member;
+- tests should not pass only because the 30 second advisory cache expires.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 5`, roughly `60-130 LOC`.
+
+### 7.12.1 Advisory Cache Can Preserve A Correctly-Suppressed Warning
+
+Even if `OpenCodeRuntimeDeliveryAdvisoryPolicy` is correct, `TeamMemberRuntimeAdvisoryService` can return an old cached warning for up to 30 seconds.
+
+This is a backend cache consistency problem, not a renderer problem.
+
+Rules:
+
+- proof/materialization paths call `TeamDataService.invalidateMemberRuntimeAdvisory(teamName, memberName)`;
+- if proof path only knows the team, call `invalidateTeamRuntimeAdvisories(teamName)` rather than guessing a member;
+- do not add renderer-side "hide warning after new message" heuristics;
+- do not shorten the TTL as the primary fix, because that increases IO and still leaves a stale window.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `50-110 LOC`.
+
+### 7.13 Payload Hash Drift Can Break Recovery And Mark Real Messages Failed
+
+Adding optional transport fields to the send command can accidentally change `payloadHash`. That can make commandStatus recovery reject the true accepted command or make the ledger fail an existing logical message as a payload mismatch.
+
+Rules:
+
+- treat app ledger hash and bridge command hash as separate contracts;
+- freeze canonical bridge hash input before adding `settlementMode`;
+- exclude response-only fields from bridge send hash;
+- keep app ledger hash limited to logical/user-visible delivery payload;
+- add tests for unchanged hashes when only transport/observation knobs differ;
+- add tests that real text/taskRefs/actionMode changes still change app ledger hash.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `60-120 LOC`.
+
+### 7.14 Runtime Store Compatibility Can Fail Before Feature Logic Runs
+
+`VersionedJsonStore` validates on every locked update. If schema normalization is wrong, delivery code may fail before reaching the new acceptance/observe logic.
+
+Rules:
+
+- old ledger records without new fields must parse;
+- update paths should normalize missing arrays/nulls;
+- future-schema behavior remains quarantine, but missing new optional fields do not;
+- compatibility tests must use realistic old ledger JSON, not only constructed TS objects.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 5`, roughly `80-160 LOC`.
+
+### 7.15 Acceptance Unknown Must Not Be Upgraded By Optimism
+
+Bridge timeout plus failed commandStatus is not prompt acceptance. It is `acceptanceUnknown`.
+
+Rules:
+
+- only endpoint acceptance or strict commandStatus evidence can clear `acceptanceUnknown`;
+- do not synthesize `runtimePromptMessageId`;
+- retry/observe policy for unknown acceptance remains conservative;
+- exact observe is optional only after real prompt identity exists.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 5`, roughly `70-140 LOC`.
+
+### 7.16 Coarse Board Progress Can Suppress Warnings But Cannot Prove Every Delivery
+
+The proof reader can see board progress after a prompt by reading task comments/history. That is useful, but it is not equivalent to a visible reply.
+
+Rules:
+
+- board progress can suppress stale advisory candidates for the same member/task;
+- board progress cannot satisfy peer relay recipient correctness;
+- board progress cannot mark a normal direct message read unless existing read-commit policy allows it;
+- weak start-only comments stay weak and should not become response proof;
+- work-sync/task-stall can define their own board-progress proof contract, but it must not leak into normal delivery.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 5`, roughly `80-160 LOC`.
+
+### 7.17 Invalidation Must Cross The Main/Worker Boundary
+
+Runtime advisory data can be served by both in-process `TeamDataService` and the team-data worker. Updating only one cache leaves the UI stale depending on which path serves the next snapshot.
+
+Rules:
+
+- use the existing `setMemberRuntimeAdvisoryInvalidator` boundary for proof paths;
+- invalidate both `teamDataService` and `TeamDataWorkerClient`;
+- if extracted async observer cannot access that callback, pass an invalidation port into it;
+- do not import renderer or IPC code into proof/domain logic.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 3`, roughly `30-80 LOC`.
+
+### 7.18 Member-Scoped Invalidation Can Be Silently Dropped
+
+`TeamDataWorkerClient` intentionally validates names before posting best-effort worker messages:
+
+```ts
+if (!SAFE_NAME_RE.test(teamName)) return;
+if (memberName !== undefined && !SAFE_NAME_RE.test(memberName)) return;
+```
+
+That is correct at the worker IPC boundary, but it creates a subtle stale-cache risk for advisory invalidation if a member name is not worker-safe.
+
+Rules:
+
+- invalidation port should canonicalize member names using the same configured member name used by team snapshots;
+- if canonical member invalidation cannot be proven worker-safe, call team-scoped invalidation as a conservative fallback;
+- proof paths must never treat a failed best-effort worker invalidation as delivery failure;
+- add a regression test that unsafe member-scoped invalidation does not leave stale worker advisory state.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 3`, roughly `30-70 LOC`.
+
+### 7.19 Message Kind Drift Can Break Store Validation Or Filtering
+
+`InboxMessageKind` is used by shared types, inbox persistence, OpenCode bridge DTOs, prompt delivery ledger, and renderer filtering. These whitelists are not generated from one source.
+
+Rules:
+
+- keep message kind parity tests across shared type literals, inbox reader, OpenCode ledger validator, bridge contract, and renderer automation filters;
+- never depend on a message kind for proof policy until it round-trips through stored inbox and ledger fixtures;
+- if a kind is unsupported for OpenCode delivery, fail before ledger write with a structured diagnostic.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 3`, roughly `30-80 LOC`.
+
+### 7.20 Retry Prompt Text Can Accidentally Change Logical Idempotency
+
+Retry control text is prepended to the OpenCode prompt, but it is not the user's original message. If it enters `hashOpenCodePromptDeliveryPayload()`, every retry can look like a different payload and force a terminal mismatch.
+
+Rules:
+
+- compute app ledger hash before adding repair control text;
+- keep retry control text out of inbox row text and out of app logical hash;
+- include actual prompt text only in bridge command hash, scoped to one concrete command attempt;
+- assert that retry control text changes do not create app ledger payload mismatch.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 3`, roughly `30-70 LOC`.
+
+### 7.21 Tool-Error Retries Can Loop If MCP Repair Is Not A Gate
+
+When the model hits `message_send` `Not connected`, a retry prompt alone is not enough. It can produce another tool error or another plain assistant fallback in the same broken runtime.
+
+Rules:
+
+- every retry prompt goes through MCP/session readiness repair before prompt submission;
+- failed readiness repair schedules retry or surfaces transport diagnostics, but does not send another prompt;
+- plain assistant fallback is materialized only when existing semantic/recipient gates pass;
+- work-sync and normal delivery continue to use separate proof contracts.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `80-180 LOC`.
+
+### 7.22 TaskRefs-Only Reply Recovery Can Produce False Positives
+
+TaskRefs-only visible reply recovery is deliberately weaker than `relayOfMessageId`. It can be useful when OpenCode missed correlation metadata, but it can also match an older or unrelated task status message by the same member.
+
+Rules:
+
+- use taskRefs-only recovery after exact relay/message-id recovery fails;
+- require semantic sufficiency and expected sender;
+- keep source-missing compatibility diagnostic-only but visible to developer logs/details;
+- do not mark read/responded when taskRefs-only candidates are ambiguous;
+- add tests with two candidate replies for the same task and different destinations.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `50-110 LOC`.
+
+### 7.23 Work-Sync Inbox Payload Conflict Can Become A Silent Stale Wake
+
+The work-sync outbox already treats `payloadHash` as part of the idempotency contract, but the inbox sink can still dedupe by `messageId` only.
+
+Rules:
+
+- preserve outbox payload conflict detection as the first line of defense;
+- make inbox sink either compare `payloadHash` metadata or prove equivalence from stable payload fields before returning `inserted=false`;
+- if the existing inbox row cannot be proven equivalent, return `conflict=true`;
+- do not schedule `member_work_sync_nudge_existing` delivery wake after a conflict;
+- include conflict diagnostics in work-sync audit, not in the normal Messages feed.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `50-120 LOC`.
+
+### 7.24 Work-Sync Can Be Mistaken For A Fast Assignment Wake
+
+Because work-sync has phase2 readiness, rate limits, lifecycle checks, busy checks, and foreground-unread suppression, using it as the primary way to wake an OpenCode member for a new task will create unpredictable delay.
+
+Rules:
+
+- normal task assignment delivery remains the primary wake path;
+- delivery watchdog handles "accepted but no proof" and `message_send` tool errors;
+- work-sync handles board-state reconciliation after delivery/turn activity;
+- a skipped work-sync nudge must not be interpreted as proof that normal delivery succeeded;
+- launch/bootstrap suppression must not suppress normal bootstrap/delivery prompts.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `70-160 LOC`.
+
+### 7.25 Correct Task Logs Can Still Look Late Without Narrow Refresh Events
+
+Exact-session task log lookup can be correct but invisible in the UI if no task-scoped refresh event fires after evidence is written.
+
+Rules:
+
+- emit `task-log-change` with `taskSignalKind: "log"` when task-log evidence changes;
+- keep event fanout per taskRef, not per native tool row;
+- keep renderer loading lazy for hidden panels;
+- summary badge and opened stream must share the same refresh trigger;
+- do not use broad `refreshTeamData` as the primary way to refresh task logs.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 4`, roughly `60-140 LOC`.
+
+### 7.26 Accept-Fast Can Accidentally Drain The Inbox Queue
+
+If `accepted=true` is treated like "delivered and done", the relay loop can push the next unread message into the same OpenCode member before the first prompt has produced proof.
+
+Rules:
+
+- accepted pending prompt keeps the active ledger slot;
+- relay loop stops after accepted pending delivery;
+- later unread messages wait for proof, terminal failure, or existing retry policy;
+- member-work-sync nudge wake cannot bypass this queue;
+- queue behavior is tested with mixed foreground and work-sync inbox rows.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `70-150 LOC`.
+
+### 7.27 Old Orchestrator Can Silently Disable Exact Observation
+
+Accept-fast spans two repos. A user can point `CLAUDE_DEV_RUNTIME_ROOT` at an older orchestrator build that does not know the new exact prompt/session fields.
+
+Rules:
+
+- use explicit OpenCode bridge capability detection;
+- if capability is missing, run observed mode and log why;
+- never infer accept-fast support from a successful generic bridge command;
+- missing exact fields after an acceptance-mode response become `acceptanceUnknown`;
+- tests must cover old response shapes and unsupported command errors.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `80-180 LOC`.
+
+### 7.28 Lane Registry Lock Timeout Can Corrupt Diagnosis If It Owns Evidence
+
+`lanes.json` is a shared file-lock boundary. It can be temporarily unavailable while the OpenCode runtime is still alive and the exact session evidence is valid.
+
+Rules:
+
+- store accepted prompt identity outside `lanes.json`;
+- do not downgrade accepted prompt to failed because lane diagnostics failed after acceptance;
+- exact task-log lookup reads stored session evidence before current lane fallback;
+- lane registry failures before first runtime evidence block safely;
+- avoid unbounded lane-index writes from event fanout.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 5`, roughly `90-220 LOC`.
+
+### 7.29 Runtime Delivery Dedupe Can Hide The Proof Message ID
+
+`TeamInboxWriter.sendMessage()` can return an existing message ID for duplicate `runtime_delivery` rows. If proof code keeps using the attempted message ID, advisory clearing and ledger correlation can drift.
+
+Rules:
+
+- always propagate the returned inbox message ID from `sendMessage()`;
+- ledger proof should correlate against returned existing ID when deduped;
+- do not use dedupe as proof without ledger validation;
+- keep dedupe scoped to same `relayOfMessageId`;
+- non-runtime control messages do not use runtime-delivery dedupe.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `50-130 LOC`.
+
+### 7.30 Bridge Idempotency Can Drift From App Delivery Idempotency
+
+The app delivery ledger hash and the bridge command idempotency key solve different problems. Accept-fast transport fields can accidentally change the bridge command body while the logical user message is still the same.
+
+Rules:
+
+- persist settlement mode on the delivery record;
+- keep app logical payload hash independent from retry-control text and observation tuning;
+- keep bridge requestHash stable for a single delivery attempt;
+- store original bridge requestId for timeout recovery;
+- require commandStatus recovery to echo exact prompt identity before upgrading to accepted.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `80-180 LOC`.
+
+### 7.31 Runtime `message_send` Conflicts Can Be Misread As MCP Failure
+
+Runtime delivery conflicts are agent-to-app idempotency conflicts. They are not OpenCode MCP readiness failures. Misclassifying them can make the watchdog repair the wrong layer.
+
+Rules:
+
+- keep `idempotency_conflict`, `destination_write_failed`, and `mcp_not_connected` as separate failure reasons;
+- runtime delivery journal conflicts do not trigger OpenCode MCP reattach by themselves;
+- prompt delivery repair can mention a previous message_send conflict only as payload guidance;
+- visible proof requires verified destination write, not transcript-only message_send attempt;
+- if retry asks the model to resend, specify whether to reuse exact payload or create a new idempotency key.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 5`, roughly `70-160 LOC`.
+
+### 7.32 User-Visible Reply Can Be Stored Where Proof Reader Does Not Look
+
+If `RuntimeDeliveryService` writes a direct user reply to `sentMessages.json` but `OpenCodeRuntimeDeliveryProofReader` only scans inbox rows, the UI can show a valid reply while advisory/watchdog still thinks proof is missing.
+
+Rules:
+
+- proof reader must cover `user_sent_messages`, `member_inbox`, and cross-team destinations according to the runtime delivery destination kind;
+- source-string compatibility is not enough without matching destination location or relay metadata;
+- do not accept arbitrary sent messages from the lead as OpenCode member proof;
+- tests must cover direct user reply and lead-recipient fallback separately.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `100-220 LOC`.
+
+### 7.33 Concurrent Sent Message Writes Can Drop Runtime Proof
+
+`sentMessages.json` currently has a simpler append path than inbox files. Concurrent OpenCode members can both write direct user replies and race.
+
+Rules:
+
+- add a locked append/verify path for sent messages before relying on it for proof;
+- dedupe by destination message ID under lock;
+- keep MAX_MESSAGES trimming deterministic and proof-safe;
+- do not change normal lead-process rendering semantics while adding the lock;
+- test concurrent direct user runtime replies.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 4`, roughly `70-160 LOC`.
+
+### 7.34 Runtime TaskRefs Can Be Silently Dropped
+
+The runtime delivery normalizer currently filters taskRefs to strings. If OpenCode sends structured taskRefs, task context can disappear without an error.
+
+Rules:
+
+- make taskRefs input schema explicit in MCP prompt and app normalizer;
+- reject or preserve invalid shapes, never silently drop all context;
+- keep hash/proof/task-log matching aligned with the selected shape;
+- test prompt artifacts and runtime delivery normalizer together.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `50-120 LOC`.
+
+### 7.35 Unknown Secondary Runtime Can Fall Back To Primary Lane
+
+If lane metadata is missing, a secondary OpenCode member control call can accidentally resolve as `primary`. That is a correctness risk for delivery journals, task-log evidence, and heartbeats.
+
+Rules:
+
+- fail closed for unresolved non-lead secondary members;
+- allow primary fallback only for true primary OpenCode runtime;
+- use exact committed session evidence when available;
+- keep message delivery, task event, and heartbeat lane resolution identical.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `90-220 LOC`.
+
+### 7.36 Work-Sync Can Jump Ahead If Relay Priority Is Misread
+
+The code sorts OpenCode inbox relay candidates ascending by numeric priority. Work-sync currently has a larger number because it should run later, not sooner.
+
+Rules:
+
+- document sort direction next to `getOpenCodeInboxRelayPriority()`;
+- test foreground task assignment before work-sync, including older work-sync messages;
+- keep `onlyMessageId` as an explicit override only;
+- preserve accepted-pending queue stop behavior;
+- keep busy-status diagnostics separate from UI hidden-row filtering.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 3`, roughly `30-90 LOC`.
+
+### 7.37 Hidden Automation Rows Can Become Undebuggable
+
+Hiding work-sync and task-stall rows from Messages is good UX, but hiding them too early can remove the only visible clue that an automation path fired.
+
+Rules:
+
+- keep durable inbox rows intact and unread until the delivery/proof path consumes them;
+- keep diagnostic/audit views able to opt into automation rows;
+- ensure `TeamInboxReader` preserves automation messageKind values;
+- do not use UI-filtered feeds for delivery, watchdog, or prompt ledger rebuild;
+- add tests where hidden work-sync is delivered to OpenCode even though Messages does not show it.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 4`, roughly `60-140 LOC`.
+
+### 7.38 File-Change Backfill Can Be Broken By Delivery Context Drift
+
+OpenCode Changes review depends on `ChangeExtractorService` and task ledger backfill, not on task-log native tool rows. Accept-fast and retry changes can accidentally alter or remove the delivery context that backfill needs.
+
+Rules:
+
+- keep delivery context hash stable for logical delivery identity;
+- pass exact session/member/lane/task evidence into backfill;
+- do not cache negative backfill when a delivery context exists or appears later;
+- keep metadata-only evidence as manual-review, not as "no changes";
+- verify current evidence contract before caching duplicates-only results.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 5`, roughly `120-260 LOC`.
+
+### 7.39 Runtime Store Recovery Can Lose Canonical Evidence
+
+Manifest recovery is useful, but prompt/runtime delivery evidence is not disposable. A broad cleanup after corruption can make a real visible reply look unproven.
+
+Rules:
+
+- distinguish diagnostic-only stores from delivery ledgers;
+- never drop prompt/runtime delivery ledgers without quarantine and rebuild status;
+- rebuild delivery stores from canonical destination writes, not provider session guesses alone;
+- do not let provider rebuild overwrite newer canonical destination evidence;
+- include recovery action and source in artifacts.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 5`, roughly `100-240 LOC`.
+
+### 7.40 Stale OpenCode Runtime Can Write After Team Stop Or Relaunch
+
+A stopped team can still have a stale `opencode serve` process briefly alive. A restarted team can also have old session callbacks arriving after a new run is current.
+
+Rules:
+
+- validate current run/lane/tombstone immediately before every runtime-originated durable write;
+- reject stale evidence as stale runtime evidence, not as generic provider failure;
+- do not let stale delivery clear advisory, mark inbox read, update liveness, or emit task-log refresh;
+- stop/relaunch cleanup must not delete delivery ledgers before they are captured in artifacts;
+- tests must cover app restart with orphaned lane evidence and stale runtime process.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `120-280 LOC`.
+
+### 7.41 Destination Write Succeeds But UI Warning Stays Stale
+
+Runtime delivery can correctly write a visible reply while renderer cache or member advisory still shows "delivery is being checked".
+
+Rules:
+
+- destination write emits the same event family as normal user-facing writes;
+- advisory invalidation follows proof-capable writes;
+- hidden automation rows still invalidate diagnostic/advisory caches;
+- worker-cache invalidation failure is diagnostic-only and does not block the durable write;
+- tests must assert both durable store state and renderer/event fanout.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `70-180 LOC`.
+
+### 7.42 Rebuild Can Turn Ambiguous Destination Rows Into False Success
+
+After corruption recovery, it is tempting to rebuild prompt ledger state from any matching visible row. That can hide a real transport failure or clear the wrong delivery.
+
+Rules:
+
+- strict relay/source/destination proof is required for visible proof;
+- exact runtime prompt identity is required for transport accepted state;
+- ambiguous destination matches remain ambiguous;
+- hidden automation rows only rebuild automation-intent deliveries;
+- stale run rows are ignored for current run rebuild.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 5`, roughly `90-220 LOC`.
+
+### 7.43 Work-Sync Timing Can Create Either Long Delays Or Spam
+
+The member-work-sync queue has fast trigger defaults, but a broad quiet-window override or scheduled-only dispatch path can turn normal assignment wakeup into a minute-scale delay. The opposite mistake is sending work-sync while foreground delivery is still pending.
+
+Rules:
+
+- keep trigger-specific timing explicit;
+- do not let startup/member-spawn scans dispatch nudges before launch readiness;
+- foreground unread or accepted-pending delivery suppresses generic work-sync;
+- scheduled dispatcher recovers due outbox rows but does not replace direct delivery wake;
+- diagnostics must show why a nudge is queued, delayed, skipped, or rate-limited.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 5`, roughly `120-260 LOC`.
+
+### 7.44 Slow Delivery Has No Single Correlation Timeline
+
+Without a correlated phase timeline, every slow run looks like "OpenCode is slow" even when the delay is actually queue timing, relay busy state, MCP repair, provider latency, or task-log projection.
+
+Rules:
+
+- add shared correlation fields to existing ledgers, not a cross-layer mega-log;
+- include phase timestamps from task assignment to first tool/proof;
+- keep timeline developer-only;
+- live tests should print the timeline on slow pass and failure.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 4`, roughly `80-200 LOC`.
+
+### 7.45 UI Can Show "Working On" While Runtime Is Failed
+
+Task ownership and runtime liveness are different facts. If the card prioritizes task label over launch failure, users believe the agent is working while it cannot receive prompts.
+
+Rules:
+
+- runtime health/advisory status outranks task labels visually;
+- task label remains context, not liveness proof;
+- stale spawn snapshot after stop/offline must not resurrect working status;
+- hover/detail surfaces should separate assignment, runtime, lane/session, and worktree facts.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 3`, roughly `50-130 LOC`.
+
+### 7.46 Transcript-Only Plain Text After Tool Error Can Look Like Success
+
+OpenCode may write useful text in its transcript after `message_send` fails. That text is not a delivered app message unless it lands in the durable destination store.
+
+Rules:
+
+- never clear delivery advisory from transcript-only fallback text;
+- retry/repair should preserve the user's logical message id;
+- task changes can be visible while reply proof remains missing;
+- do not synthesize app-visible replies from transcript text.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `80-180 LOC`.
+
+### 7.47 Agenda Fingerprint Churn Can Cause Nudge Storms
+
+If volatile presentation data enters the agenda fingerprint, the system can invalidate valid reports on every refresh and repeatedly schedule sync nudges.
+
+Rules:
+
+- keep fingerprint payload semantic and minimal;
+- add regression tests before adding any new field to `AgendaFingerprintPayload`;
+- treat `sourceRevision` as dangerous until its semantics are documented and tested;
+- report token invalidation must happen because actionable work changed, not because UI state changed.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `70-160 LOC`.
+
+### 7.48 Stale Report Token Can Suppress Real Work
+
+Offline/pending report replay can be useful, but accepting stale `caught_up` or `still_working` after the board changed would hide real work from the reconciler.
+
+Rules:
+
+- re-read current agenda on every report and replay;
+- reject stale fingerprint/token before applying leases;
+- rejected reports can be stored as diagnostics only;
+- pending replay never marks a member caught up unless current agenda is empty.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `90-220 LOC`.
+
+### 7.49 Turn-Settled Event Can Be Lost Or Routed To Wrong Member
+
+Runtime hooks and observers are external edges. A crash between write and drain, duplicate file, stale provider payload, or wrong transcript match can silently break work-sync.
+
+Rules:
+
+- file state transitions are incoming -> processing -> processed/invalid;
+- stale processing recovery is tested;
+- provider mismatch and removed members are rejected;
+- duplicate events are harmless at queue/outbox level;
+- malformed files are quarantined, not retried forever.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 5`, roughly `120-280 LOC`.
+
+### 7.50 Task Impact Routing Can Miss The Real Owner Or Ping Everyone
+
+Task-change routing is a tradeoff between narrow correctness and safe fallback. A bad resolver can either miss a member who needs a wakeup or enqueue the entire team too often.
+
+Rules:
+
+- owner/reviewer/lead/dependency resolution has direct unit coverage;
+- unknown task ID fallback is diagnostic and rate-limited downstream;
+- removed members are filtered;
+- resolver exceptions fall back to scan, not silent drop;
+- team-wide fallback must not bypass nudge readiness/cooldown gates.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 5`, roughly `90-220 LOC`.
+
+### 7.51 Busy Signal Can Suppress Nudges Forever
+
+If a tool finish/reset event is missed, an in-memory active-tool busy flag can suppress sync nudges longer than intended.
+
+Rules:
+
+- busy is advisory, not authoritative;
+- every busy reason has a bounded retryAfter;
+- active tool state has a reset/drop path;
+- foreground delivery ignores generic work-sync busy;
+- busy-signal failure delays briefly and logs diagnostics.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `60-160 LOC`.
+
+### 7.52 Outbox Dispatch Without Revalidation Can Deliver Stale Nudges
+
+Planning and dispatch happen at different times. If dispatch trusts the planned row without reloading agenda and metrics, stale work-sync messages can wake agents after they already reported or completed work.
+
+Rules:
+
+- dispatch revalidates agenda, lifecycle, phase2 activation, busy, rate limit, and watchdog cooldown;
+- stale outbox items are superseded;
+- retryable blockers receive bounded nextAttemptAt;
+- delivered review-pickup event IDs prevent repeat delivery for the same request.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `120-260 LOC`.
+
+### 7.53 Existing Inbox MessageId Can Hide Payload Drift
+
+Message ID idempotency is useful, but returning success for an existing row without checking payload shape can hide a stale or corrupted hidden automation message.
+
+Rules:
+
+- sink stays behind outbox payloadHash validation;
+- future sink reuse must compare payloadHash or messageKind/source/taskRefs;
+- existing row ambiguity is conflict, not delivered;
+- hidden automation rows remain debug-readable.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `70-160 LOC`.
+
+### 7.54 Broad Queue Quiet Window Can Reintroduce Minute-Scale Starts
+
+The event queue supports fast trigger defaults, but a broad `queueQuietWindowMs` override can delay `turn_settled` and `tool_finished` unless per-trigger timing is explicit.
+
+Rules:
+
+- do not use broad quietWindow as production tuning for all triggers;
+- preserve fast trigger defaults or explicit triggerTiming;
+- maxCoalesceWait is tested for each trigger family;
+- diagnostics expose the timing decision.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 4`, roughly `60-150 LOC`.
+
+### 7.55 Targeted Recovery Can Accidentally Become Global Early Nudging
+
+OpenCode targeted recovery exists because OpenCode has a provider-specific runtime delivery path. Expanding that bypass to all providers would skip shadow-readiness safety.
+
+Rules:
+
+- targeted recovery stays provider-specific;
+- strict review pickup is the only cross-provider early exception;
+- non-OpenCode secondary members wait for phase2 readiness unless explicitly covered by a new provider adapter and tests;
+- dispatch-time safety checks still apply.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 5`, roughly `80-200 LOC`.
+
+### 7.56 Status Read Refresh Can Become Hidden Work
+
+Refreshing stale work-sync status on read is useful, but a renderer poll should not become a hidden delivery loop.
+
+Rules:
+
+- stale read enqueues reconcile only;
+- queue coalesces repeated reads;
+- stale read never writes inbox or sends prompts directly;
+- inactive team checks remain authoritative.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `60-140 LOC`.
+
+### 7.57 Scheduled Dispatcher Can Mask Fresh Delivery Bugs
+
+If fresh assignment wakeups rely on the periodic dispatcher, users can see minute-scale delays and the root foreground delivery bug stays hidden.
+
+Rules:
+
+- scheduler is recovery-only;
+- fresh assignment uses foreground delivery and event queue;
+- slow-start diagnostics identify queue vs scheduler path;
+- scheduler only scans lifecycle-active teams.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 4`, roughly `50-130 LOC`.
+
+### 7.58 Provider Hook Wiring Can Look Enabled But Emit Nothing
+
+Claude, Codex, and OpenCode do not share one installation path. A green Claude Stop hook test does not prove Codex env injection or OpenCode bridge env injection.
+
+Rules:
+
+- test each provider's actual emitter path;
+- log install mode and spool root diagnostics;
+- missing env should degrade to no turn-settled signal, not failed foreground delivery.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 4`, roughly `80-180 LOC`.
+
+### 7.59 Normalizer Drift Can Route Wrong Provider Events
+
+If provider/source validation becomes loose, a malformed runtime event can enqueue reconcile for the wrong member or provider.
+
+Rules:
+
+- provider and source strings are contract fields;
+- claimed-provider source mismatch fails closed;
+- duplicate identity is stable and tested.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `60-140 LOC`.
+
+### 7.60 Drain Scheduler Failure Can Silently Stop Reconcile
+
+If the drain scheduler stops after one exception, live agents can keep finishing turns while the app never sees events.
+
+Rules:
+
+- scheduler failure is warning-only;
+- next tick still runs;
+- stale processing files are recovered or quarantined;
+- no drain path writes inbox directly.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `70-160 LOC`.
+
+### 7.61 Audit Journal Loss Can Hide Diagnostics But Must Not Change State
+
+The audit journal is rotated, truncated, and warning-only. Treating it as canonical proof would create hidden data-loss bugs.
+
+Rules:
+
+- audit is diagnostics only;
+- canonical state remains in stores/ledgers/outbox/inbox/runtime event store;
+- audit append failure cannot fail business flows.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 3`, roughly `30-90 LOC`.
+
+### 7.62 Sparse Metrics Can Accidentally Enable Or Disable Phase 2
+
+Shadow readiness can become misleading if empty, corrupt, or truncated metrics are interpreted as healthy.
+
+Rules:
+
+- empty metrics remain collecting/not-ready;
+- corrupt/truncated data is conservative;
+- high nudge, churn, or rejection rates block readiness.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `70-150 LOC`.
+
+### 7.63 Raw Member Names Can Corrupt Per-Member Stores
+
+Directly joining member names into storage paths can split state, collide with reserved names, or write outside the intended member directory.
+
+Rules:
+
+- use `MemberWorkSyncStorePaths` and `TeamMemberStoragePaths`;
+- store canonical key and display name separately;
+- path uncertainty fails closed before nudge dispatch.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 3`, roughly `40-110 LOC`.
+
+### 7.64 Event Detail Shape Drift Can Drop Sync Triggers
+
+If `TeamChangeEvent.detail` changes shape, router parsing can silently miss inbox, tool, task, or turn-settled events.
+
+Rules:
+
+- parse detail defensively;
+- cover every emitted detail shape with tests;
+- fallback team-wide only for durable task changes where exact target is unknown.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `80-180 LOC`.
+
+### 7.65 Wake Failure Can Make Delivered Nudges Look Idle
+
+The inbox row can be inserted and outbox marked delivered, while the runtime wake fails and the agent does not process it until a later trigger.
+
+Rules:
+
+- keep inbox insert as durable boundary;
+- make wake failure visible in diagnostics;
+- allow safe re-wake without duplicate inbox insert.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `70-160 LOC`.
+
+### 7.66 Pending Report Replay Can Become Infinite Startup Work
+
+Transient reporter failures leave intents pending. Without retry bounds or stale cutoff, every startup can keep replaying the same bad intent.
+
+Rules:
+
+- permanent validation failures are marked processed;
+- transient failures are bounded and diagnostic;
+- replay always validates against current agenda.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 5`, roughly `90-220 LOC`.
+
+### 7.67 Review-Pickup Escalation Can Double Notify Lead
+
+Plan-time and dispatch-time review-pickup failures can both escalate the same review request unless keyed idempotently.
+
+Rules:
+
+- escalation key includes review request IDs and reason class;
+- one unchanged review request produces one escalation;
+- generic agenda sync never uses review-pickup bypass.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 5`, roughly `90-220 LOC`.
+
+### 7.68 Disposed Feature Instances Can Dispatch Stale Nudges
+
+If timers or running queue work survive feature disposal, old dependencies can write inbox rows after app reload or test teardown.
+
+Rules:
+
+- composition owns every timer;
+- dispose is idempotent;
+- stale in-flight work still passes lifecycle/revalidation before writing.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 4`, roughly `70-180 LOC`.
+
+### 7.69 Provider Identity Drift Can Pick The Wrong Delivery Path
+
+Agenda items can be correct while provider id is stale, inferred incorrectly, or hidden by config/meta merge. That can route OpenCode-specific remediation to the wrong path or block it unexpectedly.
+
+Rules:
+
+- provider identity is dispatch metadata, not agenda fingerprint by default;
+- removedAt and provider metadata merge rules are explicit;
+- unknown provider fails closed for provider-specific direct delivery.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `70-160 LOC`.
+
+### 7.70 Claimed Outbox Rows Can Get Stuck Forever After Crash
+
+A dispatcher can claim a pending nudge and crash before marking delivered, superseded, or failed. Without a claim lease, that row is no longer due and the agent never receives the nudge.
+
+Rules:
+
+- claimed status has a lease, not permanent ownership;
+- stale claims are recovered under lock;
+- late writes from old attempts are ignored by attemptGeneration.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `100-240 LOC`.
+
+### 7.71 Poison Turn-Settled Payload Can Churn The Drain Forever
+
+Payloads that repeatedly throw after being claimed can bounce from processing back to incoming forever.
+
+Rules:
+
+- processing retries are counted durably;
+- repeated failures quarantine;
+- one poison file cannot block later files.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 5`, roughly `100-240 LOC`.
+
+### 7.72 IPC Or HTTP Source Drift Can Trust The Wrong Actor
+
+Typed renderer calls and HTTP route payloads are not runtime-trusted. If report source is mislabeled, app-originated reports can look like MCP tool reports.
+
+Rules:
+
+- validate and normalize at IPC/HTTP adapters;
+- preserve report provenance;
+- invalid input cannot create sync storage.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `80-180 LOC`.
+
+### 7.73 Manual Status Reads Can Train Phase 2 Readiness
+
+Opening diagnostics repeatedly can create status_evaluated metrics if request reconciles count exactly like queue/runtime reconciles.
+
+Rules:
+
+- separate request/manual metrics from automation readiness metrics;
+- UI refresh never dispatches;
+- readiness diagnostics show event provenance.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 4`, roughly `80-180 LOC`.
+
+### 7.74 Corrupt Stall Journal Can Suppress Work-Sync Forever
+
+Fail-closed watchdog cooldown is safe against spam, but corrupt/unreadable journal data can become an invisible permanent suppressor.
+
+Rules:
+
+- corrupt journal suppresses temporarily and visibly;
+- missing journal does not suppress;
+- cooldown remains task-scoped.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 3`, roughly `50-130 LOC`.
+
+### 7.75 Duplicate Main-Process Events Can Become Duplicate Nudges
+
+File watcher, provisioning service, and log source events can all notify the same logical change.
+
+Rules:
+
+- event fanout is at-least-once;
+- queue/outbox dedupe is the correctness boundary;
+- event order must converge to the same state.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 4`, roughly `70-170 LOC`.
+
+### 7.76 Index Repair Can Hide Corrupt Member Metadata
+
+Repair currently depends on member meta files to discover member storage. Corrupt meta can hide status/outbox/report rows during repair.
+
+Rules:
+
+- skipped member storage is diagnostic;
+- repair never deletes hidden member feature data;
+- canonical meta repair stays outside work-sync domain.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 5`, roughly `90-220 LOC`.
+
+### 7.77 User-Facing Status Can Confuse Control-Plane State With Agent Failure
+
+Hidden automation is good, but a visible "Needs sync" badge or audit row can look like a failed message if copy is not strict.
+
+Rules:
+
+- normal Messages hides control-plane rows;
+- diagnostics show them with control-plane labels;
+- status UI never becomes a proof source.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 3`, roughly `40-120 LOC`.
+
+### 7.78 Proof-Missing Can Become False Success Or Duplicate Prompt
+
+`OpenCode proof missing` means the runtime did something, but the app still lacks required visible/progress proof. Treating it as success hides broken delivery. Treating it as a fresh message creates duplicate prompts.
+
+Rules:
+
+- proof-missing is a recovery signal only;
+- original message identity is preserved;
+- recovery uses queue/outbox dedupe;
+- ledger remains pending until real proof exists.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 5`, roughly `120-280 LOC`.
+
+### 7.79 Advisory Cache Can Keep Warning After Proof
+
+A valid task comment or visible reply can arrive while a cached advisory still says proof is missing.
+
+Rules:
+
+- advisory cache is invalidated on proof writes;
+- advisory badge is derived, not persisted;
+- invalidation failure logs diagnostics and does not send another nudge.
+
+Risk:
+
+`🎯 8 🛡️ 8 🧠 4`, roughly `70-170 LOC`.
+
+### 7.80 Prompt Repair And Work-Sync Can Double-Nudge
+
+The delivery repair path and member-work-sync path can both see `progress_proof_required`.
+
+Rules:
+
+- one recovery channel wins per logical message and cooldown window;
+- delivery repair is preferred for direct visible replies;
+- work-sync is preferred for task-scoped board progress;
+- both paths consult shared recent-recovery state or deterministic keys.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 5`, roughly `120-260 LOC`.
+
+### 7.81 Diagnostic Classifier Can Overmatch Protocol Proof
+
+Broad text matching can classify `message_send Not connected` as proof missing instead of backend/runtime connectivity, or classify quota/auth failures as model behavior issues.
+
+Rules:
+
+- classifier precedence is explicit;
+- proof-missing tokens are narrow;
+- auth/quota/runtime errors win over proof-missing;
+- user-facing diagnostics are redacted.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `60-150 LOC`.
+
+### 7.82 Inbox MessageId Collision Can Hide Payload Drift
+
+The member-work-sync inbox sink currently receives `payloadHash`, but an existing inbox row with the same `messageId` can be treated as existing without proving payload equality.
+
+Rules:
+
+- same id with different payload is a conflict;
+- conflict does not mark outbox delivered;
+- legacy rows without hash fail closed or are recomputed explicitly.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `70-150 LOC`.
+
+### 7.83 Overlapping OpenCode Inbox Relays Can Duplicate Prompts
+
+A second `onlyMessageId` wake can arrive while a member relay is already running. If it starts a second relay loop, prompt ledger idempotency becomes the last line of defense.
+
+Rules:
+
+- one member has one active relay loop;
+- target-message wakes coalesce or queue behind active relay;
+- diagnostics explain queued-behind vs missing vs already-read.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 4`, roughly `60-180 LOC`.
+
+### 7.84 Outbox Delivered Can Be Mistaken For Runtime Accepted
+
+For generic agenda-sync nudges, outbox delivery means inbox row insertion. It does not prove OpenCode accepted the prompt.
+
+Rules:
+
+- keep durable inbox delivery and runtime acceptance separate;
+- latency timeline exposes every stage;
+- UI copy does not imply the agent saw the message until runtime proof exists.
+
+Risk:
+
+`🎯 9 🛡️ 8 🧠 3`, roughly `50-130 LOC`.
+
+### 7.85 Busy Suppression Can Block Its Own Repair
+
+Unread foreground messages correctly suppress generic work-sync nudges, but the same unread message can also be the delivery that needs proof repair.
+
+Rules:
+
+- generic work-sync remains suppressed by unread foreground;
+- same-message delivery repair uses delivery path, not generic sync bypass;
+- unrelated unread messages still suppress proof-missing task recovery.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 5`, roughly `90-220 LOC`.
+
+### 7.86 Read-Path Recovery Can Spam Agents
+
+If advisory reads enqueue recovery, every member card refresh, worker snapshot, and renderer poll can become a write.
+
+Rules:
+
+- read/query services stay side-effect free;
+- only delivery lifecycle, event queue, or explicit command handlers schedule recovery;
+- tests assert no inbox/outbox writes during repeated advisory reads.
+
+Risk:
+
+`🎯 10 🛡️ 10 🧠 4`, roughly `80-180 LOC`.
+
+### 7.87 Ambiguous Trigger Reason Can Hide Recovery Bugs
+
+Reusing `runtime_activity` for proof-missing recovery makes timing, coalescing, and audit diagnostics ambiguous.
+
+Rules:
+
+- add a dedicated trigger reason if proof-missing recovery is in scope;
+- update default timing, coalescing, audit, and composition together;
+- keep broad runtime activity behavior unchanged.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `70-160 LOC`.
+
+### 7.88 Outbox Arbitration Can Bypass Ports
+
+It is tempting to inspect member-work-sync JSON files from `TeamProvisioningService` to find recent recovery. That creates tight coupling and hidden storage assumptions.
+
+Rules:
+
+- recovery lookup goes through `MemberWorkSyncOutboxStorePort`;
+- storage-specific scans stay inside `JsonMemberWorkSyncStore`;
+- port query returns logical state, not raw JSON rows.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `90-190 LOC`.
+
+### 7.89 Optional Inbox Hash Can Break Old Rows
+
+Persisting `workSyncPayloadHash` is necessary for idempotency, but old inbox rows will not have it.
+
+Rules:
+
+- optional field only;
+- reader and renderer tolerate missing hash;
+- sink compatibility is explicit and tested;
+- missing hash never silently confirms a changed work-sync payload.
+
+Risk:
+
+`🎯 9 🛡️ 9 🧠 4`, roughly `90-180 LOC`.
+
+### 7.90 Audit Union Drift Can Hide Recovery Diagnostics
+
+Adding recovery code without extending `MemberWorkSyncAuditEventName` correctly can lead to casts, generic skip events, or missing diagnostics.
+
+Rules:
+
+- event union, reason mapper, journal tests, and diagnostics update in the same cut;
+- no `as MemberWorkSyncAuditEventName` for new event names;
+- audit failure remains non-blocking.
+
+Risk:
+
+`🎯 8 🛡️ 9 🧠 3`, roughly `40-100 LOC`.
+
+---
+
+## 8. Implementation Sequence
+
+### Cut 1 - Documentation And UI Copy
+
+Safe first commit.
+
+Tasks:
+
+1. Add this plan.
+2. Add `workIntervals` invariant tests if missing.
+3. Rename visible label to `In progress time`.
+4. Update renderer tests.
+
+Commit:
+
+```text
+docs: plan opencode delivery hardening phases
+```
+
+or if UI copy included:
+
+```text
+fix(team): clarify task in-progress duration label
+```
+
+### Cut 2 - OpenCode Transcript Session Lookup
+
+Medium-risk, mostly read-only behavior.
+
+Tasks:
+
+1. Add orchestrator CLI `--session-id` with strict team/member/lane validation.
+2. Add orchestrator tests for exact session hit, team/member mismatch, lane mismatch, and missing session.
+3. Extend `ClaudeMultimodelBridgeService.getOpenCodeTranscript` with optional `sessionId`.
+4. Add bridge tests that verify CLI args and temp output cleanup.
+5. Fix `OpenCodeTaskLogStreamSource` attributed path cache/group keys to include session before adding new evidence.
+6. Add tests for two sessions owned by the same member.
+7. Add `TaskLogOpenCodeSessionEvidenceSource` as a narrow ledger-to-candidate adapter.
+8. Query bounded exact session candidates in `OpenCodeTaskLogStreamSource`.
+9. Add diagnostics and fixture tests.
+10. Update OpenCode fallback segment IDs to include session identity.
+11. Add `BoardTaskLogStreamService` merge/cache tests so exact-session fallback is not dropped by segment dedupe or stale layout cache.
+12. Add session-aware projected-message dedupe tests using `sessionId + uuid/sourceToolUseID`.
+13. Add a regression where an unrelated primary `execution` record does not suppress exact OpenCode fallback.
+14. Emit narrow `task-log-change` signals when exact OpenCode session evidence starts referencing a task.
+15. Add renderer tests that opened stream and badge count reload from that signal without a full team-data refresh.
+
+Commit:
+
+```text
+fix(team): load opencode task logs from delivery session evidence
+```
+
+### Cut 3 - Acceptance/Observation Split
+
+High-risk, needs focused tests.
+
+Tasks:
+
+1. First fix OpenCode turn-settled observer blockers from Section 4.8.
+2. Add runtime prompt identity fields to orchestrator send response and command outcome.
+3. Add ledger optional runtime prompt identity fields and migration-safe parsing.
+4. Add exact observe fields to OpenCode bridge contract and adapter, but keep observed behavior unchanged.
+5. Update watchdog observe calls to pass exact session/prompt identity when present.
+6. Add tests proving exact observe reads the accepted session even after lane/session changes.
+7. Add `settlementMode: "acceptance"` path with default still `observed`.
+8. Keep pre-prompt MCP/session repair synchronous.
+9. Return accepted after `prompt_async` only in acceptance mode.
+10. Update `OpenCodeReadinessBridge` timeout recovery to preserve accepted runtime prompt identity.
+11. Update ledger states and watchdog handling without releasing the active slot on acceptance alone.
+12. Add idempotency and queued-behind tests.
+13. Preserve proof context (`messageKind`, `taskRefs`, `relayOfMessageId`, `actionMode`, `workSyncIntent`) in ledger/observe paths.
+14. Add tests that work-sync proof cannot clear a normal delivery and normal plain-text fallback remains strict.
+15. Preserve turn-settled spool schema consumed by member-work-sync normalizer.
+16. Verify advisory classification does not surface ordinary post-acceptance observation lag as an error.
+17. Freeze/explicitly test canonical `payloadHash` shape before adding accept-fast transport fields.
+18. Add schema compatibility tests for old ledger records missing runtime prompt identity fields.
+19. Keep `acceptanceUnknown` distinct from accepted unless commandStatus/observe proves exact prompt acceptance.
+20. Add message-kind parity tests across shared types, inbox reader, bridge contract, ledger validator, and renderer filters.
+21. Add tests that retry control text does not change the app ledger logical payload hash.
+22. Ensure retry prompt path runs MCP/session readiness repair before sending any repair control prompt.
+23. Tighten taskRefs-only visible reply recovery tests so ambiguous candidates do not commit read/responded.
+24. Add work-sync inbox idempotency tests so same messageId with changed payloadHash cannot schedule a stale wake.
+25. Add delivery/work-sync separation tests so foreground task assignment delivery does not depend on work-sync phase2 activation.
+26. Add relay queue tests proving accepted-pending delivery stops the unread loop and keeps later foreground/work-sync messages queued.
+27. Add OpenCode bridge capability detection before enabling acceptance mode, with old-orchestrator fallback tests.
+28. Add lane-registry lock failure tests proving accepted exact evidence survives `lanes.json` timeout.
+29. Add task-log tests proving exact session evidence does not depend on current lane registry success.
+30. Add runtime-delivery inbox dedupe tests proving returned existing messageId is used for proof/advisory clearing.
+31. Add `opencodeDeliveryAcceptanceContractVersion` or equivalent explicit bridge contract marker.
+32. Persist settlement mode/original bridge request ID on the app delivery ledger before command execution.
+33. Add bridge idempotency tests for same logical delivery, changed settlement mode, timeout recovery, and missing echoed idempotencyKey.
+34. Add runtime delivery journal tests separating idempotency conflict, destination write failure, and MCP-not-connected failure reasons.
+35. Add proof tests showing committed runtime delivery clears prompt advisory only through verified visible correlation.
+36. Add proof reader tests for direct user replies stored in `sentMessages.json`, member inbox replies, and cross-team runtime locations.
+37. Add regression proving unrelated `lead_process` sent message cannot satisfy OpenCode member proof.
+38. Add locked sent-message append/verify tests for concurrent direct OpenCode replies to user.
+39. Add trim-boundary tests proving the just-committed runtime delivery proof row is preserved.
+40. Add runtime delivery taskRefs schema tests and prompt artifact checks so refs are never silently dropped.
+41. Add runtime control lane-resolution tests so secondary member calls never fall back blindly to primary.
+42. Add OpenCode inbox relay priority tests so foreground messages beat work-sync nudges and accepted-pending foreground delivery stops the loop.
+43. Add busy-status tests proving work-sync scheduling is suppressed by unread/recent foreground messages without changing UI filtering.
+44. Add durable automation visibility tests so hidden work-sync/task-stall rows remain readable in diagnostics and delivery paths.
+45. Preserve `task_stall_remediation` and future automation message kinds through `TeamInboxReader` or reject unsupported kinds before write.
+46. Add OpenCode Changes backfill tests for delivery context hash stability, negative-cache invalidation, and current-contract duplicates-only caching.
+47. Add tests proving Task Log Stream native tool rows do not by themselves create reviewable file-change ledger entries.
+48. Add manifest recovery tests distinguishing diagnostic-only stores from prompt/runtime delivery ledgers.
+49. Add corruption/quarantine tests proving prompt delivery ledger evidence is not silently dropped or rewritten as primary lane.
+50. Update artifact/debug checklist so failures include relay priority, hidden automation rows, backfill context hash, and manifest recovery action.
+51. Add stopped/tombstoned runtime evidence tests for delivery, task event, heartbeat, bridge result, and relaunch-old-run callbacks.
+52. Add stale-runtime post-stop tests proving no sent message, inbox row, task attribution, advisory clear, or task-log refresh is written.
+53. Add cache/advisory invalidation tests for direct user reply, member inbox reply, hidden automation row, and task event.
+54. Add renderer event fanout tests proving `lead-message`, `inbox`, `member-advisory`, and `task-log-change` refresh the intended surfaces only.
+55. Add conservative rebuild tests proving strict destination proof can clear advisory but cannot invent accepted prompt transport state.
+56. Add ambiguous destination rebuild tests so multiple plausible replies remain pending/diagnostic instead of guessed.
+57. Add stale-run rebuild tests so destination rows from old runs cannot satisfy current run proof.
+58. Add artifact/debug checklist so stale evidence includes team/run/lane/evidenceKind/tombstone reason and cache invalidation result.
+59. Add member-work-sync trigger timing tests proving `turn_settled`/`tool_finished` stay fast and startup/member-spawn scans stay readiness-gated.
+60. Add work-sync foreground suppression tests proving unread/accepted-pending OpenCode delivery delays generic sync nudges without dropping them.
+61. Add scheduled nudge dispatcher recovery tests for due outbox rows after app restart.
+62. Add delivery latency timeline builder and tests using existing ledgers/audit journals as sources.
+63. Add live/safe E2E diagnostics that print phase timings on slow OpenCode assignment runs.
+64. Add member card/status tests where runtime failure/advisory outranks task "working on" while preserving task context.
+65. Add transcript-only plain-text fallback tests so `message_send Not connected` does not clear proof or synthesize a reply.
+66. Add repair-policy tests that MCP readiness repair precedes retry after tool-error fallback.
+67. Add agenda fingerprint stability tests for reorder, generatedAt, presentation-only changes, dependency/review semantics, and future `sourceRevision` behavior.
+68. Add report token and pending replay tests proving stale fingerprints/tokens cannot suppress current actionable work.
+69. Add runtime turn-settled spool crash-recovery, invalid/quarantine, provider-mismatch, and duplicate-source tests.
+70. Add task impact resolver tests for owner, reviewer, lead clarification, broken dependencies, dependent owners, unknown task fallback, and removed members.
+71. Add busy signal tests proving active/recent tool activity is time-bounded, resettable, diagnostic, and advisory-only.
+72. Add implementation diagnostics so queue fallback, report rejection, busy suppression, and turn-settled resolution are visible in audit/debug artifacts.
+73. Add outbox planner/dispatcher tests proving plan-time rows are always revalidated at claim-time before inbox writes.
+74. Add sink/outbox payload drift tests so existing messageId cannot mask changed text/taskRefs/messageKind/source.
+75. Add targeted recovery tests proving OpenCode and lead bypasses do not become broad non-OpenCode early nudges.
+76. Add queue timing tests for default fast triggers, broad quietWindow hazards, follow-up rerun timing, and diagnostics.
+77. Add stale status read-refresh tests proving renderer polling coalesces and never writes inbox or prompts directly.
+78. Add scheduled dispatcher recovery tests separating fresh assignment wake from periodic due-row recovery.
+79. Add slow-start artifact fields for nudge origin: `foreground_delivery`, `event_queue`, `scheduled_dispatcher`, or `manual_refresh`.
+80. Add provider-specific turn-settled wiring tests for Claude Stop hook settings, Codex provisioning env, and OpenCode bridge env.
+81. Add workspace trust/preflight tests proving `AGENT_TEAMS_RUNTIME_TURN_SETTLED_SPOOL_ROOT` survives allowed env filtering.
+82. Add normalizer strictness tests for provider/source mismatch, duplicate `sourceId`, malformed payload quarantine, and no lead fallback.
+83. Add drain scheduler tests for first-run timing, non-overlap, exception recovery, stale processing recovery, and invalid payload quarantine.
+84. Add audit journal tests proving append failure and rotation do not affect canonical sync state or dispatch decisions.
+85. Add Phase 2 readiness tests for empty, sparse, corrupt, high-nudge, high-churn, and high-rejection metrics.
+86. Add member path safety tests proving sync stores use canonical member keys and reject reserved/removed/unsafe identities.
+87. Add live artifact fields for provider emitter mode, spool root present/missing, last drain result, last normalizer result, and readiness blocking reasons.
+88. Add router detail-shape tests for inbox, lead-message, tool-activity, member-turn-settled, task file paths, malformed JSON, and resolver fallback.
+89. Add nudge wake tests proving wake failure is diagnostic/recoverable without turning delivered inbox rows back into retrying duplicates.
+90. Add pending report replay tests for bounded transient failures, stale cutoff, deterministic ordering, removed members, and current-agenda validation.
+91. Add review-pickup escalation idempotency tests keyed by review request IDs, agenda fingerprint or intent key, and reason class.
+92. Add composition lifecycle tests proving dispose clears timers, queue follow-ups, scheduler ticks, and old feature instances.
+93. Add agenda source provider-identity merge tests for config/meta precedence, removedAt, provider inference, and unknown-provider direct-delivery gating.
+94. Add outbox claim lease tests proving stale claimed rows recover, active claims do not double dispatch, and late old-attempt writes are ignored.
+95. Add poison turn-settled payload tests proving repeated processing exceptions quarantine after bounded retries and do not block later files.
+96. Add IPC/HTTP boundary tests for member-work-sync status/report validation, source provenance, encoded member names, and no storage writes on invalid input.
+97. Add Phase 2 metric provenance tests proving manual/request status reads do not unlock active nudges without queue/runtime observations.
+98. Add watchdog cooldown adapter tests for missing, corrupt, expired, unrelated, and matching task journal rows.
+99. Add main-process fanout/coalescing tests proving duplicate task/inbox/runtime events converge to one outbox row.
+100. Add store repair diagnostics tests proving corrupt member metadata does not delete hidden sync data and valid members still repair.
+101. Add user-surface tests proving work-sync automation stays hidden in normal Messages but visible in audit/debug surfaces.
+102. Add proof-missing recovery adapter tests proving `protocol_proof_missing` enqueues one coalesced recovery signal without marking inbox read or ledger proven.
+103. Add advisory cache invalidation tests proving later visible/task proof clears `OpenCode proof missing` and suppresses queued recovery.
+104. Add delivery-repair vs work-sync arbitration tests proving one recovery nudge wins per message/cooldown window.
+105. Add diagnostic classifier precedence tests proving `message_send Not connected` stays backend/runtime connectivity and quota/auth errors outrank proof-missing tokens.
+106. Add inbox nudge sink payload-hash tests so same messageId cannot hide changed work-sync payload.
+107. Add OpenCode relay in-flight tests proving concurrent `onlyMessageId` wakes serialize or return queued-behind without duplicate prompts.
+108. Add work-sync delivery timeline tests separating durable inbox insertion from OpenCode runtime prompt acceptance.
+109. Add busy-signal recovery-context tests proving same-message delivery repair is not suppressed by its own unread row, while unrelated unread foreground still suppresses generic nudges.
+110. Add classifier implementation tests proving single-message classification uses explicit precedence or intentionally tested rule order.
+111. Add advisory read-path purity tests proving `TeamMemberRuntimeAdvisoryService`, `TeamDataService`, and worker snapshot reads never enqueue inbox/outbox recovery.
+112. Add explicit `proof_missing_recovery` trigger tests if that trigger is introduced, including timing, coalescing, audit metadata, and composition wiring.
+113. Add outbox logical recovery lookup tests through `MemberWorkSyncOutboxStorePort`, not file-path scans from provisioning code.
+114. Add backward-compatible inbox schema tests for optional `workSyncPayloadHash` across writer, reader, sink, renderer filtering, and legacy rows.
+115. Add member-work-sync audit event tests for proof-missing recovery schedule/coalesce/suppress/conflict events without type casts.
+
+Commit:
+
+```text
+fix(opencode): split prompt acceptance from turn observation
+```
+
+### Cut 4 - Retry Classification And Live Validation
+
+Only after Cut 3 is stable.
+
+Tasks:
+
+1. Add failure reason taxonomy.
+2. Tune retry delays by reason.
+3. Add live smoke tests gated by env.
+4. Save results under docs or test-results, not tracked fixtures unless sanitized.
+
+Commit:
+
+```text
+test(opencode): add delivery acceptance live smoke coverage
+```
+
+or:
+
+```text
+fix(opencode): classify delivery retry reasons
+```
+
+---
+
+## 9. Detailed Risk Register
+
+### Risk 1 - Duplicate OpenCode Prompt
+
+Severity:
+
+`P1`
+
+How it happens:
+
+- app times out before command returns;
+- command actually accepted prompt;
+- watchdog retries same logical message;
+- old prompt and new prompt both produce replies.
+
+Mitigation:
+
+- persist accepted runtime prompt immediately;
+- recover via commandStatus before retry;
+- never retry same attempt after accepted runtime prompt;
+- correlate visible reply by `relayOfMessageId` and `runtimePromptMessageId`.
+
+Tests:
+
+- bridge timeout after accepted prompt recovers accepted outcome;
+- retry does not call prompt again for same attempt;
+- late visible proof resolves original ledger.
+
+### Risk 2 - False Task Logs From Wrong Session
+
+Severity:
+
+`P1`
+
+How it happens:
+
+- member has multiple OpenCode sessions;
+- current lane points to newer idle session;
+- task was handled by recreated previous session;
+- fallback pulls unrelated current session logs.
+
+Mitigation:
+
+- prefer exact runtimeSessionId from ledger;
+- bound by task owner/member and time window;
+- use task markers as anchors;
+- dedupe and sort;
+- keep current fallback only after exact evidence fails.
+
+Tests:
+
+- two session transcripts, only one has task marker;
+- wrong member session ignored;
+- current lane fallback does not override exact session.
+
+### Risk 3 - Work Sync And Watchdog Double Nudge
+
+Severity:
+
+`P2`
+
+How it happens:
+
+- OpenCode turn-settled enqueues reconcile;
+- member-work-sync plans nudge;
+- task-stall watchdog also nudges same task/member.
+
+Mitigation:
+
+- keep existing watchdog cooldown port;
+- member-work-sync dispatcher revalidates cooldown before delivery;
+- watchdog should see recent work-sync nudge and skip if appropriate;
+- turn-settled event never sends directly.
+
+Tests:
+
+- recent watchdog alert blocks sync nudge;
+- recent sync nudge blocks duplicate sync but not real semantic stall after threshold;
+- OpenCode accepted prompt with foreground unread assignment does not create extra sync nudge.
+
+### Risk 4 - UI Shows Warning After Success
+
+Severity:
+
+`P2`
+
+How it happens:
+
+- advisory banner is based on pending/unknown ledger state;
+- visible reply arrives;
+- banner state is not cleared promptly.
+
+Mitigation:
+
+- clear advisory when ledger gets visible proof or task progress proof;
+- treat observation timeout after accepted prompt as developer detail, not user warning, if proof later arrives;
+- explicitly invalidate member runtime advisory cache from the proof write path;
+- keep the renderer passive: it displays snapshot state but does not decide proof.
+
+Tests:
+
+- cached warning exists, visible reply proof arrives, next snapshot has no warning without waiting for TTL;
+- proof for one member does not clear another member's hard runtime warning;
+- observation timeout followed by task progress proof does not leave a stale banner;
+- renderer state subscribes to proof update;
+- pending advisory disappears after visible reply;
+- "Saved" appears on separate line as previously requested;
+- no warning remains after successful OpenCode reply.
+
+### Risk 5 - Accept-Fast Hides MCP Not Connected
+
+Severity:
+
+`P1`
+
+How it happens:
+
+- prompt accepted but MCP was not actually usable;
+- agent attempts `agent-teams_message_send`;
+- tool returns `Not connected`;
+- app thinks delivery accepted and does not repair.
+
+Mitigation:
+
+- keep pre-prompt `ensureSessionAppMcpReady` synchronous;
+- observe tool errors as response proof failure;
+- watchdog retry goes through MCP repair gate again;
+- do not mark message read until proof.
+
+Tests:
+
+- MCP unavailable before prompt rejects/recreates before acceptance;
+- tool error after acceptance keeps ledger pending/failed proof;
+- retry re-checks MCP before prompt.
+
+### Risk 6 - Session Observer Hangs Or Burns CPU
+
+Severity:
+
+`P2`
+
+Mitigation:
+
+- bounded timeout;
+- abort controller;
+- no unbounded SSE reader in Electron main;
+- no infinite reconnect loop in v1;
+- diagnostics on `stream_unavailable`;
+- test premature EOF and timeout.
+
+### Risk 7 - Wrong Proof Clears The Wrong Delivery
+
+Severity:
+
+`P1`
+
+How it happens:
+
+- work-sync nudge produces a valid board-sync report;
+- a normal OpenCode delivery for the same member is still pending;
+- generic observation logic treats any valid member activity as response proof.
+
+Mitigation:
+
+- keep proof context on the ledger record;
+- require message kind/taskRefs/relay correlation before read/responded commit;
+- keep final proof decisions in `TeamProvisioningService`;
+- test normal delivery and work-sync in the same member/lane.
+
+Tests:
+
+- work-sync report does not mark normal delivery responded;
+- normal visible reply does not satisfy a different task's delivery without matching refs;
+- plain assistant output after tool error is not accepted unless materialized/semantically sufficient.
+
+### Risk 8 - Fixed OpenCode Logs Still Look Empty Due Cache/Merge
+
+Severity:
+
+`P2`
+
+How it happens:
+
+- exact session evidence becomes available;
+- OpenCode source cache key does not include it;
+- fallback segment ID collides with an older same-member segment;
+- UI still shows empty or only MCP markers.
+
+Mitigation:
+
+- include evidence identity in source cache key;
+- include session ID in fallback segment IDs;
+- add merge tests at `BoardTaskLogStreamService` level;
+- expose developer diagnostics for cache hit/miss reason.
+
+Tests:
+
+- exact session evidence after previous empty cache render produces native tools;
+- same member with two sessions keeps distinct safe fallback segments;
+- duplicate rows are deduped by source/tool signature, not participant-only segment ID.
+
+### Risk 9 - Too Much Live Test Load
+
+Severity:
+
+`P2`
+
+Mitigation:
+
+- live tests opt-in only;
+- cheap models by default;
+- no model matrix in this phase;
+- cleanup only smoke-owned teams/processes;
+- no broad `killall opencode`.
+
+---
+
+## 10. Clean Architecture Placement
+
+### 10.1 `claude_team`
+
+Use feature architecture for new policy/state.
+
+Do not place new business policy in renderer.
+
+Recommended additions:
+
+```text
+src/main/services/team/taskLogs/stream/
+ TaskLogOpenCodeSessionEvidenceSource.ts
+
+src/main/services/team/opencode/delivery/
+ OpenCodeDeliveryFailureReason.ts
+ OpenCodeDeliveryProofContext.ts
+```
+
+If the acceptance/observation split becomes large, prefer moving new OpenCode delivery use cases into a feature-style structure later:
+
+```text
+src/features/opencode-delivery/
+ contracts/
+ core/domain/
+ core/application/
+ main/adapters/output/
+ main/infrastructure/
+```
+
+But for this pass, avoid a broad migration. Keep changes narrow around existing OpenCode delivery services.
+
+Architecture standard mapping:
+
+```text
+domain policy:
+ proof classification, failure reason taxonomy, session candidate ordering
+
+application service:
+ delivery queue ownership, retry/retry-safe decisions, advisory invalidation orchestration
+
+output adapters:
+ orchestrator bridge, ledger stores, attribution store, runtime transcript reader
+
+renderer:
+ read-only presentation of backend state
+```
+
+Do not let a convenience helper cross these boundaries. For example, `TaskLogOpenCodeSessionEvidenceSource` may read ledger/attribution stores and return candidates, but it must not build renderer chunks. `OpenCodeTaskLogStreamSource` may project transcripts into stream segments, but it must not decide whether a delivery is responded/read.
+
+Contract boundary:
+
+- `OpenCodeReadinessBridge` is an output adapter to the orchestrator bridge.
+- `OpenCodeTeamRuntimeAdapter` maps app runtime DTOs to bridge command DTOs.
+- `TeamProvisioningService` remains the application service that owns delivery queue semantics.
+- `OpenCodeDeliveryProofContext` is a small domain/application DTO, not renderer state and not orchestrator policy.
+- New evidence readers should be ports/adapters, not helper functions embedded in renderer or task log components.
+- `ChangeExtractorService` remains the authority for file-change summaries; Task Log Stream should not mutate or synthesize change ledgers.
+
+SOLID guardrail:
+
+```text
+OpenCodeTaskLogStreamSource should not read raw ledger files directly if that makes it both evidence collector and projector.
+OpenCodeDeliveryProofContext should describe required proof, but proof decisions stay in one application service.
+Task-log projection and file-change extraction change for different reasons and should stay separate.
+```
+
+Keep evidence collection behind `TaskLogOpenCodeSessionEvidenceSource` so task-log projection can be tested separately from ledger discovery.
+
+### 10.1.1 Member-Work-Sync Provider Emitter Boundary
+
+The provider-specific "turn settled" emitters are input/infrastructure adapters, not domain policy.
+
+Current boundary should stay:
+
+```text
+provider runtime:
+ Claude Stop hook, Codex native orchestrator event, OpenCode bridge event
+
+main/infrastructure:
+ hook/env installer, spool paths, payload normalizers, runtime event store
+
+core/application:
+ ingest normalized RuntimeTurnSettledEvent, enqueue/reconcile member work state
+
+core/domain:
+ agenda fingerprint, report validation, readiness, nudge activation policy
+```
+
+Rules:
+
+- `ActionableWorkAgenda`, report validation, readiness, and nudge policy must not inspect provider-specific raw payloads;
+- provider-specific normalizers convert raw payloads into one shared `RuntimeTurnSettledEvent`;
+- adding a future provider should add a normalizer/emitter adapter and tests, not fork member-work-sync core use cases;
+- OpenCode-specific remediation remains an output-port behavior at dispatch/targeting time, not a different agenda model;
+- provider wiring diagnostics belong in main/infrastructure or artifacts, not renderer state.
+
+### 10.2 `agent_teams_orchestrator`
+
+Provider-specific OpenCode protocol remains here:
+
+```text
+src/services/opencode/
+ OpenCodeBridgeCommandHandler.ts
+ OpenCodeSessionBridge.ts
+ OpenCodeTurnSettledObserver.ts
+ OpenCodeCommandOutcomeStore.ts
+```
+
+The orchestrator may know:
+
+- OpenCode host/session;
+- SSE events;
+- prompt_async;
+- MCP readiness on OpenCode host;
+- command outcome storage.
+
+The orchestrator must not know:
+
+- task agenda fingerprint policy;
+- whether to nudge a member;
+- task-stall semantic policy;
+- renderer warning UI behavior.
+
+Contract changes here should be additive:
+
+- add optional fields first;
+- keep old callers valid;
+- reject contradictory exact identity fields with structured diagnostics;
+- preserve schema version compatibility unless a breaking change is unavoidable.
+
+### 10.3 Renderer
+
+Renderer changes should be limited to:
+
+- label copy;
+- clearing advisory state when backend says proof arrived;
+- optional developer details display;
+- keeping member-work-sync nudges hidden from the normal Messages feed by default.
+
+Renderer must not:
+
+- infer OpenCode delivery status from raw transcript;
+- run retry policy;
+- synthesize task progress.
+- show WORK SYNC control messages in the main conversation unless an explicit debug/audit view asks for them.
+
+---
+
+## 11. Verification Matrix
+
+### Unit Tests
+
+`claude_team`:
+
+```bash
+pnpm vitest run \
+ test/main/services/team/TeamTaskWriter.test.ts \
+ test/main/services/team/TeamTaskActivityIntervalService.test.ts \
+ test/shared/utils/taskWorkDuration.test.ts
+```
+
+```bash
+pnpm vitest run \
+ test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts \
+ test/main/services/team/OpenCodeReadinessBridge.test.ts \
+ test/main/services/team/OpenCodeBridgeCommandContract.test.ts \
+ test/main/services/team/BoardTaskLogStreamService.test.ts \
+ test/main/services/team/OpenCodeTaskLogStreamSource.test.ts \
+ test/main/services/team/TaskLogOpenCodeSessionEvidenceSource.test.ts \
+ test/main/services/team/OpenCodePromptDeliveryLedger.test.ts \
+ test/main/services/team/RuntimeDeliveryService.test.ts \
+ test/main/services/team/OpenCodeRuntimeDeliveryAdvisoryPolicy.test.ts \
+ test/main/services/team/OpenCodeRuntimeDeliveryDiagnostics.test.ts \
+ test/main/services/team/RuntimeDiagnosticClassifier.test.ts \
+ test/main/services/team/TeamMemberRuntimeAdvisoryService.test.ts \
+ test/main/services/team/ChangeExtractorService.test.ts \
+ test/main/services/team/TaskChangeLedgerReader.test.ts \
+ test/main/services/team/RuntimeStoreManifest.test.ts \
+ test/main/services/team/OpenCodeRuntimeManifestEvidenceReader.test.ts \
+ test/main/services/team/RuntimeRunTombstoneStore.test.ts \
+ test/main/services/team/OpenCodeRuntimeDeliveryProofReader.test.ts \
+ test/main/services/team/TeamMessageFeedService.test.ts \
+ test/main/services/team/TeamInboxReader.test.ts
+```
+
+Add or extend tests for:
+
+- `OpenCodeRuntimeDeliveryProofReader` if task-progress proof rules need direct coverage;
+- app ledger hash vs bridge command hash stability;
+- runtime advisory invalidation across `TeamDataService` and `TeamDataWorkerClient`.
+- unsafe member-name advisory invalidation falls back to team-scoped invalidation instead of leaving the worker cache stale;
+- OpenCode task-log projection dedupes by `sessionId + source id`, not by member name or tool signature alone;
+- `BoardTaskLogStreamService.shouldMergeRuntimeFallback()` does not suppress exact OpenCode fallback because of an unrelated execution record.
+- exact OpenCode session evidence emits a narrow task-log signal for every affected taskRef;
+- `TaskLogStreamSection` and `TaskLogsPanel` reload stream/count from that signal without requiring full team refresh;
+- message kind parity across `InboxMessageKind`, `TeamInboxReader`, OpenCode ledger validation, bridge command DTO, and renderer filtering;
+- retry control text does not change `hashOpenCodePromptDeliveryPayload()`;
+- `message_send` tool-error retry path invokes MCP/session readiness repair before sending another prompt.
+- taskRefs-only visible reply recovery does not commit read/responded when multiple plausible candidates exist.
+- work-sync inbox nudge sink treats same messageId plus different payloadHash as conflict, or proves outbox rejects it before sink;
+- normal task assignment delivery does not wait on member-work-sync phase2 activation or nudge planning.
+- OpenCode inbox relay stops after accepted-pending delivery and does not drain later unread messages for the same member.
+- OpenCode bridge capability detection falls back safely with an old orchestrator response shape.
+- `lanes.json` lock timeout after prompt acceptance does not delete or downgrade exact delivery evidence.
+- exact session task-log lookup works when current lane registry points at a newer session.
+- runtime-delivery inbox dedupe returns existing messageId and downstream proof/advisory code uses that ID.
+- bridge idempotency remains stable for one delivery attempt and timeout recovery requires exact echoed identity.
+- runtime delivery journal conflicts are tested separately from MCP readiness failures.
+- visible proof reader covers the actual runtime delivery destination stores, including direct user replies in sent messages.
+- `protocol_proof_missing` is recovery-only and never marks inbox read, ledger proven, or member-work-sync reported.
+- proof-missing advisory refreshes coalesce to one recovery key and are canceled by later visible/task proof.
+- delivery repair and member-work-sync arbitration sends at most one nudge for the same logical message per cooldown window.
+- `message_send Not connected` does not classify as proof missing and keeps the current backend/network taxonomy unless a dedicated MCP reason code is added end-to-end.
+- auth/quota/provider diagnostics outrank protocol proof-missing diagnostics.
+- single-message runtime diagnostic classification uses explicit priority or tests the intended rule order.
+- existing inbox nudge rows with same messageId and different payloadHash are conflicts, not delivered existing nudges.
+- concurrent `onlyMessageId` OpenCode wake calls serialize, coalesce, or return queued-behind without duplicate prompt attempts.
+- generic work-sync outbox delivered is verified as inbox-inserted only, not runtime-accepted.
+- same-message delivery repair is not suppressed by its own unread row, while unrelated foreground unread messages still suppress generic work-sync.
+- concurrent direct user reply writes to `sentMessages.json` preserve all committed proof rows.
+- runtime delivery taskRefs schema is explicit and invalid shapes cannot be silently dropped.
+- unresolved secondary OpenCode runtime control calls fail closed instead of falling back to primary lane.
+- OpenCode relay priority keeps foreground inbox messages ahead of `member_work_sync_nudge`.
+- hidden automation rows remain durable and available through diagnostics while normal Messages stays clean.
+- `task_stall_remediation` and `member_work_sync_nudge` survive inbox reader normalization.
+- OpenCode file-change backfill preserves delivery context hash and does not reuse stale negative cache.
+- OpenCode metadata-only evidence is rendered as manual review/unavailable, not as no changes.
+- runtime store manifest recovery does not drop or downgrade prompt/runtime delivery ledgers.
+- stopped/tombstoned OpenCode runtime evidence cannot write sent messages, inbox rows, task attribution, liveness, or advisory-clearing proof.
+- stale old-run callbacks after relaunch are diagnostic-only and cannot affect current run UI state.
+- direct user reply destination write emits feed refresh and member-advisory invalidation.
+- member inbox runtime reply emits inbox refresh and invalidates the owner advisory.
+- hidden automation writes remain hidden in normal Messages but still invalidate diagnostic/advisory state.
+- conservative ledger rebuild can use strict visible proof but cannot invent prompt acceptance.
+- ambiguous or stale-run rebuild candidates remain pending/diagnostic instead of guessed.
+- member-work-sync fast triggers remain fast and readiness-gated triggers cannot dispatch during launch bootstrap.
+- foreground unread and accepted-pending OpenCode deliveries suppress generic work-sync without dropping outbox recovery.
+- latency timeline can identify whether delay came from queue, relay, MCP repair, prompt acceptance, model/tool execution, proof, or task-log projection.
+- runtime failure/advisory status outranks task "working on" in member card and hover surfaces.
+- transcript-only plain text after `message_send Not connected` remains proof-missing until a real destination write or task progress proof appears.
+- agenda fingerprint remains stable across generatedAt, task array order, and presentation-only changes.
+- report token and pending replay reject stale fingerprint/token reports without extending old leases.
+- runtime turn-settled spool recovers stale processing files and quarantines invalid payloads.
+- target resolver rejects provider mismatch, removed member, reserved member, and deleted team.
+- task impact resolver keeps owner/reviewer/lead/dependency routing narrow and uses diagnostic team-wide fallback only when uncertain.
+- busy signal is bounded, resettable, and cannot block normal foreground delivery.
+- outbox dispatch revalidates agenda, lifecycle, activation, busy, rate limit, and watchdog cooldown at claim-time.
+- inbox nudge sink is tested behind outbox payloadHash validation and cannot hide payload drift.
+- targeted recovery remains OpenCode/lead-specific and does not bypass phase2 for arbitrary providers.
+- event queue fast triggers stay fast even when coalescing and scheduler recovery are present.
+- stale status reads enqueue coalesced refresh only and never become direct delivery.
+- scheduled dispatcher is tested as recovery for due rows, not the fresh assignment wake path.
+- Claude Stop hook, Codex provisioning env, and OpenCode bridge env are tested through their real provider wiring paths.
+- normalizers reject provider/source drift instead of routing to another provider or defaulting to lead.
+- runtime turn-settled drain scheduler recovers from exceptions, stale processing files, and invalid payloads without blocking the app.
+- member-work-sync audit journal failures do not alter canonical store, outbox, inbox, or delivery state.
+- Phase 2 readiness remains conservative under empty, sparse, corrupt, or truncated metrics.
+- member-work-sync paths are derived from canonical member storage, not raw member-name joins.
+- router detail parsing covers every current team-change detail shape and malformed detail cannot enqueue the wrong member.
+- nudge wake failure is visible and re-wakeable without duplicate inbox rows.
+- pending report replay is bounded, ordered, and validates against current agenda every time.
+- review-pickup escalation is idempotent for unchanged review request events.
+- feature dispose clears timers and prevents stale feature instances from dispatching.
+- agenda source provider identity is tested separately from agenda fingerprint stability.
+- outbox claimed rows recover after lease expiry and never double-dispatch through old attempts.
+- poison runtime turn-settled payloads are quarantined after bounded retries.
+- IPC and HTTP member-work-sync requests are runtime-validated and preserve report provenance.
+- manual status reads do not train Phase 2 active readiness by themselves.
+- watchdog cooldown suppression is task-scoped, bounded, and diagnostic on corrupt journal data.
+- duplicate main-process team-change fanout coalesces before outbox writes.
+- store repair diagnostics expose corrupt member metadata without deleting hidden sync data.
+- user-facing status copy separates control-plane sync from delivery failure.
+
+```bash
+pnpm vitest run \
+ test/features/member-work-sync/core/ActionableWorkAgenda.test.ts \
+ test/features/member-work-sync/core/MemberWorkSyncReportValidator.test.ts \
+ test/features/member-work-sync/main/HmacMemberWorkSyncReportTokenAdapter.test.ts \
+ test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts \
+ test/features/member-work-sync/main/RuntimeTurnSettledIngestor.test.ts \
+ test/features/member-work-sync/main/FileRuntimeTurnSettledEventStore.test.ts \
+ test/features/member-work-sync/main/TeamRuntimeTurnSettledTargetResolver.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncTaskImpactResolver.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncTeamChangeRouter.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncToolActivityBusySignal.test.ts \
+ test/features/member-work-sync/main/TeamInboxMemberWorkSyncNudgeSink.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncNudgeDispatchScheduler.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncEventQueue.test.ts \
+ test/features/member-work-sync/core/application/MemberWorkSyncNudgeActivationPolicy.test.ts \
+ test/features/member-work-sync/core/application/MemberWorkSyncTargetedRecoveryPolicy.test.ts \
+ test/features/member-work-sync/core/MemberWorkSyncUseCases.test.ts
+```
+
+```bash
+pnpm vitest run \
+ test/features/member-work-sync/main/MemberWorkSyncEventQueue.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncNudgeDispatchScheduler.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncNudgeDispatcher.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncNudgeOutboxPlanner.test.ts \
+ test/shared/utils/teamInternalControlMessages.test.ts \
+ test/renderer/utils/teamMessageFiltering.test.ts \
+ test/features/member-work-sync/main/createMemberWorkSyncFeature.test.ts
+```
+
+```bash
+pnpm vitest run \
+ test/features/member-work-sync/main/RuntimeTurnSettledHookSettings.test.ts \
+ test/features/member-work-sync/main/CodexNativeTurnSettledPayloadNormalizer.test.ts \
+ test/features/member-work-sync/main/OpenCodeTurnSettledPayloadNormalizer.test.ts \
+ test/features/member-work-sync/main/RuntimeTurnSettledIngestor.test.ts \
+ test/features/member-work-sync/main/FileMemberWorkSyncAuditJournal.test.ts \
+ test/features/member-work-sync/core/MemberWorkSyncPhase2Readiness.test.ts \
+ test/features/member-work-sync/main/JsonMemberWorkSyncStore.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncTeamChangeRouter.test.ts \
+ test/features/member-work-sync/main/MemberWorkSyncTaskImpactResolver.test.ts \
+ test/features/member-work-sync/main/adapters/output/TeamTaskAgendaSource.test.ts \
+ test/features/member-work-sync/main/registerMemberWorkSyncIpc.test.ts \
+ test/features/member-work-sync/main/TeamTaskStallJournalWorkSyncCooldown.test.ts \
+ test/features/member-work-sync/renderer/memberWorkSyncStatusViewModel.test.ts \
+ test/renderer/api/httpClient.memberWorkSync.test.ts \
+ test/preload/electronApiMemberWorkSync.test.ts \
+ test/main/services/team/TeamProvisioningServicePrepare.test.ts \
+ test/features/member-work-sync/main/createMemberWorkSyncFeature.test.ts \
+ test/main/services/team/TeamMemberStoragePaths.test.ts
+```
+
+`agent_teams_orchestrator`:
+
+```bash
+bun test \
+ src/services/opencode/OpenCodeBridgeCommandHandler.test.ts \
+ src/services/opencode/OpenCodeSessionBridge.test.ts \
+ src/services/opencode/OpenCodeTurnSettledObserver.test.ts \
+ src/services/opencode/OpenCodeRuntimeTurnSettledEmitter.test.ts \
+ src/services/opencode/OpenCodeTurnSettledEmissionCoordinator.test.ts \
+ src/services/opencode/OpenCodeCommandOutcomeStore.test.ts
+```
+
+### Integration Tests
+
+```bash
+pnpm vitest run \
+ test/main/services/team/BoardTaskLogStreamIntegration.test.ts \
+ test/main/services/team/TeamAgentLaunchMatrix.safe-e2e.test.ts \
+ test/renderer/components/team/taskLogs/TaskLogStreamSection.opencode-fixture-e2e.test.tsx \
+ test/renderer/components/team/dialogs/TaskDetailDialog.test.tsx \
+ test/renderer/store/teamChangeThrottle.test.ts
+```
+
+### Typecheck And Build
+
+```bash
+pnpm typecheck --pretty false
+```
+
+```bash
+cd /Users/belief/dev/projects/claude/agent_teams_orchestrator
+bun run build
+```
+
+### Live E2E
+
+Run only after unit/integration tests are green.
+
+```bash
+OPENCODE_E2E=1 \
+OPENCODE_DELIVERY_ACCEPT_FAST_LIVE=1 \
+pnpm vitest run test/main/services/team/OpenCodeAcceptFastDelivery.live-e2e.test.ts
+```
+
+Expected live assertions:
+
+- prompt acceptance timestamp appears before full assistant completion;
+- task_start/tool logs are visible by exact session;
+- no duplicate logical delivery;
+- member-work-sync journal shows reconcile wakeup, not direct spam;
+- advisory clears after visible proof.
+- work-sync nudge is not visible in normal Messages if filtered by existing UI policy.
+- no `Not connected` tool error occurs after pre-prompt MCP-ready gate in the happy path.
+- normal delivery proof and work-sync proof do not satisfy each other.
+- same-member different-session OpenCode task logs do not collapse into one segment.
+- a new OpenCode task assignment is delivered through normal delivery without waiting for member-work-sync phase2 activation.
+- live report includes the detected OpenCode bridge capability snapshot.
+- if a lane registry diagnostic write fails after acceptance in a fault-injected run, accepted prompt identity remains observable.
+- direct OpenCode reply to user appears in Messages and clears advisory through the same proof reader.
+- stopping a team before a stale OpenCode callback arrives produces no visible reply and no advisory clear.
+- relaunching a team then receiving an old-run callback leaves the new run unaffected.
+- advisory/banner disappears after a valid visible reply without requiring a manual full refresh.
+- slow-pass report includes phase timings for assignment, inbox, relay, MCP readiness, prompt accepted, first tool, task_start, visible proof, and work-sync decision.
+- `message_send Not connected` live/fault-injected run is retried through MCP repair and never marked successful from transcript-only text.
+
+---
+
+## 12. Implementation Checklists By Fragile Area
+
+### 12.1 `workIntervals` Checklist
+
+- no storage migration;
+- no provider conditional;
+- no change to task create/status interval logic;
+- tests assert status-time semantics;
+- UI copy is the only user-facing change around this metric.
+
+### 12.2 Task Log Session Evidence Checklist
+
+- exact `sessionId` lookup exists in orchestrator CLI;
+- bridge passes `--session-id`;
+- task log source uses exact session candidates before current lane fallback;
+- cache key includes evidence;
+- fallback segment IDs include session identity;
+- BoardTaskLogStreamService merge keeps distinct same-member sessions;
+- exact-session evidence writes emit narrow task-log refresh events;
+- task-log badge count and opened stream reload from the same signal;
+- candidate count bounded;
+- foreign team/member/task ignored;
+- missing exact session is diagnostic, not fatal.
+
+### 12.3 Delivery Acceptance Checklist
+
+- pre-prompt repair remains synchronous;
+- active member delivery queue remains serialized;
+- outcome store status/rank/safeToRetry updated;
+- commandStatus recovery still strict;
+- ledger records accepted runtime prompt identity;
+- ledger/observation keep proof context (`messageKind`, `taskRefs`, `relayOfMessageId`, `actionMode`);
+- commandStatus timeout recovery preserves runtime prompt identity when synthesizing accepted response;
+- app ledger payload hash and bridge command payload hash are tested as separate contracts;
+- payload hashes are stable across transport-only accept-fast fields and change for real payload changes;
+- old ledger schema-1 records missing new prompt identity fields still parse and update safely;
+- acceptanceUnknown is not upgraded to accepted without strict acceptance evidence;
+- settlement mode is persisted before first send and not recomputed differently during retry;
+- original bridge request ID is stored for commandStatus recovery;
+- bridge command result echo of `idempotencyKey` is required before state mutation;
+- timeout recovery that lacks exact prompt/session identity remains unknown, not accepted;
+- observation timeout after acceptance does not immediately duplicate prompt;
+- relay loop stops after accepted pending delivery and keeps the same member serialized;
+- watchdog proof logic remains authoritative;
+- `taskProgressAt` can suppress advisory but cannot bypass normal delivery read-commit policy;
+- member-work-sync receives only wakeup signals;
+- work-sync proof cannot clear a normal delivery record;
+- turn-settled spool payload still normalizes through member-work-sync.
+
+### 12.4 UI Advisory Checklist
+
+- success proof clears warning;
+- proof write path invalidates member runtime advisory cache;
+- invalidation reaches both in-process `TeamDataService` and `TeamDataWorkerClient`;
+- observation timeout after accepted prompt is not shown as error if proof arrives;
+- "Saved" remains separate from warning copy if both are visible;
+- work-sync automation messages stay hidden from normal Messages if current filtering requires that.
+
+### 12.5 Member-Work-Sync Boundary Checklist
+
+- first assignment wake is normal delivery, not work-sync;
+- work-sync reconcile can be triggered by turn-settled/task/inbox events but still goes through activation policy;
+- foreground unread assignment suppresses duplicate sync nudge;
+- phase2 metrics can block generic sync nudges without blocking normal delivery;
+- outbox payloadHash conflict is tested;
+- inbox sink either stores/compares payloadHash or proves payload equivalence before returning existing;
+- existing nudge wake is not scheduled after payload conflict;
+- work-sync audit records conflict/cooldown/suppression reasons for debugging.
+
+### 12.6 Cross-Repo And Lane Registry Checklist
+
+- OpenCode bridge capability is detected before acceptance mode is used;
+- delivery acceptance support is represented by explicit contract version, not generic command presence alone;
+- missing capability falls back to observed mode with diagnostic, not guessed accept-fast;
+- acceptance-mode response without exact runtime prompt identity remains `acceptanceUnknown`;
+- old orchestrator response fixtures are covered by tests;
+- accepted prompt identity is persisted outside `lanes.json`;
+- lane registry read/write failure after acceptance is diagnostic-only for the accepted prompt;
+- lane registry failure before first runtime evidence blocks delivery safely;
+- no transcript read, task-log attribution write, renderer event emit, or OpenCode network call happens while holding the lane index lock;
+- exact session evidence lookup precedes current lane lookup;
+- stale lane cleanup cannot delete exact delivery evidence early.
+
+### 12.7 Runtime Delivery Dedupe Checklist
+
+- runtime-delivery dedupe remains scoped to same `relayOfMessageId`;
+- deduped inbox write returns the existing `messageId`;
+- ledger proof and advisory clearing use the returned message ID;
+- dedupe never applies to work-sync, task-stall, or system notification rows;
+- identical text without `source="runtime_delivery"` and exact relay proof is not enough;
+- taskRef merge after dedupe is tested and does not widen proof semantics.
+
+### 12.8 Runtime Delivery Journal Checklist
+
+- `RuntimeDeliveryService` remains the only path that writes OpenCode runtime `message_send` destinations;
+- destination write is verified before journal commit;
+- duplicate identical idempotency key returns existing committed location;
+- same idempotency key with different payload hash returns conflict;
+- conflict is not mapped to MCP not connected;
+- committed runtime delivery can feed visible proof correlation but cannot directly mark arbitrary prompt deliveries responded;
+- runtime delivery journal reconciliation emits diagnostics only and never re-prompts OpenCode;
+- destination change events stay scoped to the actual destination.
+
+### 12.9 Visible Proof Store Parity Checklist
+
+- proof reader scans or resolves every destination kind written by runtime delivery ports;
+- direct user replies stored in sent messages can clear advisory through strict proof;
+- member inbox replies remain inbox-scoped;
+- cross-team replies remain cross-team scoped;
+- source string mismatch is diagnostic unless committed runtime delivery location proves the same message;
+- unrelated lead/process messages cannot satisfy OpenCode member delivery proof.
+
+### 12.10 Sent Messages Store Checklist
+
+- sent-message append path is locked or otherwise concurrency-safe;
+- duplicate destination message ID is detected under lock;
+- append verifies the row after write;
+- trim keeps the just-written row;
+- read normalizer preserves fields needed by runtime proof;
+- normal live lead message overlay tests stay green.
+
+### 12.11 Runtime TaskRefs Contract Checklist
+
+- MCP prompt/tool schema and app normalizer agree on taskRefs shape;
+- invalid taskRefs fail loudly or are preserved through a documented normalizer;
+- string refs have defined taskId/displayId semantics;
+- structured refs hash deterministically if supported;
+- proof reader and task-log evidence use the same normalized refs;
+- prompt artifact tests assert the documented schema.
+
+### 12.12 Runtime Control Lane Resolution Checklist
+
+- non-lead secondary member control calls require member-owned lane or exact session evidence;
+- true primary OpenCode runtime remains supported;
+- message delivery, task event, and heartbeat share the same fail-closed resolver;
+- stale launch-state and missing lane registry are covered by tests;
+- rejection diagnostics include enough member/run/session context for artifact debugging;
+- no destination write happens before lane/evidence validation.
+
+### 12.13 OpenCode Inbox Relay Priority Checklist
+
+- priority sort direction is documented in code and tests;
+- normal foreground unread rows sort before `member_work_sync_nudge`;
+- system notifications do not accidentally outrank user/task foreground rows;
+- accepted-pending foreground delivery leaves later rows unread and queued;
+- `onlyMessageId` is treated as a controlled exact override, not broad scheduling;
+- busy-status diagnostics include active message kind and message id;
+- work-sync scheduler tests assert it backs off when foreground work is unread or recent.
+
+### 12.14 Automation Hiding Checklist
+
+- hidden automation rows are not deleted or marked read by UI filtering;
+- raw inbox diagnostics can show hidden automation rows;
+- `TeamInboxReader` preserves all supported `InboxMessageKind` values;
+- `TeamMessageFeedService` and renderer filtering have separate tests;
+- delivery, watchdog, prompt ledger rebuild, and work-sync never use UI-filtered messages as source of truth;
+- normal Messages and counts hide work-sync by default;
+- debug/audit views can opt into automation rows without changing durable state.
+
+### 12.15 OpenCode File-Change Backfill Checklist
+
+- delivery context file/hash contains the exact fields backfill needs;
+- retry-control text does not change delivery context hash;
+- negative backfill cache is invalidated when delivery context appears;
+- current-contract duplicates-only evidence is cacheable and old-contract duplicates-only evidence is not;
+- metadata-only OpenCode evidence remains manual-review/unavailable;
+- task-log native tool projection cannot synthesize reviewable file-change ledger entries;
+- summary-only requests wait for bounded backfill when delivery context exists;
+- backfill diagnostics include task/member/session/lane/context hash.
+
+### 12.16 Runtime Store Recovery Checklist
+
+- runtime diagnostics store can be dropped without touching delivery evidence;
+- prompt delivery ledger and runtime delivery journal are quarantined/rebuilt, not silently deleted;
+- canonical destination writes win over provider session rebuild data;
+- readiness-blocking launch store corruption blocks new delivery but keeps existing evidence available for proof/debug;
+- secondary lane recovery never writes evidence into primary lane;
+- artifact packs include manifest recovery action, source, and quarantine path;
+- recovery tests cover corrupted ledger, stale provider session, and existing committed destination row.
+
+### 12.17 Stopped Runtime Evidence Checklist
+
+- every OpenCode runtime write path calls the same fail-closed evidence gate before writing;
+- evidence gate receives teamName, runId, laneId, and evidenceKind;
+- stopped pure team rejects runtime delivery before sent-message/inbox write;
+- stopped mixed secondary lane rejects task event and heartbeat before attribution/liveness write;
+- stale old-run callback after relaunch cannot clear current warning or mark current delivery responded;
+- tombstone rejection is recorded as stale evidence with reason, not provider error;
+- stop/relaunch cleanup preserves ledgers/artifacts needed for debugging;
+- orphaned stale OpenCode process cleanup remains team/run/lane scoped.
+
+### 12.18 Cache And Advisory Invalidation Checklist
+
+- direct user reply emits `lead-message` refresh and member-advisory invalidation;
+- member inbox reply emits `inbox` refresh and member-advisory invalidation;
+- task event/attribution emits narrow `task-log-change`;
+- hidden automation write invalidates diagnostics/advisory where needed without showing normal Messages rows;
+- unsafe member name falls back to team-scoped advisory invalidation;
+- worker unavailable/invalidation failure is diagnostic-only after durable write;
+- tests assert both durable store state and renderer refresh behavior.
+
+### 12.19 Conservative Ledger Rebuild Checklist
+
+- visible proof rebuild requires strict relay/source/destination evidence;
+- prompt transport acceptance rebuild requires exact runtime prompt identity or command outcome proof;
+- ambiguous candidates remain pending/diagnostic;
+- hidden automation rows only rebuild automation-intent deliveries;
+- stale run rows cannot rebuild current run state;
+- rebuild preserves messageKind, source, relayOfMessageId, taskRefs, destination kind, and destination message ID;
+- rebuild never marks inbox read or mutates user-visible rows.
+
+### 12.20 Member-Work-Sync Timing Checklist
+
+- trigger-specific defaults are documented in tests;
+- broad `queueQuietWindowMs` cannot silently delay `turn_settled` and `tool_finished` production paths;
+- startup/member-spawn scans can materialize status without dispatching nudges before launch readiness;
+- `canDispatchNudges` is checked before dispatch and again effectively through revalidation;
+- foreground unread delivery and accepted-pending OpenCode delivery suppress generic work-sync;
+- scheduled dispatcher recovers due outbox rows after restart but is not the primary fresh-assignment path;
+- queue diagnostics expose trigger reasons, runAt, maxRunAt, queued age, running age, and rerunRequested.
+
+### 12.21 Delivery Latency Timeline Checklist
+
+- timeline is derived from existing ledgers/audit journals where possible;
+- every phase uses shared correlation IDs instead of text matching;
+- missing phases are explicit diagnostics, not silent gaps;
+- slow pass report distinguishes queue delay, relay busy wait, MCP repair, prompt acceptance, model/tool execution, proof wait, task-log projection, and work-sync decision;
+- timeline rows are developer/audit diagnostics, not normal Messages rows;
+- live E2E prints timeline on failure or threshold breach.
+
+### 12.22 Member Status Presentation Checklist
+
+- runtime failure/advisory/bootstrap state has higher priority than task labels;
+- task assignment remains visible as context, not liveness proof;
+- `registered_only` and runtime-process-without-bootstrap are not shown as online/working;
+- stale spawn-status fetch after stopped/offline is ignored;
+- hover/detail separates task, runtime diagnostic, lane/session, and worktree facts;
+- tests cover failed OpenCode secondary with assigned task.
+
+### 12.23 Tool-Error Plain Text Fallback Checklist
+
+- transcript-only assistant text after `message_send` failure is not visible proof;
+- MCP/session readiness repair runs before retry prompt;
+- task/file progress can be shown separately from reply delivery proof;
+- app never synthesizes user-visible reply from transcript-only text;
+- idempotency conflict, destination write failure, MCP not connected, and missing tool stay distinct;
+- later real runtime destination write clears advisory through normal proof reader.
+
+### 12.24 Agenda Fingerprint Stability Checklist
+
+- fingerprint payload contains only actionable work semantics;
+- `generatedAt`, UI row order, unread counts, duration labels, and cache revisions are excluded;
+- item order is canonical and independent from task array order;
+- evidence arrays are sorted before hashing;
+- dependency/review/owner/status changes intentionally change fingerprint;
+- unrelated task changes for other members do not change this member fingerprint unless they affect dependencies, review, or lead clarification;
+- any future `sourceRevision` addition includes a written semantic contract and regression tests.
+
+### 12.25 Report Token And Replay Checklist
+
+- token binds teamName, memberName, agendaFingerprint, and expiry;
+- reporter always reloads current agenda before validation;
+- `caught_up` requires current empty agenda;
+- `still_working` and `blocked` require current fingerprint;
+- `blocked` requires current blocker evidence;
+- pending replay goes through the same reporter/validator as live reports;
+- stale/expired/foreign reports are diagnostic-only and cannot extend leases or clear `needs_sync`;
+- member inactive/team inactive replay is marked superseded rather than accepted.
+
+### 12.26 Runtime Turn-Settled Spool Checklist
+
+- incoming event files are not claimable until fully written;
+- claim path moves incoming files to processing before reading;
+- stale processing recovery is bounded and ignores `.meta.json`;
+- invalid/oversized/unsupported-provider files go to invalid with reason;
+- non-terminal OpenCode outcomes are processed as ignored and do not enqueue reconcile;
+- provider-owned events require explicit team/member and configured provider match;
+- Claude transcript/session matching rejects wrong provider, deleted team, removed member, and reserved member;
+- duplicate source events are harmless at queue/outbox level.
+
+### 12.27 Task Impact Routing Checklist
+
+- owner changes enqueue only active owner unless fallback is required;
+- review changes enqueue current-cycle reviewer and lead for self-review/missing reviewer;
+- lead clarification and broken dependencies enqueue lead;
+- dependent task owners are enqueued when their blocker changes;
+- unknown/missing task ID fallback is diagnostic and still passes through readiness/cooldown;
+- file-path detail parsing accepts only task JSON names, not arbitrary paths;
+- resolver failure falls back to team scan instead of dropping the event.
+
+### 12.28 Busy Signal Checklist
+
+- busy signal is advisory-only and cannot block normal foreground delivery;
+- active tool state can be cleared by finish, reset, offline, or bounded stale cleanup;
+- recent-finish grace is short and tested;
+- busy-signal errors return bounded retryAfter and diagnostics;
+- reset can clear one member or whole team;
+- future persisted busy state must be team/run/member scoped and TTL-bound.
+
+### 12.29 Nudge Outbox Revalidation Checklist
+
+- planner writes durable intent only after current status and activation checks;
+- dispatcher reloads current agenda before inbox write;
+- dispatcher supersedes stale fingerprint or empty agenda;
+- dispatcher re-checks lifecycle, phase2 activation, rate limit, busy signal, and watchdog cooldown;
+- retryable failures include bounded `nextAttemptAt`;
+- terminal failures are not revived without new fingerprint or supported intent key;
+- review-pickup delivery is tracked by reviewRequestEventId, not only agenda fingerprint.
+
+### 12.30 Inbox Nudge Sink Checklist
+
+- production calls sink only through outbox dispatcher;
+- outbox payloadHash conflict blocks sink call;
+- existing messageId path validates payload equivalence at the sink or fails closed;
+- writer-returned messageId is recorded consistently;
+- hidden automation row stays durable and debug-readable;
+- direct sink reuse cannot bypass payloadHash or messageKind/source/taskRefs validation.
+
+### 12.31 Targeted Recovery Checklist
+
+- OpenCode targeted recovery requires providerId `opencode`;
+- lead targeted recovery requires canonical lead-like member identity;
+- Codex/Anthropic/Gemini secondary agents stay behind phase2 readiness unless strict review pickup;
+- strict review pickup requires reviewRequestEventId and non-ambiguous evidence;
+- targeted recovery still goes through dispatch-time lifecycle, busy, cooldown, rate limit, and inbox checks;
+- tests cover both activation policy and dispatcher behavior.
+
+### 12.32 Queue And Scheduler Timing Checklist
+
+- default trigger timings remain documented by tests;
+- `turn_settled` and `tool_finished` remain fast;
+- broad `queueQuietWindowMs` is not used to tune production fast triggers without explicit triggerTiming;
+- coalescing preserves earlier/urgent runAt;
+- running-item follow-up keeps urgent reasons and schedules quickly;
+- scheduled dispatcher recovers due outbox rows and does not replace fresh assignment delivery;
+- diagnostics expose nudge origin and queue timing.
+
+### 12.33 Stale Status Read Refresh Checklist
+
+- stale read enqueues reconcile only;
+- repeated reads coalesce while queued or running;
+- stale read cannot write inbox, call OpenCode, or dispatch a nudge directly;
+- inactive/stopped team remains inactive after refresh;
+- diagnostics separate stale refresh enqueue from actual nudge delivery;
+- renderer polling is not required for correctness.
+
+### 12.34 Runtime Turn-Settled Provider Wiring Checklist
+
+- Claude Stop hook settings are tested independently from Codex/OpenCode env wiring;
+- Codex primary and secondary teammate provisioning include the spool env;
+- OpenCode desktop bridge and live harness include the spool env;
+- workspace trust/preflight does not strip the spool env;
+- missing env degrades to no turn-settled event, not launch or delivery failure;
+- diagnostics identify provider, install mode, and spool root presence.
+
+### 12.35 Normalizer Strictness Checklist
+
+- provider and source strings are explicit contract fields;
+- source mismatch for a claimed provider fails closed;
+- unsupported providers do not fall back to `lead` or any configured member;
+- duplicate logical event produces stable `sourceId`;
+- different session/turn/runtime prompt identity produces distinct `sourceId`;
+- invalid payloads are quarantined and do not enqueue reconcile.
+
+### 12.36 Runtime Turn-Settled Drain Scheduler Checklist
+
+- first drain runs promptly after composition;
+- scheduler runs are non-overlapping;
+- exceptions log warning and do not stop future ticks;
+- claim/read batches are bounded;
+- stale processing files are recovered or quarantined;
+- drain only ingests/enqueues and never writes inbox nudges directly.
+
+### 12.37 Audit Journal Checklist
+
+- audit append failure is warning-only;
+- audit rotation/truncation does not alter canonical sync state;
+- audit entries are never parsed as proof;
+- artifact packs may include audit snippets as diagnostics only;
+- audit payloads are sanitized and bounded;
+- no full prompt, auth value, API key, or unbounded transcript is written.
+
+### 12.38 Phase 2 Metrics Checklist
+
+- empty metrics remain collecting/not-ready;
+- sparse metrics cannot enable active nudges;
+- corrupt or truncated metric data is conservative;
+- high nudge rate, fingerprint churn, or report rejection blocks readiness;
+- readiness reasons are user/debug explainable;
+- audit journal rotation does not affect metrics readiness.
+
+### 12.39 Member Work Sync Path Safety Checklist
+
+- per-member sync files use `MemberWorkSyncStorePaths`;
+- raw `memberName` is not joined into filesystem paths;
+- canonical key and display name are separated;
+- reserved identities cannot create teammate sync stores;
+- removed member storage is diagnostic-only unless the member is active again by canonical identity;
+- path uncertainty fails closed before nudge dispatch.
+
+### 12.40 Team Change Router Detail Checklist
+
+- every emitted `TeamChangeEvent.detail` shape has a router test;
+- malformed JSON detail never throws;
+- unknown inbox recipient cannot create a member directory;
+- durable task changes fallback safely when exact task impact cannot be resolved;
+- materializer failure for one member does not block queueing other members;
+- team-wide fallback remains diagnostic and downstream-gated.
+
+### 12.41 Nudge Delivery Wake Checklist
+
+- inbox insert remains the durable generic nudge boundary;
+- wake failure after insert records diagnostic/audit data;
+- wake failure does not mark delivered outbox rows retryable by default;
+- safe re-wake path does not duplicate inbox rows;
+- existing-nudge wake still respects foreground, busy, cooldown, and provider delivery gates;
+- latency timeline shows whether delay happened before or after durable inbox insert.
+
+### 12.42 Pending Report Replay Checklist
+
+- replay uses current agenda and current token validation;
+- accepted/rejected/superseded outcomes are marked processed;
+- transient reporter failures are bounded and diagnosable;
+- removed member and inactive team intents are superseded;
+- replay has deterministic order and per-run limits;
+- replay summary distinguishes processed from still-pending transient failures.
+
+### 12.43 Review Pickup Escalation Checklist
+
+- escalation key includes review request IDs and reason class;
+- same unchanged review request escalates once;
+- new review request event can escalate again;
+- generic agenda sync cannot use review-pickup bypass;
+- delivery failure chooses retry or escalation for one attempt, not both;
+- delivered review-pickup event IDs suppress duplicate direct nudges.
+
+### 12.44 Feature Composition Lifecycle Checklist
+
+- composition owns every scheduler, queue, and runtime-drain timer;
+- dispose is idempotent;
+- dispose prevents queued follow-up work after in-flight reconcile completes;
+- stale feature instance cannot receive or dispatch new events;
+- live tests dispose feature before deleting temp teams;
+- feature recreation does not reuse stale busy signal or queue state.
+
+### 12.45 Agenda Source Runtime Identity Checklist
+
+- config/member-meta merge precedence is documented and tested;
+- meta `removedAt` removes a member from active sync;
+- provider id affects provider-specific dispatch but not agenda fingerprint by default;
+- unknown provider fails closed for provider-specific direct delivery;
+- provider inference from model is diagnostic when it affects behavior;
+- lead-like and reserved identities cannot become secondary sync targets.
+
+### 12.46 Outbox Claim Lease Checklist
+
+- claimed outbox rows have an explicit stale threshold;
+- stale claimed rows recover under the same locks as normal claim;
+- old attemptGeneration writes are ignored after recovery;
+- terminal rows are never revived;
+- recovery path is idempotent across app restart.
+
+### 12.47 Poison Turn-Settled Payload Checklist
+
+- unexpected processing failures increment durable retry metadata;
+- repeated failures quarantine with a stable reason;
+- poison payload does not block other payloads in the batch;
+- invalid and ignored outcomes remain separate in summaries;
+- diagnostics include provider/source identity when available.
+
+### 12.48 IPC And HTTP Boundary Checklist
+
+- IPC handlers validate request shape at runtime;
+- HTTP routes and IPC normalize team/member/report fields consistently;
+- report source provenance is not overwritten incorrectly;
+- invalid input cannot create status, pending report, outbox, or member storage files;
+- browser-mode client route mapping has parity tests with Electron bridge behavior.
+
+### 12.49 Phase 2 Metric Provenance Checklist
+
+- manual/request status reads do not satisfy active readiness thresholds alone;
+- queue/runtime/team-change reconciles are the primary readiness signal;
+- UI refresh writes no outbox rows directly;
+- readiness diagnostics include event provenance counts;
+- evaluatedAt churn alone cannot unlock nudges.
+
+### 12.50 Watchdog Cooldown Checklist
+
+- missing stall journal does not suppress sync;
+- corrupt or unreadable journal suppression is visible and bounded;
+- cooldown is scoped to overlapping task IDs;
+- expired alerts do not suppress;
+- retry after cooldown remains idempotent.
+
+### 12.51 Main-Process Fanout Checklist
+
+- team-change fanout is treated as at-least-once;
+- duplicate events coalesce before outbox planning;
+- trigger reasons are diagnostic and do not churn fingerprints;
+- event fanout never dispatches inbox nudges directly;
+- disposed features ignore late fanout safely.
+
+### 12.52 Store Repair Diagnostics Checklist
+
+- corrupt member meta during repair is diagnostic;
+- repair never deletes member feature data because discovery failed;
+- valid members still repair when one member dir is malformed;
+- legacy fallback does not create unsafe raw member dirs;
+- repair output is deterministic.
+
+### 12.53 User Status Surface Checklist
+
+- normal Messages hides work-sync automation by default;
+- activity/debug views label work-sync rows as control-plane automation;
+- member status copy does not imply user-message delivery failure;
+- diagnostics expose fingerprint/token/outbox state without secrets;
+- rendering a status panel cannot trigger dispatch.
+
+### 12.54 Protocol Proof Missing Checklist
+
+- proof-missing never marks inbox read;
+- proof-missing never marks delivery ledger proven;
+- proof-missing never satisfies `member_work_sync_report`;
+- recovery identity includes exact team, member, original message id, and task refs when available;
+- missing identity fails closed with diagnostics;
+- repeated advisory evaluation coalesces to one recovery key.
+
+### 12.55 Advisory Cache Clearing Checklist
+
+- visible reply proof invalidates member advisory cache;
+- task progress proof invalidates member advisory cache;
+- canceled recovery invalidates affected member advisory cache;
+- stale cache cannot keep `OpenCode proof missing` visible after current proof exists;
+- invalidation failure is logged and does not send another nudge.
+
+### 12.56 Delivery Repair And Work-Sync Arbitration Checklist
+
+- delivery repair and work-sync share a recent-recovery guard or compatible deterministic keys;
+- direct visible-reply failures prefer delivery repair;
+- task-scoped progress-proof failures prefer work-sync when no delivery retry is active;
+- only one nudge is sent per message/cooldown window;
+- both paths preserve original `messageId`, `relayOfMessageId`, and `taskRefs`.
+
+### 12.57 Diagnostic Classifier Checklist
+
+- classifier precedence is explicit and tested;
+- `Not connected` is not proof missing;
+- auth/quota/runtime failures outrank proof missing;
+- proof-missing tokens are narrow;
+- user-facing advisory diagnostics redact secrets and large raw payloads.
+
+### 12.58 Inbox Nudge Payload Integrity Checklist
+
+- inbox nudge sink validates existing row payload hash;
+- same messageId with different payload fails closed;
+- legacy rows without hash are handled by explicit compatibility rule;
+- conflict does not mark outbox delivered;
+- hash excludes presentation-only fields that should not change delivery semantics.
+
+### 12.59 OpenCode Relay In-Flight Checklist
+
+- one team/member has one active relay loop;
+- `onlyMessageId` wake during active relay cannot start overlapping prompt delivery;
+- queued-behind result is retryable and diagnostic;
+- already-read and missing target rows are distinct outcomes;
+- active relay map is not overwritten by a newer promise.
+
+### 12.60 Work-Sync Delivery Timeline Checklist
+
+- outbox delivered means inbox inserted;
+- runtime prompt acceptance is tracked separately through OpenCode ledger;
+- timeline captures inbox insert, wake schedule, relay start, prompt accept, response proof, and terminal reason;
+- wake failure does not duplicate inbox row;
+- user-facing copy does not imply runtime acceptance from outbox delivery alone.
+
+### 12.61 Busy Recovery Context Checklist
+
+- default busy context remains generic work-sync;
+- same-message delivery repair does not use broad foreground bypass;
+- unrelated unread foreground messages still suppress generic and task recovery nudges;
+- active prompt ledger suppresses duplicate sends;
+- tests cover foreground unread, foreground recent, active ledger, lane missing, and same-message repair.
+
+### 12.62 Advisory Read-Path Purity Checklist
+
+- `TeamMemberRuntimeAdvisoryService` remains query/cache only;
+- `TeamDataService` snapshot reads never enqueue recovery;
+- renderer polling cannot write inbox/outbox rows;
+- worker advisory refresh cannot send notifications or prompts;
+- recovery scheduling happens only from delivery lifecycle or explicit member-work-sync command paths.
+
+### 12.63 Proof-Missing Trigger Contract Checklist
+
+- trigger reason is explicit if proof-missing recovery is implemented;
+- default run-after and max coalesce are defined;
+- composition passes trigger timing when customized;
+- audit metadata includes original message id and task refs;
+- broad `runtime_activity` behavior remains unchanged.
+
+### 12.64 Outbox Recovery Lookup Checklist
+
+- arbitration uses `MemberWorkSyncOutboxStorePort`;
+- JSON store owns storage scans;
+- lookup distinguishes pending, claimed, delivered, retryable, superseded, and stale rows;
+- logical key includes team, member, intent key, original message id, and task ids when available;
+- no provisioning code reads member-work-sync store files directly.
+
+### 12.65 Work-Sync Inbox Schema Checklist
+
+- `workSyncPayloadHash` is optional and backward-compatible;
+- writer persists it only for work-sync automation;
+- reader materializes it without changing normal message identity;
+- renderer hides it from normal Messages UI;
+- sink treats same messageId with different hash as conflict.
+
+### 12.66 Audit Event Extension Checklist
+
+- new event names are added to `MemberWorkSyncAuditEventName`;
+- `reasonToAuditEvent()` has explicit mappings for new recovery reasons;
+- no string casts are used to bypass the event union;
+- audit rows include diagnostic identity without secrets;
+- audit failure does not change dispatch correctness.
+
+---
+
+## 13. Rollback Strategy
+
+Phase 1 rollback:
+
+- revert copy/tests only.
+
+Phase 2 rollback:
+
+- remove session-id lookup;
+- keep old lane/current fallback;
+- no data migration needed.
+
+Phase 3 rollback:
+
+- default `settlementMode` back to `observed`;
+- keep acceptance fields in ledger as ignored optional data;
+- no task data migration.
+
+Phase 4 rollback:
+
+- restore previous retry delays/reason mapping.
+
+Proof-missing recovery rollback:
+
+- disable new scheduling entrypoint first, not advisory reads;
+- keep optional inbox `workSyncPayloadHash` as ignored compatible data;
+- keep audit rows and outbox rows as diagnostics;
+- do not delete inbox rows or ledgers.
+
+Never rollback by deleting ledgers, outcome stores, runtime session stores, or task JSON.
+
+---
+
+## 14. Definition Of Done
+
+This hardening is complete when:
+
+- `workIntervals` remain unchanged and tested as status-time.
+- UI label no longer implies active execution.
+- OpenCode task logs can load from exact runtime session evidence.
+- OpenCode delivery acceptance no longer waits for full turn completion in the app-facing path.
+- Accepted prompts are never duplicated by one attempt.
+- Watchdog and member-work-sync remain separated.
+- Successful OpenCode replies clear warnings.
+- Accept-fast is gated by explicit orchestrator capability.
+- Lane registry failures do not erase accepted exact prompt/session evidence.
+- Runtime-delivery dedupe returns existing message IDs without weakening proof rules.
+- Runtime `message_send` idempotency conflicts remain separate from MCP readiness repair.
+- Proof reader sees the same destination stores that runtime delivery writes.
+- Direct user sent-message writes are concurrency-safe before they are used as proof.
+- Runtime delivery taskRefs are preserved or rejected explicitly.
+- Secondary OpenCode runtime control calls cannot write under the wrong lane.
+- Foreground OpenCode inbox rows cannot be delayed behind hidden work-sync automation.
+- Work-sync/task-stall automation is hidden from normal Messages without losing durable diagnostics or delivery state.
+- OpenCode Changes backfill remains driven by task-change ledger evidence, not task-log native rows.
+- Runtime store recovery cannot silently drop or downgrade prompt/runtime delivery ledgers.
+- Stopped/tombstoned OpenCode runtime callbacks cannot write visible state or clear current-run advisories.
+- Destination writes reliably invalidate message feed, task-log, and member-advisory caches without making cache refresh a correctness dependency.
+- Ledger rebuild is conservative: strict proof can clear warnings, but missing prompt identity cannot be upgraded to accepted transport.
+- Member-work-sync fast triggers remain low-latency while launch/startup scans stay readiness-gated.
+- A delivery latency timeline can explain slow OpenCode starts without conflating queue, relay, MCP, model, proof, and UI cache delays.
+- Member status surfaces cannot show a failed/unbootstrapped OpenCode teammate as simply "working on".
+- Transcript-only plain text after OpenCode tool error cannot clear delivery proof or synthesize a user-visible reply.
+- Agenda fingerprints do not churn on presentation-only changes, and reports/tokens are accepted only for the current fingerprint.
+- Runtime turn-settled events survive app restarts, route only to the configured active member/provider, and duplicate safely.
+- Task impact routing is narrow for known task changes and diagnostic/rate-limited for team-wide fallback.
+- Busy signal remains bounded advisory state and cannot suppress foreground delivery or nudges indefinitely.
+- Nudge outbox dispatch revalidates current agenda and safety gates immediately before inbox write.
+- Inbox nudge idempotency cannot hide changed payload, message kind, source, or task refs.
+- Targeted recovery remains provider-specific and does not become a global phase2 bypass.
+- Queue fast triggers, stale-read refresh, and scheduler recovery are separated and explainable in diagnostics.
+- Runtime turn-settled install paths are tested per provider: Claude Stop hook, Codex provisioning env, and OpenCode bridge env.
+- Runtime turn-settled normalizers reject provider/source drift and cannot route malformed events to the wrong member.
+- Runtime turn-settled drain is non-blocking, recovers after failure, and never writes inbox nudges directly.
+- Member-work-sync audit journal is diagnostic-only and cannot change canonical sync or delivery state.
+- Phase 2 readiness remains conservative under empty, sparse, corrupt, or truncated metrics.
+- Member-work-sync per-member storage uses canonical member paths and rejects unsafe/reserved identities before dispatch.
+- Team-change routing cannot silently drop current event detail shapes or enqueue unsafe/raw member names.
+- Nudge wake failures are visible and recoverable without duplicating durable inbox rows.
+- Pending report replay is bounded, current-agenda validated, and cannot loop stale intents forever.
+- Review-pickup escalation is idempotent for unchanged review request events.
+- Feature composition owns and disposes all work-sync timers, queues, and drain schedulers.
+- Provider/runtime identity merge rules are tested separately from agenda fingerprint semantics.
+- Claimed outbox rows recover after crash without duplicate dispatch.
+- Poison runtime turn-settled payloads quarantine after bounded retries.
+- IPC/HTTP adapters validate requests and preserve report provenance.
+- Phase 2 readiness cannot be trained by manual status reads alone.
+- Watchdog cooldown failures are bounded, task-scoped, and visible.
+- Duplicate main-process events coalesce before dispatch.
+- Store repair never deletes hidden sync data because member metadata is malformed.
+- User status surfaces separate control-plane sync from agent/message failure.
+- `OpenCode proof missing` remains a warning/recovery condition and never becomes success proof.
+- Proof-missing recovery preserves original message identity and coalesces duplicate advisory evaluations.
+- Later visible reply or task progress proof clears proof-missing advisory and suppresses queued recovery.
+- Delivery repair and work-sync cannot both nudge the same logical message inside the cooldown window.
+- Diagnostic classification distinguishes runtime connectivity, auth/quota/provider failures, and protocol proof missing using explicit precedence.
+- Inbox nudge sink detects messageId/payloadHash drift before marking outbox delivered.
+- OpenCode inbox relay cannot run overlapping prompt loops for the same team/member.
+- Generic work-sync outbox delivered remains an inbox durability state and is not confused with OpenCode prompt acceptance.
+- Busy suppression cannot block same-message delivery repair forever while still protecting unrelated foreground messages.
+- Runtime advisory reads are side-effect free and cannot schedule recovery from UI polling.
+- Proof-missing recovery uses an explicit trigger and port contract, or remains deliberately out of scope.
+- Recovery arbitration uses member-work-sync ports instead of provisioning-code storage scans.
+- Work-sync payload hashes are persisted backward-compatibly and hidden from normal user-facing message content.
+- Recovery audit events are typed, tested, and non-blocking.
+- Live smoke proves a task assignment reaches OpenCode, starts work, produces task logs, and settles member-work-sync without duplicate nudges.
+
+---
+
+## 15. Practical Expected Impact
+
+Phase 1:
+
+- no speed change;
+- less user confusion.
+
+Phase 2:
+
+- task logs should appear correctly even after OpenCode session recreate;
+- debugging delayed starts becomes much clearer.
+
+Phase 3:
+
+- normal ready-session OpenCode assignment should be accepted in roughly `1-5s`;
+- stale/MCP repair path should no longer wait for full model completion before app acceptance;
+- observed start may still depend on model/provider latency, but app state will distinguish "accepted and running" from "not accepted".
+
+Phase 4:
+
+- fewer false retries;
+- fewer confusing warnings;
+- better separation between provider errors, MCP errors, and slow model turns.
+
+---
+
+## 16. Final Recommendation
+
+Proceed in order:
+
+1. Keep `workIntervals` unchanged.
+2. Make the UI label honest.
+3. Fix OpenCode task logs by exact session evidence.
+4. Split OpenCode delivery acceptance from turn observation.
+5. Tune retries only after acceptance/observation is covered by tests.
+
+Do not jump straight to Phase 3 without Phase 2. Without correct session-based logs, debugging accept-fast behavior will be too ambiguous.
diff --git a/docs/team-management/research-messaging.md b/docs/team-management/research-messaging.md
index ea5789e1..a5005fdf 100644
--- a/docs/team-management/research-messaging.md
+++ b/docs/team-management/research-messaging.md
@@ -1,43 +1,43 @@
-# Research: Подходы к отправке сообщений тиммейтам
+# Research: Teammate Message Delivery Approaches
-## Сравнение 3 подходов
+## Comparison of 3 Approaches
-| Критерий | Inbox-файлы | Agent SDK | CLI subprocess |
-|----------|:-----------:|:---------:|:--------------:|
-| Скорость | ~5ms | ~12с | 10-15с |
-| Стоимость | $0 | $0.01-0.08/msg | токены |
-| Работает с запущенными | **YES** | NO | NO |
-| Прерывает mid-turn | NO | NO | NO |
-| Требует API ключ | NO | YES | NO |
-| Расход памяти | 0 | 0 | 100-320MB |
+| Criterion | Inbox files | Agent SDK | CLI subprocess |
+| --------- | :---------: | :-------: | :------------: |
+| Speed | ~5ms | ~12s | 10-15s |
+| Cost | $0 | $0.01-0.08/msg | tokens |
+| Works with running teammates | **YES** | NO | NO |
+| Interrupts mid-turn | NO | NO | NO |
+| Requires API key | NO | YES | NO |
+| Memory usage | 0 | 0 | 100-320MB |
---
-## 1. Inbox-файлы (ВЫБРАНО)
+## 1. Inbox Files (Chosen)
-### Как работает
+### How It Works
-Прямая запись JSON в файл `~/.claude/teams/{team}/inboxes/{member}.json`. Claude Code мониторит эти файлы через fs.watch и доставляет сообщения агентам между turns.
+The app writes JSON directly to `~/.claude/teams/{team}/inboxes/{member}.json`. Claude Code watches these files through fs.watch and delivers messages to agents between turns.
-### Плюсы
+### Pros
-- **Мгновенная запись** (~5ms)
-- **$0** — никаких API вызовов
-- **Единственный** способ общаться с запущенными тиммейтами
-- Работает с idle и active агентами (но доставка между turns)
+- **Instant write** (~5ms)
+- **$0** - no API calls
+- **Only** way to communicate with already-running teammates
+- Works with idle and active agents, although delivery still happens between turns
-### Минусы
+### Cons
-- Race condition при одновременной записи (см. [research-inbox.md](./research-inbox.md))
-- Формат недокументирован (internal API)
-- Доставка между turns, не real-time
+- Race condition during concurrent writes (see [research-inbox.md](./research-inbox.md))
+- Undocumented format (internal API)
+- Delivery happens between turns, not in real time
-### Формат сообщения
+### Message Format
```json
{
"from": "user",
- "text": "Не трогай файл auth.ts, я его сам изменю",
+ "text": "Do not touch auth.ts, I will change it myself",
"timestamp": "2026-02-17T15:30:00.000Z",
"read": false,
"summary": "Do not modify auth.ts",
@@ -47,9 +47,9 @@
---
-## 2. Agent SDK (ОТВЕРГНУТ)
+## 2. Agent SDK (Rejected)
-### Как работает
+### How It Works
```typescript
import Anthropic from '@anthropic-ai/sdk';
@@ -57,156 +57,159 @@ const client = new Anthropic();
const response = await client.messages.create({
model: 'claude-opus-4-7',
messages: [{ role: 'user', content: 'Send message to teammate...' }],
- tools: [/* SendMessage, TaskUpdate, etc. */]
+ tools: [/* SendMessage, TaskUpdate, etc. */],
});
```
-### Почему отвергнут
+### Why It Was Rejected
-1. **Создаёт НОВУЮ сессию** — не подключается к работающему тиммейту. SendMessage и TaskCreate — это инструменты модели, не программные вызовы
-2. **~12 секунд** на каждый вызов (полный API round-trip)
-3. **Стоит токены** — $0.01-0.08 за сообщение
-4. **Нужен API ключ** — отдельная оплата, а не подписка Claude
+1. **Creates a new session** - does not attach to a running teammate. SendMessage and TaskCreate are model tools, not programmatic calls.
+2. **~12 seconds** per call because of the full API round trip.
+3. **Costs tokens** - $0.01-0.08 per message.
+4. **Requires an API key** - separate billing, not a Claude subscription.
-### Когда может пригодиться
+### When It May Be Useful
-- Создание новых команд программно
-- Автоматизация workflow (вне real-time UI)
+- Creating new teams programmatically.
+- Workflow automation outside the real-time UI path.
---
-## 3. CLI subprocess (ОТВЕРГНУТ)
+## 3. CLI Subprocess (Rejected)
-### Как работает
+### How It Works
```bash
claude --message "Send message to teammate-1: stop working on X"
```
-### Почему отвергнут
+### Why It Was Rejected
-1. **Новый процесс** — не инжектится в работающего тиммейта
-2. **10-15 секунд** холодный старт
-3. **100-320MB памяти** на процесс
-4. Каждый вызов стоит токены
+1. **New process** - does not inject into a running teammate.
+2. **10-15 second** cold start.
+3. **100-320MB** of memory per process.
+4. Each call costs tokens.
---
-## Архитектура доставки (обновлено 2026-03-23)
+## Delivery Architecture (Updated 2026-03-23)
-### Два разных механизма: лид vs тиммейты
+### Two Different Mechanisms: Lead vs Teammates
-**Лид** читает ТОЛЬКО stdin (stream-json). Для доставки сообщений лиду используется `relayLeadInboxMessages()` — конвертирует inbox-записи в stream-json на stdin. Без relay лид не видит inbox.
+**Lead** reads ONLY stdin (stream-json). Messages to the lead are delivered with `relayLeadInboxMessages()`, which converts inbox entries into stream-json on stdin. Without relay, the lead does not see inbox messages.
-**Тиммейты** — полноценные независимые Claude Code процессы. Каждый мониторит свой inbox файл через fs.watch и читает сообщения напрямую. Relay через лида НЕ нужен.
+**Teammates** are fully independent Claude Code processes. Each teammate watches its own inbox file through fs.watch and reads messages directly. Relay through the lead is not needed.
-### Поток сообщений: Юзер → Тиммейт
+### Message Flow: User -> Teammate
-```
-User → [UI] → TeamInboxWriter → inboxes/{member}.json (read: false)
- ↓
- Teammate CLI (fs.watch) → читает → обрабатывает
- ↓
- Teammate → inboxes/user.json (ответ)
- ↓
- [UI] ← TeamInboxReader ← читает user.json
+```text
+User -> [UI] -> TeamInboxWriter -> inboxes/{member}.json (read: false)
+ |
+ Teammate CLI (fs.watch) -> reads -> handles
+ |
+ Teammate -> inboxes/user.json (response)
+ |
+ [UI] <- TeamInboxReader <- reads user.json
```
-Лид в этой цепочке НЕ участвует. Сообщение доставляется напрямую.
+The lead is not part of this path. The message is delivered directly.
-### Поток сообщений: Юзер → Лид
+### Message Flow: User -> Lead
-```
-User → [UI] → stdin (stream-json) → Lead CLI
- ↓
-Lead → sentMessages.json / liveLeadProcessMessages
- ↓
- [UI] ← читает и отображает
+```text
+User -> [UI] -> stdin (stream-json) -> Lead CLI
+ |
+Lead -> sentMessages.json / liveLeadProcessMessages
+ |
+ [UI] <- reads and renders
```
-Для лида дополнительно работает `relayLeadInboxMessages()` при изменении `inboxes/{lead}.json`.
+For the lead, `relayLeadInboxMessages()` additionally runs when `inboxes/{lead}.json` changes.
-### Ответы тиммейтов
+### Teammate Responses
-Тиммейт отвечает юзеру через `SendMessage(to="user")`, что записывается в `inboxes/user.json`. UI читает этот файл через `TeamInboxReader.getMessages()` (читает ВСЕ inbox файлы в директории).
+A teammate responds to the user through `SendMessage(to="user")`, which writes to `inboxes/user.json`. The UI reads this file through `TeamInboxReader.getMessages()`, which reads all inbox files in the directory.
-Сообщения в `user.json` могут не содержать `messageId` — `TeamInboxReader` генерирует детерминированный ID из sha256(from + timestamp + text).
+Messages in `user.json` may not contain `messageId`; `TeamInboxReader` generates a deterministic ID from sha256(from + timestamp + text).
-### from: "user" — подтверждено работает
+### from: "user" Is Confirmed To Work
-`from: "user"` работает корректно (подтверждено эмпирически 2026-03-23):
-- Тиммейт получает сообщение
-- Тиммейт корректно определяет что это от юзера
-- Тиммейт отвечает в `inboxes/user.json`
-- Fallback на `from: "team-lead"` не нужен
+`from: "user"` works correctly, confirmed empirically on 2026-03-23:
-### Почему relay через лида был ОТКЛЮЧЁН (2026-03-23)
+- Teammate receives the message.
+- Teammate correctly identifies that it came from the user.
+- Teammate responds in `inboxes/user.json`.
+- Fallback to `from: "team-lead"` is not needed.
-Ранее при отправке DM тиммейту, помимо записи в inbox, вызывался `relayMemberInboxMessages()` — инструкция лиду переслать сообщение через `SendMessage(to=member)`. Это вызывало 3 бага:
+### Why Relay Through the Lead Was Disabled (2026-03-23)
-1. **Лид отвечал вместо тиммейта** — LLM интерпретировал relay-инструкцию как обращение к себе и отвечал юзеру напрямую
-2. **Дубликаты сообщений** — `markInboxMessagesRead()` записывал в файл → FileWatcher срабатывал → relay запускался повторно → цикл
-3. **Тиммейт не отвечал юзеру** — relay-промпт содержал "Do NOT send to user", что тиммейт тоже видел через лида
+Previously, when sending a DM to a teammate, the app called `relayMemberInboxMessages()` in addition to writing to the inbox. This instructed the lead to forward the message through `SendMessage(to=member)`. It caused 3 bugs:
-Relay отключён в `teams.ts` (handleSendMessage) и `index.ts` (FileWatcher). Код закомментирован, не удалён. Relay для лида (`relayLeadInboxMessages`) не затронут.
+1. **Lead replied instead of the teammate** - the LLM interpreted the relay instruction as addressed to itself and answered the user directly.
+2. **Duplicate messages** - `markInboxMessagesRead()` wrote to the file, triggering FileWatcher, which re-ran relay and created a loop.
+3. **Teammate did not reply to the user** - the relay prompt contained "Do NOT send to user", which the teammate also saw through the lead.
+
+Relay is disabled in `teams.ts` (`handleSendMessage`) and `index.ts` (FileWatcher). The code is commented out, not deleted. Lead relay (`relayLeadInboxMessages`) is unaffected.
---
-## Доставка: Timing и ограничения
+## Delivery: Timing and Constraints
-### Цикл тиммейта
+### Teammate Turn Cycle
-```
+```text
Turn N:
- 1. Читает inbox → видит новые (read: false)
- 2. Обрабатывает сообщения/задачи
- 3. Вызывает инструменты
+ 1. Reads inbox -> sees new messages with read: false
+ 2. Handles messages/tasks
+ 3. Calls tools
4. Reasoning
5. Output
- → idle_notification → IDLE
+ -> idle_notification -> IDLE
-... ожидание ...
+... wait ...
Turn N+1:
- 1. Пробуждение (новое сообщение в inbox / назначение задачи)
- 2. Читает inbox → видит новые
+ 1. Wake-up (new inbox message / assigned task)
+ 2. Reads inbox -> sees new messages
...
```
-### Задержка
+### Delay
-- **Idle agent**: получит при следующем пробуждении (доли секунды если inbox-change triggers)
-- **Active agent (mid-turn)**: получит только после завершения текущего turn (1-30 секунд)
+- **Idle agent**: receives the message on the next wake-up, usually a fraction of a second if inbox-change triggers.
+- **Active agent (mid-turn)**: receives the message only after the current turn completes, usually 1-30 seconds.
-### Нельзя прервать
+### No Mid-Turn Interrupt
-Если агент уже вызвал Edit/Bash — инструмент выполнится. Наше сообщение придёт ПОСЛЕ.
+If an agent has already called Edit/Bash, the tool will complete. Our message arrives after that.
-**Пример**:
-```
-17:12:30 — Agent начинает Edit на auth.ts
-17:12:31 — Мы шлём "Не трогай auth.ts"
-17:12:32 — Agent завершает Edit (auth.ts изменён)
-17:12:33 — Agent читает inbox, видит наше сообщение
-→ Поздно, файл уже изменён
+**Example**:
+
+```text
+17:12:30 - Agent starts Edit on auth.ts
+17:12:31 - We send "Do not touch auth.ts"
+17:12:32 - Agent completes Edit (auth.ts changed)
+17:12:33 - Agent reads inbox and sees our message
+-> Too late, the file was already changed
```
-### Hard Interrupt (будущее)
+### Hard Interrupt (Future)
-Возможные подходы:
-1. **kill -SIGINT** процесса тиммейта (жёсткое прерывание, потеря контекста)
-2. **Файловый flag** `.interrupt-{member}` (нужна поддержка в Claude Code)
-3. **API от Anthropic** (если появится)
+Possible approaches:
-Текущее решение: задержка приемлема, hard interrupt — в будущем.
+1. **kill -SIGINT** the teammate process: hard interrupt, context loss.
+2. **File flag** `.interrupt-{member}`: needs Claude Code support.
+3. **Anthropic API**: if it becomes available.
+
+Current decision: the delay is acceptable; hard interrupt is future work.
---
-## Финальное решение
+## Final Decision
-### messageId — обязателен в каждом исходящем сообщении
+### messageId Is Required In Every Outgoing Message
-Каждое исходящее сообщение включает `messageId: crypto.randomUUID()`:
+Every outgoing message includes `messageId: crypto.randomUUID()`:
```json
{
@@ -219,18 +222,18 @@ Turn N+1:
}
```
-### Verify: проверка сразу после записи
+### Verify Immediately After Write
-- После atomic write читаем inbox и ищем наш `messageId`
-- Если не найден — потеря обнаружена → warning в UI (не silent fail)
-- Не автоматический retry на MVP
+- After atomic write, read the inbox and look for our `messageId`.
+- If missing, message loss was detected -> show a warning in the UI instead of failing silently.
+- No automatic retry in MVP.
-### 3 состояния offline-участника
+### 3 States For Offline Members
-| Состояние | Условие | Отображение |
-|-----------|---------|-------------|
-| `ACTIVE` | idle < 5 минут | Зелёный dot |
-| `IDLE` | idle > 5 минут | Жёлтый dot |
-| `TERMINATED` | Получен `shutdown_response` с `approve: true` | Серый dot, "Завершён" |
+| State | Condition | Display |
+| ----- | --------- | ------- |
+| `ACTIVE` | idle < 5 minutes | Green dot |
+| `IDLE` | idle > 5 minutes | Yellow dot |
+| `TERMINATED` | Received `shutdown_response` with `approve: true` | Gray dot, "Terminated" |
-Определение состояния по timestamp последнего события в inbox (idle_notification, любое сообщение). TERMINATED — исключительно по явному `shutdown_response`.
+State is determined from the timestamp of the latest event in the inbox (`idle_notification` or any message). `TERMINATED` is based only on an explicit `shutdown_response`.
diff --git a/docs/team-management/tmux-vs-process-runtime-rationale.md b/docs/team-management/tmux-vs-process-runtime-rationale.md
new file mode 100644
index 00000000..d932cdbd
--- /dev/null
+++ b/docs/team-management/tmux-vs-process-runtime-rationale.md
@@ -0,0 +1,237 @@
+# Runtime backend rationale: process by default, tmux as debug/manual mode
+
+Date: 2026-05-13
+
+Status: informational note, not a normative architecture spec.
+
+This document captures the reasoning discussed during launch-runtime stabilization work. It may contain small inaccuracies or outdated external-project details, especially about third-party projects. Treat it as context and rationale, not as the source of truth. Current implementation, tests, and upstream project docs remain authoritative.
+
+## Short version
+
+We intentionally moved the desktop app toward **process backend by default** for app-launched teammates, while keeping **tmux as an explicit debug/manual mode**.
+
+The reason is not that tmux is bad. The reason is that our product is not primarily a terminal multiplexer. It is an app-owned team runtime with UI state, launch diagnostics, restart/retry controls, provider auth handling, bootstrap proofs, notifications, and artifact packs.
+
+For that product shape, the default runtime should be controlled by the app, not by a human attaching to panes.
+
+## What tmux gives
+
+tmux is useful when the product expects live terminal sessions:
+
+- A human can attach to a pane and see exactly what the CLI sees.
+- If the CLI asks for input, the user can manually press Enter or answer prompts.
+- Panes can survive some app restarts.
+- TTY behavior is closer to running the CLI manually.
+- Debugging auth/login/TTY problems is easier because the terminal is visible.
+
+This is why tmux is a natural default for terminal-first systems.
+
+## Why not tmux like gastown/gascity
+
+Based on the external-project research snapshot from this thread, `gastown` and `gascity` appear to be more terminal/session-oriented. This is an interpretation of their public docs/issues at the time of research, not a maintained compatibility claim:
+
+- Their interaction model leans heavily on attachable sessions.
+- Their session layer historically expects pane-like targets and terminal observation.
+- In `gascity`, tmux appears as a default provider in session configuration.
+- They use tmux because their flow values live interactive sessions, attach/revive/nudge, and human terminal control.
+
+That is a valid design for a terminal-first product.
+
+It is not automatically the best default for us because our desktop app has different ownership boundaries:
+
+- We need reliable UI state for each member.
+- We need deterministic launch success/failure state.
+- We need structured diagnostics, not only "look at the pane".
+- We need restart/retry/cleanup to be owned by the app.
+- We need provider auth and tool approval to be modeled explicitly.
+- We need headless teammate behavior to work without a terminal being open.
+
+tmux also has known operational costs in this class of products:
+
+- zombie sessions;
+- broken pane targets;
+- socket/version split-brain after upgrades;
+- platform limitations, especially Windows;
+- ambiguity between "pane exists" and "agent is actually ready";
+- harder cleanup when app state and terminal state diverge.
+
+So the difference is product shape:
+
+- `gastown/gascity`: terminal/session-first, so tmux default is understandable.
+- `claude_team`: desktop/app-owned lifecycle-first, so process default is more aligned.
+
+## What process backend gives us
+
+The process backend lets the app own the lifecycle:
+
+- Runtime identity is represented as process metadata, not only pane id.
+- `backendType: process` and `tmuxPaneId: process:` preserve compatibility with older shapes while making the backend explicit.
+- Launch state can distinguish `spawned`, `bootstrap_submitted`, `bootstrap_confirmed`, `failed_to_start`, `bootstrap_stalled`, and provider failures.
+- Diagnostics can be surfaced in member cards, notifications, launch summaries, and artifact packs.
+- Restart and cleanup can target launch-owned processes instead of broad terminal state.
+- App-managed bootstrap can avoid relying on the model to manually discover and call setup tools.
+
+This is a better foundation for stable desktop launches than treating a pane as the primary runtime truth.
+
+## Interactive prompts are still real
+
+The main argument for tmux is valid: real CLIs sometimes ask interactive questions.
+
+Examples:
+
+- "Press Enter to continue"
+- "Do you want to proceed? [y/N]"
+- "Enter API key"
+- "Please login"
+- OAuth token expired
+- provider quota or key limit prompt
+- tool approval prompt
+
+Our answer should not be "ignore all interaction". The correct answer is to split interaction into categories.
+
+## How our architecture should handle interaction
+
+### Structured approvals
+
+Tool approvals should use structured protocol:
+
+- CLI emits a `control_request`;
+- app shows an approval UI or notification;
+- app sends `control_response` through the owned channel;
+- decision is persisted in runtime state.
+
+This is better than asking the user to attach to tmux and press a key manually.
+
+### Auth and login prompts
+
+Auth/login prompts should usually be handled before launch:
+
+- preflight provider auth;
+- validate subscription/API-key mode;
+- validate required settings/env;
+- fail fast with actionable UI if auth is missing or expired.
+
+Hidden teammate processes should not block waiting for a browser login or secret input.
+
+### Safe known prompts
+
+Some prompts can be handled through an allowlisted interactive prompt gate:
+
+- exact "Press Enter to continue" style prompt;
+- exact yes/no confirmation where the action is known and safe;
+- one prompt at a time per process;
+- timeout if user does not respond;
+- event recorded in diagnostics/artifact pack.
+
+For a lead process, the desktop app already owns `child.stdin`, so writing a newline is technically possible.
+
+For teammate process backend, the desktop app may not directly own the child handle. The robust design is:
+
+- detect prompt in process backend/orchestrator;
+- surface structured prompt state to desktop;
+- user chooses action in UI;
+- the runtime owner writes to the teammate stdin;
+- event is persisted.
+
+Do not blindly write to arbitrary process stdin by PID.
+
+### Unknown prompts
+
+Unknown prompts should not be answered automatically.
+
+Correct behavior:
+
+- mark the member as waiting/blocked with a diagnostic;
+- show the relevant output excerpt;
+- suggest fixing auth/settings or using tmux debug mode;
+- avoid sending random newline/yes/no input.
+
+This prevents dangerous accidental confirmation and avoids hiding provider setup bugs.
+
+## Why tmux remains useful
+
+tmux should stay available as an explicit mode:
+
+```bash
+CLAUDE_TEAM_TEAMMATE_MODE=tmux pnpm dev
+```
+
+or via extra CLI args:
+
+```bash
+--teammate-mode tmux
+```
+
+Use it for:
+
+- debugging unknown TTY behavior;
+- reproducing provider CLI prompts manually;
+- investigating strange live CLI output;
+- cases where human terminal control matters more than app-owned lifecycle.
+
+tmux is an escape hatch, not the production default.
+
+## Why not full arbitrary terminal emulation
+
+Trying to support all possible interactive terminal behavior inside process backend would be risky.
+
+Problems:
+
+- prompts are provider-specific and change over time;
+- pressing Enter may be safe in one context and dangerous in another;
+- stdin might be structured JSON, not text;
+- a newline can land during an active model turn;
+- secrets should not be requested through generic stdin;
+- the app can accidentally mask auth or provider integration failures.
+
+The safer contract is:
+
+- app-managed launch should be non-interactive by default;
+- known safe prompts may be handled through structured UI;
+- auth/setup should be preflighted;
+- unknown TTY needs tmux/manual debug mode.
+
+## Current strategic choice
+
+Recommended runtime policy:
+
+1. Production default: process backend.
+2. Provider setup: preflight and actionable diagnostics.
+3. Tool approvals: structured app UI.
+4. Known safe prompts: bounded interactive prompt gate.
+5. Unknown prompts: fail/block visibly with diagnostics.
+6. Debug/manual: explicit tmux mode.
+
+This keeps the app in control of lifecycle state while preserving tmux where it is genuinely useful.
+
+## Tradeoff summary
+
+### Process default + tmux debug mode
+
+Confidence: 9.3/10
+Reliability: 9/10
+Complexity: 6/10
+
+Best fit for desktop/app-owned agent teams. Requires strong diagnostics and provider preflight.
+
+### tmux default + process fallback
+
+Confidence: 6.5/10
+Reliability: 6.5/10
+Complexity: 4/10
+
+Good for terminal-first workflows. Less aligned with deterministic app-owned launch state.
+
+### Fully abstract runtime providers
+
+Confidence: 7/10
+Reliability: 7.5/10
+Complexity: 9/10
+
+Potentially useful later, but too broad as a launch-stability fix.
+
+## Bottom line
+
+We did not reject tmux entirely. We rejected tmux as the default runtime truth for app-launched teams.
+
+The desktop product should make teammate launch reliable through app-owned process lifecycle, structured evidence, diagnostics, and controlled recovery. tmux remains valuable for debug/manual sessions, especially when an unknown CLI prompt requires a real terminal.
diff --git a/docs/team-management/workspace-trust-host-preflight-plan.md b/docs/team-management/workspace-trust-host-preflight-plan.md
new file mode 100644
index 00000000..930d66f5
--- /dev/null
+++ b/docs/team-management/workspace-trust-host-preflight-plan.md
@@ -0,0 +1,4227 @@
+# Workspace Trust Host-Preflight Plan
+
+## Goal
+
+Make team launch work in newly selected workspaces without forcing the user to open Claude Code or Codex manually, while keeping the runtime safe, testable, and easy to extend.
+
+Chosen approach: **Host-preflight + runtime contract**.
+
+Rating:
+
+- Option: Host-preflight + runtime contract
+- Confidence: 8/10
+- Reliability: 9/10
+- Complexity: 8/10
+- Estimated change size: 950-1450 lines in the desktop app, plus 50-180 lines if the orchestrator contract is hardened in the sibling runtime repo.
+
+This plan intentionally avoids changing process launch semantics, permission mode, cleanup, tmux lifecycle, or provider auth. It only prepares exact user-selected workspaces for provider trust gates and improves recovery when a trust gate still blocks launch.
+
+Important 2026-05-13 update: the safest Claude preflight is no longer plain `claude`. Use a protected interactive Claude command that still shows the workspace trust prompt but suppresses project MCP, project/local settings, hooks, and built-in tools as much as Claude Code allows.
+
+## Problem Summary
+
+Recent failures show this class of launch error:
+
+```text
+Teammate "alice-reviewer" cannot start in headless process runtime because workspace trust is not accepted for "[path]".
+Open that workspace once interactively and accept trust, then launch the team again.
+```
+
+The current UI explains the failure, but the product goal is stronger:
+
+- The user selects a project in the frontend.
+- The app should prepare that exact project for team launch.
+- The user should not need to manually open the workspace in Claude Code or Codex.
+- The fix must not weaken provider security globally or trust arbitrary paths.
+
+Important finding: this is not just a Codex issue. The `agent_teams_orchestrator` process runtime checks Claude Code workspace trust before spawning headless teammates, including Codex teammates. So a Codex teammate can fail because the Claude/orchestrator workspace trust state is missing.
+
+## Prototype Findings
+
+Local versions used during compatibility checks:
+
+- Claude Code: `2.1.119`
+- Codex CLI: `0.125.0`
+- node-pty: `1.1.0`
+- Platform: macOS arm64
+
+### Claude Findings
+
+Fresh workspace with seeded Claude profile:
+
+```text
+Quick safety check: Is this a project you created or one you trust?
+Yes, I trust this folder
+Enter to confirm
+```
+
+After pressing Enter, Claude writes:
+
+```json
+{
+ "projects": {
+ "/private/.../workspace": {
+ "hasTrustDialogAccepted": true,
+ "projectOnboardingSeenCount": 1
+ }
+ }
+}
+```
+
+Second launch in the same folder skips the trust prompt.
+
+Additional observed Claude startup chains:
+
+```text
+trust -> main
+trust -> bypass permissions -> main
+trust -> custom API key confirmation -> main
+empty profile onboarding -> no trust yet
+```
+
+Important detail: `hasCompletedProjectOnboarding` can remain `false`, but `hasTrustDialogAccepted: true` is enough for the existing `isPathTrusted()` gate.
+
+### Claude Protected Preflight Findings
+
+Local `claude --help` for `2.1.119` exposes these important flags:
+
+- `--bare` - minimal mode that skips hooks, LSP, plugin sync, attribution, auto-memory, background prefetches, keychain reads, and CLAUDE.md auto-discovery.
+- `--strict-mcp-config` plus `--mcp-config` - only load MCP servers from the provided config.
+- `--setting-sources user` - load user settings without project/local settings.
+- `--settings '{"disableAllHooks":true}'` - explicitly disables hooks for this session.
+- `--tools ""` - disables built-in tools for this session.
+- `-p`/`--print` is not suitable because help says workspace trust is skipped in print mode.
+- `doctor` is not suitable because help says workspace trust is skipped and `.mcp.json` stdio servers can be spawned for health checks.
+
+PTY smoke without pressing Enter confirmed these variants still show the workspace trust prompt in a new folder:
+
+```text
+claude
+claude --bare
+claude --bare --strict-mcp-config --mcp-config
+claude --strict-mcp-config --mcp-config --setting-sources user --settings '{"disableAllHooks":true}'
+claude --bare --strict-mcp-config --mcp-config --setting-sources user --settings '{"disableAllHooks":true}' --tools ""
+```
+
+The empty MCP config must be:
+
+```json
+{"mcpServers":{}}
+```
+
+Plain `{}` is rejected by Claude Code as invalid MCP config.
+
+Recommended candidate command for v1:
+
+```text
+claude --bare --strict-mcp-config --mcp-config --setting-sources user --settings '{"disableAllHooks":true}' --tools ""
+```
+
+This still needs one final smoke before default-on: press Enter on the trust prompt in a temp workspace and verify `hasTrustDialogAccepted: true` is persisted in the same state file the orchestrator reads.
+
+### External Research Update
+
+Relevant external facts checked on 2026-05-13:
+
+- Official Claude Code CLI reference documents `--bare`, `--strict-mcp-config`, `--mcp-config`, `--setting-sources`, `--settings`, `--tools`, and warns that `-p` skips workspace trust: [Claude Code CLI reference](https://code.claude.com/docs/en/cli-reference).
+- Official Claude Code settings and hooks docs document `disableAllHooks`, managed/user/project/local setting sources, and hook disable behavior: [Claude Code settings](https://code.claude.com/docs/en/settings), [Claude Code hooks](https://code.claude.com/docs/en/hooks).
+- Official Claude Code security docs say first-time codebase runs and new MCP servers require trust verification, and that MCP configuration can live in source-controlled project settings: [Claude Code security](https://docs.anthropic.com/en/docs/claude-code/security).
+- Recent security research around "TrustFall" argues that accepting folder trust can enable project-defined MCP execution in agentic CLIs. We should not rely on full normal Claude startup for preflight if a protected interactive command works: [Adversa AI TrustFall report](https://adversa.ai/blog/trustfall-coding-agent-security-flaw-rce-claude-cursor-gemini-cli-copilot/).
+- A recent Codex issue reports that `-c projects."".trust_level="trusted"` behavior may not be purely ephemeral in affected versions. Treat Codex native config overrides as app-scoped intent, not a hard guarantee that no user config changes can happen: [openai/codex issue #18475](https://github.com/openai/codex/issues/18475).
+- Sibling runtime check: Claude CLI uses `-c` for `--continue`, while Codex native exec uses `configOverrides: string[]` and turns those into `-c` only when spawning the Codex binary. Therefore desktop must not append Codex `-c` pairs to Claude/orchestrator launch argv.
+
+### Codex Findings
+
+Codex TUI with real profile in a new workspace:
+
+```text
+Update available
+Skip
+Do you trust the contents of this directory?
+Yes, continue
+```
+
+The order matters. After skipping the update prompt, the workspace trust prompt appears. This matches GasCity's tested sequence:
+
+```text
+Down, Enter, Enter
+```
+
+Codex direct CLI with per-launch override:
+
+```bash
+codex \
+ -c 'projects."/path".trust_level="trusted"' \
+ -c 'projects."/realpath".trust_level="trusted"'
+```
+
+The trust prompt is skipped for that direct Codex launch. Path keys with spaces, brackets, and quotes work when serialized as quoted TOML keys.
+
+Important refinement: the reusable value is the dotted config override string, not necessarily the CLI `-c` flag. In Agent Teams deterministic launch, pass those values to the sibling runtime through a typed settings/runtime contract, and let Codex native exec convert them to `-c` only at the direct Codex binary boundary.
+
+Codex with an isolated `CODEX_HOME` and no auth shows an auth picker first, not a trust prompt. This must be handled as provider auth required, not workspace trust.
+
+### GasCity Prior Art
+
+GasCity has a battle-tested startup dialog sequence in `internal/runtime/dialog.go`:
+
+1. Claude resume selector - `Down`, `Enter`
+2. Codex update dialog - `Down`, `Enter`
+3. Workspace trust dialog - `Enter`
+4. Bypass permissions warning - `Down`, `Enter`
+5. Claude custom API key confirmation - `Up`, `Enter`
+6. Rate limit dialog
+
+Relevant behavior from GasCity tests:
+
+- Detects Claude trust: `Quick safety check`, `trust this folder`
+- Detects Codex trust: `Do you trust the contents of this directory?`
+- Detects Gemini trust: `Do you trust the files in this folder?`
+- Peeks deep enough to catch a late trust dialog below prompt text
+- Handles stale update snapshots before moving to trust
+- Waits a short grace period after an apparent prompt because a dialog can arrive in the next terminal snapshot
+
+We should copy the state-machine idea, not the tmux dependency.
+
+### Other Project Lessons
+
+GasTown older implementation:
+
+- polls tmux pane content after startup
+- accepts workspace trust before bypass permission warning
+- checks trust text before prompt detection because Codex trust screens can contain a leading `>` line
+- has a blind dismiss helper, but only for remediation of already-stalled sessions
+
+What to copy:
+
+- trust-before-prompt matching order
+- polling with timeout
+- idempotent no-dialog behavior
+
+What not to copy into v1:
+
+- blind key sequences on a healthy launch
+- tmux dependency
+- generic prompt suffix readiness
+
+Overstory newer implementation:
+
+- provider runtime owns `detectReady(content)` and returns `dialog`, `ready`, or `loading`
+- `waitForTuiReady()` calls a provider callback instead of hardcoding Claude only
+- tracks handled dialog actions so trust `Enter` is not sent repeatedly
+- retries typed dialogs like bypass confirmation after a delay if the dialog persists
+- declares Claude ready only after prompt marker plus status bar marker
+
+What to copy:
+
+- provider-owned detection callback shape
+- handled-action memory
+- two-signal readiness
+- dead-session/exit awareness
+
+What to adapt:
+
+- our PTY engine should use `PtyProcessPort`, not tmux
+- our v1 preflight can stop after trust persistence, not full TUI readiness
+
+## Second-Pass Codebase Findings
+
+This section records the high-risk integration findings from the current app codebase.
+
+### Deterministic Path Shape
+
+The legacy deterministic paths are in `TeamProvisioningService._createTeamInner` and
+`TeamProvisioningService._launchTeamInner`.
+
+Important order today:
+
+1. Acquire per-team lock.
+2. Normalize `config.json` and update project path.
+3. Resolve Claude binary.
+4. Build provider-aware env.
+5. Materialize effective member specs.
+6. Resolve OpenCode member workspaces.
+7. Build cross-provider args.
+8. Build `providerArgsByProvider`.
+9. Resolve and validate launch identity.
+10. Create `ProvisioningRun`.
+11. Build bootstrap spec, prompt file, MCP config.
+12. Build final launch args.
+13. Spawn CLI process.
+
+The trust work cannot be inserted as a single block without risk. It needs three phases:
+
+- **Early settings-only plan phase** immediately after `buildProvisioningEnv`, because Codex provider settings can affect default model resolution inside `materializeEffectiveTeamMemberSpecs()`.
+- **Full plan phase** before launch identity validation, because Codex runtime-trust settings must affect `readRuntimeProviderLaunchFacts()` and final runtime settings.
+- **Execute phase** after `ProvisioningRun` exists, because Claude PTY warmup can take seconds and should report progress/diagnostics.
+
+### Provider Args Risk
+
+Codex provider settings/args are used in several places:
+
+- Primary provider args from `buildProvisioningEnv`.
+- Cross-provider args from `buildCrossProviderMemberArgs`.
+- `providerArgsByProvider` passed into `resolveAndValidateLaunchIdentity`.
+- Final `launchArgs`.
+- Flattened cross-provider member args pushed after primary runtime args.
+
+Risk: if Codex is a **secondary teammate** under an Anthropic lead, adding trust only to primary provider args will not reach the spawned Codex teammate. The opposite mistake is worse: appending Codex native `-c` to Claude launch argv makes Claude interpret `-c` as `--continue`.
+
+Rule: apply Codex trust intent only through surfaces that are known to carry Codex settings or Codex native config overrides:
+
+- `providerArgsByProvider.get('codex')`
+- primary `runtimeArgsPlan.providerArgs` when lead provider is Codex
+- flattened cross-provider settings when Codex is a cross-provider teammate
+- sibling runtime Codex native `configOverrides` after validating the override values
+- any future one-shot runtime probe that takes Codex provider settings
+
+### Progress And Artifact Risk
+
+Before `ProvisioningRun` exists, failures can throw without launch progress/artifact context. After `ProvisioningRun` exists, the service can:
+
+- update progress
+- append provisioning trace lines
+- include diagnostics in launch failure artifacts
+- clean up generated bootstrap files if a later step fails
+
+Rule: the plan phase must not throw for normal trust problems. It should return diagnostics and arg patches. The execute phase should also prefer diagnostics over throw, except for deterministic local errors such as invalid cwd.
+
+### PTY Pattern Already Exists
+
+The app already has `ClaudeDoctorProbe` and `PtyTerminalService` patterns:
+
+- `node-pty` is an optional native dependency.
+- imports must be graceful.
+- PTY startup can throw and must become a diagnostic, not app crash.
+- output should be bounded.
+- `pty.kill()` is best effort.
+
+The workspace trust PTY adapter should reuse those patterns rather than adding direct `require('node-pty')` calls in the launch service.
+
+### OpenCode Boundary
+
+`_createTeamInner` and `_launchTeamInner` do not handle pure OpenCode teams routed through the runtime adapter. Mixed OpenCode secondary lanes are handled later. V1 should target the legacy deterministic create/launch paths and exact workspaces in `allEffectiveMemberSpecs`.
+
+Do not pull OpenCode runtime adapter launch into this change unless a separate OpenCode trust gate appears. That keeps the blast radius small.
+
+## Third-Pass Integration Findings
+
+This section records additional constraints found after reading the current app and sibling runtime code more closely.
+
+### Service Injection Constraint
+
+`TeamProvisioningService` has a large positional constructor used by many tests. Adding another constructor parameter is technically possible, but it increases test churn and makes dependency order more brittle.
+
+Safer integration:
+
+- add a private `workspaceTrustCoordinator` field or lazy getter
+- add `setWorkspaceTrustCoordinator(coordinator: WorkspaceTrustCoordinator | null): void`
+- follow the existing setter pattern used by `setRuntimeAdapterRegistry`, `setControlApiBaseUrlResolver`, and runtime turn-settled providers
+- keep constructor signature unchanged
+
+Rating: 🎯 9 🛡️ 9 🧠 3, ~20-35 LOC.
+
+Avoid in v1:
+
+- converting the whole service constructor to an options object
+- threading the coordinator through every existing test constructor call
+- importing `node-pty` from `TeamProvisioningService`
+
+### Progress Schema Constraint
+
+`TeamLaunchDiagnosticItem.code` is a fixed TypeScript union. Adding `workspace_trust_preflight` there is a schema change and should come with renderer tests.
+
+V1 should avoid that surface:
+
+- use `progress.message` for temporary live status
+- use `progress.warnings` for short non-blocking warning strings
+- store structured trust preflight data on `run.workspaceTrustDiagnostics`
+- copy structured trust preflight data into artifact `flags.workspaceTrustPreflight`
+
+Only add UI diagnostic rows later if needed, with:
+
+- `TeamLaunchDiagnosticItem.code` union extension
+- renderer copy-diagnostics test
+- member card/detail diagnostic rendering test
+
+Rating: 🎯 9 🛡️ 10 🧠 3, ~25-45 LOC for v1 artifact-only diagnostics.
+
+### Artifact Pack Constraint
+
+`writeLaunchFailureArtifactPackBestEffort()` already has a flexible `flags` object in the manifest. That is the safest place to put structured workspace trust preflight data in v1.
+
+Add:
+
+```ts
+flags: {
+ ...existingFlags,
+ workspaceTrustPreflight: run.workspaceTrustDiagnostics ?? null,
+}
+```
+
+Do not add raw terminal transcripts here. Store bounded, redacted facts only:
+
+- provider
+- workspace path or hash according to existing path exposure policy
+- status
+- matched dialog ids
+- actions
+- elapsedMs
+- error code
+
+### Runtime Security Constraint
+
+The sibling `agent_teams_orchestrator` uses Claude workspace trust as a security gate for more than teammate spawn:
+
+- headless teammate process startup checks `isPathTrusted(workingDir)`
+- hooks are skipped until workspace trust is accepted
+- auth helpers are guarded by `checkHasTrustDialogAccepted()`
+- MCP headers helpers are guarded by trust
+
+So the host must not disable the runtime gate. The host should prepare trust through the provider-owned flow, then let the runtime keep enforcing trust.
+
+This confirms the v1 rule: no global bypass flag and no direct config write as the normal path.
+
+### Provider Args Constraint
+
+`buildTeamRuntimeLaunchArgsPlan()` reads `providerArgs` from `envResolution.providerArgs`.
+
+Safer than changing the method signature:
+
+```ts
+const envResolutionForLaunch = {
+ ...provisioningEnv,
+ providerArgs: providerArgsForLaunch,
+}
+```
+
+Then pass `envResolutionForLaunch` into `buildTeamRuntimeLaunchArgsPlan()`.
+
+`mergeJsonSettingsArgs()` only merges `--settings` JSON args. For deterministic Agent Teams launch, Codex trust should normally be represented as JSON settings that the sibling runtime consumes, not as direct `-c` pairs on the Claude CLI command.
+
+Arg ordering rule:
+
+- final launch currently pushes extra args before provider args
+- app-managed Codex trust settings should live in app-managed provider settings so `mergeJsonSettingsArgs()` can combine them with existing Codex settings
+- direct Codex CLI `-c` is allowed only at the Codex binary boundary, not at the Claude launch boundary
+- if Codex native config override precedence changes, switch the sibling runtime adapter to explicit validated merge of user `projects` entries plus app-owned workspace keys
+
+Probe ordering rule:
+
+- `readRuntimeProviderLaunchFacts()` receives only provider args, not user extra args
+- therefore Codex trust settings must be patched into `providerArgsByProvider` before launch identity validation
+- do not rely on final `launchArgs` for provider facts
+
+### Runtime Path Matching Constraint
+
+The runtime `isPathTrusted(dir)` walks parent directories from `resolve(dir)`. The host `ClaudeStateProbe` should mirror this:
+
+- check exact resolved cwd
+- check exact realpath cwd
+- check parent directories up to root
+- treat a trusted parent as trusted for a child
+- do not write trust to a parent unless the provider itself chooses that key
+
+This lets us skip PTY when the user already trusted a repository root and then launches a subdirectory.
+
+## Fourth-Pass Failure Scope Findings
+
+The most dangerous integration bug is not prompt matching. It is inserting preflight after `ProvisioningRun` is created but outside the existing cleanup scopes.
+
+Current launch shape:
+
+- `run` is inserted into `runs` and `provisioningRunByTeam`
+- progress trace is initialized
+- persisted launch state is cleared
+- bootstrap files are written inside a local `try/catch`
+- spawn is inside another local `try/catch`
+- `cleanupRun()` writes artifacts and removes run tracking, but does not restore the prelaunch config backup
+
+If workspace trust `execute()` throws in the wrong place, the service can leave:
+
+- `provisioningRunByTeam` pointing at a dead run
+- normalized config not restored
+- Anthropic helper material not cleaned
+- progress retained without a failure artifact
+- stale launch state cleared without a corresponding launch attempt
+
+### Pre-Spawn Failure Helper
+
+Add a small helper for failures after `run` exists and before `spawnCli()` succeeds:
+
+```ts
+private async failDeterministicRunBeforeSpawn(
+ run: ProvisioningRun,
+ input: {
+ mode: 'create' | 'launch'
+ message: string
+ error: string
+ provisioningEnv: ProvisioningEnvResolution
+ cleanupPolicy: DeterministicPreSpawnCleanupPolicy
+ }
+): Promise
+```
+
+Responsibilities:
+
+1. assign bounded `run.workspaceTrustDiagnostics` if the failure came from preflight
+2. `updateProgress(run, 'failed', input.message, { error: input.error, warnings: run.progress.warnings })`
+3. `run.onProgress(run.progress)`
+4. cleanup Anthropic helper material if present
+5. apply the typed create/launch cleanup policy
+6. call `cleanupRun(run)` so artifacts and retained progress are handled consistently
+7. throw an `Error` with the same message
+
+The diagnostics assignment must happen before `cleanupRun(run)`, because the failure artifact writer is idempotent per run and may run during cleanup.
+
+Use this helper for workspace trust `blocked` results and unexpected preflight exceptions after run creation.
+
+Do not manually duplicate `this.runs.delete(...)` and `this.provisioningRunByTeam.delete(...)` in the new preflight path. The existing code already has several manual cleanup branches, and adding one more is how subtle lifecycle drift happens.
+
+```mermaid
+flowchart TD
+ A["ProvisioningRun created"] --> B["WorkspaceTrustCoordinator.execute"]
+ B -->|"ok"| C["clearPersistedLaunchState"]
+ B -->|"soft_failed"| D["merge warning"]
+ D --> C
+ C --> K["launchStateClearedForRun = true"]
+ B -->|"blocked or unexpected error"| E["failDeterministicRunBeforeSpawn"]
+ E --> F["assign diagnostics then updateProgress failed"]
+ F --> G["cleanup helper material"]
+ G --> H["apply create/launch cleanup policy"]
+ H --> I["cleanupRun, guarded by launchStateClearedForRun"]
+ I --> J["artifact flags.workspaceTrustPreflight"]
+```
+
+### Exact Execute Placement
+
+Recommended v1 placement:
+
+1. create `run`
+2. insert `run` into maps
+3. initialize provisioning trace and emit initial progress
+4. run `WorkspaceTrustCoordinator.execute()` inside a guarded pre-spawn block
+5. if blocked, call `failDeterministicRunBeforeSpawn(...)`
+6. if soft-failed, merge warning and continue
+7. then clear persisted launch state and continue current bootstrap flow
+
+Reason for running before `clearPersistedLaunchState`: if preflight blocks before any runtime launch starts, the app should not erase the previous launch snapshot as if a new launch had begun.
+
+Required guard: initialize `run.launchStateClearedForRun = false` and set it to `true` only after `clearPersistedLaunchState()` succeeds. `cleanupRun()` must use this flag before persisting failed launch snapshots or finalizing unconfirmed bootstrap members for a launch run. Otherwise a preflight-blocked relaunch can still overwrite the previous launch snapshot even though no runtime process was spawned.
+
+Create mode detail: run the helper before team meta/tasks directories are created. That keeps create cleanup minimal and avoids deleting any pre-existing unrelated team data if a future branch changes existence checks.
+
+Launch mode detail: run the helper after `normalizeTeamConfigForLaunch()` and `updateConfigProjectPath()` because the launch flow already mutates config before run creation. A blocked launch preflight must restore the prelaunch config through the typed cleanup policy.
+
+If later product behavior wants clearing earlier, it should be a deliberate change with a test that covers stale launch state UI.
+
+### Preflight Env Constraint
+
+Claude PTY preflight should not use the final runtime env blindly.
+
+Use a derived env:
+
+```ts
+const trustPreflightEnv = buildWorkspaceTrustPreflightEnv(shellEnv)
+```
+
+Keep:
+
+- `HOME`
+- `USERPROFILE`
+- `PATH`
+- `SHELL` / `COMSPEC`
+- `TERM`
+- `CLAUDE_CONFIG_DIR`
+- normal user auth env already present in the shell
+
+Remove:
+
+- `CLAUDE_ENABLE_DETERMINISTIC_TEAM_BOOTSTRAP`
+- `CLAUDE_TEAM_CONTROL_URL`
+- app-managed Anthropic helper env vars
+- runtime turn-settled hook env vars
+- transient team launch nonce/env vars
+
+Reason: preflight is not a team runtime. It should only let Claude Code show and persist workspace trust. It should not execute app-managed helper paths or runtime hooks before trust is established.
+
+Rating: 🎯 8 🛡️ 9 🧠 5, ~40-80 LOC with tests.
+
+## Fifth-Pass Reliability Findings
+
+This pass focuses on the parts that can still fail after the high-level design is correct.
+
+### Concurrency Scope
+
+`TeamProvisioningService.withTeamLock()` serializes launches per team name, not per workspace. Two different teams can launch against the same selected workspace at the same time.
+
+Add `WorkspaceTrustLockRegistry` in the workspace-trust feature:
+
+- lock key: `${provider}:${normalizedRealpath}`
+- in-process promise chaining for normal app usage
+- optional file lock using the existing `withFileLock` helper for cross-window or duplicate app instances
+- acquire timeout: 5 seconds for plan-free locks, 20 seconds for active PTY preflight
+- stale timeout: 30 seconds
+- waiting for a lock should be cancellable
+
+Behavior:
+
+- if another launch is already preparing the same workspace, wait for it
+- after lock wait, re-run the state probe before spawning PTY
+- if the other launch already accepted trust, return `already_trusted_after_wait`
+- if lock acquisition times out, return `soft_failed` and let runtime classification handle any remaining trust issue
+
+Rating: 🎯 9 🛡️ 9 🧠 5, ~70-120 LOC.
+
+### Cancellation Contract
+
+Preflight must be cancellable because it runs after `ProvisioningRun` exists.
+
+Add to `WorkspaceTrustExecutionPlan`:
+
+```ts
+isCancelled(): boolean
+onProgress(event: WorkspaceTrustProgressEvent): void
+```
+
+Rules:
+
+- check cancellation before acquiring lock
+- check before spawning PTY
+- check after every terminal snapshot
+- check before every key action
+- if cancelled, kill PTY and return `cancelled`
+- `TeamProvisioningService` maps `cancelled` to the existing launch cancellation path, not a trust failure
+
+Do not use a long uninterruptible `Promise.race` around the whole preflight. The engine should poll in small intervals and notice cancellation.
+
+### Claude PTY Command Shape
+
+Default Claude preflight command should be protected, not just boring:
+
+```text
+claude --bare --strict-mcp-config --mcp-config --setting-sources user --settings '{"disableAllHooks":true}' --tools ""
+```
+
+Do not pass:
+
+- team bootstrap args
+- `--team-bootstrap-spec`
+- `--mcp-config`
+- `--dangerously-skip-permissions`
+- user `extraCliArgs`
+- model/effort args
+
+Reason: the goal is only to let Claude Code persist workspace trust for the selected cwd. Passing launch args can trigger extra provider setup, hooks, MCP, permission prompts, or model-specific behavior.
+
+The protected command also reduces the risk that accepting trust immediately loads project MCP, project/local settings, hooks, or tools before the preflight can kill the PTY. The dialog engine may still know about bypass/custom API prompts because users can have global settings that surface them, but the strategy should not intentionally create those prompts.
+
+Command fallback order:
+
+1. Protected modern command - 🎯 9 🛡️ 9 🧠 6, ~80-140 LOC including flag detection and temp MCP cleanup. Chosen for v1 if the final persistence smoke passes.
+2. Strict MCP/settings command without `--bare` - 🎯 8 🛡️ 8 🧠 5, ~60-110 LOC. Fallback for Claude builds that do not support `--bare`.
+3. Plain `claude` PTY auto-accept - 🎯 8 🛡️ 5 🧠 4, ~40-80 LOC. Do not default now because it can load more project/user startup behavior after trust.
+
+Flag compatibility rule:
+
+- Discover support with cached `claude --help` text or optimistic spawn diagnostics.
+- If protected flags are unsupported, return `preflight_unavailable_or_unprotected` as a soft failure unless an explicit experimental env flag allows the lower-safety fallback.
+- Do not silently downgrade to plain `claude` in production defaults.
+
+### Post-Trust Exit Rule
+
+After a trust action, success probing outranks dialog clearing.
+
+Flow:
+
+1. detect workspace trust prompt
+2. send allowlisted `Enter`
+3. wait a short settle delay
+4. probe Claude state
+5. if trust is persisted, kill PTY immediately and return success
+
+Do not wait for full TUI readiness after trust is persisted.
+
+Reason: after trust is accepted, Claude may begin loading project config, MCP, hooks, or auth helpers. The selected workspace is now trusted, but preflight should still minimize side effects by exiting as soon as the trust bit is durable.
+
+### Path Canonicalization Contract
+
+Use two path forms everywhere:
+
+- `displayCwd`: what the user selected and what diagnostics can show
+- `configKeyCwd`: path normalized like the runtime config key
+
+Rules:
+
+- `realCwd` uses `fs.promises.realpath` when available
+- `configKeyCwd` uses path normalize plus backslash-to-forward-slash, matching sibling runtime `normalizePathForConfigKey`
+- comparison key uses `normalizePathForComparison` so Windows drive letter and separator case do not duplicate locks
+- diagnostics include both display and real path when existing launch diagnostics already expose paths
+- do not lowercase POSIX paths
+- do not trust parent by writing parent key; only treat a persisted trusted parent as covering the child
+
+Tests:
+
+- macOS `/var` and `/private/var`
+- symlinked workspace
+- Windows `C:\Repo` and `c:/repo`
+- UNC path
+- path with trailing slash
+- deleted workspace during plan
+
+### Feature Flag Semantics
+
+Use a single parsed config object:
+
+```ts
+type WorkspaceTrustFeatureFlags = {
+ enabled: boolean
+ claudePty: boolean
+ codexArgs: boolean
+ retry: boolean
+ fileLock: boolean
+}
+```
+
+Parsing rules:
+
+- only `'0'`, `'false'`, and `'off'` disable
+- only `'1'`, `'true'`, and `'on'` enable explicit experimental flags
+- malformed values fall back to default and emit a diagnostic once
+- include effective flags in artifact `flags.workspaceTrustPreflight.featureFlags`
+
+Default v1:
+
+- `enabled: true`
+- `claudePty: true`
+- `codexArgs: true`
+- `retry: false`
+- `fileLock: true` if lock path can be created, otherwise degrade to in-process lock
+
+### Diagnostics Redaction
+
+PTY raw output can contain account names, emails, org names, or snippets of provider setup text.
+
+V1 diagnostics should store structured facts by default:
+
+- status
+- provider
+- workspace id/source
+- matched rule ids
+- action names
+- elapsedMs
+- bounded error code/message
+
+Only store `rawTail` when:
+
+- preflight fails
+- feature flag `AGENT_TEAMS_WORKSPACE_TRUST_DEBUG=1` is set
+- the tail has passed the same secret redaction used by launch failure artifacts
+
+Even then, cap raw tail to 8 KiB.
+
+## Sixth-Pass Lifecycle Findings
+
+This pass focuses on integration bugs that can happen even when prompt handling is correct.
+
+### Launch Cancellation State
+
+`ProvisioningRun` is created with progress state `validating`, but `cancelProvisioning()` only allows cancellation in `spawning`, `configuring`, `assembling`, `finalizing`, or `verifying`.
+
+If Claude PTY preflight runs for several seconds while progress remains `validating`, the UI can show an active run that cannot be cancelled through the existing cancel API.
+
+Rule:
+
+- before `WorkspaceTrustCoordinator.execute()`, update progress to cancellable state `spawning` with message `Preparing workspace trust`
+- after execute returns `ok` or `soft_failed`, continue current flow
+- if execute returns `cancelled`, do not call `failDeterministicRunBeforeSpawn(...)` as a failure; follow existing cancellation semantics
+- after execute returns, check `run.cancelRequested`, `run.processKilled`, `stopAllTeamsGeneration`, and whether the run is still current before continuing to `clearPersistedLaunchState`
+
+Rating: 🎯 9 🛡️ 9 🧠 4, ~25-45 LOC plus tests.
+
+### Stop/Shutdown Race
+
+`stopTeam()` can call `cleanupRun(run)` while `_createTeamInner()` or `_launchTeamInner()` is still inside the preflight await. That means the code after `execute()` must tolerate a run that was already cleaned up by user stop or app shutdown.
+
+Add a stale-run guard:
+
+```ts
+private isLaunchRunStillCurrent(run: ProvisioningRun): boolean {
+ return this.runs.get(run.runId) === run &&
+ this.provisioningRunByTeam.get(run.teamName) === run.runId &&
+ !run.cancelRequested &&
+ !run.processKilled
+}
+```
+
+Use it after trust preflight and before every subsequent pre-spawn block.
+
+If the run is stale:
+
+- kill any preflight PTY through coordinator cancellation
+- do not restore config twice
+- do not write a second failure artifact
+- throw a cancellation-shaped error only after the existing stop cleanup has already updated progress
+
+### Main Composition Boundary
+
+`TeamProvisioningService` is constructed in `src/main/index.ts` and then configured through setter methods.
+
+V1 wiring:
+
+```ts
+teamProvisioningService = new TeamProvisioningService()
+teamProvisioningService.setWorkspaceTrustCoordinator(createWorkspaceTrustCoordinator(...))
+```
+
+Rules:
+
+- expose main-only composition from `src/features/workspace-trust/main`
+- keep `node-pty`, filesystem config probes, temp MCP files, and file locks inside main adapters
+- keep root `src/features/workspace-trust/index.ts` free of Electron/native imports
+- tests can inject fake coordinator without loading `node-pty`
+
+### Coordinator Shutdown Ownership
+
+The coordinator owns transient PTY sessions. The service owns team launch cancellation.
+
+Contract:
+
+- `WorkspaceTrustCoordinator.execute()` receives `isCancelled`
+- `stopTeam()` and `stopAllTeams()` set existing run cancellation flags
+- coordinator checks cancellation in small polling intervals and kills active PTY
+- optional `dispose()` kills any sessions not tied to a current run
+- `TeamProvisioningService.stopAllTeams()` does not need a new broad process killer for preflight; it should only trigger cancellation and let the coordinator clean its own sessions
+
+This avoids adding preflight Claude PIDs to generic team runtime cleanup, where it would be easy to kill unrelated user Claude sessions.
+
+### Provider State Probe Races
+
+Claude may write `.claude.json` while `ClaudeStateProbe` reads it.
+
+Rules:
+
+- read with a small max byte limit
+- on JSON parse failure, retry 2-3 times with a short delay
+- never log the raw file
+- if still unreadable, return `unknown` and let the strategy decide soft failure
+- mirror runtime path matching, including parent directory trust
+
+This is especially important on Windows and OneDrive-style filesystems where atomic rename and file visibility can lag.
+
+### Claude Binary Resolution Gap
+
+Local prototype showed `claude` was not in this shell PATH, while `/Users/belief/.local/bin/claude` exists and works. The strategy must use `ClaudeBinaryResolver.resolve()` output, not a hardcoded `claude` command name.
+
+The PTY env can add `path.dirname(claudePath)` to PATH for child tools only if needed, but the spawned executable should be the resolved absolute path.
+
+### PTY Ownership Is Not ChildProcess Ownership
+
+Existing `transientProbeProcesses` tracks `child_process.spawn()` probes. `node-pty` returns `IPty`, not a `ChildProcess`, so it must not be stuffed into that set.
+
+Rules:
+
+- `NodePtyProcessAdapter` owns `IPty` lifecycle.
+- `WorkspaceTrustCoordinator` owns the active preflight session registry.
+- `TeamProvisioningService` cancels by setting run flags and passing `isCancelled`.
+- app shutdown can call optional coordinator `dispose()` through the service setter-owned dependency.
+- do not add preflight PTYs to global CLI process tracking, because those helpers are designed for normal child processes and can over-kill.
+
+### Shell Env And Keychain Boundary
+
+`buildEnrichedEnv()` only sets `CLAUDE_CONFIG_DIR` when the user configured a custom Claude base path. Setting it to the default path can break macOS Keychain namespace lookup.
+
+Preflight env rule:
+
+- start from the same enriched env family as runtime, using the resolved `claudePath`
+- preserve `HOME`, `USERPROFILE`, `USER`, `LOGNAME`, `PATH`, and custom `CLAUDE_CONFIG_DIR` if present
+- do not force `CLAUDE_CONFIG_DIR` to the default `~/.claude`
+- strip team runtime env and app-managed helper env after enrichment
+
+## Seventh-Pass Flow Integration Findings
+
+This pass found the biggest missing piece in the earlier plan: the app has two legacy deterministic flows, not one.
+
+### Create And Launch Must Both Be Covered
+
+`createTeam()` and `launchTeam()` have parallel legacy deterministic paths:
+
+- both build provider env
+- both materialize member specs
+- both resolve OpenCode member workspaces
+- both build cross-provider args
+- both validate launch identity
+- both create `ProvisioningRun`
+- both write bootstrap/MCP files
+- both spawn Claude CLI
+
+If preflight is added only to `_launchTeamInner`, the very first team creation for a new project can still hit the same trust gate.
+
+V1 scope should be:
+
+- legacy deterministic `createTeam`
+- legacy deterministic `launchTeam`
+- not pure OpenCode runtime adapter path
+- mixed OpenCode side lanes only insofar as their workspaces flow through legacy deterministic launch
+
+Top 3 integration options:
+
+1. Shared helper used by create and launch - 🎯 9 🛡️ 9 🧠 7, ~180-280 integration LOC. Chosen because it prevents drift between the two flows.
+2. Implement launch first, create later - 🎯 6 🛡️ 6 🧠 5, ~90-160 LOC. Lower initial work, but leaves first-run new-project failures.
+3. Inline duplicate preflight blocks in both flows - 🎯 5 🛡️ 5 🧠 4, ~120-220 LOC. Fast but likely to diverge during future launch fixes.
+
+Recommended helper shape:
+
+```ts
+private async prepareWorkspaceTrustForDeterministicRun(input: {
+ mode: 'create' | 'launch'
+ run: ProvisioningRun
+ request: TeamCreateRequest | TeamLaunchRequest
+ claudePath: string
+ shellEnv: NodeJS.ProcessEnv
+ stopAllGenerationAtStart: number
+ workspaceTrustPlan: WorkspaceTrustFullPlanResult
+ cleanupPolicy: DeterministicPreSpawnCleanupPolicy
+}): Promise
+```
+
+The helper owns:
+
+- progress transition to cancellable `spawning`
+- `WorkspaceTrustCoordinator.execute()`
+- stale-run guard after await
+- result mapping: `ok`, `soft_failed`, `blocked`, `cancelled`
+- calling the correct pre-spawn failure cleanup helper
+
+It must not own:
+
+- provider arg patch computation
+- bootstrap spec generation
+- MCP runtime config generation
+- process spawn
+
+### Different Cleanup Policies For Create And Launch
+
+Create and launch clean up different things.
+
+Launch cleanup:
+
+- restore prelaunch `config.json`
+- cleanup Anthropic helper material
+- remove generated bootstrap files if already present
+- preserve existing team data
+- do not clear previous launch state if preflight blocks before runtime launch
+
+Create cleanup:
+
+- there may be no previous team files yet
+- if preflight runs before team meta/tasks dirs are created, cleanup should only remove run tracking and Anthropic helper material
+- if a later shared helper is reused after meta dirs are written, it must delete the create-owned team dir/tasks dir exactly like current create spawn-failure cleanup
+- never call `restorePrelaunchConfig()` for create mode
+
+Do not use one boolean like `restorePrelaunchConfig`. Use a typed cleanup policy:
+
+```ts
+type DeterministicPreSpawnCleanupPolicy =
+ | { mode: 'launch'; restorePrelaunchConfig: true; cleanupCreatedTeamArtifacts?: false }
+ | { mode: 'create'; restorePrelaunchConfig?: false; cleanupCreatedTeamArtifacts: boolean }
+```
+
+### Early Provider Args Patch Before Default Model Resolution
+
+`materializeEffectiveTeamMemberSpecs()` can call `resolveProviderDefaultModel()` for non-Anthropic members that do not specify a model. That command receives `envResolution.providerArgs` before the full `WorkspaceTrustCoordinator.planFull()` placement.
+
+This matters for Codex because provider args already carry runtime auth intent, and the new trust settings should be consistently present for all provider fact/model probes.
+
+Use two planning phases:
+
+1. **Early settings-only phase**
+ - after `buildProvisioningEnv`
+ - before `materializeEffectiveTeamMemberSpecs`
+ - workspaces: `request.cwd` and `realpath(request.cwd)`
+ - providers: request lead provider plus explicit member provider ids
+ - output: Codex provider settings patches only
+ - no PTY, no locks, no config writes
+
+2. **Full workspace phase**
+ - after `resolveOpenCodeMemberWorkspacesForRuntime`
+ - workspaces: request cwd plus resolved member/worktree cwd values
+ - output: final Codex settings patches plus Claude execution plan
+ - dedupe early patches so the final settings contain one app-owned Codex trust override set
+
+Integration rule:
+
+- pass the early provider settings patches into `materializeEffectiveTeamMemberSpecs()` through an optional parameter, or refactor default model resolution to call a provider-arg resolver callback
+- do not let `materializeEffectiveTeamMemberSpecs()` import workspace-trust feature internals
+- keep the dependency direction as `TeamProvisioningService -> WorkspaceTrustCoordinator port`
+
+Rating: 🎯 8 🛡️ 9 🧠 7, ~80-140 LOC plus tests.
+
+### Progress State Options
+
+The preflight needs to be cancellable. Options:
+
+1. Set progress to existing `spawning` before PTY preflight - 🎯 9 🛡️ 8 🧠 3, ~15-30 LOC. Chosen for v1 because it uses existing cancel states and avoids schema changes.
+2. Add `validating` to cancellable states - 🎯 7 🛡️ 7 🧠 3, ~10-25 LOC. Could change semantics for other validation waits and existing tests.
+3. Add a new state like `preparing_workspace_trust` - 🎯 6 🛡️ 6 🧠 6, ~60-120 LOC. Cleaner UI but requires shared/renderer state updates.
+
+V1 uses option 1 with message `Preparing workspace trust`.
+
+### Artifact Flags Need Their Own Size Cap
+
+`writeTeamLaunchFailureArtifactPack()` redacts JSON recursively, but `flags` is otherwise flexible. Workspace trust diagnostics must self-limit before being assigned to `run.workspaceTrustDiagnostics`.
+
+Rules:
+
+- max strategy results: 20
+- max workspaces per strategy result: 20
+- max evidence strings per result: 5
+- max evidence string length: 600 chars
+- max raw tail: 8 KiB and only under debug/failure
+- no env values
+- no full provider config file content
+
+## Eighth-Pass Integration Hardening Findings
+
+This pass re-checked the exact current create/launch code paths instead of only the target architecture.
+
+### Default Model Resolution Must Use A Provider Args Resolver
+
+`materializeEffectiveTeamMemberSpecs()` calls `resolveProviderDefaultModel()` for non-Anthropic members without explicit models. It obtains secondary provider env through its local `getProvisioningEnv()` closure, which means a simple patch to the primary `provisioningEnv.providerArgs` is not enough.
+
+Recommended signature change:
+
+```ts
+private async materializeEffectiveTeamMemberSpecs(params: {
+ ...
+ providerArgsResolver?: (input: {
+ providerId: TeamProviderId
+ providerArgs: string[]
+ phase: 'default-model-resolution'
+ }) => string[]
+}): Promise
+```
+
+Rules:
+
+- default implementation returns the input args unchanged
+- workspace-trust integration passes a resolver created from `planArgsOnly()`
+- the resolver is pure and does not import workspace-trust internals into materialization
+- only provider args are patched; env objects and auth helper material are not mutated
+
+Why this matters: without this resolver, a Codex secondary member can fail during model probing before the full workspace plan has a chance to patch cross-provider launch args.
+
+Rating: 🎯 8 🛡️ 9 🧠 6, ~45-90 LOC plus tests.
+
+### Cross-Provider Arg Patching Must Happen After Env Resolution
+
+`buildCrossProviderMemberArgs()` builds secondary provider env internally and returns:
+
+- flattened inherited args
+- `providerArgsByProvider`
+- `envPatch`
+- `usesAnthropicApiKeyHelper`
+
+Do not push workspace-trust logic into this method. Instead:
+
+1. Let it return current provider args unchanged.
+2. Run `planFull()` using the resolved member workspaces and returned cross-provider provider args.
+3. Apply final patches to both `crossProviderMemberArgs.args` and `crossProviderMemberArgs.providerArgsByProvider`.
+4. Rebuild `providerArgsByProviderForLaunch` from patched primary and patched cross-provider args.
+
+This keeps `buildCrossProviderMemberArgs()` responsible for provider auth/env only, while workspace trust owns launch arg policy.
+
+Rating: 🎯 9 🛡️ 9 🧠 5, ~55-100 LOC plus tests.
+
+### Pre-Run Disk Artifacts Must Not Increase
+
+Current `buildProvisioningEnv()` can materialize Anthropic API-key helper files before `ProvisioningRun` exists. That is an existing lifecycle risk. The workspace-trust feature should not add any new pre-run disk artifacts.
+
+V1 rules:
+
+- `planArgsOnly()` writes nothing
+- `planFull()` writes nothing
+- temp empty MCP config is created only inside Claude strategy `execute()`, after `run` exists
+- if touching pre-run env failure paths, add best-effort helper cleanup in a separate, focused hardening commit with tests
+
+Do not mix a broad helper-material lifecycle refactor into the first workspace-trust PR unless tests prove the current leak can be fixed narrowly.
+
+Rating: 🎯 8 🛡️ 8 🧠 6, ~0 LOC for v1 scope control, ~40-80 LOC only if separately hardening.
+
+### Launch Config Mutation Window Is Intentional
+
+Launch mode normalizes `config.json` and updates `projectPath` before `ProvisioningRun` is created. Workspace trust execution will therefore happen after those launch mutations.
+
+Required behavior:
+
+- blocked launch preflight restores prelaunch config
+- cancelled launch preflight should follow existing stop cleanup without double restore
+- create mode must never call launch config restore
+- tests should assert restore calls by mode, not just final progress state
+
+This is why the cleanup policy must be typed by mode instead of a loose boolean.
+
+Rating: 🎯 8 🛡️ 9 🧠 5, ~35-70 LOC plus tests.
+
+### Naming Should Avoid Launch-Only Semantics
+
+Names like `failLaunchBeforeSpawn` are easy to misuse from create mode. Use deterministic-run terminology for shared helpers:
+
+- `prepareWorkspaceTrustForDeterministicRun`
+- `failDeterministicRunBeforeSpawn`
+- `DeterministicPreSpawnCleanupPolicy`
+- `WorkspaceTrustLaunchArgContext` can remain launch-arg-specific because it describes CLI args, not the workflow mode
+
+Rating: 🎯 9 🛡️ 8 🧠 2, ~10-25 LOC.
+
+## Ninth-Pass Runtime Contract Findings
+
+This pass checked the sibling orchestrator implementation, not just the desktop app.
+
+### Runtime Trust Key Is Not Always The Exact Cwd
+
+The sibling runtime uses `normalizePathForConfigKey(path)` as:
+
+- Node `path.normalize(...)`
+- then backslash-to-forward-slash conversion
+
+It also computes the primary project config key as:
+
+- canonical git root when the cwd is inside a git repo
+- otherwise the resolved original cwd
+
+`isPathTrusted(dir)` then walks from `resolve(dir)` to parents and returns true if any ancestor key has `hasTrustDialogAccepted`.
+
+Host probe rule:
+
+- check exact `request.cwd`
+- check `realpath(request.cwd)`
+- check canonical git root when cheap and available
+- check parent directories up to filesystem root
+- normalize keys exactly like runtime: path normalize, then backslash-to-forward-slash
+- never lowercase POSIX paths
+- on Windows, compare case-insensitively for dedupe/locks but preserve the serialized key casing
+
+Do not guess a parent key to write. Let Claude decide where to persist trust. The host only probes all keys the runtime might later accept.
+
+Tests to add:
+
+- selected subdirectory inside a trusted git root skips PTY
+- trust accepted from a git subdirectory is observed through the git-root key
+- symlinked repo path is checked through original path, realpath, and parent walk
+- Windows drive letter case dedupes lock keys but preserves config key text
+
+Rating: 🎯 8 🛡️ 9 🧠 6, ~50-100 LOC plus tests.
+
+### Home Directory Trust Is Not Persisted Reliably
+
+The sibling runtime comments state that when running from the home directory, trust can be session-only. Headless teammate startup uses `isPathTrusted(dir)`, which checks persisted config and does not consult the interactive session latch.
+
+V1 behavior:
+
+- detect `realpath(cwd) === realpath(home)` before Claude PTY
+- do not auto-trust home, root, or broad parent directories
+- return a blocked diagnostic like `workspace_trust_not_persistable_home`
+- keep the provider's exact runtime error if launch still proceeds through a soft-failure path
+
+Top 3 options:
+
+1. Block home-dir preflight with clear diagnostic - 🎯 9 🛡️ 10 🧠 3, ~20-40 LOC. Chosen for v1 because it avoids broad trust.
+2. Continue launch and rely on runtime classification - 🎯 6 🛡️ 8 🧠 2, ~5-15 LOC. Less invasive, but user sees the same failure after waiting.
+3. Write persisted trust for home directly - 🎯 3 🛡️ 2 🧠 5, ~60-120 LOC. Too broad and security-sensitive.
+
+### Provider Config Writes Need Provider Locks
+
+Claude's own `saveCurrentProjectConfig()` uses a file lock and auth-loss guard before writing `~/.claude.json`. This reinforces the v1 decision not to direct-write Claude trust files.
+
+If a future emergency fallback writes provider config directly, it must:
+
+- take the provider-compatible lock
+- re-read before merge
+- preserve auth and onboarding state
+- create backups according to provider conventions
+- use the same config-key normalization
+- be behind a separate feature flag
+
+Do not implement that fallback in the first PR.
+
+Rating: 🎯 7 🛡️ 6 🧠 8, 140-260 LOC if ever implemented.
+
+### Competitor Lesson: Dialog Beats Ready
+
+Gastown polls tmux panes and intentionally checks trust dialog text before prompt detection because Codex trust screens can include a leading prompt-looking marker. Overstory tests the same class of precedence: trust/bypass dialogs must win over ready indicators.
+
+Adopt:
+
+- dialog phase has priority over prompt/ready phase
+- polling handles render races
+- tests replay multiple snapshots where prompt-looking text and dialog text coexist
+
+Do not adopt:
+
+- blind startup dismiss by default
+- tmux as the transport
+- "prompt suffix means ready" as a trust-preflight success condition
+
+Rating: 🎯 9 🛡️ 9 🧠 4, ~40-80 LOC in dialog-engine tests.
+
+### ProvisioningRun Needs Explicit Trust Fields
+
+`ProvisioningRun` currently has no workspace trust fields, and `TeamProvisioningProgress` should not grow a structured trust payload in v1.
+
+Add only main-process run fields:
+
+```ts
+launchStateClearedForRun: boolean
+workspaceTrustPlan?: WorkspaceTrustFullPlanResult | null
+workspaceTrustExecution?: WorkspaceTrustExecutionResult | null
+workspaceTrustDiagnostics?: WorkspaceTrustDiagnosticsManifest | null
+workspaceTrustRetryAttempted?: boolean
+```
+
+Rules:
+
+- these fields are not sent over IPC as progress
+- `launchStateClearedForRun` is a lifecycle guard, not user-facing diagnostics
+- `workspaceTrustDiagnostics` is already budgeted/redacted before assignment
+- `writeLaunchFailureArtifactPackBestEffort()` copies only `workspaceTrustDiagnostics` into `flags.workspaceTrustPreflight`
+- retained progress does not need these fields
+
+This keeps the renderer schema stable and makes the artifact pack the structured diagnostics surface.
+
+Rating: 🎯 9 🛡️ 9 🧠 4, ~25-50 LOC plus artifact tests.
+
+## Tenth-Pass Runtime Lane And Provider Args Findings
+
+This pass re-checked OpenCode routing and provider CLI arg construction. It found two places where a correct preflight feature could still miss real launches.
+
+### Pure OpenCode Runtime Adapter Is Out Of V1
+
+Pure OpenCode teams can be routed through the runtime adapter before the legacy deterministic create/launch flow. That path is a different runtime ownership model and should not call the workspace-trust coordinator in v1.
+
+V1 boundary:
+
+- legacy deterministic `createTeam` and `launchTeam`: use workspace trust preflight
+- mixed OpenCode secondary lanes inside a deterministic launch: include their resolved workspace paths
+- pure OpenCode runtime adapter launch: do not call workspace trust preflight
+- future OpenCode-native trust gate: add a separate strategy and adapter-owned contract
+
+Tests to add:
+
+- pure OpenCode runtime-adapter launch with a fake coordinator asserts coordinator was not called
+- mixed OpenCode side lane under a deterministic lead includes the generated side-lane cwd in `planFull()` workspace input
+- deterministic create and deterministic launch both pass `allEffectiveMemberSpecs`, not only filtered primary members, into workspace collection
+
+Top 3 options:
+
+1. Keep v1 scoped to deterministic paths and include mixed side-lane workspaces - 🎯 9 🛡️ 9 🧠 5, ~50-90 LOC plus tests. Chosen because it matches current ownership boundaries.
+2. Add preflight to pure OpenCode adapter now - 🎯 5 🛡️ 6 🧠 7, ~120-220 LOC. Too easy to mix runtime-adapter and desktop UX policy before a real OpenCode trust failure exists.
+3. Ignore OpenCode workspaces completely - 🎯 4 🛡️ 5 🧠 2, ~0-10 LOC. Simple but can miss mixed side-lane generated worktrees.
+
+### Workspace Collection Must Use All Effective Members
+
+The deterministic flow creates `allEffectiveMemberSpecs`, plans runtime lanes, then filters `effectiveMemberSpecs` to primary-lane members. Workspaces must be collected before that filter loses side-lane information.
+
+Rule:
+
+- use `allEffectiveMemberSpecs` for workspace collection and artifact diagnostics
+- use `effectiveMemberSpecs` for primary runtime bootstrap semantics exactly as today
+- include side-lane OpenCode workspaces only when they flow through this deterministic path
+- dedupe by comparison key, but retain `source` and `memberId` evidence for diagnostics
+
+This is a Clean Architecture boundary: workspace trust collection is launch preparation policy, not lane execution policy. It consumes the lane plan output but does not decide lane ownership.
+
+Rating: 🎯 9 🛡️ 9 🧠 5, ~35-70 LOC plus mixed-lane tests.
+
+### Provider CLI Args Accept Early Patches
+
+The current helper shape is:
+
+```ts
+function buildProviderCliCommandArgs(providerArgs: string[], args: string[]): string[] {
+ return mergeJsonSettingsArgs([...providerArgs, ...args])
+}
+```
+
+That means app-managed provider settings can be applied before provider fact commands such as `model list` and `runtime status`. This is good: Codex trust intent belongs in provider settings, not in final launch-only raw argv.
+
+Rules:
+
+- patch provider settings before default-model resolution
+- patch `providerArgsByProvider` before `resolveAndValidateLaunchIdentity`
+- keep command args like `model list` and `runtime status` after provider args
+- characterize that `mergeJsonSettingsArgs()` merges app-owned Codex workspace-trust settings with existing forced-login settings
+- do not move trust overrides into user `extraCliArgs`
+
+Rating: 🎯 9 🛡️ 9 🧠 4, ~30-60 LOC plus characterization tests.
+
+### Provider Presence Decides Codex Patches, Not Workspace Presence
+
+`planFull()` should always receive the deterministic workspace set because Claude/orchestrator trust applies to every teammate workspace. Codex trust settings, however, should be produced only when Codex is actually present in one of the provider surfaces.
+
+Provider detection inputs:
+
+- resolved lead provider id
+- requested member provider ids before materialization
+- materialized member provider ids after defaults
+- cross-provider args map from `buildCrossProviderMemberArgs()`
+- provider facts/default-model probe surface for Codex
+
+Rules:
+
+- Anthropic-only team: no Codex trust settings
+- Codex lead: patch primary provider settings and provider facts settings
+- Anthropic lead with Codex teammate: patch cross-provider Codex settings and default-model probe settings
+- OpenCode-only pure adapter: no v1 patch because this path is outside deterministic launch
+- unknown/future provider ids: ignore for Codex strategy and keep diagnostics non-throwing
+
+Rating: 🎯 9 🛡️ 9 🧠 5, ~35-75 LOC plus provider-matrix tests.
+
+### Codex Native Overrides Should Be Repeatable Dotted Values
+
+The earlier plan used one `-c projects={...}` blob. That is riskier than necessary because it replaces the whole `projects` value at the config override layer and creates awkward merging with user-provided project config.
+
+Use one override value per trusted path:
+
+```text
+projects."".trust_level="trusted"
+projects."".trust_level="trusted"
+```
+
+Builder contract:
+
+- output `CodexConfigOverride[]`, not one TOML table string
+- each item is a value that the sibling Codex native adapter can pass as a single `-c` flag only when spawning Codex
+- quoted key segment uses TOML basic-string escaping
+- preserve slash/backslash text after runtime-compatible normalization
+- dedupe exact config keys after canonicalization
+- append app-owned overrides after existing Codex native config overrides inside the sibling adapter where the CLI uses later-wins semantics
+- do not parse and rewrite arbitrary user Codex config overrides unless a future test proves CLI precedence changed
+
+Example output:
+
+```ts
+[
+ 'projects."/tmp/project".trust_level="trusted"',
+ 'projects."/private/tmp/project".trust_level="trusted"',
+]
+```
+
+Why this is safer:
+
+- it preserves unrelated `projects` entries
+- it aligns with Codex CLI `-c ` syntax at the Codex binary boundary
+- it matches the sibling runtime's native `configOverrides: string[]` contract
+- it makes order and rollback easier to test
+
+Tests to add:
+
+- quoted path segment with spaces, brackets, quotes, and backslashes
+- two repeated override values are preserved by the sibling Codex native adapter
+- forced-login `--settings` JSON remains separate from native Codex trust override values
+- app overrides are appended after existing sibling Codex native config overrides
+- no single `projects={...}` blob is produced by v1 code
+- no Codex native override is emitted as `-c` in Claude launch argv
+
+Top 3 Codex override encodings:
+
+1. Repeatable dotted override values passed through typed runtime contract to sibling `configOverrides` - 🎯 9 🛡️ 9 🧠 6, ~90-180 LOC across desktop and sibling runtime. Chosen because it avoids Claude `-c` ambiguity and uses the existing Codex native boundary.
+2. Direct Codex CLI `-c projects."".trust_level="trusted"` from desktop - 🎯 6 🛡️ 6 🧠 4, ~45-90 LOC. Valid only for direct Codex binary surfaces, not deterministic Claude/orchestrator launch.
+3. Single `-c projects={...}` table blob - 🎯 6 🛡️ 5 🧠 5, ~50-100 LOC. Works in prototypes, but has more merge/clobber risk.
+
+## Eleventh-Pass Cleanup And Restart Findings
+
+This pass checked what happens if preflight blocks after `ProvisioningRun` exists but before the current launch flow calls `clearPersistedLaunchState()`.
+
+### `cleanupRun()` Needs A Launch-State Guard
+
+Current `cleanupRun()` treats any failed `run.isLaunch && !run.provisioningComplete && !run.cancelRequested` run as a launch that already entered runtime bootstrap cleanup. It finalizes unconfirmed members and persists a failed launch snapshot.
+
+That is correct after runtime launch has begun. It is wrong for a workspace-trust preflight that blocks before `clearPersistedLaunchState()`: the previous launch snapshot should remain untouched because no new runtime launch actually started.
+
+Add a narrow run flag:
+
+```ts
+interface ProvisioningRun {
+ launchStateClearedForRun: boolean
+}
+```
+
+Rules:
+
+- initialize as `false`
+- set to `true` immediately after `clearPersistedLaunchState()` succeeds
+- gate launch cleanup finalization on `run.launchStateClearedForRun === true`
+- failure artifacts should still be written for failed preflight runs
+- retained progress should still be stored
+- previous launch snapshot must not be overwritten when preflight blocks before clear
+
+Target cleanup condition:
+
+```ts
+const shouldFinalizeFailedLaunchSnapshot =
+ !hasNewerTrackedRun &&
+ run.isLaunch &&
+ run.launchStateClearedForRun &&
+ !run.provisioningComplete &&
+ !run.cancelRequested
+```
+
+This is a small lifecycle hardening that makes the planned preflight placement safe. Without it, the plan's "run before clearing persisted launch state" guarantee is false.
+
+Top 3 options:
+
+1. Add `launchStateClearedForRun` and gate only launch-state finalization - 🎯 9 🛡️ 10 🧠 3, ~20-40 LOC plus tests. Chosen because it is precise and preserves current post-clear behavior.
+2. Avoid `cleanupRun()` for preflight-blocked launches - 🎯 6 🛡️ 6 🧠 5, ~40-80 LOC. Risky because it duplicates run-map/progress/artifact cleanup.
+3. Move preflight after `clearPersistedLaunchState()` - 🎯 7 🛡️ 5 🧠 2, ~5-15 LOC. Simpler but loses the main safety property and can erase the previous launch snapshot on trust setup failures.
+
+### Preflight Failure Artifact Ordering
+
+`writeLaunchFailureArtifactPackBestEffort()` is idempotent per run. If workspace-trust diagnostics are assigned after the first artifact write, Copy diagnostics will miss the most useful facts.
+
+Ordering rule for `failDeterministicRunBeforeSpawn()`:
+
+1. assign `run.workspaceTrustDiagnostics`
+2. update progress to `failed`
+3. call `run.onProgress(...)`
+4. cleanup helper material and mode-specific disk artifacts
+5. call `cleanupRun(run)`
+
+Do not call `writeLaunchFailureArtifactPackBestEffort()` directly from the preflight helper unless the helper also owns the idempotence key and can prove `cleanupRun()` will not write first. The cleaner v1 path is to let `cleanupRun()` write exactly once after diagnostics are already on the run.
+
+Tests to add:
+
+- blocked launch preflight before `clearPersistedLaunchState()` writes artifact with `workspaceTrustPreflight`
+- blocked launch preflight before clear does not call `persistLaunchStateSnapshot`
+- blocked launch preflight after a simulated clear uses existing failed-launch cleanup behavior
+- failed artifact is written once even if the preflight helper and cleanup path both observe failure
+
+Rating: 🎯 9 🛡️ 9 🧠 4, ~35-70 LOC plus tests.
+
+### Direct Teammate Restart Is A Separate Workflow
+
+The code also has direct teammate restart paths:
+
+- `launchDirectTmuxMemberRestart(...)`
+- `launchDirectProcessMemberRestart(...)`
+
+Those paths can spawn provider runtimes with their own cwd after a team is already alive. They are not part of issue #100 first-launch/relaunch, and pulling them into the first PR would widen the blast radius.
+
+V1 rule:
+
+- do not wire workspace trust preflight into direct restart paths
+- keep direct restart failures classified through existing runtime diagnostics
+- make `WorkspaceTrustCoordinator` reusable so a later `prepareWorkspaceTrustForDirectRestart(...)` can call the same port
+- document direct restart as a phase 2 extension, not a hidden TODO inside launch integration
+
+Future direct restart helper should be tiny:
+
+```ts
+prepareWorkspaceTrustForDirectRestart({
+ teamName,
+ memberName,
+ providerId,
+ cwd,
+ shellEnv,
+ claudePath,
+})
+```
+
+Top 3 direct-restart options:
+
+1. Exclude from v1 but keep coordinator reusable - 🎯 9 🛡️ 9 🧠 3, ~0-15 LOC now. Chosen because launch reliability is the current bug.
+2. Add direct process restart preflight only - 🎯 6 🛡️ 7 🧠 6, ~80-140 LOC. Covers one restart path but creates tmux/process asymmetry.
+3. Add all restart preflight now - 🎯 5 🛡️ 6 🧠 8, ~150-260 LOC. Too much workflow risk for the first PR.
+
+### Post-Trust Provider Screens Must Not Become New Automation
+
+The sibling Claude startup flow shows trust before several other screens and side-effectful setup steps: MCP server approvals, external CLAUDE.md include warnings, telemetry/env initialization, Grove policy, and custom API key prompts.
+
+V1 rule:
+
+- after pressing workspace-trust `Enter`, probe persisted trust immediately
+- if trust is persisted, kill PTY and return success
+- do not automate MCP approvals, external include dialogs, Grove policy, or provider onboarding
+- bypass/custom API key rules exist only for observed screens that can appear before trust persistence is readable or in compatibility smoke, not as a reason to keep the PTY alive longer
+
+This mirrors the security intent in the sibling runtime: accept trust for the exact selected folder, then minimize additional provider startup side effects.
+
+Rating: 🎯 9 🛡️ 10 🧠 4, ~20-45 LOC in strategy/engine tests.
+
+### Pending-Key And No-Run Phase Invariant
+
+Both create and launch set a pending provisioning key before the run object exists. Early `planArgsOnly()` happens in that no-run window.
+
+Invariant:
+
+- no-run phase may compute pure arg patches and diagnostics
+- no-run phase must not spawn PTY
+- no-run phase must not write temp files
+- no-run phase must not create provider config sentinels
+- any no-run exception must be caught by the existing pending-key cleanup path
+
+This keeps the TOCTOU guard intact without introducing a second run lifecycle before `ProvisioningRun` exists.
+
+Rating: 🎯 9 🛡️ 9 🧠 3, ~10-25 LOC plus tests.
+
+## Twelfth-Pass Feature Boundary, Arg Dialect, And PTY Adapter Findings
+
+This pass focused on the remaining places where a clean plan could still become risky during implementation: feature imports, `node-pty` ownership, Codex/Claude argument dialects, duplicate fallback patches, and diagnostic redaction.
+
+### Feature Boundary Must Be Public And Narrow
+
+The workspace trust feature should be a feature module, not a hidden subfolder of `TeamProvisioningService`.
+
+Allowed imports from team launch code:
+
+```ts
+import {
+ createWorkspaceTrustCoordinator,
+ type WorkspaceTrustCoordinator,
+ type WorkspaceTrustFullPlanResult,
+} from '@features/workspace-trust/main'
+```
+
+Forbidden imports from team launch code:
+
+```ts
+// forbidden
+import { NodePtyProcessAdapter } from '@features/workspace-trust/main/adapters/output/NodePtyProcessAdapter'
+import { StartupDialogRules } from '@features/workspace-trust/main/infrastructure/StartupDialogRules'
+import { CodexConfigOverrideBuilder } from '@features/workspace-trust/core/domain/CodexConfigOverride'
+```
+
+Reason:
+
+- `TeamProvisioningService` should orchestrate launch lifecycle only.
+- prompt matching should change because provider startup changes, not because team launch flow changes.
+- Codex settings/runtime-contract syntax should change because Codex runtime syntax changes, not because launch cleanup changes.
+- `node-pty` optional native loading should stay behind a port and never leak into tests that only launch fake teams.
+
+Feature import boundary:
+
+```mermaid
+flowchart TD
+ TPS["TeamProvisioningService"] --> PUB["@features/workspace-trust/main public facade"]
+ PUB --> APP["core/application use cases"]
+ APP --> DOM["core/domain pure policy"]
+ APP --> PORTS["core/application ports"]
+ PUB --> COMP["main/composition"]
+ COMP --> ADAPT["main/adapters"]
+ ADAPT --> NPTY["optional node-pty"]
+ ADAPT --> FS["filesystem/provider state"]
+
+ TPS -. forbidden .-> ADAPT
+ TPS -. forbidden .-> DOM
+```
+
+Top 3 boundary options:
+
+1. Public feature facade only - 🎯 9 🛡️ 10 🧠 5, ~45-90 LOC of feature shell and exports. Chosen because it keeps launch lifecycle separate from provider automation.
+2. Local `team/workspaceTrust` folder with internal exports - 🎯 7 🛡️ 7 🧠 4, ~25-60 LOC. Faster, but makes future provider growth easier to couple into launch service.
+3. Direct imports from adapters/domain inside `TeamProvisioningService` - 🎯 4 🛡️ 4 🧠 2, ~10-25 LOC. Too easy to violate SRP and hard to test without native dependencies.
+
+### Do Not Reuse `PtyTerminalService`
+
+`PtyTerminalService` already handles app terminals and optional `node-pty`, but it is the wrong abstraction for trust preflight.
+
+Why not reuse it:
+
+- it is renderer-terminal-facing and tied to `BrowserWindow`/IPC behavior.
+- it owns interactive terminal sessions, not short provider startup probes.
+- it may keep session state that is useful for UI terminals but undesirable for headless preflight.
+- workspace trust preflight needs bounded raw-tail capture, allowlisted key actions, and immediate cleanup.
+
+Use a new adapter:
+
+```ts
+export class NodePtyProcessAdapter implements PtyProcessPort {
+ async spawn(input: PtySpawnInput): Promise {
+ const pty = loadOptionalNodePty()
+ if (!pty.ok) {
+ return skippedPtySession('node_pty_unavailable', pty.error)
+ }
+ return spawnBoundedProviderProbe(pty.module, input)
+ }
+}
+```
+
+Rules:
+
+- adapter may use the same optional `require('node-pty')` pattern as terminal service.
+- adapter must not import `BrowserWindow`.
+- adapter must not register app terminal sessions.
+- missing native addon returns `pty_unavailable` diagnostic, not a thrown launch crash.
+- test core with fake `PtyProcessPort`; test adapter with one small optional-load unit.
+
+Top 3 PTY ownership options:
+
+1. Dedicated `NodePtyProcessAdapter` behind `PtyProcessPort` - 🎯 9 🛡️ 9 🧠 5, ~80-150 LOC. Chosen because it isolates native/process lifecycle.
+2. Wrap `PtyTerminalService` from workspace trust feature - 🎯 6 🛡️ 6 🧠 4, ~40-80 LOC. Reuses code but leaks renderer terminal semantics into launch preflight.
+3. Spawn normal `child_process` and hope prompts print enough - 🎯 4 🛡️ 4 🧠 3, ~30-60 LOC. Not reliable for TUIs and misses the core issue.
+
+### Launch Arg Patches Need Dialect, Owner, And Dedupe
+
+The earlier type was too raw:
+
+```ts
+type WorkspaceTrustLaunchArgPatch = {
+ provider: WorkspaceTrustProvider
+ args: string[]
+ appliesTo: 'primary' | 'cross_provider' | 'provider_facts' | 'default_model_probe'
+ reason: string
+}
+```
+
+That is not enough to prevent mistakes. A patch must say which CLI dialect consumes it and where it is safe to apply.
+
+Safer contract:
+
+```ts
+export type WorkspaceTrustLaunchArgPatch = {
+ id: string
+ owner: 'workspace-trust'
+ targetProvider: WorkspaceTrustProvider
+ targetSurface:
+ | 'primary_provider_args'
+ | 'cross_provider_member_args'
+ | 'provider_facts_probe'
+ | 'default_model_probe'
+ dialect:
+ | 'codex-native-config-override'
+ | 'claude-codex-runtime-settings'
+ | 'codex-direct-cli-config'
+ args: string[]
+ dedupeKey: string
+ sourceWorkspaceIds: string[]
+ reason: string
+}
+```
+
+Application rules:
+
+- `claude-codex-runtime-settings` is the v1 desktop-to-sibling contract for deterministic Agent Teams launch.
+- `codex-native-config-override` may only be applied inside the sibling Codex native adapter before spawning the Codex binary.
+- `codex-direct-cli-config` is allowed only for future direct Codex CLI probes, not for Claude/orchestrator launch.
+- no Codex dialect may append `-c` to pure Anthropic Claude CLI args.
+- unknown target surface returns a skipped diagnostic, not a best-effort append.
+- patch applier is pure: input args in, derived args out, no mutation.
+- exact app-owned override sequence is not appended twice.
+- never remove arbitrary user-provided Codex config overrides.
+
+This is the narrowest way to support fallback/future phases without mixing provider syntax into launch orchestration.
+
+Top 3 arg patch models:
+
+1. Typed patch with dialect, owner, target surface, and dedupe key - 🎯 9 🛡️ 10 🧠 5, ~90-160 LOC plus tests. Chosen because it prevents wrong-provider args and duplicate fallback patches.
+2. Raw `args: string[]` append helper - 🎯 6 🛡️ 5 🧠 2, ~25-50 LOC. Simple but fragile during retries and cross-provider launch.
+3. Full parser/merger for all provider args - 🎯 6 🛡️ 7 🧠 9, ~250-450 LOC. Too broad for v1 and likely to create parser bugs.
+
+### Codex Arg Dialect Boundary
+
+There are at least two Codex-related launch surfaces in this repo family:
+
+- direct Codex binary style, where `-c key=value` is native to Codex.
+- Claude/orchestrator multimodel style, where Codex preferences are passed through Claude `--settings` JSON and later consumed by runtime code.
+- sibling Codex native executor style, where `configOverrides: string[]` is converted to direct Codex `-c` only at the Codex binary boundary.
+
+Do not assume every place that mentions Codex can consume the same argument shape.
+
+V1 rule:
+
+- desktop deterministic launch emits Codex trust as `--settings` JSON, not direct `-c`.
+- sibling runtime validates the settings payload and appends matching values to Codex native `configOverrides`.
+- do not append Codex native `-c` to primary Anthropic provider args or any Claude CLI argv.
+- characterize `buildInheritedCliFlags` in the sibling runtime to prove app-owned settings survive Anthropic lead -> Codex teammate.
+- characterize desktop `buildProviderCliCommandArgs(...)` so provider facts/default model probes keep existing order and merge app trust settings.
+- if a Codex surface cannot be proven, emit `codex_trust_settings_surface_unknown` diagnostic and leave current behavior intact.
+
+Required tests:
+
+- Anthropic-only team receives no Codex trust settings.
+- Codex lead receives app-owned Codex workspace-trust settings.
+- Anthropic lead with Codex teammate receives app-owned settings that `buildInheritedCliFlags` preserves for the Codex teammate.
+- default model probe for Codex member receives merged app-owned settings.
+- provider facts probe for Codex receives merged app-owned settings.
+- pure Claude primary provider args do not receive Codex native `-c`.
+- repeated application of the same patch does not duplicate override values.
+- existing sibling Codex native `configOverrides` remain in place.
+
+### Borrow From GasCity, But Do Not Copy Its Coupling
+
+What to borrow:
+
+- PTY automation is valid for real TUI startup screens.
+- update dialogs and trust dialogs can appear in sequence.
+- stale terminal text must not cause repeated key presses.
+- tests should be snapshot-driven and replayable.
+
+What to improve:
+
+- use provider state probe as the success condition, not only screen text.
+- kill provider PTY as soon as trust state persists.
+- use a protected Claude command so project MCP/hooks/tools are not loaded.
+- keep PTY automation in desktop host, not in the headless runtime executor.
+- keep fallback as typed diagnostics, not blind extra key presses.
+
+Rating: 🎯 9 🛡️ 9 🧠 5, mostly tests and architecture boundaries.
+
+### Diagnostic Redaction Should Reuse Existing Launch Artifact Policy
+
+Workspace trust should not invent a second redaction system.
+
+Rules:
+
+- structured diagnostics by default.
+- raw PTY tail only when debug flag is on and status is failed/blocked.
+- raw tail must pass through the same redaction helper used for launch artifacts or a shared redactor extracted from it.
+- diagnostic budget is applied before assigning to `run.workspaceTrustDiagnostics`.
+- artifact manifest stores effective feature flags, omitted counts, statuses, and evidence tokens, not full config files or env.
+
+Top 3 redaction options:
+
+1. Extract/reuse launch artifact redaction helper - 🎯 9 🛡️ 9 🧠 4, ~40-90 LOC. Chosen because diagnostics stay consistent.
+2. New workspace-trust-only redactor - 🎯 7 🛡️ 6 🧠 4, ~40-80 LOC. Easier locally, but policy drift is likely.
+3. Do not include any PTY tail ever - 🎯 7 🛡️ 10 🧠 1, ~5-10 LOC. Safest but weak for field debugging.
+
+## Thirteenth-Pass Sibling Runtime Contract And Launch Lifecycle Findings
+
+This pass rechecked the sibling runtime and the current `TeamProvisioningService` integration points. It changes the Codex part of the plan: direct Codex `-c` is valid, but not on Claude/orchestrator argv.
+
+### Claude `-c` Collision Is A Hard Boundary
+
+Sibling runtime facts:
+
+- `src/main.tsx` defines Claude `-c` as `--continue`.
+- sibling Codex native `execRunner` accepts `configOverrides: string[]` and converts each value into direct Codex `-c`.
+- `buildInheritedCliFlags()` propagates `--settings` inline JSON, but it does not propagate arbitrary Codex `-c` pairs.
+- cross-provider inherited flags strip model/effort only; settings survive provider boundary.
+
+Therefore:
+
+- desktop must not append `-c projects...` to Claude launch args.
+- desktop should send app-owned Codex workspace trust as inline settings JSON.
+- sibling runtime should validate that settings payload and append values to Codex native `configOverrides`.
+- direct Codex `-c` remains valid only for direct Codex PTY/probe/future direct binary surfaces.
+
+Suggested settings shape:
+
+```json
+{
+ "codex": {
+ "agent_teams_workspace_trust": {
+ "config_overrides": [
+ "projects.\"/path\".trust_level=\"trusted\"",
+ "projects.\"/private/path\".trust_level=\"trusted\""
+ ]
+ }
+ }
+}
+```
+
+Sibling validation rules:
+
+- accept only an object at `codex.agent_teams_workspace_trust`.
+- accept only a bounded string array at `config_overrides`.
+- accept only override values matching `projects."".trust_level="trusted"`.
+- reject values containing NUL, newline, unrelated keys, or non-string entries.
+- bound count and total bytes.
+- append after existing `buildCodexNativeMcpConfigOverrides()` values.
+- log structured diagnostic if settings are malformed, then skip trust overrides.
+
+Top 3 Codex contract options:
+
+1. Inline settings contract -> sibling `configOverrides` - 🎯 9 🛡️ 9 🧠 6, ~120-220 LOC across both repos. Chosen because it survives teammate inheritance and avoids Claude `-c`.
+2. New env var with JSON override values - 🎯 7 🛡️ 7 🧠 5, ~90-160 LOC. Simpler to parse, but tmux/env forwarding and security naming need more care.
+3. Desktop direct `-c` append - 🎯 4 🛡️ 3 🧠 3, ~40-80 LOC. Invalid for Claude launch because `-c` means continue.
+
+### Launch Lifecycle Has Four Trust-Safe Windows
+
+Current create path:
+
+1. pending key
+2. cwd + Claude binary
+3. provisioning env
+4. materialization/default model probes
+5. worktree resolution
+6. runtime lane and cross-provider args
+7. launch identity validation
+8. create `ProvisioningRun`
+9. clear persisted launch state
+10. write team meta, members, bootstrap spec, prompt, MCP config
+11. validate MCP runtime
+12. spawn
+
+Current launch path:
+
+1. pending key
+2. read config
+3. compatibility repair
+4. normalize config and backup/restore boundary
+5. update project path
+6. cwd + Claude binary
+7. provisioning env
+8. materialization/default model probes
+9. worktree resolution
+10. runtime lane and cross-provider args
+11. launch identity validation
+12. create `ProvisioningRun`
+13. clear persisted launch state
+14. publish mixed secondary lane status
+15. write bootstrap/MCP files
+16. write meta/members
+17. spawn
+
+Trust-safe windows:
+
+- `planEarly`: after provisioning env, before materialization. Pure settings patches only.
+- `planFull`: after `allEffectiveMemberSpecs` and cross-provider args, before launch identity validation. Pure settings patches only.
+- `executePreflight`: after `ProvisioningRun`, before `clearPersistedLaunchState`. Claude PTY only, cancellable.
+- `spawn`: after clear/write/validate. No new trust side effects.
+
+Do not move Claude PTY before run creation: progress/cancel/artifacts would be weak. Do not move it after `clearPersistedLaunchState`: a blocked trust setup could wipe the previous launch snapshot.
+
+Top 3 placement options:
+
+1. Pure plans before validation, PTY execute after run and before clear - 🎯 9 🛡️ 10 🧠 7, ~160-260 LOC integration. Chosen because it respects both provider probes and launch snapshot safety.
+2. PTY before `ProvisioningRun` - 🎯 6 🛡️ 5 🧠 4, ~80-150 LOC. Less lifecycle state, but poor progress/cancel/artifacts.
+3. PTY after `clearPersistedLaunchState` - 🎯 7 🛡️ 5 🧠 3, ~60-120 LOC. Simpler, but can erase previous launch state before trust is ready.
+
+### Create And Launch Need Different Failure Policies
+
+Create failures before metadata writes should not delete unrelated existing files. Launch failures after config normalization must restore prelaunch config.
+
+Typed cleanup policy:
+
+```ts
+type WorkspaceTrustPreSpawnFailurePolicy =
+ | {
+ mode: 'create'
+ teamName: string
+ createdTeamDirectories: false
+ cleanupAnthropicHelper: boolean
+ }
+ | {
+ mode: 'launch'
+ teamName: string
+ restorePrelaunchConfig: true
+ launchStateClearedForRun: boolean
+ }
+```
+
+Rules:
+
+- `create` preflight block before meta writes removes run tracking and helper material only.
+- `create` preflight block must not `rm -rf` team/task directories unless this run created them.
+- `launch` preflight block restores prelaunch config because normalization/projectPath update already happened.
+- `launch` preflight block before clear must not persist a new failed launch snapshot.
+- both modes assign workspace trust diagnostics before `cleanupRun()`.
+
+Rating: 🎯 9 🛡️ 10 🧠 6, ~70-130 LOC plus tests.
+
+### Sibling Runtime Changes Are Now Explicit V1 Scope
+
+Old assumption: desktop can solve Codex with launch args only.
+
+Updated assumption: desktop solves Claude/orchestrator trust alone, but Codex-native trust needs a small sibling runtime contract if we want zero user friction for Codex-native turns.
+
+V1 sibling scope:
+
+- add `getCodexWorkspaceTrustConfigOverrides(settingsInline?)`.
+- validate and bound settings payload.
+- append validated values to `runExec({ configOverrides })` in `turnExecutor`.
+- add tests in sibling runtime for valid, malformed, over-limit, and inherited settings.
+- keep runtime `isPathTrusted()` gate unchanged.
+
+Out of v1 sibling scope:
+
+- modifying Claude trust persistence.
+- accepting trust in headless runtime.
+- changing `isPathTrusted()`.
+- global Codex config writes.
+
+Rating: 🎯 9 🛡️ 9 🧠 6, ~50-120 sibling LOC plus tests.
+
+## Non-Goals
+
+Do not do these in v1:
+
+- Do not add tmux as a requirement.
+- Do not auto-edit Claude trust files as the primary path.
+- Do not globally trust parent directories, home directories, or recent projects.
+- Do not scan project files to decide trust.
+- Do not mutate provider auth state.
+- Do not accept unknown prompts.
+- Do not change teammate permission mode.
+- Do not change process cleanup or lifecycle.
+- Do not treat Codex auth picker as a trust prompt.
+- Do not hide the exact provider error if preflight fails.
+
+## Architecture
+
+The desktop app owns UX policy. The runtime owns process execution.
+
+```mermaid
+flowchart TD
+ UI["Frontend project selection"] --> TPS["TeamProvisioningService"]
+ TPS --> WTC["WorkspaceTrustCoordinator"]
+ WTC --> PLAN["WorkspaceTrustPlan"]
+ PLAN --> CWS["ClaudePtyWorkspaceTrustStrategy"]
+ PLAN --> CXS["CodexLaunchArgsTrustStrategy"]
+ CWS --> PDE["PtyDialogEngine"]
+ PDE --> NPA["NodePtyProcessAdapter"]
+ CXS --> ARGS["Codex provider args patch"]
+ WTC --> DIAG["TrustPreflightDiagnostics"]
+ TPS --> RUNTIME["agent_teams_orchestrator launch"]
+ ARGS --> RUNTIME
+ RUNTIME --> BOOT["Bootstrap events"]
+ BOOT --> RETRY["Optional one trust retry if workspace_trust_required"]
+ RETRY --> WTC
+```
+
+### Detailed Runtime Placement
+
+```mermaid
+flowchart TD
+ A["createTeam or launchTeam legacy deterministic path"] --> B["buildProvisioningEnv"]
+ B --> C["WorkspaceTrustCoordinator.planArgsOnly"]
+ C --> D["apply early Codex settings patches"]
+ D --> E["materializeEffectiveTeamMemberSpecs"]
+ E --> F["resolveOpenCodeMemberWorkspacesForRuntime"]
+ F --> LANE["plan runtime lanes, retain allEffectiveMemberSpecs"]
+ LANE --> G["buildCrossProviderMemberArgs for primary-lane members"]
+ G --> H["WorkspaceTrustCoordinator.planFull with allEffectiveMemberSpecs"]
+ H --> I["apply final provider settings patches"]
+ I --> J["resolveAndValidateLaunchIdentity"]
+ J --> K["create ProvisioningRun"]
+ K --> L["prepareWorkspaceTrustForDeterministicRun"]
+ L --> M["clearPersistedLaunchState"]
+ M --> N["build bootstrap spec and MCP config"]
+ N --> O["build final launch args"]
+ O --> P["spawnCli"]
+```
+
+The ordering is intentional:
+
+- early Codex settings patches must exist before default model resolution inside `materializeEffectiveTeamMemberSpecs`.
+- final Codex settings patches must exist before `resolveAndValidateLaunchIdentity`.
+- Claude PTY warmup should happen after `ProvisioningRun` exists so progress and diagnostics are retained.
+- Claude PTY warmup should happen before `clearPersistedLaunchState` so a blocked preflight does not erase the previous launch snapshot.
+- Bootstrap files and MCP config should be written after trust warmup, so preflight failures do not leave unnecessary temporary launch files.
+- create and launch must use the same helper so first-run project creation and later relaunches do not drift.
+
+### Critical Sequence
+
+```mermaid
+sequenceDiagram
+ participant TPS as TeamProvisioningService
+ participant WTC as WorkspaceTrustCoordinator
+ participant MAT as Member Materialization
+ participant VAL as Launch Identity Validation
+ participant RUN as ProvisioningRun
+ participant PTY as Claude PTY Strategy
+ participant RUNTIME as agent_teams_orchestrator
+
+ TPS->>WTC: planArgsOnly(request cwd, providers, provider settings)
+ WTC-->>TPS: early Codex settings patches, diagnostics
+ TPS->>MAT: materialize members with providerArgsResolver
+ MAT-->>TPS: effective members and default models
+ TPS->>WTC: planFull(all deterministic workspaces, cross-provider settings)
+ WTC-->>TPS: final settings patches and Claude execution plan
+ TPS->>VAL: validate with patched providerArgsByProvider
+ VAL-->>TPS: launch identity
+ TPS->>RUN: create run and emit progress
+ TPS->>WTC: execute(plan, isCancelled, onProgress)
+ WTC->>PTY: protected interactive command
+ PTY-->>WTC: accepted, already trusted, blocked, soft_failed, or cancelled
+ WTC-->>TPS: bounded diagnostics
+ alt blocked
+ TPS->>RUN: failDeterministicRunBeforeSpawn(policy)
+ else cancelled or stale
+ TPS->>RUN: existing cancellation cleanup, no spawn
+ else ok or soft_failed
+ TPS->>RUNTIME: spawn with patched settings/args
+ RUNTIME-->>TPS: bootstrap events or workspace_trust_required fallback
+ end
+```
+
+### Dependency Direction
+
+```mermaid
+flowchart LR
+ TPS["TeamProvisioningService"] --> WTC["WorkspaceTrustCoordinator port"]
+ WTC --> WTP["WorkspaceTrustPlanner"]
+ WTC --> CTS["ClaudeTrustStrategy port"]
+ WTC --> CXS["CodexTrustStrategy port"]
+ CTS --> PTE["PtyDialogEngine"]
+ CTS --> CSP["ClaudeStateProbe port"]
+ PTE --> PTP["PtyProcessPort"]
+ PTP --> NPA["NodePtyProcessAdapter"]
+ CSP --> FS["File system adapter"]
+ CXS --> CCB["CodexConfigOverrideBuilder"]
+```
+
+Allowed dependencies:
+
+- `TeamProvisioningService` depends only on coordinator types.
+- coordinator depends on strategy ports and pure domain helpers.
+- strategies depend on provider-specific ports.
+- adapters depend on filesystem, `node-pty`, and provider config syntax.
+
+Forbidden dependencies:
+
+- `TeamProvisioningService` -> `node-pty`
+- `TeamProvisioningService` -> prompt regexes
+- `PtyDialogEngine` -> team launch state
+- Codex strategy -> Claude state file
+- Claude strategy -> Codex config builder
+
+### Three-Stage Coordinator
+
+```ts
+export interface WorkspaceTrustCoordinator {
+ planArgsOnly(request: WorkspaceTrustArgsOnlyPlanRequest): Promise
+ planFull(request: WorkspaceTrustFullPlanRequest): Promise
+ execute(plan: WorkspaceTrustExecutionPlan): Promise
+}
+```
+
+`planArgsOnly()`:
+
+- pure or near-pure
+- runs after `buildProvisioningEnv`
+- sees `request.cwd`, requested providers, provider args, shell env, and `claudePath`
+- computes Codex settings patches needed by default model and provider fact probes
+- does not inspect member worktrees because they are not resolved yet
+- does not spawn PTY
+- does not write config or temp files
+- should not throw for provider trust setup problems
+
+`planFull()`:
+
+- pure or near-pure
+- computes unique workspaces
+- computes provider list
+- computes final Codex settings patches for primary and cross-provider launch args
+- computes Claude execution work
+- does not spawn PTY
+- does not write trust files
+- dedupes early Codex patches so the final launch receives one app-owned override set
+- should not throw for provider trust setup problems
+
+`execute()`:
+
+- runs side-effectful strategies
+- currently Claude PTY warmup
+- updates diagnostics
+- uses bounded timeouts
+- observes cancellation
+- returns `ok`, `soft_failed`, `blocked`, or `cancelled`
+
+This split keeps launch identity validation correct without forcing a long PTY warmup before the UI has a visible run.
+
+### Strategy Ratings After Code Review
+
+| Area | Rating | Reason |
+| --- | --- | --- |
+| Claude PTY warmup | 🎯 9 / 🛡️ 8 / 🧠 6 | Prototype confirmed it writes `hasTrustDialogAccepted`; risk is prompt matching and cleanup. |
+| Codex per-launch args | 🎯 8 / 🛡️ 8 / 🧠 5 | Best scoped intent, but recent Codex issue means we must verify/diagnose possible provider-side config mutation. |
+| Three-stage coordinator | 🎯 8 / 🛡️ 9 / 🧠 8 | Best fit for current create/launch order; risk is integration complexity. |
+| Direct `.claude.json` write fallback | 🎯 5 / 🛡️ 5 / 🧠 5 | Useful emergency lever but private format and file race risk. Keep out of v1. |
+| Codex PTY fallback | 🎯 7 / 🛡️ 7 / 🧠 6 | GasCity-proven for TUI, but not needed for issue #100 path if settings -> native override contract works. Keep engine-ready, do not default. |
+
+### Responsibility Boundaries
+
+`TeamProvisioningService`:
+
+- Knows when a team launch is about to start.
+- Knows requested members, providers, cwd, worktrees, and launch args.
+- Does not know prompt text or PTY details.
+
+`WorkspaceTrustCoordinator`:
+
+- Builds the trust plan for all launch workspaces.
+- Executes provider strategies.
+- Produces diagnostics and launch argument patches.
+- Owns idempotence and retry policy.
+
+`WorkspaceTrustStrategy`:
+
+- Provider-specific behavior behind one interface.
+- Claude strategy handles PTY warmup.
+- Codex strategy handles per-launch config override.
+- Future Gemini/OpenCode strategies can be added without modifying the coordinator.
+
+`PtyDialogEngine`:
+
+- Generic terminal snapshot state machine.
+- Owns prompt detection and key actions.
+- Does not know team launch, providers, or config files.
+
+`NodePtyProcessAdapter`:
+
+- Thin adapter around `node-pty`.
+- Handles process spawn, snapshot collection, key writes, timeout, and cleanup.
+
+`Runtime contract`:
+
+- Runtime receives prepared workspace and provider args.
+- Runtime still enforces trust gates.
+- Runtime reports typed trust failure if trust is still missing.
+
+## Clean Architecture Shape
+
+Suggested desktop app layout:
+
+```text
+src/features/workspace-trust/
+ index.ts
+ contracts/
+ index.ts
+ core/
+ domain/
+ WorkspaceTrustTypes.ts
+ WorkspaceTrustPolicy.ts
+ WorkspaceTrustPath.ts
+ CodexConfigOverride.ts
+ WorkspaceTrustDiagnosticsBudget.ts
+ application/
+ WorkspaceTrustCoordinator.ts
+ WorkspaceTrustPlanner.ts
+ WorkspaceTrustArgPatchApplier.ts
+ WorkspaceTrustDiagnostics.ts
+ WorkspaceTrustLocks.ts
+ ClaudePreflightCommand.ts
+ ports.ts
+ main/
+ index.ts
+ composition/
+ createWorkspaceTrustCoordinator.ts
+ adapters/
+ output/
+ ClaudeStateProbe.ts
+ NodePtyProcessAdapter.ts
+ FileWorkspaceTrustLockStore.ts
+ TempEmptyMcpConfigStore.ts
+ infrastructure/
+ PtyDialogEngine.ts
+ StartupDialogRules.ts
+ WorkspaceTrustFeatureFlags.ts
+```
+
+Small pure helpers that are only needed by `TeamProvisioningService` can live near the service, but the feature logic should use the canonical `src/features/` shape from `docs/FEATURE_ARCHITECTURE_STANDARD.md`.
+
+Top 3 placement options:
+
+1. `src/features/workspace-trust` with core/application/main layers - 🎯 9 🛡️ 9 🧠 8, ~700-1050 LOC. Best long-term architecture and easiest provider growth.
+2. `src/main/services/team/workspaceTrust` local module - 🎯 8 🛡️ 8 🧠 6, ~550-850 LOC. Less churn, but weaker boundary and easier to couple back into team service.
+3. Inline helpers inside `TeamProvisioningService` - 🎯 4 🛡️ 4 🧠 3, ~180-300 LOC. Fast, but violates SRP and makes prompt/PTY bugs likely.
+
+Chosen: option 1.
+
+SOLID mapping:
+
+- Single Responsibility - prompt rules, PTY IO, provider strategy, and launch orchestration are separate.
+- Open/Closed - new provider support adds a new strategy and rules, not changes in `TeamProvisioningService`.
+- Liskov - every strategy returns the same result contract and can be skipped safely.
+- Interface Segregation - Codex settings strategy does not depend on PTY; Claude PTY strategy does not depend on Codex config override syntax.
+- Dependency Inversion - coordinator depends on ports, not `node-pty` directly.
+
+## Core Types
+
+```ts
+export type WorkspaceTrustProvider = 'claude' | 'codex' | 'gemini' | 'opencode'
+
+export type WorkspaceTrustWorkspace = {
+ id: string
+ displayCwd: string
+ cwd: string
+ realCwd: string
+ configKeyCwd: string
+ gitRootConfigKey?: string
+ comparisonKey: string
+ source: 'team-root' | 'member-worktree' | 'member-cwd' | 'git-root'
+ memberId?: string
+ persistable: boolean
+ nonPersistableReason?: 'home_directory' | 'filesystem_root' | 'unavailable'
+}
+
+export type WorkspaceTrustRequest = {
+ teamName: string
+ launchId: string
+ mode: 'create' | 'launch'
+ providers: WorkspaceTrustProvider[]
+ workspaces: WorkspaceTrustWorkspace[]
+ shellEnv: NodeJS.ProcessEnv
+ trustPreflightEnv: NodeJS.ProcessEnv
+ claudePath?: string
+ codexPath?: string
+ policy: WorkspaceTrustPolicy
+ featureFlags: WorkspaceTrustFeatureFlags
+ isCancelled: () => boolean
+ onProgress?: (event: WorkspaceTrustProgressEvent) => void
+}
+
+export type WorkspaceTrustResult = {
+ ok: boolean
+ stage: 'args_only_plan' | 'full_plan' | 'execute'
+ provider: WorkspaceTrustProvider
+ workspaceIds: string[]
+ actions: WorkspaceTrustAction[]
+ launchArgPatches?: WorkspaceTrustLaunchArgPatch[]
+ diagnostics: WorkspaceTrustDiagnostic[]
+ error?: string
+}
+
+export type WorkspaceTrustExecutionStatus =
+ | 'ok'
+ | 'soft_failed'
+ | 'blocked'
+ | 'cancelled'
+
+export type WorkspaceTrustLaunchArgPatch = {
+ id: string
+ owner: 'workspace-trust'
+ targetProvider: WorkspaceTrustProvider
+ targetSurface:
+ | 'primary_provider_args'
+ | 'cross_provider_member_args'
+ | 'provider_facts_probe'
+ | 'default_model_probe'
+ dialect:
+ | 'codex-native-config-override'
+ | 'claude-codex-runtime-settings'
+ | 'codex-direct-cli-config'
+ args: string[]
+ dedupeKey: string
+ sourceWorkspaceIds: string[]
+ reason: string
+}
+
+export type WorkspaceTrustDiagnosticsManifest = {
+ attempt: number
+ featureFlags: WorkspaceTrustFeatureFlags
+ strategyResults: WorkspaceTrustDiagnosticStrategyResult[]
+ omittedCounts?: Record
+}
+```
+
+The important design choice: strategies can either prepare state with side effects, or return typed launch argument patches. That keeps Codex native config overrides out of Claude argv, makes duplicate fallback patches testable, and keeps Claude PTY warmup isolated.
+
+## Ports And Adapters Contract
+
+Keep the core coordinator free from native dependencies and provider file formats.
+
+Core ports:
+
+```ts
+export interface WorkspaceTrustStrategy {
+ provider: WorkspaceTrustProvider
+ planArgsOnly?(request: WorkspaceTrustRequest): Promise
+ planFull(request: WorkspaceTrustRequest): Promise
+ execute?(request: WorkspaceTrustRequest): Promise
+}
+
+export interface PtyProcessPort {
+ spawn(input: PtySpawnInput): Promise
+}
+
+export interface PtySessionPort {
+ readSnapshot(timeoutMs: number): Promise
+ writeAction(action: PtyKeyAction): Promise
+ kill(): Promise
+}
+
+export interface ProviderStateProbe {
+ readTrustState(workspace: WorkspaceTrustWorkspace): Promise
+}
+
+export interface WorkspaceTrustLockPort {
+ withWorkspaceLock(
+ key: string,
+ options: { timeoutMs: number; isCancelled: () => boolean },
+ fn: () => Promise,
+ ): Promise
+}
+```
+
+Adapter mapping:
+
+- `NodePtyProcessAdapter` implements `PtyProcessPort`.
+- `ClaudeStateProbe` implements `ProviderStateProbe`.
+- `WorkspaceTrustLockRegistry` implements `WorkspaceTrustLockPort`.
+- `CodexConfigOverrideBuilder` is a pure adapter for Codex native config override values. Only the sibling Codex native adapter turns those values into `-c` flags.
+- `TeamProvisioningService` only consumes `WorkspaceTrustCoordinator`.
+
+Testing rule: every provider strategy must be testable without a real provider binary. Real Claude/Codex checks belong in manual smoke scripts, not unit tests.
+
+### Dialog Detection Contract
+
+Use the Overstory shape, not a pile of regexes inside the launch service:
+
+```ts
+export type StartupReadinessState =
+ | { phase: 'dialog'; ruleId: string; actions: PtyKeyAction[]; retryPolicy: 'once' | 'typed_retry' }
+ | { phase: 'ready'; evidence: string[] }
+ | { phase: 'setup_required'; code: string; evidence: string[] }
+ | { phase: 'loading'; evidence?: string[] }
+```
+
+Provider strategy owns `detect(snapshot)`. The engine owns polling, action memory, retry delays, timeout, and cleanup.
+
+Action memory:
+
+- `Enter` trust actions are sent once per dialog rule unless a fresh, clearly new trust screen appears
+- typed actions like bypass confirmation may retry after a short delay if the same dialog persists
+- unknown screens never receive actions
+
+This is more robust than GasTown's simple sequential polling and avoids blind key sequences.
+
+## Fallback Ladder
+
+The product goal is "launch from frontend with no manual trust prompt". The engineering goal is "do that without silently trusting the wrong thing".
+
+### Claude Ladder
+
+1. Probe state.
+ - If exact `cwd`, `realCwd`, git root, or a parent key is already trusted, skip PTY.
+2. Non-persistable guard.
+ - If the workspace is home/root, return a clear blocked diagnostic instead of broad trust.
+3. PTY warmup.
+ - Use protected interactive Claude args, not normal startup.
+ - Accept only allowlisted workspace trust prompt.
+ - Handle known post-trust prompts like bypass permissions and custom API key.
+4. Verify state.
+ - Check `hasTrustDialogAccepted` through exact, realpath, git-root, and parent candidate keys after PTY action.
+5. Soft failure.
+ - If PTY unavailable, timeout, or unknown screen appears, continue launch with diagnostics unless the screen is known setup-required.
+6. Runtime fallback.
+ - If runtime still fails, keep `workspace_trust_required` classification and exact provider error.
+7. Optional later retry.
+ - Run only once and only after normal cleanup.
+
+Do not use direct `.claude.json` writes in v1. It is tempting because it is simple, but it bypasses the provider's own persistence path and risks config races.
+
+### Codex Ladder
+
+1. Build scoped native config override values.
+ - Add repeatable dotted `projects."".trust_level="trusted"` values for exact workspace paths.
+2. Carry those values through app-owned Codex settings.
+ - Primary Codex launch settings.
+ - Secondary Codex teammate settings under another lead provider.
+ - Provider facts validation settings.
+ - Default model resolution probe settings.
+3. Apply direct `-c` only inside sibling Codex native exec.
+ - Validate settings payload.
+ - Append trusted project override values to `configOverrides`.
+ - Let `execRunner` turn values into Codex binary `-c` flags.
+4. Keep Codex PTY rules tested but inactive by default.
+ - Useful for future direct Codex TUI flows.
+5. Auth/setup fallback.
+ - If Codex shows auth picker, return `provider_auth_required`.
+ - Do not press Enter.
+
+Do not write `~/.codex/config.toml` in v1. Per-launch override is safer because it is scoped to the app launch and easy to roll back.
+
+### Fallback Ratings
+
+| Fallback | Use in v1 | Rating | Notes |
+| --- | --- | --- | --- |
+| Claude state probe | yes | 🎯 9 / 🛡️ 10 / 🧠 3 | read-only and cheap |
+| Non-persistable home/root guard | yes | 🎯 9 / 🛡️ 10 / 🧠 3 | avoids broad unsafe trust |
+| Claude PTY warmup | yes | 🎯 9 / 🛡️ 8 / 🧠 6 | provider-owned persistence, bounded risk |
+| Runtime typed/text failure | yes | 🎯 9 / 🛡️ 10 / 🧠 2 | already protected by current diagnostics |
+| Optional relaunch retry | later | 🎯 7 / 🛡️ 7 / 🧠 8 | useful but lifecycle-sensitive |
+| Direct Claude config write | no | 🎯 5 / 🛡️ 5 / 🧠 5 | future emergency flag only |
+| Codex settings -> native `configOverrides` | yes, with sentinel | 🎯 9 / 🛡️ 9 / 🧠 6 | safest deterministic-launch path because Claude argv never receives Codex `-c` |
+| Direct Codex per-launch dotted `-c` | future direct Codex only | 🎯 7 / 🛡️ 7 / 🧠 4 | valid at direct Codex binary boundary, unsafe on Claude argv |
+| Codex PTY fallback | later | 🎯 7 / 🛡️ 7 / 🧠 6 | useful if direct TUI path appears |
+| Codex global config write | no | 🎯 4 / 🛡️ 5 / 🧠 4 | unnecessary with `-c` |
+
+## Runtime Flow
+
+```mermaid
+sequenceDiagram
+ participant UI as User/UI
+ participant TPS as TeamProvisioningService
+ participant WTC as WorkspaceTrustCoordinator
+ participant CP as Claude PTY Strategy
+ participant CX as Codex Args Strategy
+ participant RT as Orchestrator Runtime
+
+ UI->>TPS: Launch team for selected project
+ TPS->>WTC: prepareWorkspaceTrust(request)
+ WTC->>CP: ensure Claude workspace trust for exact workspaces
+ CP->>CP: run protected node-pty warmup and accept allowlisted dialogs
+ WTC->>CX: build Codex workspace-trust settings
+ CX-->>WTC: typed settings patch
+ WTC-->>TPS: diagnostics + args patches
+ TPS->>RT: launch with patched args
+ RT-->>TPS: bootstrap events
+ alt optional Phase 4b workspace_trust_required retry
+ TPS->>WTC: retry preflight once
+ TPS->>RT: relaunch once
+ else success
+ RT-->>TPS: members joined
+ end
+```
+
+## Dialog State Machine
+
+The PTY engine should model startup as phases. It should not classify against a single ever-growing transcript.
+
+```mermaid
+stateDiagram-v2
+ [*] --> Start
+ Start --> ClaudeResume: resume selector
+ Start --> CodexUpdate: update available
+ Start --> WorkspaceTrust: trust prompt
+ Start --> Ready: ready prompt
+ Start --> AuthRequired: auth picker
+ Start --> ProviderSetupRequired: onboarding/theme
+ ClaudeResume --> CodexUpdate: Down + Enter
+ ClaudeResume --> WorkspaceTrust: Down + Enter
+ CodexUpdate --> WorkspaceTrust: Down + Enter
+ CodexUpdate --> Ready: Down + Enter
+ WorkspaceTrust --> BypassPermissions: Enter
+ WorkspaceTrust --> CustomApiKey: Enter
+ WorkspaceTrust --> Ready: Enter
+ BypassPermissions --> Ready: Down + Enter
+ CustomApiKey --> Ready: Up + Enter
+ Ready --> [*]
+ AuthRequired --> [*]
+ ProviderSetupRequired --> [*]
+```
+
+Important behavior:
+
+- After an action, the engine should wait for a fresh snapshot or action-settle delay.
+- Previously matched stale text must not trigger the same rule again forever.
+- A prompt-looking screen should get a short grace window before declaring ready.
+- Unknown text should never receive `Enter`.
+
+## Claude Implementation Plan
+
+### What We Need To Solve
+
+Claude/orchestrator trust is the workspace trust gate that blocks headless process teammates.
+
+Current runtime behavior in the sibling orchestrator:
+
+- `isPathTrusted(workingDir)` checks Claude global project trust.
+- If false, teammate spawn fails before the provider-specific process starts.
+- This affects Codex teammates too.
+
+Therefore, Claude workspace trust preflight must run for every workspace used by a team launch, even if no teammate uses Anthropic as its model provider.
+
+### Claude Strategy
+
+`ClaudePtyWorkspaceTrustStrategy` should:
+
+1. Resolve Claude binary with the same resolver used for launch.
+2. For each exact workspace:
+ - use `cwd`
+ - compute `realCwd`
+ - run a short PTY warmup in that cwd with the protected interactive Claude command
+ - detect allowlisted startup dialogs
+ - press only the required keys
+ - stop immediately once trust state is persisted or the workspace is already trusted
+3. Record diagnostics without secrets.
+
+The strategy should not send any user prompt to Claude. It starts interactive Claude only long enough to clear startup dialogs.
+
+The strategy should not pass runtime launch args. In particular, do not pass team bootstrap args, MCP config, user extra CLI args, model args, or `--dangerously-skip-permissions`.
+
+Protected command builder:
+
+```ts
+buildClaudeWorkspaceTrustPreflightArgs({
+ emptyMcpConfigPath,
+ supportsBare,
+ supportsStrictMcpConfig,
+ supportsSettingSources,
+ supportsTools,
+})
+```
+
+Expected modern args:
+
+```text
+--bare
+--strict-mcp-config
+--mcp-config
+--setting-sources user
+--settings {"disableAllHooks":true}
+--tools ""
+```
+
+The temp empty MCP file is app-owned, contains `{"mcpServers":{}}`, and is removed in `finally`.
+
+### Claude Success Criteria
+
+Success if any is true:
+
+- Claude reaches a trusted normal prompt without showing trust.
+- Claude trust dialog was accepted and state now contains `hasTrustDialogAccepted: true` for `realCwd` or an ancestor.
+- The workspace was already trusted before preflight.
+- Another concurrent preflight accepted trust while this launch was waiting for the workspace lock.
+
+Do not require:
+
+- `hasCompletedProjectOnboarding: true`
+- a model call
+- a session to remain open
+- a successful plugin marketplace install
+
+### Claude Dialog Rules
+
+Required rules:
+
+```text
+Claude theme onboarding:
+ detect: Choose the text style that looks best with your terminal
+ action: none in v1
+ result: provider_setup_required
+
+Claude workspace trust:
+ detect: Quick safety check
+ detect: trust this folder
+ detect: Yes, I trust this folder
+ action: Enter
+ success check: state file has hasTrustDialogAccepted true
+
+Claude bypass permissions:
+ detect: Bypass Permissions mode
+ action: Down, Enter
+
+Claude custom API key:
+ detect: Detected a custom API key in your environment
+ detect: Do you want to use this API key?
+ action: Up, Enter
+
+Claude ready:
+ detect: prompt marker plus status/readiness marker
+ action: none
+```
+
+Do not treat `ClaudeCode v2.x` alone as ready. Prototype showed that string appears on splash/onboarding before the app is actually ready.
+
+Do not treat a single prompt marker as ready. GasTown's prompt-suffix exit is useful for detached tmux sessions, but Overstory's two-signal readiness is safer for a provider TUI. For our v1 Claude preflight, readiness is secondary anyway: trust persistence is the success condition.
+
+### Claude State Probe
+
+`ClaudeStateProbe` should read only the minimal state needed:
+
+- `$HOME/.claude.json`
+- `$CLAUDE_CONFIG_DIR/.claude.json` if present
+- only `projects` keys and trust booleans
+
+Do not log:
+
+- OAuth account data
+- API keys
+- MCP configs
+- full config JSON
+
+Path matching:
+
+- check exact `cwd`
+- check exact `realCwd`
+- check parent directories like orchestrator `isPathTrusted`
+- normalize macOS `/var` to `/private/var` by checking both original and realpath
+- preserve Windows drive paths if running on Windows
+- skip PTY if a trusted parent already covers the workspace
+
+### Claude Edge Cases
+
+| Case | Expected behavior |
+| --- | --- |
+| Claude binary missing | skip PTY, return `provider_binary_missing`, launch should fail with existing diagnostics |
+| `node-pty` unavailable | skip PTY, return `preflight_unavailable`, no crash |
+| Protected Claude flags unsupported | soft-fail by default; do not silently downgrade to plain `claude` |
+| Empty MCP config file invalid | treat as implementation error in tests; runtime returns soft-failed diagnostic |
+| Empty Claude profile shows onboarding | return `provider_setup_required`, do not auto-select theme in v1 |
+| Trust prompt appears | press Enter only if allowlisted prompt matches |
+| Bypass permissions appears after trust | probe trust state first; if persisted, kill PTY and return success |
+| Custom API key prompt appears after trust | probe trust state first; if persisted, kill PTY and return success |
+| Claude hangs on irrelevant output | timeout and return diagnostic |
+| Trust accepted but UI not ready | success if trust state is persisted |
+| Trust accepted then another dialog appears | success if trust state is persisted |
+| Workspace missing | do not run PTY, keep existing cwd-unavailable error |
+| Multiple team worktrees | run per unique realpath |
+| Concurrent launch same workspace | serialize by provider + realpath lock |
+
+### Claude Integration Detail
+
+Claude PTY preflight should run for all deterministic team launch workspaces, not only Anthropic launches.
+
+Reason: the sibling orchestrator checks Claude workspace trust before spawning headless process teammates. A Codex teammate can therefore fail before Codex starts.
+
+Workspace collection:
+
+- always include `request.cwd`
+- include `member.cwd` from `allEffectiveMemberSpecs` when present
+- include generated OpenCode worktree paths only when they flow through the legacy deterministic launch path
+- include `request.worktree` only if it resolves to an actual cwd path, not the worktree name string
+- dedupe by `realpath`
+- keep both display path and realpath in diagnostics so Windows/macOS path normalization bugs are visible
+- treat trusted parent directories as covering child workspaces, matching runtime `isPathTrusted`
+
+Placement:
+
+- collect workspaces after `resolveOpenCodeMemberWorkspacesForRuntime`
+- run Claude PTY after `ProvisioningRun` is created and before `clearPersistedLaunchState`
+- keep this preflight outside `buildProvisioningEnv`, because env resolution should not spawn UI processes
+- run under `failDeterministicRunBeforeSpawn(...)` cleanup if it blocks
+
+Progress:
+
+- start: `Preparing workspace trust`
+- action: `Accepted Claude workspace trust`
+- soft failure: warning diagnostic, continue launch
+- blocking setup: `Claude setup required` only if the runtime would otherwise be guaranteed to fail
+
+The safer v1 default is to continue launch on soft preflight failures and let the existing runtime failure path classify `workspace_trust_required`. This avoids converting recoverable launch paths into new pre-spawn hard failures.
+
+## Codex Implementation Plan
+
+### What We Need To Solve
+
+Codex has two separate trust-related surfaces:
+
+1. The orchestrator's Claude workspace trust gate before spawning headless teammates.
+2. Codex CLI's own workspace trust prompt in TUI/direct Codex flows.
+
+For issue #100 class failures, the first surface is the blocker. That is solved by Claude workspace preflight.
+
+For direct Codex launch and future Codex-native flows, use native config override values as the app-owned intent instead of directly writing global config. Recent Codex issue data means we must not promise that Codex itself will never persist a project trust entry.
+
+### Codex Strategy
+
+`CodexWorkspaceTrustSettingsStrategy` should:
+
+1. Compute trusted path keys for every workspace:
+ - original `cwd`
+ - `realpath(cwd)`
+ - normalized config key candidates that match Codex path syntax
+2. Build repeatable dotted native override values:
+
+```text
+projects."/path".trust_level="trusted"
+projects."/realpath".trust_level="trusted"
+```
+
+3. Return them as typed settings patches for Codex provider invocations.
+4. In the sibling runtime, validate the settings payload and append values to Codex native `configOverrides`.
+
+Do not write `~/.codex/config.toml` in v1.
+
+Do not claim "no config writes" in diagnostics. The correct claim is:
+
+- the desktop app does not directly write Codex global config
+- Codex native receives scoped config override values for the selected launch workspaces
+- current Codex behavior must be smoke-tested for whether it persists trust despite native config overrides
+- Claude/orchestrator launch argv never receives Codex native `-c`
+
+### Codex Config Override Builder
+
+The override builder must not concatenate unsafe raw path strings or emit a single `projects={...}` blob.
+
+Rules:
+
+- quote each path as a TOML basic-string dotted-key segment
+- support spaces
+- support quotes
+- support brackets
+- support backslashes
+- include original and realpath
+- deduplicate exact strings
+- return one override per path
+- do not parse or rewrite unrelated user Codex config overrides in v1
+
+Example:
+
+```ts
+buildCodexTrustedProjectOverrides([
+ '/tmp/project',
+ '/private/tmp/project',
+])
+```
+
+Output:
+
+```ts
+[
+ 'projects."/tmp/project".trust_level="trusted"',
+ 'projects."/private/tmp/project".trust_level="trusted"',
+]
+```
+
+### Codex PTY Fallback
+
+Codex PTY fallback is optional in v1, but the shared `PtyDialogEngine` should support it because:
+
+- GasCity has proven the update-to-trust order in real user environments.
+- Direct Codex TUI can show update before trust.
+- It is useful for future direct runtime flows.
+
+Codex rules:
+
+```text
+Codex update:
+ detect: Update available
+ detect: Skip until next version
+ detect: Press enter to continue
+ action: Down, Enter
+
+Codex workspace trust:
+ detect: Do you trust the contents of this directory?
+ detect: Working with untrusted contents
+ detect: Trusting the directory allows project-local config
+ action: Enter
+
+Codex auth picker:
+ detect: Sign in with ChatGPT
+ detect: Provide your own API key
+ action: none
+ result: provider_auth_required
+
+Codex ready:
+ detect: Ask Codex
+ detect: model/status line
+ action: none
+```
+
+### Codex Edge Cases
+
+| Case | Expected behavior |
+| --- | --- |
+| Codex auth picker appears | return `provider_auth_required`, do not press Enter |
+| Update prompt appears | press Down, Enter |
+| Trust appears after update | press Enter |
+| Stale update snapshot remains in buffer | state machine must move to next phase and reclassify fresh snapshots |
+| Per-launch native override values present | trust prompt should not appear |
+| Path has spaces/quotes | config override builder escapes TOML dotted-key segments correctly |
+| Codex `exec` does not show trust | no problem, override is harmless and scoped to launch |
+| Codex mutates `~/.codex/config.toml` after native override | record provider-side mutation diagnostic; keep rollback flag |
+| Global config write fails | app does not direct-write in v1; provider-side failures surface through Codex/runtime diagnostics |
+
+### Codex Config Mutation Sentinel
+
+Because a recent Codex issue reports unexpected persistence from project trust native overrides, add a small sentinel for manual smoke and optional debug diagnostics.
+
+Sentinel behavior:
+
+- before launch, read only `stat` plus a content hash of `~/.codex/config.toml` if the file exists
+- do not log file content
+- after a Codex launch failure or debug-enabled smoke, re-check stat/hash
+- if changed, record `codex_config_changed_during_trust_override`
+- never restore user config automatically
+
+This is not a blocker for issue #100 because Claude/orchestrator trust is the main gate there. It is a guardrail so we do not silently rely on a false "ephemeral override" assumption.
+
+Top 3 Codex trust options after the external finding and sibling code review:
+
+1. Desktop `--settings` contract -> sibling Codex native `configOverrides` plus mutation sentinel - 🎯 9 🛡️ 9 🧠 6, ~120-220 LOC across both repos. Best v1 because Claude argv never receives Codex `-c`.
+2. App-owned atomic write to `~/.codex/config.toml` with backup - 🎯 7 🛡️ 6 🧠 7, ~140-220 LOC. More deterministic but we own user config risk.
+3. Codex PTY accept trust - 🎯 7 🛡️ 7 🧠 6, ~120-200 LOC. Provider-owned persistence, but more prompt automation and update/auth handling.
+
+Chosen: option 1 for v1, with a feature flag to disable Codex trust settings if mutation behavior is worse than expected.
+
+### Codex Integration Detail
+
+Codex settings patch must be typed and applied immutably.
+
+Do not mutate `provisioningEnv.providerArgs` or `crossProviderMemberArgs` in place. Instead derive:
+
+- `providerArgsForLaunch`
+- `providerArgsByProviderForLaunch`
+- `crossProviderMemberArgsForLaunch`
+
+Consumers that must receive patched Codex settings:
+
+- `providerArgsByProvider.get('codex')` before `resolveAndValidateLaunchIdentity`
+- `runtimeArgsPlan.providerArgs` when the lead provider is Codex
+- `crossProviderMemberArgs.args` as merged `--settings` JSON when Codex is a secondary provider
+- launch snapshot and spawn context through the final merged args
+
+Surfaces that must not receive Codex native `-c`:
+
+- pure Anthropic primary provider args
+- Claude PTY trust preflight command args
+- runtime bootstrap args that are not provider CLI args
+- OpenCode pure runtime adapter args in v1
+
+Special cases:
+
+- Anthropic API-key helper strips path-based `--settings`; app-owned Codex trust settings must be inline JSON settings so they survive.
+- `mergeJsonSettingsArgs` should still run after adding patches, and must deep-merge app-owned Codex workspace-trust settings with forced-login settings.
+- Existing user Codex settings should not be blindly overwritten. App-owned workspace-trust keys should live under a dedicated namespace such as `codex.agent_teams_workspace_trust`.
+- Sibling runtime Codex native path already models `configOverrides` as repeatable `-c` values. Keep our desktop settings payload compatible with that contract instead of putting raw `-c` in Claude argv.
+- Every patch must carry `targetSurface`, `dialect`, `owner`, and `dedupeKey`.
+- Applying the same patch twice must be a no-op for exact app-owned dotted overrides.
+- If a target surface cannot prove that app-owned settings reach the Codex teammate, do not append the patch. Emit `codex_trust_settings_surface_unknown`.
+
+Required v1 hardening:
+
+- Pass Codex trust settings into default model resolution for non-primary Codex members. This is required because default model resolution can call provider runtime commands before final launch args exist.
+- Add a typed `CodexConfigOverride` model and typed `WorkspaceTrustLaunchArgPatch` from the start instead of raw string arrays.
+- Characterize current `buildInheritedCliFlags` behavior so Anthropic lead -> Codex teammate keeps inline app-owned settings intact.
+- Characterize current `buildProviderCliCommandArgs(...)` order for provider facts/default model probes.
+- Keep all Codex trust settings behind `AGENT_TEAMS_WORKSPACE_TRUST_CODEX_SETTINGS` so a runtime contract regression can be rolled back without disabling Claude PTY trust preflight.
+
+## PTY Dialog Engine
+
+The dialog engine is the most important reliability piece.
+
+It must be a phase-aware state machine, not a single regex over all accumulated output.
+
+Why:
+
+- Codex update output can remain visible after pressing Skip.
+- Claude/Codex TUI rendering can remove or collapse whitespace.
+- Dialogs can appear one frame after a prompt-looking screen.
+- Some startup screens include provider names before the actual prompt is ready.
+
+### Engine Inputs
+
+```ts
+export type TerminalSnapshot = {
+ raw: string
+ normalized: string
+ lines: string[]
+ elapsedMs: number
+}
+
+export type StartupDialogRule = {
+ id: string
+ phase: StartupDialogPhase
+ priority: number
+ match(snapshot: TerminalSnapshot): boolean
+ actions: PtyKeyAction[]
+ afterAction?: 'continue' | 'success' | 'fail'
+ successProbe?: StartupSuccessProbe
+}
+```
+
+### Normalization
+
+Use a shared normalizer:
+
+- strip ANSI escape sequences
+- convert `\r` to `\n`
+- keep both line-based text and compact text
+- compact text lowercases and removes whitespace for TUI-collapsed matching
+
+Example:
+
+```text
+Quick safety check
+QuickSafetyCheck
+Quicksafetycheck:
+```
+
+All must match the same trust rule.
+
+### Required Phase Order
+
+```text
+claude_resume
+codex_update
+workspace_trust
+bypass_permissions
+custom_api_key
+rate_limit
+ready
+```
+
+The phase order should be configurable by strategy, but shared rules can live in one registry.
+
+### Action Safety
+
+Only these actions are allowed:
+
+- `Enter`
+- `Down`
+- `Up`
+- `Escape` only for explicit abort paths
+
+Do not send text input.
+Do not send shell commands.
+Do not press Enter on unknown screens.
+
+Allowed exception:
+
+- typed confirmation can be modeled as `type:2` only for an exact Claude bypass confirmation screen that contains the full warning and both numbered choices
+- this must be behind its own rule id and tests, not a general text-entry action
+
+### Timeouts
+
+Suggested defaults:
+
+- per phase: 8 seconds
+- action settle delay: 200-500 ms
+- post-prompt grace: 100-250 ms
+- total PTY warmup cap per workspace: 15 seconds
+
+These should be constants with tests, not magic numbers in strategies.
+
+### Cleanup
+
+Always kill the PTY child at the end of preflight.
+
+On POSIX:
+
+- kill process group if possible
+- fall back to direct child kill
+
+On Windows:
+
+- use existing process tree kill helper if available
+
+Do not kill unrelated Claude, Codex, tmux, or OpenCode processes.
+
+### Snapshot Memory Model
+
+Use two bounded transcript buffers:
+
+- `rawTail`: last N bytes for diagnostics
+- `phaseWindow`: text observed since the previous accepted action
+
+Rule matching should use `phaseWindow`, not the full lifetime buffer. This is the GasCity lesson that matters most for Codex: after pressing Skip on an update prompt, stale update text can remain visible while the trust prompt appears below it.
+
+After every accepted action:
+
+- record the action and matched rule
+- clear `phaseWindow`
+- keep `rawTail`
+- wait for an action-settle delay
+- require a fresh snapshot timestamp before matching the same phase again
+
+This prevents loops like:
+
+```text
+see update -> Down Enter -> stale update still visible -> Down Enter forever
+```
+
+It also lets tests replay exact terminal snapshots without a real PTY.
+
+## Runtime Contract
+
+The runtime should remain the authority for process execution and bootstrap events.
+
+The host should prepare trust before launch and pass explicit settings/contracts. The runtime should still fail safely if trust is missing.
+
+### Host To Runtime
+
+The desktop app may pass:
+
+- prepared workspace cwd
+- existing provider launch args
+- app-owned Codex workspace-trust settings containing validated native config override values
+- a diagnostic marker like `AGENT_TEAMS_WORKSPACE_TRUST_PREFLIGHT=1`
+
+Avoid:
+
+- passing raw provider config JSON outside the typed settings contract
+- passing secrets
+- passing global trust flags
+- disabling runtime checks
+
+### Runtime To Host
+
+Runtime failure should be typed when possible:
+
+```json
+{
+ "code": "workspace_trust_required",
+ "provider": "claude",
+ "workspace": "/path",
+ "message": "workspace trust is not accepted"
+}
+```
+
+Current text classification already catches this class. Keep it as fallback even if typed events are added.
+
+## Precise Integration Plan In `TeamProvisioningService`
+
+This is the safest wiring plan after reviewing `_createTeamInner` and `_launchTeamInner`.
+
+1. Add an optional `workspaceTrustCoordinator` dependency to `TeamProvisioningService`.
+ - Keep the constructor unchanged.
+ - Default production value is created lazily by a private getter.
+ - Tests can inject a fake coordinator through `setWorkspaceTrustCoordinator(...)`.
+ - This follows existing service setter patterns and avoids churn across many `new TeamProvisioningService(...)` tests.
+ - The service should depend on the coordinator interface, not on `node-pty`, dialog rules, or Codex config override builders.
+
+2. Add optional fields to `ProvisioningRun`.
+ - `workspaceTrustPlan`
+ - `workspaceTrustExecution`
+ - `workspaceTrustRetryAttempted`
+ - `workspaceTrustDiagnostics`
+
+3. Insert early settings-only planning after `buildProvisioningEnv`.
+ - Inputs: `request.cwd`, resolved/requested provider ids, `providerArgs`, `shellEnv`, `claudePath`.
+ - Output: Codex settings patches for provider probes/default model resolution.
+ - No PTY, no locks, no config writes, no prompt actions.
+ - Pass the resulting provider arg patch resolver into `materializeEffectiveTeamMemberSpecs()` without importing workspace-trust internals there.
+
+4. Insert full `planFull()` after `buildCrossProviderMemberArgs`.
+ - Inputs: `request.cwd`, `allEffectiveMemberSpecs`, `effectiveMemberSpecs`, `resolvedProviderId`, `providerArgs`, `crossProviderMemberArgs`, `shellEnv`, `claudePath`.
+ - Output: unique workspaces, strategy plan, Codex settings patches, non-blocking diagnostics.
+ - No PTY, no config writes, no prompt actions.
+ - Workspace collection must use `allEffectiveMemberSpecs`; provider runtime/bootstrap behavior can still use filtered `effectiveMemberSpecs`.
+
+5. Derive patched settings/args immutably.
+ - `providerArgsForLaunch = applyProviderSettingPatches(providerArgs, plan.patches, resolvedProviderId)`
+ - `crossProviderMemberArgsForLaunch = applyCrossProviderSettingPatches(crossProviderMemberArgs, plan.patches)`
+ - `providerArgsByProviderForLaunch = buildProviderArgsByProvider(...)`
+ - Do not mutate `provisioningEnv`.
+ - Do not mutate the original `crossProviderMemberArgs`.
+
+6. Pass `providerArgsByProviderForLaunch` into `resolveAndValidateLaunchIdentity`.
+ - This is required because `readRuntimeProviderLaunchFacts()` receives provider args.
+ - Missing this step can leave Codex validation/probes using untrusted default args.
+
+7. Create `ProvisioningRun`, then run the shared execution helper.
+ - Before `execute()`, update progress to cancellable state `spawning` with message `Preparing workspace trust`.
+ - Emit progress before and after.
+ - Store diagnostics on `run`.
+ - Use `progress.warnings` for short live warnings.
+ - Do not add workspace trust entries to `progress.launchDiagnostics` in v1 because its `code` field is a fixed union.
+ - If execution returns `blocked`, fail with a clear setup message and restore prelaunch config through the existing cleanup path.
+ - If execution returns `soft_failed`, keep launching and let runtime classification handle any remaining trust failure.
+ - If execution returns `cancelled`, follow existing cancellation semantics and do not create a trust failure.
+ - After `execute()`, check the run is still current before continuing. User stop or shutdown may have cleaned it up while the PTY was running.
+ - Run this before `clearPersistedLaunchState`.
+ - Use a typed cleanup policy so create mode deletes only create-owned artifacts and launch mode restores prelaunch config.
+
+8. Use patched args in final launch args.
+ - Build `envResolutionForLaunch = { ...provisioningEnv, providerArgs: providerArgsForLaunch }`.
+ - `buildTeamRuntimeLaunchArgsPlan` should receive `envResolutionForLaunch`.
+ - `launchArgs.push(...runtimeArgsPlan.providerArgs)` should use patched primary args.
+ - `launchArgs.push(...crossProviderMemberArgsForLaunch.args)` should use patched secondary args.
+
+9. Copy diagnostics into failure artifacts.
+ - Add `workspaceTrustPreflight` under artifact manifest `flags`.
+ - Keep it bounded and redacted.
+ - Do not store full PTY transcripts.
+
+10. Keep preflight env separate from runtime env.
+ - Build `trustPreflightEnv` from `shellEnv`.
+ - Strip team runtime env and app-managed Anthropic helper env.
+ - Preserve `CLAUDE_CONFIG_DIR` and normal shell auth env.
+
+11. Keep temp preflight files out of run cleanup.
+ - Temp empty MCP config belongs to `ClaudePtyWorkspaceTrustStrategy`.
+ - The strategy removes it in `finally`.
+ - Do not store it in `run.mcpConfigPath`, because that field is for team runtime MCP config and existing cleanup removes it as launch-owned state.
+
+12. Use the same shared helper in create and launch.
+ - `_createTeamInner` and `_launchTeamInner` should call the same workspace-trust integration helper.
+ - The only difference should be the cleanup policy and user-facing progress wording.
+
+Preferred helper shape:
+
+```ts
+type WorkspaceTrustLaunchArgContext = {
+ primaryProviderId: TeamProviderId
+ primaryProviderArgs: string[]
+ crossProviderArgs: CrossProviderMemberArgsResult
+ providerArgsByProvider: Map
+}
+
+type WorkspaceTrustLaunchArgContextWithPatches = WorkspaceTrustLaunchArgContext & {
+ diagnostics: WorkspaceTrustDiagnostic[]
+}
+```
+
+The helper should be pure and covered by tests. Most regressions here would be invisible until mixed-provider launch, so this logic should not live inline inside `_createTeamInner` or `_launchTeamInner`.
+
+## Retry Policy
+
+Recommendation after code review: ship preflight first, ship automatic relaunch retry only as a separate sub-phase behind a feature flag.
+
+Reason: `handleDeterministicBootstrapEvent()` already owns failure cleanup, progress state, retained logs, artifact writing, member spawn statuses, and config restoration. Relaunching from inside that path is higher risk than preparing trust before the first launch.
+
+```mermaid
+flowchart TD
+ A["Launch requested"] --> B["Run trust preflight"]
+ B --> C["Launch runtime"]
+ C --> D{"Failure classification"}
+ D -->|"workspace_trust_required and retry not used"| E["Run preflight again"]
+ E --> F["Relaunch once"]
+ F --> G{"Still trust failure?"}
+ G -->|"yes"| H["Show Workspace trust required diagnostics"]
+ G -->|"no"| I["Continue"]
+ D -->|"other failure"| J["Keep original failure path"]
+```
+
+Rules:
+
+- Retry only once.
+- Retry disabled by default until Phase 4b smoke tests pass.
+- Retry only for `workspace_trust_required`.
+- Do not retry for auth, missing binary, permission denied, model errors, or cwd unavailable.
+- Preserve original error text in diagnostics.
+- Record both preflight attempts.
+- Do not relaunch directly inside `handleDeterministicBootstrapEvent`.
+- If retry is enabled later, cleanup the failed run first, then call the normal launch entrypoint with a retry marker.
+
+## Diagnostics
+
+Add a compact diagnostics object to launch failure artifacts and progress trace.
+
+Example:
+
+```json
+{
+ "workspaceTrustPreflight": {
+ "attempt": 1,
+ "strategyResults": [
+ {
+ "provider": "claude",
+ "workspace": "/private/tmp/project",
+ "status": "accepted",
+ "matchedDialogs": ["workspace_trust"],
+ "actions": ["Enter"],
+ "elapsedMs": 532,
+ "lockWaitMs": 0,
+ "rawTailIncluded": false
+ },
+ {
+ "provider": "codex",
+ "status": "args_patch_added",
+ "pathsCount": 2
+ }
+ ]
+ }
+}
+```
+
+Redaction rules:
+
+- Do not log env values.
+- Do not log full `.claude.json`.
+- Do not log OAuth/account details.
+- Do not log API keys.
+- Log path and provider only if existing diagnostics already expose the workspace path.
+- Do not log PTY raw tail unless failure plus debug flag.
+- Redact raw tail before writing artifact.
+
+### Progress Diagnostics Without UI Schema Change
+
+No renderer schema change is needed in v1.
+
+Use existing surfaces:
+
+- `progress.message` for high-level state:
+ - `Preparing workspace trust`
+ - `Workspace trust prepared`
+ - `Claude setup required`
+- `progress.warnings` for non-blocking trust preflight warnings.
+- artifact manifest `flags.workspaceTrustPreflight` for structured details.
+- launch failure classification remains `workspace_trust_required` when runtime still fails.
+
+User-facing rule:
+
+- If preflight succeeds, do not show extra UI noise.
+- If preflight soft-fails but launch succeeds, keep the warning in diagnostics only.
+- If launch fails with trust, show the existing `Workspace trust required` message and include preflight evidence in Copy diagnostics.
+
+Do not use `progress.launchDiagnostics` for trust preflight in v1 unless `TeamLaunchDiagnosticItem.code` is explicitly extended and renderer tests are added.
+
+## Integration Points
+
+### Desktop App
+
+Likely touch points:
+
+- `TeamProvisioningService`
+ - before `spawnCli(...)`
+ - after cwd, worktrees, provider mix, shell env, and provider args are resolved
+ - before final launch args are frozen
+- `TeamLaunchFailureArtifactPack`
+ - include trust preflight diagnostics
+ - keep `workspace_trust_required` classification first
+- Tests under:
+ - `test/main/services/team/`
+ - new `workspaceTrust` unit tests
+
+### Sibling Orchestrator Repo
+
+The sibling runtime already has:
+
+- `isPathTrusted(dir)`
+- `checkHasTrustDialogAccepted()`
+- headless process failure when trust is missing
+- Codex native `configOverrides`
+- hooks/auth helpers/MCP helper guards that depend on workspace trust
+
+Preferred contract changes:
+
+- Keep `isPathTrusted` gate.
+- Add typed failure metadata if not already available through events.
+- Accept Codex `configOverrides` from host without special casing.
+- Do not add PTY prompt handling inside the orchestrator in v1.
+
+This keeps the orchestrator as runtime, not UX policy owner.
+
+Security rule: do not weaken these runtime checks. The desktop host prepares the selected workspace, but the runtime still decides whether a given headless process is allowed to start.
+
+## Implementation Phases
+
+### Revised Low-Risk Implementation Order
+
+Do not start with the full preflight pipeline. Build in this order:
+
+1. Domain and pure helpers.
+2. Path canonicalization, feature flags, and lock registry.
+3. Dialog engine with fake terminal snapshots.
+4. Codex settings patching with pure integration helpers.
+5. Early settings-only plan integration for provider/default-model probes.
+6. Claude PTY strategy behind a fake `PtyProcessPort`.
+7. Shared create/launch `TeamProvisioningService` full plan integration.
+8. Shared create/launch execute integration behind feature flags.
+9. Protected Claude command smoke with real profile in temp workspaces.
+10. Codex mutation sentinel smoke with real profile or isolated `CODEX_HOME`.
+11. Enable default preflight.
+12. Add automatic relaunch retry only after preflight is stable.
+
+Top 3 implementation approaches:
+
+1. Three-stage coordinator: early settings, full plan, execute - 🎯 8 🛡️ 9 🧠 8, ~950-1450 LOC desktop plus 50-180 sibling runtime LOC. Best v1 after create/launch and default-model gaps.
+2. Two-stage coordinator: full plan and execute only - 🎯 7 🛡️ 8 🧠 7, ~650-950 LOC. Simpler, but misses early default-model/provider probe arg consistency.
+3. Full preflight plus automatic retry in one PR - 🎯 6 🛡️ 6 🧠 9, ~1000-1400 LOC. More complete, but too much launch lifecycle risk for first rollout.
+
+### Phase 0 - Keep Current Diagnostics
+
+Already done or in progress:
+
+- classify `workspace_trust_required`
+- show `Workspace trust required` for deterministic bootstrap failure
+- preserve exact provider error as `error`
+
+Value:
+
+- users see the true cause even before auto-preflight exists
+- tests protect the current behavior
+
+### Phase 1 - Domain And Dialog Engine
+
+Add:
+
+- workspace trust domain types
+- path canonicalization helper
+- feature flag parser
+- workspace trust lock registry
+- Codex config override builder
+- terminal normalizer
+- startup dialog rules
+- PTY dialog engine with fake adapter tests
+
+No live process changes yet.
+
+Tests:
+
+- Claude trust compact text matches
+- Codex update matches
+- Codex stale update then trust sends `Down, Enter, Enter`
+- stale `rawTail` does not rematch update after phase transition
+- `phaseWindow` resets after accepted action
+- Bypass sends `Down`, then `Enter` with delay
+- Custom API key sends `Up`, then `Enter`
+- Unknown screen sends no keys
+- Claude `ClaudeCode v2.1.119` splash alone is not ready
+- Claude single prompt marker alone is not ready
+- Claude trust dialog takes precedence over prompt-looking `>` text
+- Claude trust dialog takes precedence over ready indicators and bypass text
+- Codex trust dialog takes precedence over prompt-looking update/banner text
+- Codex auth picker sends no keys
+- PTY unavailable returns skipped diagnostic
+- cancellation stops PTY and returns non-throwing diagnostic
+- Prompt-looking screen waits grace for delayed dialog
+- Timeout returns a non-throwing diagnostic
+- Windows drive and UNC paths dedupe correctly
+- trusted parent path covers child workspace
+- lock wait re-probes state before spawning PTY
+- malformed feature flag value emits one diagnostic and uses default
+
+### Phase 2 - Claude PTY Strategy
+
+Add:
+
+- `ClaudePtyWorkspaceTrustStrategy`
+- `ClaudeStateProbe`
+- `ClaudePreflightCommandBuilder`
+- temporary empty MCP config writer
+- process cleanup
+- per-workspace lock
+- stripped `trustPreflightEnv`
+- diagnostics
+
+Run it behind a feature flag first:
+
+```text
+AGENT_TEAMS_WORKSPACE_TRUST_PREFLIGHT=1
+```
+
+Success criteria:
+
+- fresh workspace gets `hasTrustDialogAccepted`
+- repeated preflight is no-op
+- empty profile returns setup required
+- protected Claude command is used with no runtime launch args
+- `-p` and `doctor` are not used for workspace trust preflight
+- temp MCP config is `{"mcpServers":{}}`, not plain `{}`
+- project/local settings are not loaded in the protected command
+- built-in tools are disabled with `--tools ""`
+- trust action is followed by state probe and immediate PTY kill on success
+- follow-up bypass/custom API dialogs are ignored if trust has already persisted
+- preflight env strips app-managed team helper env
+- trusted parent path skips PTY for child workspace
+- selected home directory returns non-persistable-home blocked diagnostic
+- selected git subdirectory observes trust persisted at git-root key
+- symlinked git workspace observes trust through original path, realpath, and git-root candidates
+
+### Phase 3 - Codex Settings Strategy
+
+Add:
+
+- `CodexWorkspaceTrustSettingsStrategy`
+- `CodexConfigOverrideBuilder`
+- provider settings patch application
+- early settings-only plan for `request.cwd`
+- sibling runtime settings reader for `codex.agent_teams_workspace_trust.config_overrides`
+
+Success criteria:
+
+- settings contain repeatable dotted `projects."".trust_level="trusted"` override values
+- overrides are represented as typed patches with `dialect: 'claude-codex-runtime-settings'` on desktop and `codex-native-config-override` in sibling runtime
+- original and realpath included
+- quoted paths are valid
+- app does not directly write global Codex config
+- existing Codex forced-login settings stay stable
+- Codex primary lead gets the trust settings
+- Anthropic lead with Codex teammate gets inherited trust settings via `--settings` JSON
+- Anthropic primary provider args do not get Codex native `-c`
+- `providerArgsByProvider.get('codex')` includes trust settings before launch identity validation
+- Anthropic args are unchanged when no Codex provider is present
+- applying early and full-plan patches does not duplicate exact app-owned overrides
+- duplicate path keys are removed
+- `mergeJsonSettingsArgs` deep-merges the trust settings with forced-login settings
+- `buildTeamRuntimeLaunchArgsPlan` receives a cloned env resolution with patched `providerArgs`
+- original `provisioningEnv.providerArgs` remains unchanged
+- user extra args before provider args cannot remove app-managed Codex trust settings
+- provider facts probes receive patched Codex settings even before final launch args exist
+- default model resolution receives patched Codex settings for non-Anthropic members without explicit model
+- sibling Codex native `turnExecutor` appends validated settings values to `configOverrides`
+- no v1 deterministic launch path emits Codex native `-c` into Claude argv
+- optional mutation sentinel records config hash changes without logging config content
+- no v1 code path emits a single `projects={...}` table blob
+
+### Phase 4a - Create/Launch Integration Without Retry
+
+Integrate coordinator into `TeamProvisioningService`.
+
+Flow:
+
+1. In create and launch, run early settings-only plan after `buildProvisioningEnv`.
+2. Materialize members/default models with early patched provider settings.
+3. Resolve OpenCode workspaces, retain `allEffectiveMemberSpecs`, and plan runtime lanes.
+4. Build cross-provider args for primary-lane members, then run full `WorkspaceTrustCoordinator.planFull()` with the all-effective workspace set.
+5. Apply final provider settings patches immutably.
+6. Resolve and validate launch identity with patched provider settings.
+7. Create `ProvisioningRun`.
+8. Set progress to cancellable `spawning` and run shared `prepareWorkspaceTrustForDeterministicRun()`.
+9. Clear persisted launch state only if execute did not block/cancel.
+10. Set `run.launchStateClearedForRun = true` only after clear succeeds.
+11. Launch runtime.
+12. On failure, artifact manifest includes `flags.workspaceTrustPreflight`.
+
+Guardrails:
+
+- feature flag default can be off for first internal smoke
+- if preflight fails, launch can still proceed unless the failure is deterministic setup-required
+- all failures are diagnostic, not crashes
+- no launch retry in this phase
+- no new `TeamLaunchDiagnosticItem.code` in this phase
+- blocked preflight uses typed create/launch cleanup policy
+- blocked launch preflight restores prelaunch config and writes failure artifact
+- blocked create preflight removes run tracking and create-owned helper material without deleting unrelated team data
+- blocked preflight does not clear the previous launch snapshot
+- blocked launch preflight does not persist a synthetic failed launch snapshot
+- soft-failed preflight clears persisted launch state only after deciding to continue into bootstrap
+- cancelled preflight maps to existing launch cancellation semantics
+- stopped/shutdown run after preflight does not continue into `clearPersistedLaunchState`
+- trust lock timeout is soft failure, not a launch crash
+
+### Phase 4b - Optional Trust Retry
+
+Add controlled retry only after Phase 4a passes manual smoke.
+
+Flow:
+
+1. Existing launch fails with `workspace_trust_required`.
+2. Current run completes its normal cleanup path.
+3. Service schedules one relaunch with `workspaceTrustRetryAttempted: true`.
+4. Coordinator runs preflight again.
+5. Relaunch once.
+
+Guardrails:
+
+- behind `AGENT_TEAMS_WORKSPACE_TRUST_RETRY=1`
+- no retry for non-trust failures
+- no retry if launch was cancelled
+- no retry if preflight returned `provider_setup_required`
+- preserve original failure in artifacts
+
+### Phase 5 - Default On And Cleanup
+
+After smoke tests:
+
+- enable by default
+- remove temporary feature flag if desired
+- update debugging runbook
+- add launch artifact examples
+
+## Test Plan
+
+### Unit Tests
+
+Dialog engine:
+
+- `matchesClaudeTrustDialogWithCollapsedWhitespace`
+- `matchesCodexTrustDialog`
+- `skipsCodexUpdateThenAcceptsTrust`
+- `doesNotPressEnterOnCodexAuthPicker`
+- `acceptsClaudeBypassAfterTrust`
+- `acceptsClaudeCustomApiKeyAfterTrust`
+- `stopsAfterTrustStatePersistsBeforeFollowUpDialog`
+- `usesProtectedClaudeCommandForTrustPreflight`
+- `doesNotUsePrintOrDoctorForTrustPreflight`
+- `writesValidEmptyMcpConfigShape`
+- `removesTempEmptyMcpConfigOnSuccessAndFailure`
+- `waitsGraceAfterPromptLookingSnapshot`
+- `timesOutOnIrrelevantSnapshots`
+- `cancelsBeforeWritingKeyAction`
+
+Codex config overrides:
+
+- simple absolute path
+- `/var` and `/private/var`
+- path with spaces
+- path with quotes
+- Windows drive path
+- Windows drive path case dedupe
+- UNC path
+- trailing slash dedupe
+- duplicate path dedupe
+- one dotted override per trusted path
+- no single `projects={...}` table blob
+- app-owned settings deep-merge with forced-login settings
+- direct Codex `-c` never appears in Claude launch argv
+- `WorkspaceTrustLaunchArgPatch` requires owner, dialect, target surface, and dedupe key
+- applying the same app-owned dotted override twice does not duplicate settings values
+- pure Anthropic primary provider args never receive Codex native `-c`
+- unknown Codex target surface emits `codex_trust_settings_surface_unknown`
+- `buildProviderCliCommandArgs(...)` preserves provider facts/default model probe order with merged settings
+- sibling `buildInheritedCliFlags` preserves app-owned settings for Anthropic lead -> Codex teammate
+- sibling Codex native settings reader validates and bounds `config_overrides`
+
+Claude state probe:
+
+- reads `$HOME/.claude.json`
+- reads `$CLAUDE_CONFIG_DIR/.claude.json`
+- trusts exact path
+- trusts parent path
+- trusts git root path for selected subdirectory
+- mirrors runtime config key normalization
+- detects non-persistable home directory
+- ignores missing state file
+- retries transient JSON parse failure during provider write
+- does not expose sensitive keys in diagnostics
+
+Coordinator:
+
+- dedupes workspaces
+- runs Claude strategy for Codex-only team because orchestrator gate needs it
+- returns Codex settings patch only when Codex provider is present
+- serializes concurrent preflights for same workspace
+- re-probes trust after waiting for same-workspace lock
+- lock timeout returns soft failure
+- cancellation while waiting for lock returns cancelled
+- retry only on `workspace_trust_required`
+- can be injected with `setWorkspaceTrustCoordinator`
+- dispose kills active PTYs without touching unrelated provider processes
+- plan diagnostics do not throw for PTY/provider setup problems
+- feature flag parser accepts on/off variants
+- malformed feature flag falls back to default with diagnostic
+- `planArgsOnly()` and `planFull()` never throw after provider helper material may have been created
+- `workspaceTrustDiagnostics` is budgeted before assignment to `ProvisioningRun`
+- pure OpenCode runtime-adapter launch does not call the coordinator in v1
+- mixed OpenCode side-lane workspaces are included when the launch still uses the deterministic path
+- feature public facade is the only import used by `TeamProvisioningService`
+- fake `PtyProcessPort` drives strategy tests without loading `node-pty`
+- `NodePtyProcessAdapter` missing module returns skipped diagnostic instead of throwing
+- workspace trust feature does not import renderer terminal services or `BrowserWindow`
+
+Create/launch integration:
+
+- constructor signature stays unchanged
+- fake coordinator receives expected workspaces from `request.cwd` and `member.cwd` in both create and launch
+- fake coordinator receives workspaces from `allEffectiveMemberSpecs`, including side-lane/generated worktrees, before filtering primary-lane `effectiveMemberSpecs`
+- both `_createTeamInner` and `_launchTeamInner` use the same `prepareWorkspaceTrustForDeterministicRun()` helper
+- early settings-only plan runs after `buildProvisioningEnv` and before `materializeEffectiveTeamMemberSpecs`
+- default model resolution receives patched Codex settings through a provider args resolver
+- Codex settings patch is visible to `resolveAndValidateLaunchIdentity`
+- cross-provider Codex settings patch reaches inherited teammate settings
+- `progress.launchDiagnostics` is unchanged by preflight in v1
+- preflight progress uses a cancellable state before awaiting PTY
+- failure artifact flags include bounded `workspaceTrustPreflight`
+- artifact `workspaceTrustPreflight` drops excess workspaces/evidence before manifest write
+- artifact flags include no raw env values or unredacted PTY output
+- blocked preflight assigns diagnostics before `cleanupRun()` writes artifacts
+- cancellation before PTY execute skips preflight and launch cleanly
+- cancellation during PTY execute does not continue into `clearPersistedLaunchState`
+- `stopTeam()` during PTY execute does not restore config twice or write a second artifact
+- blocked launch preflight calls restore-prelaunch-config path
+- blocked create preflight does not call restore-prelaunch-config
+- blocked create preflight before meta writes does not delete unrelated existing team data
+- blocked preflight removes run tracking and retains failed progress
+- blocked preflight artifact includes preflight diagnostics
+- soft-failed preflight continues to bootstrap writes
+- preflight execute does not run with `CLAUDE_ENABLE_DETERMINISTIC_TEAM_BOOTSTRAP`
+- preflight execute does not run with app-managed Anthropic helper env
+- preflight execute preserves custom `CLAUDE_CONFIG_DIR` but does not force default `~/.claude`
+- preflight PTY sessions are not added to `transientProbeProcesses`
+- preflight blocked before `clearPersistedLaunchState` preserves previous launch snapshot
+- preflight blocked before `clearPersistedLaunchState` does not call `persistLaunchStateSnapshot`
+- cleanupRun finalizes failed launch snapshots only when `launchStateClearedForRun` is true
+- direct teammate restart paths do not call workspace trust preflight in v1
+
+### Focused Existing Tests
+
+Keep existing focused tests:
+
+```bash
+pnpm vitest run test/main/services/team/TeamLaunchFailureArtifactPack.test.ts
+pnpm vitest run test/main/services/team/TeamProvisioningService.test.ts -t "workspace trust"
+```
+
+Add new focused tests:
+
+```bash
+pnpm vitest run test/main/services/team/workspaceTrust
+pnpm vitest run test/main/services/team/TeamProvisioningService.test.ts -t "trust preflight"
+```
+
+### Manual Compatibility Scripts
+
+Keep scripts under temp-only probes or document commands. Do not commit user profile mutations.
+
+Manual scenarios:
+
+- Claude fresh temp workspace with real profile
+- Claude isolated seeded config
+- Claude isolated empty config
+- Claude real profile with runtime launch using `--dangerously-skip-permissions` after preflight
+- Claude with dummy `ANTHROPIC_API_KEY` in shell env
+- two teams launching the same fresh workspace concurrently
+- Codex TUI real profile new workspace
+- Codex TUI per-launch trusted override
+- Codex isolated `CODEX_HOME` auth picker
+- Codex path escaping with spaces and quotes
+
+## Risk Register
+
+| Risk | Severity | Mitigation |
+| --- | --- | --- |
+| Pressing Enter on unknown prompt | High | allowlisted rules only, no generic Enter |
+| Stale terminal snapshots cause wrong phase | High | phase state machine, replayable snapshots, action settle delay |
+| Trust accepted for wrong path | High | exact cwd + realpath only, no parent auto-trust |
+| Provider auth picker misclassified | Medium | explicit auth rules return `provider_auth_required` with no action |
+| Empty Claude onboarding blocks preflight | Medium | setup-required diagnostic, no auto-theme selection in v1 |
+| PTY process leak | High | strict timeout and process tree cleanup |
+| Windows path serialization bug | Medium | config override builder tests for drive, backslash, quotes |
+| OneDrive/EPERM write issue | Medium | do not direct-write trust files in v1; rely on provider write path |
+| Multiple launches race trust state | Medium | per provider + realpath lock |
+| Retry loops | Medium | one retry only |
+| Hidden regression in launch args | High | settings patch tests and narrow integration point |
+| Constructor/test churn | Medium | keep constructor unchanged; inject coordinator through setter |
+| Progress diagnostic schema drift | Medium | keep trust details out of `TeamLaunchDiagnosticItem` in v1 |
+| Runtime security bypass | High | never disable `isPathTrusted`; use provider-owned trust persistence |
+| Pre-spawn failure leak | High | centralized `failDeterministicRunBeforeSpawn(...)` helper |
+| Preflight runs with runtime env | High | build stripped `trustPreflightEnv` and test removed env keys |
+| Same-workspace concurrent launches | Medium | lock by provider + normalized realpath, re-probe after wait |
+| Post-trust project side effects | Medium | kill PTY immediately after trust state persists |
+| Feature flag drift | Medium | central parser and artifact effective-flags record |
+| Raw PTY output leaks account info | Medium | structured diagnostics by default, raw tail only debug + failure + redaction |
+| Normal Claude startup executes project MCP after trust | High | protected interactive command with strict empty MCP, user settings only, hooks disabled, tools disabled |
+| User cancel unavailable during preflight | Medium | set cancellable progress state before awaiting PTY |
+| Stop/shutdown cleans run while preflight awaits | High | cancellation checks and stale-run guard before continuing launch |
+| Codex native override persists trust unexpectedly | Medium | mutation sentinel, feature flag rollback, no app-owned restore |
+| Create path missed | High | shared deterministic helper and create-specific tests |
+| Default model probe uses unpatched Codex settings | High | early settings-only plan and provider args resolver in materialization |
+| Cross-provider env helper mixes trust policy | Medium | patch returned args after `buildCrossProviderMemberArgs`, keep env helper unchanged |
+| PTY lifecycle treated like `ChildProcess` | High | coordinator-owned `IPty` registry, no `transientProbeProcesses` use |
+| Workspace trust flags grow unbounded | Medium | diagnostics budget before assigning to `run.workspaceTrustDiagnostics` |
+| Default `CLAUDE_CONFIG_DIR` breaks Keychain auth | High | preserve custom config dir only, never force auto-detected default |
+| Pre-run helper material lifecycle widened | Medium | no trust temp files before `ProvisioningRun`; separate helper cleanup hardening if needed |
+| Git-root trust key missed | High | probe exact cwd, realpath, git root, and parents with runtime key normalization |
+| Home directory trust not persisted | Medium | block home/root broad trust with explicit diagnostic |
+| Competitor blind dismiss copied accidentally | High | allowlisted phase engine only; blind dismiss remains out of v1 |
+| Preflight-blocked launch overwrites previous snapshot | High | `launchStateClearedForRun` guard before failed launch snapshot finalization |
+| Artifact written before trust diagnostics assigned | Medium | assign budgeted diagnostics before failed progress and `cleanupRun()` |
+| Direct restart scope creep | Medium | exclude direct restart from v1, keep coordinator reusable through a port |
+| Post-trust screens automated accidentally | High | kill PTY after trust persistence; do not automate MCP/include/Grove setup |
+| Pending-key no-run phase gets side effects | Medium | `planArgsOnly()` remains pure, no PTY/temp files/sentinels before run exists |
+| Codex `-c` passed to Claude CLI | High | typed patch dialect and target surface checks; no Codex native flags on Claude argv |
+| Duplicate trust args on fallback/retry | Medium | app-owned `dedupeKey` and exact-sequence dedupe in pure patch applier |
+| Deep feature imports from team service | Medium | public facade only, import-boundary test or dependency lint |
+| Reusing terminal UI PTY service | Medium | dedicated `NodePtyProcessAdapter`; no `BrowserWindow` or terminal session registry imports |
+| Arg dialect drift in sibling runtime | High | characterization tests for `buildInheritedCliFlags` and `buildProviderCliCommandArgs` before default-on |
+| Redaction policy drift | Medium | reuse/extract launch artifact redactor and budget before manifest assignment |
+
+## Highest-Risk Areas After Code Review
+
+| Area | Why risky | Mitigation | Score |
+| --- | --- | --- | --- |
+| Cross-provider Codex settings | Codex teammate under Anthropic lead inherits inline settings, not primary provider args | pure patch helper plus sibling inherited-settings tests | 🎯 9 / 🛡️ 9 / 🧠 6 |
+| Create vs launch drift | first team creation and later relaunch have similar but separate code paths | shared helper plus typed cleanup policy tests | 🎯 9 / 🛡️ 9 / 🧠 7 |
+| Default model probe order | `materializeEffectiveTeamMemberSpecs()` can call provider commands before final plan exists | `planArgsOnly()` provider args resolver | 🎯 8 / 🛡️ 9 / 🧠 6 |
+| Launch identity validation order | `readRuntimeProviderLaunchFacts()` runs before final launch args are built | run `planFull()` and apply provider settings patches before validation | 🎯 8 / 🛡️ 9 / 🧠 6 |
+| Preflight before `ProvisioningRun` exists | failures before run creation lose progress/artifact context | make planning non-throwing; run PTY only after run creation | 🎯 9 / 🛡️ 9 / 🧠 5 |
+| Relaunch retry | failure handler already owns cleanup and retained state | keep retry out of v1 default; implement later via normal launch entrypoint | 🎯 7 / 🛡️ 7 / 🧠 8 |
+| Claude empty onboarding | auto-selecting theme is not workspace trust and can surprise users | return `provider_setup_required`; do not press keys | 🎯 8 / 🛡️ 9 / 🧠 4 |
+| Optional `node-pty` | native dependency may fail to load on some systems | adapter import guard and skipped diagnostic | 🎯 9 / 🛡️ 8 / 🧠 4 |
+| Direct config writes | private state formats can change and writes can race provider process | no direct `.claude.json` or Codex config writes in v1 | 🎯 8 / 🛡️ 9 / 🧠 3 |
+| Progress schema | fixed diagnostic code union can break TS/UI if extended casually | artifact `flags` first, UI rows later | 🎯 9 / 🛡️ 10 / 🧠 3 |
+| Service constructor | many tests use positional constructor args | setter/lazy default instead of constructor param | 🎯 9 / 🛡️ 9 / 🧠 3 |
+| Pre-spawn cleanup | new failure point after run creation can leak config/run state | `failDeterministicRunBeforeSpawn(...)` plus tests | 🎯 8 / 🛡️ 9 / 🧠 6 |
+| Runtime env contamination | PTY preflight could see team runtime helper env | stripped preflight env | 🎯 8 / 🛡️ 9 / 🧠 5 |
+| Same-workspace lock | team lock is per team, not per cwd | workspace trust lock registry | 🎯 9 / 🛡️ 9 / 🧠 5 |
+| Post-trust exit | provider may continue into project setup after trust | success probe then immediate kill | 🎯 8 / 🛡️ 9 / 🧠 5 |
+| Project MCP execution after trust | normal Claude startup can load project MCP/settings after trust | protected command: `--bare`, strict empty MCP, user settings only, hooks off, tools off | 🎯 8 / 🛡️ 9 / 🧠 6 |
+| Cancel blocked during preflight | run starts in `validating`, which current cancel API rejects | switch to cancellable `spawning` before PTY await | 🎯 9 / 🛡️ 9 / 🧠 4 |
+| Stop/shutdown race after cleanup | `stopTeam()` can cleanup run while create/launch awaits preflight | stale-run guard after execute and before clearing launch state | 🎯 8 / 🛡️ 9 / 🧠 5 |
+| Codex native override mutates config | recent Codex issue reports non-ephemeral project trust override behavior | mutation sentinel, feature flag, no app direct write | 🎯 7 / 🛡️ 8 / 🧠 5 |
+| PTY ownership | `node-pty` returns `IPty`, not a normal spawned child | adapter-owned lifecycle and coordinator session registry | 🎯 9 / 🛡️ 9 / 🧠 5 |
+| Artifact flag size | flexible manifest flags can accidentally carry too much data | pre-budget diagnostics before artifact write | 🎯 9 / 🛡️ 9 / 🧠 3 |
+| Claude config env boundary | forcing default `CLAUDE_CONFIG_DIR` can break OAuth Keychain lookup | preserve custom only, test preflight env builder | 🎯 8 / 🛡️ 9 / 🧠 4 |
+| Runtime key mismatch | Claude may persist trust at git root while selected cwd is a child | mirror runtime key normalization and parent walk | 🎯 8 / 🛡️ 9 / 🧠 6 |
+| Home/root workspace | provider may not persist home trust and broad trust would be unsafe | explicit non-persistable diagnostic, no parent auto-trust | 🎯 9 / 🛡️ 10 / 🧠 3 |
+| Dialog vs ready precedence | competitor tests show trust text can coexist with ready-looking text | dialog-first phase machine and snapshot replay tests | 🎯 9 / 🛡️ 9 / 🧠 4 |
+| Codex contract dialect | Codex `-c` is valid only at the Codex binary boundary, while deterministic launch uses Claude settings | typed patch dialect plus sibling settings contract tests | 🎯 9 / 🛡️ 9 / 🧠 6 |
+| Patch idempotency | early plan, full plan, and future retry can append the same override more than once | owner/dedupeKey and exact-sequence dedupe | 🎯 9 / 🛡️ 9 / 🧠 4 |
+| Feature boundary | easy to deep-import adapters from a large service during implementation pressure | public facade and import-boundary test | 🎯 9 / 🛡️ 9 / 🧠 4 |
+| PTY adapter boundary | existing terminal service looks reusable but owns renderer session semantics | dedicated adapter and fake port tests | 🎯 9 / 🛡️ 9 / 🧠 5 |
+| Pure OpenCode adapter boundary | adding desktop preflight there can mix runtime-adapter ownership with legacy deterministic launch | v1 coordinator only in deterministic create/launch, adapter test asserts not called | 🎯 9 / 🛡️ 9 / 🧠 5 |
+| Side-lane workspace omission | filtering to `effectiveMemberSpecs` can drop mixed OpenCode generated worktrees | collect from `allEffectiveMemberSpecs` before lane filtering | 🎯 9 / 🛡️ 9 / 🧠 5 |
+| Codex projects table clobber | one `projects={...}` override can replace unrelated project config at the override layer | repeatable dotted overrides only | 🎯 9 / 🛡️ 9 / 🧠 4 |
+| Launch snapshot guard | current `cleanupRun()` can persist a failed launch snapshot for a preflight-only failure | add and test `launchStateClearedForRun` | 🎯 9 / 🛡️ 10 / 🧠 3 |
+| Direct restart exclusion | process/tmux restart paths are separate from create/launch but can look similar | document as v2 and assert v1 coordinator not called there | 🎯 9 / 🛡️ 9 / 🧠 3 |
+
+## Risk Burn-Down Plan
+
+This is the concrete plan for reducing the remaining risk before default-on. Do not treat the feature as done just because unit tests pass. Each gate below must pass in order.
+
+### Gate 0 - Freeze Current Failure Diagnostics
+
+Goal: preserve today's behavior when auto-preflight is disabled.
+
+Actions:
+
+- keep `workspace_trust_required` classification first in artifact pack classification.
+- keep deterministic bootstrap title `Workspace trust required`.
+- keep exact runtime error in `progress.error`.
+- add no renderer schema changes in this gate.
+
+Required tests:
+
+- issue #100 text classifies as `workspace_trust_required`.
+- issue #104 text still classifies as `workspace_trust_required`.
+- disabling all workspace-trust flags produces the same launch args and progress shape as before.
+
+Stop rule:
+
+- if disabled flags change launch behavior, stop and fix before adding PTY or settings contract.
+
+Rating: 🎯 10 🛡️ 10 🧠 2, ~20-60 LOC.
+
+### Gate 1 - Pure Core Only
+
+Goal: prove path, settings, diagnostics, and patch logic before any process is spawned.
+
+Actions:
+
+- implement path canonicalization and trust workspace collection as pure functions.
+- implement `CodexConfigOverrideBuilder` as pure value builder, not CLI argv builder.
+- implement `WorkspaceTrustLaunchArgPatch` with owner, dialect, target surface, and dedupe key.
+- implement settings patch applier as a pure function.
+- implement diagnostics budget as a pure function.
+
+Required tests:
+
+- path with spaces, quotes, brackets, backslashes, Windows drive, UNC, symlink, git child directory.
+- duplicate cwd/realpath/git-root candidates dedupe predictably.
+- app-owned settings deep-merge with existing Codex forced-login settings.
+- exact same patch applied twice produces one settings payload.
+- Anthropic-only provider matrix produces no Codex settings.
+- malformed patch surface returns diagnostic, not throw.
+
+Stop rule:
+
+- if a pure helper needs `TeamProvisioningService`, filesystem side effects, or provider processes, move it behind a port before continuing.
+
+Rating: 🎯 9 🛡️ 10 🧠 4, ~180-320 LOC plus tests.
+
+### Gate 2 - Sibling Codex Settings Contract
+
+Goal: make the Codex path safe before desktop uses it.
+
+Actions:
+
+- add sibling runtime reader for `codex.agent_teams_workspace_trust.config_overrides`.
+- validate only bounded strings matching `projects."".trust_level="trusted"`.
+- append validated values to Codex native `configOverrides` inside `turnExecutor`.
+- never pass direct Codex `-c` through Claude/orchestrator argv.
+- keep malformed settings as skipped diagnostics.
+
+Required tests in sibling runtime:
+
+- valid settings become `configOverrides`.
+- malformed values are ignored: newline, NUL, unrelated key, non-string, over-limit.
+- existing MCP config overrides remain.
+- existing forced-login settings remain.
+- `buildInheritedCliFlags()` preserves inline settings from Anthropic lead to Codex teammate.
+- Claude argv snapshot contains no Codex native `-c`.
+
+Stop rule:
+
+- if settings do not survive Anthropic lead -> Codex teammate inheritance, keep `AGENT_TEAMS_WORKSPACE_TRUST_CODEX_SETTINGS=0`.
+
+Rating: 🎯 9 🛡️ 9 🧠 6, ~50-120 sibling LOC plus tests.
+
+### Gate 3 - Claude PTY Strategy In Isolation
+
+Goal: prove prompt automation without touching team launch.
+
+Actions:
+
+- implement `PtyDialogEngine` with fake snapshots.
+- implement `NodePtyProcessAdapter` behind `PtyProcessPort`.
+- implement protected command builder:
+ `claude --bare --strict-mcp-config --mcp-config --setting-sources user --settings '{"disableAllHooks":true}' --tools ""`
+- implement state probe that mirrors sibling runtime parent walk.
+- kill PTY immediately after trust persists.
+
+Required tests:
+
+- unknown screen sends no keys.
+- trust screen sends one Enter only.
+- stale update/trust output cannot repeat actions forever.
+- auth picker and onboarding return setup/auth required with no key action.
+- `node-pty` missing returns skipped diagnostic.
+- cancellation kills PTY and returns cancelled.
+- temp MCP config is removed on success, failure, timeout, and cancellation.
+- raw tail is absent unless debug + failed/blocked + redacted.
+
+Manual smoke:
+
+- fresh temp git repo with real Claude profile accepts trust and writes `hasTrustDialogAccepted`.
+- repeated run is no-op.
+- selected git subdirectory is recognized through git-root key.
+- home directory returns non-persistable diagnostic.
+
+Stop rule:
+
+- if any unknown prompt receives Enter, disable Claude PTY and fix the rule set before launch integration.
+
+Rating: 🎯 8 🛡️ 9 🧠 6, ~250-420 LOC plus tests.
+
+### Gate 4 - Create/Launch Integration Behind Flags
+
+Goal: wire into `TeamProvisioningService` without changing normal lifecycle semantics.
+
+Actions:
+
+- inject coordinator through setter/lazy getter, not constructor.
+- keep `planArgsOnly()` side-effect free before run creation.
+- run PTY execute only after `ProvisioningRun` exists.
+- set progress to cancellable `spawning` before awaiting PTY.
+- execute before `clearPersistedLaunchState`.
+- set `launchStateClearedForRun = true` immediately after clear succeeds.
+- assign budgeted diagnostics before failed progress and before `cleanupRun()`.
+- use typed cleanup policy for create vs launch.
+- keep pure OpenCode adapter and direct restart paths out of v1.
+
+Required tests:
+
+- create path calls shared helper once.
+- launch path calls shared helper once.
+- pure OpenCode adapter path does not call coordinator.
+- direct tmux/process restart paths do not call coordinator.
+- blocked create before meta writes does not remove unrelated team/task dirs.
+- blocked launch restores prelaunch config.
+- blocked launch before clear does not persist failed launch snapshot.
+- cancellation during PTY does not continue into clear/spawn.
+- stop/shutdown during PTY does not restore twice or write two artifacts.
+- artifact includes `workspaceTrustPreflight` when blocked.
+
+Stop rule:
+
+- if any blocked preflight can erase previous launch state, do not ship.
+
+Rating: 🎯 8 🛡️ 9 🧠 7, ~220-380 LOC plus tests.
+
+### Gate 5 - Real Smoke Matrix
+
+Goal: verify real provider behavior that unit tests cannot prove.
+
+Run with feature flags on and retry off:
+
+```text
+AGENT_TEAMS_WORKSPACE_TRUST_PREFLIGHT=1
+AGENT_TEAMS_WORKSPACE_TRUST_CLAUDE_PTY=1
+AGENT_TEAMS_WORKSPACE_TRUST_CODEX_SETTINGS=1
+AGENT_TEAMS_WORKSPACE_TRUST_RETRY=0
+```
+
+Smoke cases:
+
+- Claude fresh temp git repo, Anthropic-only team.
+- Claude fresh temp git repo, Codex-only team, because orchestrator trust still gates headless teammates.
+- Anthropic lead with Codex teammate.
+- Codex lead with Anthropic teammate.
+- selected subdirectory inside git repo.
+- symlinked workspace.
+- two concurrent launches for same fresh workspace.
+- cancel during preflight.
+- stop team during preflight.
+- missing optional `node-pty` simulation.
+- isolated `CODEX_HOME` with auth picker.
+- Codex config mutation sentinel before/after hash.
+
+Pass criteria:
+
+- no issue #100 trust error on fresh trusted profile cases.
+- no direct Codex `-c` in Claude argv snapshots.
+- no unknown prompts receive keys.
+- no previous launch snapshot is overwritten by blocked preflight.
+- no unredacted env/config/raw account data appears in artifact.
+
+Stop rule:
+
+- if smoke fails only in Codex settings contract, ship Claude PTY with `AGENT_TEAMS_WORKSPACE_TRUST_CODEX_SETTINGS=0`.
+- if smoke fails in Claude PTY prompt automation, ship diagnostics-only with PTY disabled.
+
+Rating: 🎯 8 🛡️ 10 🧠 5, mostly test/smoke time.
+
+### Gate 6 - Default-On Criteria
+
+Do not enable by default until all are true:
+
+- Gate 0-5 pass on macOS.
+- Windows path unit tests pass even if live Windows smoke is not available.
+- sibling runtime contract tests pass.
+- artifact copy has enough data to diagnose field failures without raw secrets.
+- feature flags can disable Claude PTY and Codex settings independently.
+- existing focused team provisioning tests pass.
+
+Minimum verification before default-on:
+
+```bash
+pnpm vitest run test/main/services/runtime/cliSettingsArgs.test.ts
+pnpm vitest run test/main/services/team/TeamLaunchFailureArtifactPack.test.ts
+pnpm vitest run test/main/services/team/TeamProvisioningService.test.ts -t "workspace trust"
+pnpm vitest run test/main/services/team/workspaceTrust
+```
+
+Sibling runtime minimum:
+
+```bash
+bun test src/utils/swarm/spawnUtils.test.ts
+bun test src/services/codexNative/mcpConfigBridge.test.ts
+bun test src/services/codexNative/execRunner.test.ts
+```
+
+Stop rule:
+
+- no default-on without a documented smoke artifact from at least one fresh workspace run.
+
+Rating: 🎯 9 🛡️ 10 🧠 4, mostly verification.
+
+## Residual Risk Playbook
+
+Use this table when something fails during implementation or field testing.
+
+| Symptom | Likely cause | Immediate action | Long-term fix |
+| --- | --- | --- | --- |
+| Claude trust prompt still appears in runtime | PTY did not persist trust or path key mismatch | keep runtime failure classification, inspect `workspaceTrustPreflight` | add path candidate or state probe test |
+| Claude opens onboarding/theme screen | profile setup missing, not workspace trust | return `provider_setup_required`, no key action | optional future UI setup guidance |
+| Unknown PTY screen | provider version changed | no action, timeout/soft fail | add snapshot test only after human review |
+| Previous launch state disappears after blocked preflight | lifecycle guard missed | block release | fix `launchStateClearedForRun` and cleanup tests |
+| Create deletes existing team files | create cleanup policy too broad | block release | track create-owned directories only |
+| Codex teammate still prompts trust | settings did not reach sibling native exec | disable Codex settings flag | fix inherited settings contract and sibling validator |
+| Claude argv contains Codex `-c` | wrong dialect applied | block release | fix patch target surface and add snapshot test |
+| Duplicate settings payload | early/full patch dedupe failed | keep feature off | fix `dedupeKey` applier |
+| Raw account/config data in artifact | redaction path drifted | block release | reuse/extract launch artifact redactor |
+| PTY process remains alive | adapter cleanup bug | disable Claude PTY | add finally kill/process-tree tests |
+| User cannot cancel preflight | progress state not cancellable | block release | set `spawning` before await and test IPC cancel |
+
+## Final Confidence Gates
+
+Top 3 hard gates before implementation can be considered low-risk:
+
+1. Lifecycle gate: blocked preflight cannot clear or overwrite previous launch state - 🎯 9 🛡️ 10 🧠 6, ~40-90 LOC plus tests.
+2. Dialect gate: Codex native `-c` appears only inside sibling Codex native exec - 🎯 9 🛡️ 10 🧠 5, ~60-120 LOC plus tests.
+3. Prompt gate: unknown PTY screens never receive key actions - 🎯 8 🛡️ 10 🧠 6, ~120-220 LOC plus snapshot tests.
+
+If any of these three gates fails, keep the feature behind flags and ship only improved diagnostics.
+
+## Implementation Slicing And Freeze Rules
+
+This feature should not be implemented as one large diff. Split the work so every slice can be reviewed and rolled back independently.
+
+### Slice A - Diagnostics Baseline
+
+Scope:
+
+- keep existing `workspace_trust_required` classification stable.
+- keep deterministic bootstrap title stable.
+- add no new launch behavior.
+
+Allowed files:
+
+- launch failure artifact pack
+- deterministic bootstrap event handling tests
+- docs/runbook if needed
+
+Forbidden:
+
+- no `node-pty`
+- no launch arg/settings mutation
+- no `TeamProvisioningService` lifecycle edits except tests for current behavior
+
+Exit criteria:
+
+- focused tests pass.
+- disabled-feature behavior is identical to current behavior.
+
+Rating: 🎯 10 🛡️ 10 🧠 2, ~20-60 LOC.
+
+### Slice B - Sibling Codex Contract
+
+Scope:
+
+- add sibling runtime settings reader.
+- validate `codex.agent_teams_workspace_trust.config_overrides`.
+- append validated values to Codex native `configOverrides`.
+- prove `buildInheritedCliFlags()` preserves inline settings.
+
+Allowed files:
+
+- sibling runtime Codex native settings/turn executor utilities
+- sibling runtime tests
+
+Forbidden:
+
+- no desktop launch integration yet
+- no direct Codex global config writes
+- no Claude CLI argv `-c`
+
+Exit criteria:
+
+- sibling tests pass.
+- malformed settings skip safely.
+- valid settings reach Codex native `execRunner`.
+
+Rating: 🎯 9 🛡️ 9 🧠 6, ~50-120 sibling LOC.
+
+### Slice C - Desktop Pure Workspace-Trust Core
+
+Scope:
+
+- feature shell and contracts.
+- pure path/canonicalization helpers.
+- pure Codex settings patch builder.
+- diagnostics budget/redaction adapter.
+- fake-only coordinator planning tests.
+
+Allowed files:
+
+- `src/features/workspace-trust/**`
+- focused unit tests
+- no integration into launch service except type-only public facade exports
+
+Forbidden:
+
+- no `node-pty`
+- no `TeamProvisioningService` behavior change
+- no real filesystem writes except temp test fixtures
+
+Exit criteria:
+
+- all pure tests pass.
+- no import from feature internals by team service.
+- path edge cases pass.
+
+Rating: 🎯 9 🛡️ 10 🧠 5, ~250-450 LOC.
+
+### Slice D - Claude PTY Strategy Isolated
+
+Scope:
+
+- `PtyDialogEngine`
+- `NodePtyProcessAdapter`
+- protected Claude command builder
+- temp empty MCP config store
+- Claude state probe
+- fake PTY tests and optional manual smoke
+
+Allowed files:
+
+- workspace-trust feature adapters
+- isolated workspace-trust tests
+
+Forbidden:
+
+- no create/launch integration yet
+- no blind prompt dismissal
+- no direct provider state file writes
+
+Exit criteria:
+
+- unknown screens never receive key actions.
+- temp files are removed in all paths.
+- manual smoke can accept a fresh trusted temp workspace.
+
+Rating: 🎯 8 🛡️ 9 🧠 6, ~300-520 LOC.
+
+### Slice E - TeamProvisioningService Integration Behind Flags
+
+Scope:
+
+- setter/lazy coordinator.
+- early settings-only plan.
+- full plan.
+- execute preflight after `ProvisioningRun`, before clear.
+- `launchStateClearedForRun`.
+- typed create/launch failure policy.
+- artifact flags.
+
+Allowed files:
+
+- `TeamProvisioningService`
+- focused team provisioning tests
+- feature facade composition
+
+Forbidden:
+
+- no automatic retry.
+- no UI schema changes.
+- no direct restart integration.
+- no pure OpenCode runtime adapter integration.
+
+Exit criteria:
+
+- all Gate 4 tests pass.
+- feature disabled restores current behavior.
+- blocked preflight cannot erase previous launch state.
+
+Rating: 🎯 8 🛡️ 9 🧠 8, ~250-450 LOC.
+
+### Slice F - Smoke, Rollout, And Default-On
+
+Scope:
+
+- real smoke matrix.
+- artifact sample review.
+- feature flag defaults.
+- docs/runbook update.
+
+Forbidden:
+
+- no retry default-on.
+- no fallback direct config write.
+- no default-on until fresh workspace smoke is documented.
+
+Exit criteria:
+
+- Gate 5 and Gate 6 pass.
+- rollback flags verified.
+
+Rating: 🎯 8 🛡️ 10 🧠 4, mostly verification.
+
+### Merge Freeze Rules
+
+Do not merge a slice if any of these are true:
+
+- it changes launch behavior when all workspace trust flags are disabled.
+- it adds Codex native `-c` to Claude argv.
+- it makes `TeamProvisioningService` import feature internals instead of public facade.
+- it presses a key on an unknown PTY screen.
+- it writes `.claude.json` or `~/.codex/config.toml` directly.
+- it expands `TeamLaunchDiagnosticItem.code` without renderer tests.
+- it changes pure OpenCode adapter launch behavior.
+- it makes direct teammate restart call the coordinator in v1.
+- it leaves a PTY process running after cancellation/timeout tests.
+- it can clear persisted launch state before preflight success.
+
+### Review Order
+
+Review in this order to catch expensive mistakes early:
+
+1. sibling Codex contract and no-Claude-`-c` proof.
+2. pure workspace/path/settings tests.
+3. PTY prompt state machine snapshots.
+4. `TeamProvisioningService` lifecycle placement.
+5. artifact redaction and diagnostics.
+6. real smoke artifacts.
+
+Top 3 implementation strategies:
+
+1. Strict slices A-F with freeze rules - 🎯 9 🛡️ 10 🧠 7, ~1000-1600 desktop LOC plus sibling LOC. Chosen because every risky behavior has a gate.
+2. One feature branch with all code behind flags - 🎯 7 🛡️ 7 🧠 6, same LOC. Faster locally, but review risk is much higher.
+3. Diagnostics-only plus manual user instruction - 🎯 6 🛡️ 9 🧠 2, ~40-80 LOC. Very safe but does not solve the product goal.
+
+## Feature Flags And Rollback
+
+Use independent switches so a bad provider path can be disabled without reverting the whole feature.
+
+```text
+AGENT_TEAMS_WORKSPACE_TRUST_PREFLIGHT=0
+AGENT_TEAMS_WORKSPACE_TRUST_CODEX_SETTINGS=0
+AGENT_TEAMS_WORKSPACE_TRUST_CLAUDE_PTY=0
+AGENT_TEAMS_WORKSPACE_TRUST_ALLOW_UNPROTECTED_CLAUDE=0
+AGENT_TEAMS_WORKSPACE_TRUST_RETRY=0
+AGENT_TEAMS_WORKSPACE_TRUST_FILE_LOCK=0
+AGENT_TEAMS_WORKSPACE_TRUST_DEBUG=0
+```
+
+Default rollout:
+
+- internal smoke: preflight on, retry off
+- first release: Claude PTY on, Codex settings contract on after sibling tests, retry off
+- later release: retry on only if artifact data shows remaining trust failures are recoverable
+
+Rollback behavior:
+
+- disabling Claude PTY keeps current launch path and diagnostics
+- disabling Codex settings removes only app-owned Codex workspace-trust settings and sibling native config overrides
+- disabling unprotected Claude fallback is the default; keep it disabled unless debugging a specific old Claude build
+- disabling retry keeps first-launch preflight and existing failure classification
+- disabling file lock keeps in-process lock behavior
+- disabling debug suppresses raw PTY tails even on failure
+
+## Recommended V1 Defaults
+
+1. Always run Claude workspace trust preflight for team launch workspaces.
+2. Use the protected Claude interactive command, not plain `claude`, for PTY preflight.
+3. Add Codex workspace-trust settings when Codex provider is present, and let sibling runtime convert validated values to Codex native `configOverrides`.
+4. Keep Codex PTY fallback implemented in rules/tests but not necessarily used in the main path unless direct Codex TUI launch needs it.
+5. Do not direct-write `.claude.json` or `~/.codex/config.toml`.
+6. Keep automatic retry off in v1 default; add it later only for `workspace_trust_required`.
+7. Keep current user-visible diagnostics if all preflight paths fail.
+8. Use workspace trust locks by provider + realpath.
+9. Kill Claude PTY as soon as trust state persists.
+10. Treat home/root workspaces as non-persistable for v1, not as broad trust targets.
+11. Apply Codex trust settings only through typed dialect-aware patches.
+12. Keep `node-pty` behind a dedicated adapter, not behind terminal UI services.
+
+## V1 Cut Line
+
+Ship in first implementation PR:
+
+- `src/features/workspace-trust` feature shell
+- domain types
+- path canonicalization, git-root candidate detection, and parent trust matching
+- workspace trust lock registry
+- feature flag parser
+- Codex config override builder
+- Codex config mutation sentinel
+- pure provider settings patch helpers
+- typed `WorkspaceTrustLaunchArgPatch` with owner, dialect, target surface, and dedupe key
+- settings dialect/surface resolver for Codex-consuming invocation surfaces
+- early settings-only provider args resolver for default model probes
+- full plan patching for provider facts, primary settings, and cross-provider settings
+- PTY dialog engine with fake terminal tests
+- dedicated `NodePtyProcessAdapter` behind `PtyProcessPort`
+- Claude state probe
+- Claude protected command builder
+- temp empty MCP config writer and cleanup
+- Claude PTY strategy behind feature flag
+- `TeamProvisioningService` create/launch plan/execute integration without retry
+- `prepareWorkspaceTrustForDeterministicRun(...)` shared helper
+- `failDeterministicRunBeforeSpawn(...)` cleanup helper
+- typed create/launch cleanup policy
+- stale-run guard after preflight execute
+- `launchStateClearedForRun` cleanup guard
+- stripped `trustPreflightEnv`
+- artifact `flags.workspaceTrustPreflight`
+- workspace trust diagnostics budget before assigning artifact flags
+- non-persistable home/root workspace diagnostic
+- raw PTY tail disabled by default
+- shared or extracted launch artifact redaction helper for workspace trust diagnostics
+- public feature facade import boundary
+- sibling runtime settings reader that validates Codex workspace-trust config override values
+- sibling runtime tests for inherited settings and native `configOverrides`
+- focused tests for primary Codex and cross-provider Codex settings
+- focused tests for Codex settings dialect, duplicate patch dedupe, and Anthropic-only no-op behavior
+- focused tests for `node-pty` unavailable behavior
+- focused tests for pure OpenCode adapter boundary and mixed side-lane workspace inclusion
+
+Do not ship in first implementation PR:
+
+- automatic relaunch retry
+- direct `.claude.json` writes
+- direct `~/.codex/config.toml` writes
+- UI diagnostic row code changes
+- OpenCode runtime adapter changes
+- direct teammate restart preflight
+- tmux dependency
+- blind dialog dismiss on healthy launches
+
+Definition of done:
+
+- issue #100 text no longer appears for a fresh workspace when Claude profile is otherwise set up, for both first create and relaunch
+- issue #104-like failures still classify as `workspace_trust_required` if preflight cannot clear trust
+- Codex teammates under Anthropic lead receive scoped app-owned Codex workspace-trust settings
+- sibling Codex native executor receives validated `projects."".trust_level="trusted"` values through `configOverrides`
+- Codex teammates without explicit model receive patched settings during default model resolution
+- pure Anthropic provider args never receive Codex native `-c` overrides
+- repeated early/full/future retry patch application does not duplicate app-owned Codex trust settings
+- disabling all workspace trust feature flags returns to current launch behavior
+- no new prompt receives keys unless its rule is allowlisted and tested
+- two concurrent launches for the same fresh workspace result in one PTY accept and one `already_trusted_after_wait`
+- Claude preflight command is protected and does not use `-p`, `doctor`, runtime bootstrap args, project/local settings, or project MCP
+- stopping launch during preflight does not clear previous launch state or continue into spawn
+- blocked launch preflight does not persist a new failed launch snapshot when `clearPersistedLaunchState()` was not reached
+- blocked create preflight before meta writes leaves no partial team metadata/tasks directories
+- selected git subdirectory works when Claude persists trust at the git-root key
+- selected home directory never causes the app to trust home/root automatically
+- missing optional `node-pty` produces a diagnostic and keeps existing runtime fallback behavior
+
+## Open Questions
+
+1. Should empty Claude onboarding be auto-accepted later?
+ - Recommendation: no for v1. It is provider setup, not workspace trust.
+
+2. Should we add direct `.claude.json` fallback?
+ - Recommendation: no for v1. Keep it as a documented emergency fallback behind a future feature flag only.
+
+3. Should Codex PTY fallback run automatically?
+ - Recommendation: not for the current team launch path if settings -> sibling native config override contract is available. Keep the engine capable of it for future direct Codex TUI flows.
+
+4. Should we trust all member worktrees?
+ - Recommendation: yes, but only exact selected/generated worktree paths used by the launch.
+
+5. Should plain `claude` fallback be allowed when protected flags are missing?
+ - Recommendation: no by default. Add an explicit experimental flag because normal startup can load project behavior after trust.
+
+## Final Decision
+
+Implement **Host-preflight + runtime contract**.
+
+The desktop app prepares exact deterministic launch workspaces. The orchestrator remains the runtime executor and keeps its trust gate. Claude workspace trust is prepared through a bounded protected `node-pty` warmup using a dedicated adapter, `--bare`, strict empty MCP config, user settings only, hooks disabled, and tools disabled, then exits as soon as trust persists. Codex receives scoped app-owned workspace-trust settings through typed dialect-aware patches with dedupe; the sibling runtime validates those settings and converts them to Codex native `configOverrides` only at the Codex binary boundary. Pure OpenCode runtime-adapter launch and direct teammate restart stay out of v1. Launch retries stay off in v1 and remain narrowly limited to trust failures later. `launchStateClearedForRun` prevents a preflight-only failure from overwriting the previous launch snapshot.
diff --git a/landing/product-docs/.vitepress/config.ts b/landing/product-docs/.vitepress/config.ts
index 610afc50..c24f2841 100644
--- a/landing/product-docs/.vitepress/config.ts
+++ b/landing/product-docs/.vitepress/config.ts
@@ -39,8 +39,7 @@ const rootGuide: DefaultTheme.SidebarItem[] = [
text: "Start",
items: [
{ text: "Quickstart", link: "/guide/quickstart" },
- { text: "Installation", link: "/guide/installation" },
- { text: "Create a team", link: "/guide/create-team" }
+ { text: "Installation", link: "/guide/installation" }
]
},
{
@@ -48,15 +47,30 @@ const rootGuide: DefaultTheme.SidebarItem[] = [
items: [
{ text: "Runtime setup", link: "/guide/runtime-setup" },
{ text: "Agent workflow", link: "/guide/agent-workflow" },
- { text: "Code review", link: "/guide/code-review" },
+ { text: "MCP integration", link: "/guide/mcp-integration" },
+ { text: "Code review", link: "/guide/code-review" }
+ ]
+ },
+ {
+ text: "Team Management",
+ items: [
+ { text: "Create a team", link: "/guide/create-team" },
+ { text: "Team brief examples", link: "/guide/team-brief-examples" },
+ { text: "Git and worktree strategy", link: "/guide/git-worktree-strategy" },
{ text: "Troubleshooting", link: "/guide/troubleshooting" }
]
},
+ {
+ text: "Developers",
+ items: [{ text: "Developer hub", link: "/developers/" }]
+ },
{
text: "Reference",
items: [
{ text: "Concepts", link: "/reference/concepts" },
{ text: "Providers and runtimes", link: "/reference/providers-runtimes" },
+ { text: "Contributor architecture", link: "/reference/contributor-architecture" },
+ { text: "Release notes", link: "/reference/release-notes" },
{ text: "Privacy and local data", link: "/reference/privacy-local-data" },
{ text: "FAQ", link: "/reference/faq" }
]
@@ -68,8 +82,7 @@ const ruGuide: DefaultTheme.SidebarItem[] = [
text: "Старт",
items: [
{ text: "Быстрый старт", link: "/ru/guide/quickstart" },
- { text: "Установка", link: "/ru/guide/installation" },
- { text: "Создание команды", link: "/ru/guide/create-team" }
+ { text: "Установка", link: "/ru/guide/installation" }
]
},
{
@@ -77,15 +90,30 @@ const ruGuide: DefaultTheme.SidebarItem[] = [
items: [
{ text: "Настройка рантайма", link: "/ru/guide/runtime-setup" },
{ text: "Работа агентов", link: "/ru/guide/agent-workflow" },
- { text: "Код-ревью", link: "/ru/guide/code-review" },
+ { text: "MCP integration", link: "/ru/guide/mcp-integration" },
+ { text: "Код-ревью", link: "/ru/guide/code-review" }
+ ]
+ },
+ {
+ text: "Управление командами",
+ items: [
+ { text: "Создание команды", link: "/ru/guide/create-team" },
+ { text: "Team brief examples", link: "/ru/guide/team-brief-examples" },
+ { text: "Git and worktree strategy", link: "/ru/guide/git-worktree-strategy" },
{ text: "Диагностика", link: "/ru/guide/troubleshooting" }
]
},
+ {
+ text: "Разработчикам",
+ items: [{ text: "Хаб разработчика", link: "/ru/developers/" }]
+ },
{
text: "Справочник",
items: [
{ text: "Концепции", link: "/ru/reference/concepts" },
{ text: "Провайдеры и рантаймы", link: "/ru/reference/providers-runtimes" },
+ { text: "Архитектура для контрибьюторов", link: "/ru/reference/contributor-architecture" },
+ { text: "Релизы", link: "/ru/reference/release-notes" },
{ text: "Приватность и локальные данные", link: "/ru/reference/privacy-local-data" },
{ text: "FAQ", link: "/ru/reference/faq" }
]
@@ -94,6 +122,7 @@ const ruGuide: DefaultTheme.SidebarItem[] = [
const rootNav: DefaultTheme.NavItem[] = [
{ text: "Guide", link: "/guide/quickstart", activeMatch: "^/guide/(?!troubleshooting(?:/|$))" },
+ { text: "Developers", link: "/developers/", activeMatch: "^/developers/" },
{ text: "Reference", link: "/reference/concepts", activeMatch: "^/reference/" },
{
text: "Troubleshooting",
@@ -109,6 +138,7 @@ const ruNav: DefaultTheme.NavItem[] = [
link: "/ru/guide/quickstart",
activeMatch: "^/ru/guide/(?!troubleshooting(?:/|$))"
},
+ { text: "Разработчикам", link: "/ru/developers/", activeMatch: "^/ru/developers/" },
{ text: "Справочник", link: "/ru/reference/concepts", activeMatch: "^/ru/reference/" },
{
text: "Диагностика",
diff --git a/landing/product-docs/.vitepress/theme/DocsCardGrid.vue b/landing/product-docs/.vitepress/theme/DocsCardGrid.vue
index ca997fb4..85035089 100644
--- a/landing/product-docs/.vitepress/theme/DocsCardGrid.vue
+++ b/landing/product-docs/.vitepress/theme/DocsCardGrid.vue
@@ -15,6 +15,7 @@ const cards = computed(() => {
? [
{ icon: "◈", title: "Концепции", desc: "Команды, задачи, роли и уровни автономности.", link: "/ru/reference/concepts" },
{ icon: "⌁", title: "Рантаймы", desc: "Claude, Codex, OpenCode и multimodel-режим.", link: "/ru/reference/providers-runtimes" },
+ { icon: "▦", title: "Архитектура", desc: "Feature layout, guardrails и границы runtime/provider.", link: "/ru/reference/contributor-architecture" },
{ icon: "⌘", title: "Локальные данные", desc: "Что хранится на машине и что уходит провайдерам.", link: "/ru/reference/privacy-local-data" },
{ icon: "?", title: "FAQ", desc: "Короткие ответы на частые вопросы.", link: "/ru/reference/faq" }
]
@@ -30,6 +31,7 @@ const cards = computed(() => {
? [
{ icon: "◈", title: "Concepts", desc: "Teams, tasks, roles, and autonomy levels.", link: "/reference/concepts" },
{ icon: "⌁", title: "Runtimes", desc: "Claude, Codex, OpenCode, and multimodel mode.", link: "/reference/providers-runtimes" },
+ { icon: "▦", title: "Architecture", desc: "Feature layout, guardrails, and runtime/provider boundaries.", link: "/reference/contributor-architecture" },
{ icon: "⌘", title: "Local data", desc: "What stays on disk and what providers receive.", link: "/reference/privacy-local-data" },
{ icon: "?", title: "FAQ", desc: "Short answers to common questions.", link: "/reference/faq" }
]
diff --git a/landing/product-docs/developers/index.md b/landing/product-docs/developers/index.md
new file mode 100644
index 00000000..77c8a46d
--- /dev/null
+++ b/landing/product-docs/developers/index.md
@@ -0,0 +1,67 @@
+---
+title: Developers – Agent Teams Docs
+description: Contributor and developer entry point for Agent Teams architecture, guardrails, debugging, and MCP extension paths.
+---
+
+# Developers
+
+Use this page when you want to change Agent Teams itself, debug a team launch, or extend a runtime with MCP tools. The links below point to the canonical repo documents so implementation rules stay in one place.
+
+## Start here
+
+| Need | Go to |
+| --- | --- |
+| Repo overview, scripts, and source setup | [README.md](https://github.com/777genius/agent-teams-ai/blob/main/README.md) |
+| Working conventions for agents and contributors | [CLAUDE.md](https://github.com/777genius/agent-teams-ai/blob/main/CLAUDE.md) |
+| Hard implementation guardrails | [AGENT_CRITICAL_GUARDRAILS.md](https://github.com/777genius/agent-teams-ai/blob/main/AGENT_CRITICAL_GUARDRAILS.md) |
+| Medium and large feature structure | [Feature architecture standard](https://github.com/777genius/agent-teams-ai/blob/main/docs/FEATURE_ARCHITECTURE_STANDARD.md) |
+| Launch, bootstrap, and teammate messaging debugging | [Agent team debugging runbook](https://github.com/777genius/agent-teams-ai/blob/main/docs/team-management/debugging-agent-teams.md) |
+| Contribution process | [Contributing guide](https://github.com/777genius/agent-teams-ai/blob/main/.github/CONTRIBUTING.md) |
+| Release notes / Changelog | [RELEASE.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/RELEASE.md) — [CHANGELOG.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md) |
+
+## Local development path
+
+Run the desktop Electron app for normal development:
+
+```bash
+pnpm install
+pnpm dev
+```
+
+The browser/web path is not a replacement for the desktop runtime. Desktop mode is the supported local path because it includes IPC, terminals, provider auth, team lifecycle handling, launch diagnostics, and the runtime bridges used by real teams.
+
+## Architecture checkpoints
+
+Before changing a feature, identify its boundary:
+
+| Area | Expected home |
+| --- | --- |
+| Medium or large product feature | `src/features//` |
+| Electron main process orchestration | `src/main/` |
+| Preload-safe API surface | `src/preload/` |
+| Renderer UI and app state | `src/renderer/` |
+| Shared types and pure helpers | `src/shared/` |
+| Agent Teams board MCP server | `mcp-server/` |
+| Board data controller | `agent-teams-controller/` |
+
+Use `src/features/recent-projects` as the reference slice for feature organization. Keep cross-process contracts explicit, and avoid deep imports across feature boundaries.
+
+## Debugging path
+
+For launch hangs, OpenCode `registered` / bootstrap-unconfirmed states, missing teammate replies, or suspicious task logs:
+
+1. Start with the [debugging runbook](https://github.com/777genius/agent-teams-ai/blob/main/docs/team-management/debugging-agent-teams.md).
+2. Inspect the newest artifact pack under `~/.claude/teams//launch-failure-artifacts/latest.json`.
+3. Open the artifact `manifest.json` and check `classification`, bootstrap breadcrumbs, launch diagnostics, member spawn statuses, and redacted log tails.
+4. Clean up only the team, run, pane, or process you can identify as owned by the smoke test or failed launch.
+
+## MCP development path
+
+Agent Teams uses a built-in MCP server named `agent-teams` for board operations. User and project MCP servers can add external capabilities for runtimes. See [MCP integration](/guide/mcp-integration) for setup examples, `.mcp.json` structure, and tool registration guidance.
+
+## Related docs
+
+- [Contributor architecture](/reference/contributor-architecture)
+- [Runtime setup](/guide/runtime-setup)
+- [MCP integration](/guide/mcp-integration)
+- [Troubleshooting](/guide/troubleshooting)
diff --git a/landing/product-docs/guide/agent-workflow.md b/landing/product-docs/guide/agent-workflow.md
index b98f7b0a..ae91505a 100644
--- a/landing/product-docs/guide/agent-workflow.md
+++ b/landing/product-docs/guide/agent-workflow.md
@@ -18,23 +18,41 @@ Both modes share the same kanban, task logs, and code review surfaces.
## Task lifecycle
+Agent Teams tracks each task along two independent dimensions: work status and review state.
+
+| Dimension | States | Description |
+| --- | --- | --- |
+| Work status | `pending`, `in_progress`, `completed` | Tracks whether the task is waiting, actively being worked on, or finished by the owner |
+| Review state | `none`, `review`, `needsFix`, `approved` | Tracks where the task is in the post-completion review flow |
+
+The kanban board shows the combined view, but the two dimensions move independently.
+
+### Work status flow
+
| Stage | What happens | Owner |
| --- | --- | --- |
-| Provisioning | The app starts the runtime, confirms the process is alive, and waits for bootstrap confirmation | App |
-| Planning | The lead creates tasks, optionally assigns teammates, and sets dependencies | Lead or user |
-| In progress | Agents work in parallel and update task state via board MCP tools | Teammates |
-| Review | Changes are reviewed by agents or by you before final acceptance | Team lead or user |
-| Done | Accepted work stays linked to its task history and can still be inspected later | User |
+| Pending | Task is created and ready but no one has started work yet | Lead or user |
+| In progress | Agents work and update task state via board MCP tools | Teammates |
+| Completed | The owner posts a result comment and marks the task done | Teammate |
+
+### Review state flow
+
+| Stage | What happens | Owner |
+| --- | --- | --- |
+| None | Task is not yet in review (may be pending, in progress, or newly completed) | — |
+| Review | Review has been requested; a reviewer inspects the diff and result | Reviewer |
+| Needs fix | Changes were requested during review; the owner must update | Teammate (owner) |
+| Approved | Review passed; the task is finalized | Reviewer |
### Planning → In progress
-When a teammate starts a task, the board status becomes `in_progress`. The agent creates a task comment with its plan and continues working. All native tool actions (read, bash, edit, write) are streamed into a task log.
+When a teammate starts a task, the work status becomes `in_progress`. The agent creates a task comment with its plan and continues working. All native tool actions (read, bash, edit, write) are streamed into a task log.
-### In progress → Review
+### Completed → Review
-When the teammate finishes work, it posts a result comment and marks the task `completed`. The lead can then decide whether to accept it immediately or move it into review.
+When the teammate finishes work, it posts a result comment and marks the work status `completed`. The lead or reviewer can then request a review to start the review flow.
-### Review → Done
+### Review → Approved
If the review surface shows acceptable changes, approve the review. The task is finalized and linked to its diff.
diff --git a/landing/product-docs/guide/code-review.md b/landing/product-docs/guide/code-review.md
index 9bb15f03..05b50617 100644
--- a/landing/product-docs/guide/code-review.md
+++ b/landing/product-docs/guide/code-review.md
@@ -54,7 +54,7 @@ A healthy review loop looks like this:
Example request-changes comment:
```text
-Please keep the copy improvements, but revert the unrelated runtime wording in the provider table. Add a docs build result before resubmitting.
+Please keep the copy improvements, but revert the unrelated runtime wording in the provider table. Add the `pnpm --dir landing docs:build` result before resubmitting.
```
## Review states
@@ -72,6 +72,15 @@ Teams can review each other's work before you make the final call. This catches
Agent review is most useful when the reviewer has a clear rubric. For example, tell a reviewer to check only docs clarity, only IPC safety, or only test coverage. Broad "review everything" requests tend to produce weaker feedback.
+### MCP-driven review state
+
+Review state changes (request review, request changes, approve) are tool-driven. Leaving a "request changes" comment on a task does **not** move the kanban column to `needsFix` — a lead or agent must call the appropriate MCP tool:
+
+- `review_request_changes` — moves the task to `needsFix` and notifies the owner
+- `review_approve` — moves the task to `approved` and finalizes the review
+
+Comments alone are insufficient for state transitions. For the full list of review MCP tools and their parameters, see [MCP Integration](/guide/mcp-integration).
+
## Review participants
The team lead is the default reviewer. You can configure additional reviewers in the Kanban settings if you want peers to review each other's work.
@@ -82,10 +91,12 @@ Prioritize these areas when reviewing:
- **Provider auth and runtime detection** — did the agent change runtime setup in a way that would break other paths?
- **IPC, preload, and filesystem boundaries** — keep Electron responsibilities separated
-- **Git and worktree behavior** — verify branch naming, commits, and pushes
+- **Git and worktree behavior** - verify branch naming, commits, and pushes; see [Git and worktree strategy](/guide/git-worktree-strategy) for isolation patterns.
- **Parsing and task lifecycle logic** — changes to task references, chunking, or filtering can break message delivery
- **Persistence and code review flows** — changes to task storage or review state must stay consistent across IPC layers
+For the canonical feature layout and hard guardrail links, use [Contributor Architecture](/reference/contributor-architecture).
+
## Verification
Prefer focused verification commands. Broad formatting or lint-fix commands should not be used unless the task explicitly intends broad formatting churn.
diff --git a/landing/product-docs/guide/create-team.md b/landing/product-docs/guide/create-team.md
index df2b8dd5..abe573b4 100644
--- a/landing/product-docs/guide/create-team.md
+++ b/landing/product-docs/guide/create-team.md
@@ -30,7 +30,7 @@ Each team member runs on a provider backend. In the team editor, pick a provider
Mixing providers in one team is supported — for example, a Claude lead with OpenCode builders.
::: info
-Gemini support is in development and will appear in the provider list when available.
+Gemini is available as a supported provider path. See [Providers and runtimes](/reference/providers-runtimes) for auth options and current provider status.
:::
## Write a good team brief
diff --git a/landing/product-docs/guide/git-worktree-strategy.md b/landing/product-docs/guide/git-worktree-strategy.md
new file mode 100644
index 00000000..345dcef3
--- /dev/null
+++ b/landing/product-docs/guide/git-worktree-strategy.md
@@ -0,0 +1,101 @@
+---
+title: Git and Worktree Strategy – Agent Teams Docs
+description: Decide when to use the main worktree, feature branches, or OpenCode worktree isolation for parallel agent work.
+---
+
+# Git and Worktree Strategy
+
+Git gives Agent Teams the strongest review path: narrow diffs, branch visibility, task-scoped changes, and safer parallel work.
+
+## Choose a strategy
+
+| Strategy | Use when | Tradeoff |
+| --- | --- | --- |
+| Main worktree | Solo work, docs-only edits, or one teammate at a time | Simple, but parallel edits can collide |
+| Feature branch | One team is working on one coherent change | Clean review target, but teammates still share files |
+| Worktree isolation | Multiple OpenCode teammates may edit the same repo in parallel | Better isolation, but merge/review needs more discipline |
+
+Start simple. Add worktree isolation when parallel edits are likely, not because every task needs a separate checkout.
+
+## When to enable worktree isolation
+
+Enable it for OpenCode teammates when:
+
+- two or more teammates may edit the same repository at once
+- a task may run formatters, code generators, or broad tests
+- you want each teammate's branch and diff to stay separate
+- the lead workspace is dirty and should not receive direct edits
+
+Keep it off when:
+
+- the task is read-only
+- one teammate owns all edits
+- the repo is not Git-tracked
+- you need a runtime path that does not support this isolation mode
+
+::: warning
+Worktree isolation currently applies to OpenCode members and requires a Git-tracked project.
+:::
+
+## Branch hygiene
+
+Before starting parallel work:
+
+```bash
+git status --short
+git branch --show-current
+```
+
+Use a clean branch when possible. If the main worktree already has user changes, tell agents not to revert unrelated files and keep task scope narrow.
+
+Recommended branch style:
+
+```text
+agent//
+```
+
+Examples:
+
+```text
+agent/docs/mcp-guide
+agent/review/task-log-filtering
+agent/ui/code-review-polish
+```
+
+## Review flow
+
+For isolated worktrees, review the teammate's diff before merging or applying changes back to the main workspace.
+
+1. Confirm the task result comment names changed scope and verification.
+2. Inspect the task diff in the review UI.
+3. Ask for changes on the task if the diff touches unrelated files.
+4. Approve only after tests or manual checks match the task risk.
+5. Merge or apply changes deliberately.
+
+Do not auto-merge worktree output just because the task is complete. Completion means the agent believes the work is ready for review.
+
+## Conflict policy
+
+Use this policy for parallel teams:
+
+| Situation | Action |
+| --- | --- |
+| Two teammates edit the same file | Pause one task or make one owner responsible for integration |
+| Generated files changed broadly | Require a comment explaining the generator and command |
+| Main worktree has unrelated changes | Preserve them and review only task-owned changes |
+| Worktree branch diverges | Rebase or merge manually after review, not inside a vague agent task |
+
+## Task prompt example
+
+```text
+Implement the settings validation fix in your assigned worktree. Keep edits inside src/features/settings and focused tests. Do not touch provider auth or task storage. Post the test command and result before completing the task.
+```
+
+This prompt works because it names the allowed area, sensitive boundaries, and completion evidence.
+
+## Related guides
+
+- [Create a team](/guide/create-team)
+- [Code review](/guide/code-review)
+- [Team brief examples](/guide/team-brief-examples)
+- [Runtime setup](/guide/runtime-setup)
diff --git a/landing/product-docs/guide/installation.md b/landing/product-docs/guide/installation.md
index 720bb1a9..d1656d98 100644
--- a/landing/product-docs/guide/installation.md
+++ b/landing/product-docs/guide/installation.md
@@ -9,7 +9,7 @@ Agent Teams is distributed as a desktop app for macOS, Windows, and Linux.
## Download builds
-Use the download page or the latest [GitHub release](https://github.com/777genius/agent-teams-ai/releases) when you want the packaged app:
+Use the download page or the latest [GitHub release](https://github.com/777genius/agent-teams-ai/releases) when you want the packaged app:
- macOS Apple Silicon: `.dmg`
- macOS Intel: `.dmg`
@@ -30,11 +30,11 @@ To use agent runtimes, you need access to at least one provider:
| ------------------ | ------------------------------------------------- |
| Claude (Anthropic) | Claude Code CLI login or API key |
| Codex (OpenAI) | Codex CLI login or API key |
-| Gemini (Google) | _In development_ |
+| Gemini (Google) | Google ADC, Gemini CLI, or API key |
| OpenCode | API key for a supported backend (e.g. OpenRouter) |
::: info
-Gemini provider support is in development. You can prepare access now, but it will not appear in the team editor until it is ready.
+Gemini is available as a supported provider path. See [Providers and runtimes](/reference/providers-runtimes) for auth options and current status across all providers.
:::
For source development, you also need:
diff --git a/landing/product-docs/guide/mcp-integration.md b/landing/product-docs/guide/mcp-integration.md
new file mode 100644
index 00000000..2c775b9b
--- /dev/null
+++ b/landing/product-docs/guide/mcp-integration.md
@@ -0,0 +1,224 @@
+---
+title: MCP Integration – Agent Teams Docs
+description: Configure MCP in Agent Teams for board operations, teammate coordination, external tool servers, and custom tool development.
+---
+
+# MCP Integration
+
+Agent Teams uses MCP in two practical layers:
+
+| Layer | What it does | Who uses it |
+| --- | --- | --- |
+| Built-in board server | Exposes Agent Teams task, message, review, process, runtime, and cross-team tools | Leads and teammates launched by the app |
+| External MCP servers | Add optional tools such as browser automation, design context, docs search, or company systems | Users and configured runtimes |
+
+Keep those layers separate. The built-in `agent-teams` MCP server is how agents coordinate inside Agent Teams. External MCP servers are optional runtime tools.
+
+## How Agent Teams injects MCP
+
+When the desktop app launches Claude-based team members, it writes a temporary `--mcp-config` JSON file containing the built-in `agent-teams` server:
+
+```json
+{
+ "mcpServers": {
+ "agent-teams": {
+ "command": "node",
+ "args": ["/path/to/agent-teams-mcp/index.js"],
+ "env": {
+ "AGENT_TEAMS_MCP_CLAUDE_DIR": "/Users/you/.claude"
+ }
+ }
+ }
+}
+```
+
+In development, the command may point at `mcp-server/src/index.ts` through `tsx`. In packaged builds, the app copies the bundled MCP server to a stable app-data path and runs it with Node. The generated file is app-owned and cleaned up best effort.
+
+User and project MCP servers remain separate. The app reads installed servers from:
+
+| Scope | Location |
+| --- | --- |
+| User | `~/.claude.json` under `mcpServers` |
+| Local project entry in Claude config | `~/.claude.json` under `projects[projectPath].mcpServers` |
+| Project | `/.mcp.json` under `mcpServers` |
+
+Prefer project scope for tools that belong to one repository. Prefer user scope for tools you reuse across unrelated projects.
+
+## Project `.mcp.json` example
+
+Place this file at the project root when a team should see the same project-scoped server:
+
+```json
+{
+ "mcpServers": {
+ "docs-search": {
+ "command": "npx",
+ "args": ["-y", "@acme/docs-search-mcp"],
+ "env": {
+ "DOCS_INDEX_PATH": "./docs-index"
+ }
+ },
+ "local-browser": {
+ "command": "node",
+ "args": ["./tools/mcp/browser-server.js"]
+ }
+ }
+}
+```
+
+Keep secrets out of committed `.mcp.json` files. Put credentials in your shell, a user-scoped config, or the app's custom MCP install flow if the value must stay local.
+
+## Board MCP workflow
+
+Agents should use board MCP tools when the work belongs to a task:
+
+1. Read the latest task context.
+2. Start the task only when actually beginning work.
+3. Add task comments for blockers, plans, and final results.
+4. Mark the task complete after the result comment is posted.
+5. Send a short message when a lead or teammate needs to know the result.
+
+Example agent flow:
+
+```text
+task_get -> task_start -> edit/test -> task_add_comment -> task_complete -> message_send
+```
+
+Use a direct message for coordination. Use a task comment for durable task history.
+
+::: tip
+If the note affects review, verification, changed scope, or a blocker, put it on the task.
+:::
+
+## Built-in Agent Teams tools
+
+The MCP server registers tools from `agent-teams-controller/src/mcpToolCatalog.js`. The registration loop lives in `mcp-server/src/tools/index.ts`, and each group has its own file under `mcp-server/src/tools/`.
+
+Common operational tools:
+
+| Tool | Use |
+| --- | --- |
+| `task_get` | Read the latest task context, comments, attachments, status, and relations |
+| `task_start` | Mark a task in progress when work actually begins |
+| `task_add_comment` | Add blocker notes, verification notes, plans, and final result summaries |
+| `task_complete` | Complete a task after the final result comment is posted |
+| `message_send` | Send a visible inbox message to a lead, teammate, or user |
+| `review_request`, `review_start`, `review_approve`, `review_request_changes` | Move task-scoped review workflows |
+| `process_register`, `process_list`, `process_stop`, `process_unregister` | Track teammate-owned dev servers, watchers, and other background services |
+
+Tool names may appear to runtimes with MCP namespace prefixes, for example `mcp__agent-teams__task_get`. The canonical tool name inside the MCP server remains `task_get`.
+
+## Register a new built-in tool
+
+For Agent Teams repository work, add built-in board tools through the existing FastMCP structure:
+
+1. Add the tool implementation to the matching file in `mcp-server/src/tools/`, or create a new group file if the domain is genuinely new.
+2. Add the tool name to the appropriate group in `agent-teams-controller/src/mcpToolCatalog.js`.
+3. Wire a new group through `mcp-server/src/tools/index.ts` only when a new domain group is needed.
+4. Validate input with `zod` and call the controller API instead of reading board files directly.
+5. Add focused tests in `mcp-server/test/tools.test.ts` or an e2e case when the transport matters.
+
+Minimal shape:
+
+```ts
+server.addTool({
+ name: 'task_example',
+ description: 'Explain what this tool does for agents.',
+ parameters: z.object({
+ teamName: z.string().min(1),
+ claudeDir: z.string().min(1).optional(),
+ taskId: z.string().min(1)
+ }),
+ execute: async ({ teamName, claudeDir, taskId }) => {
+ assertConfiguredTeam(teamName, claudeDir);
+ const controller = getController(teamName, claudeDir);
+ return jsonTextContent(controller.tasks.getTask(taskId));
+ }
+});
+```
+
+Do not create a tool that bypasses controller validation, mutates unrelated team files, or exposes broad filesystem/process access without a narrow task need.
+
+## External MCP servers
+
+Use external MCP servers when a teammate needs a durable tool connection, not just one prompt with pasted context.
+
+Good fits:
+
+- browser or website testing tools
+- design or product data tools
+- internal docs and search systems
+- issue tracker or support systems
+- database inspection tools with read-only credentials
+
+Poor fits:
+
+- secrets pasted into prompts
+- one-off files that can be attached directly
+- tools that mutate production systems without review
+- broad local filesystem access when a narrower project scope is enough
+
+## Scopes
+
+Agent Teams recognizes shared and project-oriented MCP scopes.
+
+| Scope | Use when |
+| --- | --- |
+| User or Global | The same server should be available across projects |
+| Project or Local | The server belongs to one repository, workspace, or team context |
+
+Prefer the narrowest scope that still makes the workflow usable. Project-scoped servers are easier to reason about during review because the tool belongs to the project being changed.
+
+## Setup checklist
+
+Before assigning a task that depends on an MCP server:
+
+1. Install or configure the server.
+2. Confirm it appears in the app's installed MCP list for the intended scope.
+3. Run diagnostics from the MCP registry or extensions UI when available.
+4. Start with a low-risk read-only task.
+5. Mention the expected MCP tool use in the task description or team brief.
+
+If a server fails diagnostics, fix that first. A better task prompt will not repair a missing command, wrong config path, or rejected credentials.
+
+## Install a custom server from the app
+
+The desktop app exposes MCP registry APIs through Electron IPC for search, browse, install, custom install, uninstall, installed-state reading, and diagnostics. Custom installs validate the server name, scope, project path, env var names, and HTTP headers before calling the runtime install path.
+
+Use custom install when you have an MCP package that is not in the registry yet:
+
+| Field | Example |
+| --- | --- |
+| Server name | `docs-search` |
+| Scope | `project` for this repository, `user` for all projects |
+| Type | `stdio` for local commands, `http` or `sse` for remote servers |
+| Package | `@acme/docs-search-mcp` |
+| Env | `DOCS_INDEX_PATH=./docs-index` |
+
+After install, run diagnostics and create a small read-only task to prove the tool surface before assigning larger work.
+
+## Task example
+
+```text
+Audit the docs home page with the browser MCP. Check desktop and mobile widths, capture any layout issue as a task comment, and only edit landing/product-docs files. Run `pnpm --dir landing docs:build` before completion.
+```
+
+This works because it names the tool, the surface, the write boundary, and the verification step.
+
+## Safety rules
+
+- Do not give every teammate every MCP server by default.
+- Keep write-capable tools out of broad teams unless review requires them.
+- Prefer read-only credentials for inspection tasks.
+- Put production-impacting tool use behind explicit task comments and review.
+- Treat MCP diagnostic failures as setup failures, not agent failures.
+- Avoid committing secrets in `.mcp.json` or prompts.
+- Use absolute `projectPath` values when installing project-scoped servers through the app.
+- Do not edit the app-generated `agent-teams-mcp-*.json` files; they are temporary launch artifacts.
+
+## Related guides
+
+- [Runtime setup](/guide/runtime-setup)
+- [Team brief examples](/guide/team-brief-examples)
+- [Agent workflow](/guide/agent-workflow)
+- [Developers](/developers/)
diff --git a/landing/product-docs/guide/quickstart.md b/landing/product-docs/guide/quickstart.md
index ac4c1216..a0ebc05b 100644
--- a/landing/product-docs/guide/quickstart.md
+++ b/landing/product-docs/guide/quickstart.md
@@ -9,12 +9,16 @@ This guide gets you from a fresh install to a running team in a few minutes.
## 1. Install Agent Teams
-Download the latest release for your platform from the download page or [GitHub releases](https://github.com/777genius/agent-teams-ai/releases).
+Download the latest release for your platform from the download page or [GitHub releases](https://github.com/777genius/agent-teams-ai/releases).
::: tip
The app is free and open source. The agent runtime you choose may still require provider access — see [Installation](/guide/installation) for details.
:::
+::: info
+The desktop app is the primary product. Agent Teams also runs in a browser for development, but the browser path lacks the full desktop IPC, terminal, provider auth, and team lifecycle behavior. Use `pnpm dev` (Electron) for normal development, not the browser/web dev mode.
+:::
+
## 2. Open or create a project
Launch the app and select the project directory you want agents to work in. Agent Teams reads local project files and runtime/session state so the UI can show tasks, logs, diffs, and teammate activity.
@@ -42,7 +46,7 @@ The setup flow auto-detects installed runtimes on your machine. A common first s
| OpenCode | Multi-model teams and many provider backends |
::: info
-Gemini support is in development and will appear in the runtime list when available.
+Gemini is available as a supported provider path. See [Providers and runtimes](/reference/providers-runtimes) for auth options and current provider status.
:::
See [Runtime setup](/guide/runtime-setup) for detailed configuration per provider.
@@ -84,7 +88,7 @@ Improve the onboarding flow. Split the work into tasks, keep changes small, and
Good first prompts include concrete scope, safety boundaries, and verification:
```text
-Improve the docs quickstart. Keep edits inside landing/product-docs. Add practical examples, preserve existing VitePress syntax, and run the docs build before marking tasks done.
+Improve the docs quickstart. Keep edits inside landing/product-docs. Add practical examples, preserve existing VitePress syntax, and run `pnpm --dir landing docs:build` before marking tasks done.
```
Avoid vague prompts such as "make the app better" for the first run. The lead can break down large goals, but better input produces smaller tasks and cleaner review.
diff --git a/landing/product-docs/guide/runtime-setup.md b/landing/product-docs/guide/runtime-setup.md
index 0bf4aeb5..29b3e658 100644
--- a/landing/product-docs/guide/runtime-setup.md
+++ b/landing/product-docs/guide/runtime-setup.md
@@ -7,6 +7,17 @@ description: Configure Claude Code, Codex, or OpenCode runtimes. Covers auth, pr
Agent Teams is a coordination layer. The actual model work runs through supported local runtimes and providers.
+::: tip Quick start - choosing your first runtime
+| If you ... | Start with |
+| --- | --- |
+| Already use Claude Code or have Anthropic access | **Claude** - familiar auth, minimal setup |
+| Use Codex or OpenAI-based workflows | **Codex** - native integration |
+| Want multi-model routing or broad provider coverage | **OpenCode** - most flexible, one config for many backends |
+| Are not sure which runtime fits | **OpenCode** - covers the most provider options and lets you switch later |
+
+Start with one runtime and one teammate. Confirm one launch works before expanding to multimodel.
+:::
+
## Prerequisites
Before launching a team, make sure:
@@ -40,7 +51,7 @@ Run the command for the runtime you plan to use. If it prints nothing, install t
The app detects supported runtimes and guides setup from the UI when possible.
-Gemini appears in some internal provider lists but is currently hidden from the main team creation UI while its launch experience is still marked in development.
+Gemini is available as a supported provider path with Google ADC (`gcloud auth`), Gemini CLI OAuth, and API key authentication. Configure it from the runtime setup UI when the Gemini backend is detected.
## Provider access
@@ -109,6 +120,16 @@ Example model strings:
If OpenCode launches but a teammate never becomes deliverable, inspect lane evidence before assuming the model ignored the prompt. See [Troubleshooting](/guide/troubleshooting#opencode-registered-but-bootstrap-unconfirmed).
+### Gemini
+
+Gemini supports three authentication methods:
+
+- **Google ADC** — run `gcloud auth application-default login` to authenticate via Google Application Default Credentials.
+- **Gemini CLI** — run `gemini login` if the Gemini CLI is installed.
+- **API key** — set `GEMINI_API_KEY` in your environment or configure it through the app's Manage Providers UI.
+
+The app auto-detects which auth method is available and shows the Gemini provider in the runtime setup and team creation UI when the backend is reachable.
+
## Multimodel mode
Multimodel mode can route work through many provider backends via OpenCode-compatible configuration. Use it when you need provider flexibility or want teammates to use different model lanes.
diff --git a/landing/product-docs/guide/team-brief-examples.md b/landing/product-docs/guide/team-brief-examples.md
new file mode 100644
index 00000000..8dc82cb2
--- /dev/null
+++ b/landing/product-docs/guide/team-brief-examples.md
@@ -0,0 +1,130 @@
+---
+title: Team Brief Examples – Agent Teams Docs
+description: Practical team brief templates for small fixes, docs work, implementation tasks, reviews, and high-risk areas.
+---
+
+# Team Brief Examples
+
+A good team brief gives the lead enough structure to create small tasks without forcing every implementation detail upfront.
+
+Use this shape:
+
+```text
+Outcome:
+Scope:
+Boundaries:
+Coordination:
+Verification:
+Review:
+```
+
+## Minimal brief
+
+Use for small, low-risk work.
+
+```text
+Outcome: Improve the quickstart so a new user can launch one team successfully.
+Scope: Keep edits inside landing/product-docs.
+Boundaries: Do not rewrite the whole docs structure.
+Coordination: Create one or two tasks, keep comments on the task.
+Verification: Run `pnpm --dir landing docs:build`.
+Review: Summarize changed pages and any remaining gaps.
+```
+
+## Implementation brief
+
+Use when code changes touch one feature area.
+
+```text
+Outcome: Add a focused improvement to task comment filtering.
+Scope: Work inside the task/comment feature files unless a shared helper is clearly needed.
+Boundaries: Do not change task storage format or review state semantics.
+Coordination: Split parser, UI, and tests into separate tasks if they can be reviewed independently.
+Verification: Run the focused unit tests first, then the feature typecheck if touched.
+Review: Call out parsing edge cases and any behavior that affects existing task comments.
+```
+
+## Docs brief
+
+Use for documentation and guide work.
+
+```text
+Outcome: Draft practical workflow guides from the docs audit.
+Scope: Add concise VitePress pages under landing/product-docs/guide.
+Boundaries: Avoid moving existing navigation hubs owned by other tasks.
+Coordination: Check related docs tasks before editing nav.
+Verification: Run `pnpm --dir landing docs:build`.
+Review: Include links added to sidebar and any pages intentionally left as drafts.
+```
+
+## Review-heavy brief
+
+Use for risky areas such as IPC, provider auth, persistence, Git, or task lifecycle logic.
+
+```text
+Outcome: Fix the launch failure without changing successful launch behavior.
+Scope: Start from the newest launch-failure artifact and the affected runtime adapter.
+Boundaries: Do not change provider prompts until setup and runtime evidence are inspected.
+Coordination: Make one diagnostic task and one fix task if the cause is confirmed.
+Verification: Run focused tests and one desktop smoke check when practical.
+Review: Lead must inspect the diff before approval.
+```
+
+## Mixed provider brief
+
+Use when teammates run different provider/model lanes.
+
+```text
+Outcome: Implement and review a small feature using separate builder and reviewer lanes.
+Scope: Builder edits the feature. Reviewer inspects only the task diff and tests.
+Boundaries: Do not switch model ids mid-task unless launch fails before work begins.
+Coordination: Builder posts result comment first. Reviewer posts findings as task comments.
+Verification: Builder runs focused tests. Reviewer checks failure output and changed scope.
+Review: Lead approves only after reviewer comments are resolved.
+```
+
+## Agent blocks in briefs
+
+Agent blocks are hidden agent-only text wrapped in markers such as `...`. The app strips them from normal display but keeps them available for agent coordination. Use them when the brief needs to say something to agents that would be noise for a human reader.
+
+Example - a brief that tells the lead how to split work without exposing coordination instructions to the user:
+
+```text
+Outcome: Add a dark mode toggle to the application settings.
+Scope: Settings UI, theme context, and CSS variables.
+Boundaries: Do not change existing light theme values or provider auth screens.
+
+
+Split this into three tasks: (1) theme context and CSS vars, (2) toggle component and settings wiring, (3) dark mode preview in existing docs screenshots if practical.
+
+```
+
+The block keeps the human-facing brief clean while giving the lead structured task-splitting guidance.
+
+## What to avoid
+
+| Weak brief | Better replacement |
+| --- | --- |
+| "Improve the app" | Name the workflow, files, and success check |
+| "Fix all docs" | Pick one guide group and one build command |
+| "Use the best model" | Name provider/model choices or let the app defaults stand |
+| "Refactor as needed" | State which modules are allowed to change |
+| "Make it production ready" | Define review, tests, and rollout checks |
+
+## Before launch
+
+Check these points before starting the team:
+
+1. The brief names a concrete outcome.
+2. Risk boundaries are explicit.
+3. The lead can split the work into reviewable tasks.
+4. Verification commands are included when known.
+5. Sensitive areas require review before approval.
+
+If the brief is still broad, launch a solo or small team first and ask it to produce a task plan rather than implementation.
+
+## Related guides
+
+- [Create a team](/guide/create-team)
+- [MCP integration](/guide/mcp-integration)
+- [Git and worktree strategy](/guide/git-worktree-strategy)
diff --git a/landing/product-docs/guide/troubleshooting.md b/landing/product-docs/guide/troubleshooting.md
index d5910fbb..25e9b2c4 100644
--- a/landing/product-docs/guide/troubleshooting.md
+++ b/landing/product-docs/guide/troubleshooting.md
@@ -25,6 +25,8 @@ Run the runtime binary in a terminal to verify `PATH` and auth. Example: `claude
If OpenCode shows `registered` but bootstrap is unconfirmed, inspect artifacts first before changing team prompts.
+Contributor/debugging details live in [Contributor Architecture](/reference/contributor-architecture), which links to the canonical agent team debugging runbook.
+
Look at the newest launch failure artifact:
```bash
@@ -49,6 +51,57 @@ jq '.activeRunId, .entries' ~/.claude/teams//.opencode-runtime/lanes//
+```
+
+Key files and what they tell you:
+
+- `launch-state.json` — member launch/liveness state (`.teamLaunchState`, `.summary`, `.members`)
+- `bootstrap-journal.jsonl` — ordered bootstrap events from CLI/runtime (`tail -80`)
+- `bootstrap-state.json` — bootstrap phase summary
+- `config.json` — provider, model, and project configuration
+- `inboxes/*.json` and `sentMessages.json` — message delivery state
+
+```bash
+jq '.teamLaunchState, .summary, .members' ~/.claude/teams//launch-state.json
+tail -80 ~/.claude/teams//bootstrap-journal.jsonl 2>/dev/null
+```
+
+### OpenCode runtime evidence
+
+For OpenCode teammates, session proof is in the lane runtime store:
+
+- `.opencode-runtime/lanes.json` — lane index with state
+- `.opencode-runtime/lanes//manifest.json` — `activeRunId` and evidence entries
+- `.opencode-runtime/lanes//opencode-sessions.json` — committed session records
+
+Expected healthy state: lane state `active`, manifest has `activeRunId` with at least one evidence entry, member has `bootstrapConfirmed: true`.
+
+```bash
+jq '.lanes' ~/.claude/teams//.opencode-runtime/lanes.json 2>/dev/null
+find ~/.claude/teams//.opencode-runtime -maxdepth 3 -type f | sort
+```
+
+### Launch failure artifacts
+
+When a launch is marked as a failure, inspect `latest.json`:
+
+```bash
+~/.claude/teams//launch-failure-artifacts/latest.json
+```
+
+The manifest includes:
+- `classification` — why the launch was considered a failure
+- `bootstrapTransportBreadcrumb` — delivery path used
+- Member spawn statuses and redacted logs/traces
+
## Agent replies are missing
Open task logs and teammate messages. Missing replies often come from:
diff --git a/landing/product-docs/index.md b/landing/product-docs/index.md
index ef1de8c3..625c77c5 100644
--- a/landing/product-docs/index.md
+++ b/landing/product-docs/index.md
@@ -57,13 +57,24 @@ Agent Teams is a free desktop app for orchestrating AI agent teams. You are not
+## Next steps after launch
+
+After creating your first team, explore these guides to go further:
+
+- **Runtime setup** - configure Claude, Codex, OpenCode, or multimodel providers: [Configure runtimes](/guide/runtime-setup)
+- **Agent workflow** - understand how agents coordinate through the task board: [Understand workflow](/guide/agent-workflow)
+- **Team brief examples** - learn prompt patterns from real-world briefs: [See examples](/guide/team-brief-examples)
+- **Code review** - inspect diffs, accept or reject changes: [Review changes](/guide/code-review)
+- **Troubleshooting** - diagnose stuck launches, missing teammates, and task failures: [Troubleshoot](/guide/troubleshooting)
+- **Git worktree strategy** - use worktree isolation when multiple teammates edit the same repo in parallel: [Learn about worktrees](/guide/git-worktree-strategy)
+- **Release notes** - see what's new in each version: [View releases](/reference/release-notes)
+
## Reference
-Use the reference pages when you need exact terminology, provider behavior, or privacy boundaries.
+Use the reference pages when you need exact terminology, provider behavior, contributor architecture, or privacy boundaries.
## Product preview
-
diff --git a/landing/product-docs/reference/concepts.md b/landing/product-docs/reference/concepts.md
index 4277d17d..ec27f5a2 100644
--- a/landing/product-docs/reference/concepts.md
+++ b/landing/product-docs/reference/concepts.md
@@ -1,5 +1,5 @@
---
-title: Concepts
+title: Concepts – Agent Teams Docs
description: Core vocabulary for Agent Teams — teams, leads, teammates, tasks, kanban, inboxes, runtimes, and review.
---
@@ -45,7 +45,7 @@ Messages are durable local records. Delivery still depends on the selected runti
An agent block is hidden, agent-only instruction text wrapped with `...`. The UI strips these blocks from normal human-facing display, but agents and runtime delivery can use them for coordination details.
-The current canonical marker is `info_for_agent`; older documents may still contain legacy agent block formats.
+The current canonical marker is `info_for_agent`. Older documents may use fenced code blocks with an `info_for_agent` marker, or XML-style `` tags — these are legacy patterns and should be migrated to `info_for_agent` when encountered. (The original tag name was `agent-block`; the underscore form `` is used in VitePress source to avoid HTML parsing.)
## Context Phase
diff --git a/landing/product-docs/reference/contributor-architecture.md b/landing/product-docs/reference/contributor-architecture.md
new file mode 100644
index 00000000..48357d78
--- /dev/null
+++ b/landing/product-docs/reference/contributor-architecture.md
@@ -0,0 +1,54 @@
+---
+title: Contributor Architecture – Agent Teams Docs
+description: Contributor guide to feature layout, runtime/provider boundaries, hard guardrails, and canonical architecture documents.
+---
+
+# Contributor Architecture
+
+This page is a map for contributors. It points to the canonical repo guidance instead of restating every implementation rule.
+
+## Canonical sources
+
+Use these files as the source of truth when changing the app:
+
+| Need | Canonical source |
+| --- | --- |
+| Repo overview and commands | [README.md](https://github.com/777genius/agent-teams-ai/blob/main/README.md) |
+| Local working conventions | [CLAUDE.md](https://github.com/777genius/agent-teams-ai/blob/main/CLAUDE.md) |
+| Hard guardrails | [AGENT_CRITICAL_GUARDRAILS.md](https://github.com/777genius/agent-teams-ai/blob/main/AGENT_CRITICAL_GUARDRAILS.md) |
+| Medium and large feature layout | [docs/FEATURE_ARCHITECTURE_STANDARD.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/FEATURE_ARCHITECTURE_STANDARD.md) |
+| Agent team launch debugging | [docs/team-management/debugging-agent-teams.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/team-management/debugging-agent-teams.md) |
+
+## Feature layout
+
+Medium and large features should live under `src/features//` and follow the feature architecture standard. Keep feature internals behind public entrypoints, and avoid deep imports across feature boundaries.
+
+For new work, start with the existing `src/features/recent-projects` slice as the local reference implementation. Small fixes can stay close to the existing code path when creating a feature slice would add more structure than value.
+
+## Runtime and provider boundaries
+
+Agent Teams owns orchestration: teams, tasks, messages, launch state, review UI, diagnostics, and local persistence.
+
+The selected runtime/provider path owns model execution, auth, model availability, rate limits, tool semantics, and runtime-specific transcript evidence. Do not make prompts or UI state compensate for missing auth, missing binaries, rejected model ids, or provider outages. For user-facing setup details, see [Providers and Runtimes](/reference/providers-runtimes).
+
+## Agent team debugging
+
+For launch hangs, OpenCode `registered` / bootstrap-unconfirmed states, missing teammate replies, or suspicious task logs, start from the dedicated debugging runbook. Inspect the newest launch failure artifact under `~/.claude/teams//launch-failure-artifacts/latest.json`, then correlate UI state with persisted files and runtime-specific evidence.
+
+Avoid broad cleanup while debugging. Stop only the process, lane, team, or smoke run you can identify as belonging to the issue.
+
+## Contributor conventions
+
+- Use `pnpm dev` for the desktop Electron app during normal development.
+- Do not use browser dev mode as a substitute for desktop runtime, IPC, terminal, provider auth, or team lifecycle behavior.
+- Keep Electron main, preload, renderer, shared, and feature responsibilities separate.
+- Use `wrapAgentBlock(text)` for agent-only blocks instead of manually concatenating markers.
+- Prefer focused verification. Avoid broad `lint:fix` or formatting churn unless the task is explicitly about formatting.
+- Treat parsing, task lifecycle, provider/runtime detection, persistence, IPC, Git, and review flows as high-risk areas that need targeted tests or a clear verification path.
+
+## Related pages
+
+- [Runtime setup](/guide/runtime-setup)
+- [Troubleshooting](/guide/troubleshooting)
+- [Code review](/guide/code-review)
+- [Privacy and local data](/reference/privacy-local-data)
diff --git a/landing/product-docs/reference/faq.md b/landing/product-docs/reference/faq.md
index 8950695e..ac83e493 100644
--- a/landing/product-docs/reference/faq.md
+++ b/landing/product-docs/reference/faq.md
@@ -41,7 +41,7 @@ No. Agent Teams is not a cloud code-sync service. Provider-backed model calls ma
## Where are team files stored?
-Team coordination data is stored locally under `~/.claude/teams//`, task files under `~/.claude/tasks//`, and project session data under `~/.claude/projects//` when available.
+Team coordination data is stored locally under `~/.claude/teams//` (macOS/Linux) or `%APPDATA%\Claude\teams\\` (Windows), task files under `~/.claude/tasks//` or `%APPDATA%\Claude\tasks\\`, and project session data under `~/.claude/projects//` when available.
## What can leave my machine?
@@ -56,7 +56,7 @@ Yes. Agents can message teammates, comment on tasks, coordinate across teams, an
Give the lead a concrete outcome, file or feature boundaries, risk limits, and verification expectations. For example:
```text
-Improve the docs quickstart. Keep edits inside landing/product-docs, add practical examples, and run the docs build before marking work done.
+Improve the docs quickstart. Keep edits inside landing/product-docs, add practical examples, and run `pnpm --dir landing docs:build` before marking work done.
```
## Can I review code before accepting it?
diff --git a/landing/product-docs/reference/privacy-local-data.md b/landing/product-docs/reference/privacy-local-data.md
index ce968712..f76643a1 100644
--- a/landing/product-docs/reference/privacy-local-data.md
+++ b/landing/product-docs/reference/privacy-local-data.md
@@ -22,13 +22,16 @@ The desktop app runs on your machine and reads local project/runtime data to pow
Important local locations include:
-| Location | Purpose |
-| --- | --- |
-| `~/.claude/teams//` | Team config, member metadata, inboxes, launch state, bootstrap evidence, runtime diagnostics, sent-message records, kanban state, and review-related team files. |
-| `~/.claude/tasks//` | Durable task JSON files for the team board. |
-| `~/.claude/projects//` | Claude/Codex-style project session files used for session history, context analysis, and transcript-backed UI. |
+| Platform | Location | Purpose |
+| --- | --- | --- |
+| macOS/Linux | `~/.claude/teams//` | Team config, member metadata, inboxes, launch state, bootstrap evidence, runtime diagnostics, sent-message records, kanban state, and review-related team files. |
+| Windows | `%APPDATA%\Claude\teams\\` | Same — team config, member metadata, inboxes, launch state, and diagnostics. |
+| macOS/Linux | `~/.claude/tasks//` | Durable task JSON files for the team board. |
+| Windows | `%APPDATA%\Claude\tasks\\` | Same — durable task JSON files. |
+| macOS/Linux | `~/.claude/projects//` | Claude/Codex-style project session files used for session history, context analysis, and transcript-backed UI. |
+| Windows | `%APPDATA%\Claude\projects\\` | Same — project session files. |
-Exact files can vary by runtime and app version. For launch debugging, the newest evidence is usually under the relevant `~/.claude/teams//` folder.
+Exact files can vary by runtime and app version. For launch debugging, the newest evidence is usually under the relevant `~/.claude/teams//` (or `%APPDATA%\Claude\teams\\`) folder.
## What can leave your machine
diff --git a/landing/product-docs/reference/providers-runtimes.md b/landing/product-docs/reference/providers-runtimes.md
index d112e7f5..b0d26ece 100644
--- a/landing/product-docs/reference/providers-runtimes.md
+++ b/landing/product-docs/reference/providers-runtimes.md
@@ -38,7 +38,7 @@ The runtime provides:
| Codex | Codex / OpenAI-backed models | Codex-native workflows | Uses Codex runtime integration and Codex auth/account state where available. Some diagnostics are different from Claude transcripts. |
| OpenCode | OpenCode-managed model routing | Multi-provider teams and broad model coverage | OpenCode can route through many model providers. Agent Teams treats OpenCode lanes as runtime-specific evidence and avoids guessing when lane identity is ambiguous. |
-Gemini provider ids exist in internal configuration paths, but Gemini is currently hidden from the main team creation UI while the launch flow remains in development.
+Gemini is available as a supported provider path with Google ADC (gcloud auth), Gemini CLI OAuth, and API key authentication. It appears alongside other providers in the team creation and runtime setup UI when the runtime reports it as available.
## Provider ids
@@ -48,7 +48,7 @@ The app currently recognizes these provider ids in team/runtime configuration:
| --- | --- |
| `anthropic` | Anthropic / Claude Code path |
| `codex` | Codex path |
-| `gemini` | Gemini provider path when exposed by the runtime |
+| `gemini` | Gemini provider path (Google ADC, Gemini CLI, or API key) |
| `opencode` | OpenCode path, including OpenCode-managed provider routing |
Do not read this table as a guarantee that every provider is authenticated, installed, or available for every model on every machine. The runtime status and capability checks are the source of truth for a given launch.
@@ -76,6 +76,8 @@ Agent Teams keeps orchestration provider-aware but not provider-owned:
- model availability, auth, rate limits, and tool behavior remain runtime/provider responsibilities
- OpenCode is the broadest routing path when you want one team to use multiple provider/model lanes
+For contributor-facing boundaries and canonical implementation guidance, see [Contributor Architecture](/reference/contributor-architecture).
+
Recommended patterns:
| Pattern | When it helps | Risk |
diff --git a/landing/product-docs/reference/release-notes.md b/landing/product-docs/reference/release-notes.md
new file mode 100644
index 00000000..b8a10dd4
--- /dev/null
+++ b/landing/product-docs/reference/release-notes.md
@@ -0,0 +1,41 @@
+---
+title: Release Notes – Agent Teams Docs
+description: Release notes and changelog for Agent Teams. Links to the canonical RELEASE.md and CHANGELOG.md for full details.
+---
+
+# Release Notes
+
+Current release: **v1.2.0** (2026-03-31). Active development continues on the `main` branch with unreleased changes for member work-sync, OpenCode delivery hardening, and CI stabilization.
+
+## How releases work
+
+Agent Teams follows [Semantic Versioning](https://semver.org/). Tags pushed to the repository trigger an automated [release workflow](https://github.com/777genius/agent-teams-ai/blob/main/docs/RELEASE.md) that builds signed packages for macOS, Windows, and Linux, then publishes them to GitHub Releases.
+
+## Recent releases
+
+### v1.2.0 — Agent Graph, per-team tool approval, interactive AskUserQuestion
+
+Agent Graph with force-directed visualization and kanban task layout, per-team tool approval controls with readable permission prompts, task comment notifications, and interactive AskUserQuestion buttons. Permission system overhaul with Write/Edit/NotebookEdit seeding and MCP tool catalog integration. See [full changelog](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md#120---2026-03-31).
+
+### v1.1.0 — React 19 + Electron 40, user-initiated task starts
+
+React 19 + Electron 40 migration, user-initiated task starts from the kanban board, auth troubleshooting guide, syntax highlighting for R/Ruby/PHP/SQL, 3x faster transcript search, WSL/Windows path fixes, and XSS vulnerability fix. See [full changelog](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md#110---2026-03-25).
+
+### v1.0.0 — Initial public release
+
+First stable build: CLI/auth reliability in packaged apps, IPC hardening, cross-platform packaging with signed macOS builds, open-source governance docs (LICENSE, CONTRIBUTING, CODE_OF_CONDUCT, SECURITY). See [full changelog](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md#100---2026-03-23).
+
+## Canonical sources
+
+| Document | Description |
+| --- | --- |
+| [RELEASE.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/RELEASE.md) | Release process, versioning guide, artifact naming, auto-update setup, and release notes template. |
+| [CHANGELOG.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md) | Full changelog with all versions, features, improvements, and bug fixes from the user perspective. |
+| [GitHub Releases](https://github.com/777genius/agent-teams-ai/releases) | Downloadable installers for all platforms. |
+
+## Related pages
+
+- [Installation](/guide/installation)
+- [Quickstart](/guide/quickstart)
+- [Contributor architecture](/reference/contributor-architecture)
+- [Developers](/developers/)
diff --git a/landing/product-docs/ru/developers/index.md b/landing/product-docs/ru/developers/index.md
new file mode 100644
index 00000000..6bd8480e
--- /dev/null
+++ b/landing/product-docs/ru/developers/index.md
@@ -0,0 +1,68 @@
+---
+title: Разработчикам – Agent Teams Docs
+description: Входная страница для contributor docs, архитектуры, guardrails, debugging и MCP extension paths в Agent Teams.
+---
+
+# Разработчикам
+
+Эта страница нужна, когда вы меняете Agent Teams, разбираете зависший запуск команды или расширяете runtime через MCP tools. Ссылки ведут в canonical repo docs, чтобы правила реализации не расходились между страницами.
+
+## С чего начать
+
+| Нужно | Открыть |
+| --- | --- |
+| Обзор репозитория, scripts и setup из исходников | [README.md](https://github.com/777genius/agent-teams-ai/blob/main/README.md) |
+| Рабочие правила для агентов и contributors | [CLAUDE.md](https://github.com/777genius/agent-teams-ai/blob/main/CLAUDE.md) |
+| Жёсткие implementation guardrails | [AGENT_CRITICAL_GUARDRAILS.md](https://github.com/777genius/agent-teams-ai/blob/main/AGENT_CRITICAL_GUARDRAILS.md) |
+| Структура medium и large features | [Feature architecture standard](https://github.com/777genius/agent-teams-ai/blob/main/docs/FEATURE_ARCHITECTURE_STANDARD.md) |
+| Debugging launch, bootstrap и teammate messaging | [Agent team debugging runbook](https://github.com/777genius/agent-teams-ai/blob/main/docs/team-management/debugging-agent-teams.md) |
+| Contribution process | [Contributing guide](https://github.com/777genius/agent-teams-ai/blob/main/.github/CONTRIBUTING.md) |
+| Приватность и модель данных | [Privacy and local data](/ru/reference/privacy-local-data) |
+| Релизы / Changelog | [RELEASE.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/RELEASE.md) — [CHANGELOG.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md) |
+
+## Локальный development path
+
+Для обычной разработки запускайте desktop Electron app:
+
+```bash
+pnpm install
+pnpm dev
+```
+
+Browser/web path не заменяет desktop runtime. Desktop mode - поддерживаемый локальный путь, потому что в нём есть IPC, terminals, provider auth, team lifecycle handling, launch diagnostics и runtime bridges, которые используют реальные команды.
+
+## Architecture checkpoints
+
+Перед изменением feature определите её границу:
+
+| Область | Ожидаемое место |
+| --- | --- |
+| Medium или large product feature | `src/features//` |
+| Electron main process orchestration | `src/main/` |
+| Preload-safe API surface | `src/preload/` |
+| Renderer UI и app state | `src/renderer/` |
+| Shared types и pure helpers | `src/shared/` |
+| Agent Teams board MCP server | `mcp-server/` |
+| Board data controller | `agent-teams-controller/` |
+
+Используйте `src/features/recent-projects` как reference slice для feature organization. Держите cross-process contracts явными и не делайте deep imports через feature boundaries.
+
+## Debugging path
+
+Для launch hangs, OpenCode `registered` / bootstrap-unconfirmed states, missing teammate replies или suspicious task logs:
+
+1. Начните с [debugging runbook](https://github.com/777genius/agent-teams-ai/blob/main/docs/team-management/debugging-agent-teams.md).
+2. Проверьте самый новый artifact pack в `~/.claude/teams//launch-failure-artifacts/latest.json`.
+3. Откройте `manifest.json` и посмотрите `classification`, bootstrap breadcrumbs, launch diagnostics, member spawn statuses и redacted log tails.
+4. Очищайте только team, run, pane или process, который точно принадлежит smoke test или failed launch.
+
+## MCP development path
+
+Agent Teams использует встроенный MCP server `agent-teams` для board operations. User и project MCP servers добавляют внешние capabilities для runtimes. См. [MCP integration](/ru/guide/mcp-integration) для setup examples, структуры `.mcp.json` и tool registration guidance.
+
+## Related docs
+
+- [Архитектура для контрибьюторов](/ru/reference/contributor-architecture)
+- [Настройка рантайма](/ru/guide/runtime-setup)
+- [MCP интеграция](/ru/guide/mcp-integration)
+- [Диагностика](/ru/guide/troubleshooting)
diff --git a/landing/product-docs/ru/guide/agent-workflow.md b/landing/product-docs/ru/guide/agent-workflow.md
index ab5087b5..69fb7ffe 100644
--- a/landing/product-docs/ru/guide/agent-workflow.md
+++ b/landing/product-docs/ru/guide/agent-workflow.md
@@ -19,25 +19,43 @@ Agent Teams делает работу агентов видимой через t
## Жизненный цикл задачи
+Agent Teams отслеживает каждую задачу в двух независимых измерениях: work status и review state.
+
+| Измерение | Состояния | Описание |
+|-----------|-----------|----------|
+| Work status | `pending`, `in_progress`, `completed` | Отслеживает, ожидает ли задача, активно выполняется или завершена исполнителем |
+| Review state | `none`, `review`, `needsFix`, `approved` | Отслеживает положение задачи в процессе ревью после завершения |
+
+На канбан-доске отображается комбинированное представление, но два измерения движутся независимо.
+
+### Рабочий статус
+
| Этап | Что происходит | Ответственный |
|------|---------------|---------------|
-| Provisioning | Приложение запускает runtime, проверяет, что процесс жив, и ждёт подтверждения bootstrap | Приложение |
-| Planning | Lead создаёт задачи, назначает teammates и задаёт зависимости | Lead или пользователь |
-| In progress | Агенты работают параллельно и обновляют статус задач через board MCP tools | Teammates |
-| Review | Изменения проверяют агенты или вы перед финальным принятием | Team lead или пользователь |
-| Done | Принятая работа остаётся связанной с историей задачи и доступна для инспекции | Пользователь |
+| Pending | Задача создана, но никто ещё не начал работу | Lead или пользователь |
+| In progress | Агенты работают и обновляют статус через board MCP tools | Teammates |
+| Completed | Исполнитель публикует result comment и помечает задачу завершённой | Teammate |
+
+### Статус ревью
+
+| Этап | Что происходит | Ответственный |
+|------|---------------|---------------|
+| None | Задача ещё не на ревью (может быть pending, in progress или только что completed) | — |
+| Review | Запрошено ревью; reviewer проверяет diff и результат | Reviewer |
+| Needs fix | В ходе ревью запрошены правки; исполнитель должен обновить задачу | Teammate (owner) |
+| Approved | Ревью пройдено; задача финализирована | Reviewer |
### Planning → In progress
-Когда teammate берёт задачу, статус на доске меняется на `in_progress`. Агент создаёт task comment с планом работы и продолжает. Все нативные инструменты (read, bash, edit, write) попадают в task log.
+Когда teammate берёт задачу, work status меняется на `in_progress`. Агент создаёт task comment с планом работы и продолжает. Все нативные инструменты (read, bash, edit, write) попадают в task log.
-### In progress → Review
+### Completed → Review
-Когда teammate завершает работу, он публикует result comment и помечает задачу `completed`. Lead затем решает — принять сразу или отправить на ревью.
+Когда teammate завершает работу, он публикует result comment и помечает work status как `completed`. Lead или reviewer могут запросить ревью для начала проверки.
-### Review → Done
+### Review → Approved
-Если изменения в review surface выглядят приемлемо, approve the review. Задача финализируется и связывается со своим diff.
+Если изменения в review surface выглядят приемлемо, утвердите ревью. Задача финализируется и связывается со своим diff.
::: warning Ревью с правками
Если teammate попросили внести правки во время ревью, он должен добавить follow-up comment с исправлениями, после чего lead может approve.
diff --git a/landing/product-docs/ru/guide/code-review.md b/landing/product-docs/ru/guide/code-review.md
index 7c91c8fb..8e3dae1c 100644
--- a/landing/product-docs/ru/guide/code-review.md
+++ b/landing/product-docs/ru/guide/code-review.md
@@ -55,7 +55,7 @@ lang: ru-RU
Пример request-changes comment:
```text
-Please keep the copy improvements, but revert the unrelated runtime wording in the provider table. Add a docs build result before resubmitting.
+Please keep the copy improvements, but revert the unrelated runtime wording in the provider table. Add the `pnpm --dir landing docs:build` result before resubmitting.
```
## Состояния ревью
@@ -87,6 +87,8 @@ Team lead — ревьюер по умолчанию. Вы можете наст
- **Parsing и task lifecycle logic** — изменения в task references, chunking или filtering могут сломать доставку сообщений
- **Persistence и code review flows** — изменения в хранении задач или review state должны оставаться консистентными через IPC layers
+Canonical feature layout и hard guardrail links смотрите в [Архитектуре для контрибьюторов](/ru/reference/contributor-architecture).
+
## Верификация
Лучше запускать focused verification commands. Broad formatting или lint-fix команды не стоит использовать, если задача явно не про форматирование.
diff --git a/landing/product-docs/ru/guide/create-team.md b/landing/product-docs/ru/guide/create-team.md
index 771af2bc..658edbd6 100644
--- a/landing/product-docs/ru/guide/create-team.md
+++ b/landing/product-docs/ru/guide/create-team.md
@@ -31,7 +31,7 @@ lang: ru-RU
Микс провайдеров в одной команде поддерживается — например, Claude lead с OpenCode builder-ами.
::: info
-Поддержка Gemini в разработке и появится в списке провайдеров, когда будет готова.
+Gemini — поддерживаемый провайдер. Варианты auth смотрите в разделе [Провайдеры и рантаймы](/ru/reference/providers-runtimes).
:::
## Хороший team brief
diff --git a/landing/product-docs/ru/guide/git-worktree-strategy.md b/landing/product-docs/ru/guide/git-worktree-strategy.md
new file mode 100644
index 00000000..0128cbc4
--- /dev/null
+++ b/landing/product-docs/ru/guide/git-worktree-strategy.md
@@ -0,0 +1,99 @@
+---
+title: Git и стратегия worktree – Документация Agent Teams
+description: Как выбирать main worktree, feature branches или OpenCode worktree isolation для parallel agent work.
+lang: ru-RU
+---
+
+# Git и стратегия worktree
+
+Git даёт Agent Teams самый сильный review path: narrow diffs, branch visibility, task-scoped changes и более безопасную parallel work.
+
+## Выбор стратегии
+
+| Strategy | Когда использовать | Tradeoff |
+| --- | --- | --- |
+| Main worktree | Solo work, docs-only edits или один teammate за раз | Просто, но parallel edits могут конфликтовать |
+| Feature branch | Одна team работает над одним coherent change | Чистый review target, но teammates всё ещё делят files |
+| Worktree isolation | Несколько OpenCode teammates могут параллельно менять один repo | Лучше isolation, но merge/review требует дисциплины |
+
+Начинайте просто. Включайте worktree isolation, когда parallel edits вероятны, а не потому что каждому task нужен отдельный checkout.
+
+## Когда включать изоляцию worktree
+
+Включайте для OpenCode teammates, когда:
+
+- два или больше teammates могут менять один repository одновременно
+- task может запускать formatters, code generators или broad tests
+- нужно держать branch и diff каждого teammate отдельно
+- lead workspace dirty и не должен получать прямые edits
+
+Оставляйте выключенным, когда:
+
+- task read-only
+- один teammate владеет всеми edits
+- repo не Git-tracked
+- нужен runtime path, который не поддерживает этот isolation mode
+
+::: warning
+Worktree isolation сейчас применяется к OpenCode members и требует Git-tracked project.
+:::
+
+## Гигиена веток
+
+Перед parallel work:
+
+```bash
+git status --short
+git branch --show-current
+```
+
+По возможности используйте clean branch. Если main worktree уже содержит user changes, скажите agents не revert unrelated files и держать task scope узким.
+
+Рекомендуемый branch style:
+
+```text
+agent//
+```
+
+Примеры:
+
+```text
+agent/docs/mcp-guide
+agent/review/task-log-filtering
+agent/ui/code-review-polish
+```
+
+## Процесс ревью
+
+Для isolated worktrees проверяйте diff teammate до merge или apply в main workspace.
+
+1. Убедитесь, что task result comment называет changed scope и verification.
+2. Проверьте task diff в review UI.
+3. Запросите changes в task, если diff трогает unrelated files.
+4. Approve только когда tests или manual checks соответствуют risk.
+5. Merge или apply changes осознанно.
+
+Не auto-merge worktree output только потому, что task complete. Completion значит, что agent считает работу ready for review.
+
+## Политика разрешения конфликтов
+
+| Situation | Action |
+| --- | --- |
+| Два teammates меняют один file | Pause one task или назначьте одного owner для integration |
+| Generated files changed broadly | Требуйте comment с generator и command |
+| Main worktree имеет unrelated changes | Preserve them и review только task-owned changes |
+| Worktree branch diverges | Rebase или merge manually после review, не внутри vague agent task |
+
+## Пример промпта для задачи
+
+```text
+Implement the settings validation fix in your assigned worktree. Keep edits inside src/features/settings and focused tests. Do not touch provider auth or task storage. Post the test command and result before completing the task.
+```
+
+Этот prompt работает, потому что называет allowed area, sensitive boundaries и completion evidence.
+
+## Связанные руководства
+
+- [Создание команды](/ru/guide/create-team)
+- [Код-ревью](/ru/guide/code-review)
+- [Примеры team brief](/ru/guide/team-brief-examples)
diff --git a/landing/product-docs/ru/guide/installation.md b/landing/product-docs/ru/guide/installation.md
index 80001327..0dd4899d 100644
--- a/landing/product-docs/ru/guide/installation.md
+++ b/landing/product-docs/ru/guide/installation.md
@@ -31,11 +31,11 @@ Agent Teams распространяется как desktop-приложение
| ------------------ | ---------------------------------------------------------- |
| Claude (Anthropic) | Claude Code CLI login или API key |
| Codex (OpenAI) | Codex CLI login или API key |
-| Gemini (Google) | _В разработке_ |
+| Gemini (Google) | Google ADC, Gemini CLI или API key |
| OpenCode | API key для поддерживаемого бэкенда (например, OpenRouter) |
::: info
-Поддержка провайдера Gemini в разработке. Вы можете подготовить доступ сейчас, но он не появится в редакторе команды, пока не будет готов.
+Gemini — поддерживаемый провайдер. Варианты auth смотрите в разделе [Провайдеры и рантаймы](/ru/reference/providers-runtimes).
:::
Для запуска из исходников также нужны:
diff --git a/landing/product-docs/ru/guide/mcp-integration.md b/landing/product-docs/ru/guide/mcp-integration.md
new file mode 100644
index 00000000..80958cca
--- /dev/null
+++ b/landing/product-docs/ru/guide/mcp-integration.md
@@ -0,0 +1,102 @@
+---
+title: MCP интеграция – Документация Agent Teams
+description: Как использовать MCP в Agent Teams для board operations, координации teammates и внешних tool servers.
+lang: ru-RU
+---
+
+# MCP интеграция
+
+Agent Teams использует MCP двумя практическими способами:
+
+| Слой | Что делает | Кто использует |
+| --- | --- | --- |
+| Board MCP tools | Создают, стартуют, комментируют, завершают и читают tasks | Agents и leads |
+| External MCP servers | Добавляют инструменты вроде browser, design, docs или company systems | Users и настроенные runtimes |
+
+Держите эти слои отдельно. Board MCP нужен для координации внутри Agent Teams. External MCP servers - это дополнительные инструменты для runtimes.
+
+## Board MCP workflow
+
+Agents должны использовать board MCP tools, когда работа относится к task:
+
+1. Прочитать свежий task context.
+2. Стартовать task только когда реально начинают работу.
+3. Добавлять task comments для blockers, plan и final results.
+4. Завершать task после result comment.
+5. Отправлять короткое сообщение, если lead или teammate должен увидеть результат.
+
+Пример flow:
+
+```text
+task_get -> task_start -> edit/test -> task_add_comment -> task_complete -> message_send
+```
+
+Direct message подходит для координации. Task comment подходит для durable task history.
+
+::: tip
+Если заметка влияет на review, verification, changed scope или blocker, пишите её в task.
+:::
+
+## External MCP servers
+
+Используйте external MCP servers, когда teammate нужен устойчивый tool connection, а не один prompt с pasted context.
+
+Хорошие случаи:
+
+- browser или website testing tools
+- design или product data tools
+- internal docs и search systems
+- issue tracker или support systems
+- database inspection tools с read-only credentials
+
+Плохие случаи:
+
+- secrets, вставленные в prompts
+- one-off files, которые проще attached напрямую
+- tools, которые меняют production systems без review
+- широкий local filesystem access, когда достаточно project scope
+
+## Scopes
+
+Agent Teams распознаёт shared и project-oriented MCP scopes.
+
+| Scope | Когда использовать |
+| --- | --- |
+| User или Global | Один server нужен в разных projects |
+| Project или Local | Server относится к одному repository, workspace или team context |
+
+Выбирайте самый узкий scope, который всё ещё удобен. Project-scoped servers легче проверять на review, потому что tool привязан к изменяемому project.
+
+## Setup checklist
+
+Перед task, который зависит от MCP server:
+
+1. Установите или настройте server.
+2. Проверьте, что он виден в installed MCP list.
+3. Запустите diagnostics, если app их предлагает.
+4. Начните с low-risk read-only task.
+5. Укажите ожидаемый MCP tool use в task description или team brief.
+
+Если diagnostics падают, сначала чините setup. Лучший prompt не исправит missing command, неправильный config path или rejected credentials.
+
+## Task example
+
+```text
+Audit the docs home page with the browser MCP. Check desktop and mobile widths, capture any layout issue as a task comment, and only edit landing/product-docs files. Run `pnpm --dir landing docs:build` before completion.
+```
+
+Такой task работает, потому что называет tool, surface, write boundary и verification step.
+
+## Safety rules
+
+- Не выдавайте каждому teammate все MCP servers по умолчанию.
+- Не добавляйте write-capable tools в broad teams без review.
+- Для inspection tasks предпочитайте read-only credentials.
+- Production-impacting tool use фиксируйте через explicit task comments и review.
+- MCP diagnostic failures считайте setup failures, а не agent failures.
+
+## Related guides
+
+- [Настройка рантайма](/ru/guide/runtime-setup)
+- [Примеры team brief](/ru/guide/team-brief-examples)
+- [Работа агентов](/ru/guide/agent-workflow)
diff --git a/landing/product-docs/ru/guide/quickstart.md b/landing/product-docs/ru/guide/quickstart.md
index c79dbb74..79dc5e48 100644
--- a/landing/product-docs/ru/guide/quickstart.md
+++ b/landing/product-docs/ru/guide/quickstart.md
@@ -16,6 +16,10 @@ lang: ru-RU
Приложение бесплатное и с открытым кодом. Выбранный runtime может требовать доступ к провайдеру — подробности в разделе [Установка](/ru/guide/installation).
:::
+::: info
+Desktop-приложение — основной продукт. Agent Teams также работает в браузере для разработки, но браузерный режим не имеет полного desktop IPC, терминала, provider auth и lifecycle. Для обычной разработки используйте `pnpm dev` (Electron), а не браузерный режим.
+:::
+
## 2. Откройте проект
Запустите приложение и выберите директорию проекта, где агенты будут работать. Agent Teams читает локальные файлы проекта и runtime/session state, чтобы показывать задачи, логи, diffs и активность команды.
@@ -43,7 +47,7 @@ git status --short
| OpenCode | Для multi-model команд и большого числа provider backends |
::: info
-Поддержка Gemini в разработке и появится в списке рантаймов, когда будет готова.
+Gemini — поддерживаемый провайдер. Варианты auth смотрите в разделе [Провайдеры и рантаймы](/ru/reference/providers-runtimes).
:::
Подробная настройка каждого провайдера — в разделе [Настройка рантайма](/ru/guide/runtime-setup).
@@ -85,7 +89,7 @@ opencode --version
Хороший первый prompt содержит scope, safety boundaries и verification:
```text
-Improve the docs quickstart. Keep edits inside landing/product-docs. Add practical examples, preserve existing VitePress syntax, and run the docs build before marking tasks done.
+Improve the docs quickstart. Keep edits inside landing/product-docs. Add practical examples, preserve existing VitePress syntax, and run `pnpm --dir landing docs:build` before marking tasks done.
```
Избегайте размытых prompts вроде "make the app better" для первого запуска. Lead может дробить большие цели, но хороший input даёт более маленькие tasks и чище review.
diff --git a/landing/product-docs/ru/guide/runtime-setup.md b/landing/product-docs/ru/guide/runtime-setup.md
index 0e33927e..9e9a2491 100644
--- a/landing/product-docs/ru/guide/runtime-setup.md
+++ b/landing/product-docs/ru/guide/runtime-setup.md
@@ -1,8 +1,3 @@
----
-title: Настройка рантайма
-description: Настройте Claude Code, Codex или OpenCode рантаймы и аутентификацию провайдеров для команд агентов.
----
-
---
title: Настройка рантайма – Документация Agent Teams
description: Конфигурация Claude Code, Codex или OpenCode. Авторизация, провайдеры, multimodel mode и предзапусковые проверки.
@@ -11,7 +6,7 @@ lang: ru-RU
# Настройка рантайма
-Agent Teams — coordination layer. Model work выполняется через локальные runtimes и providers.
+Agent Teams - координационный слой. Работа моделей выполняется через локальные рантаймы и провайдеры.
## Предварительные требования
@@ -46,7 +41,7 @@ command -v opencode
Приложение по возможности определяет доступные runtimes и ведёт настройку через UI.
-Gemini встречается во внутренних provider lists, но сейчас скрыт из основного team creation UI, пока launch experience отмечен как in development.
+Gemini — поддерживаемый провайдер с Google ADC (`gcloud auth`), Gemini CLI OAuth и API key аутентификацией. Настройка доступна через UI управления провайдерами, когда Gemini backend обнаружен.
## Доступ к провайдеру
@@ -115,6 +110,16 @@ Codex-native launches используют Codex account state и model catalog
Если OpenCode запускается, но teammate не становится deliverable, сначала смотрите lane evidence, а не предполагаете, что model проигнорировала prompt. См. [Диагностика](/ru/guide/troubleshooting#opencode-registered-но-bootstrap-не-подтверждён).
+### Gemini
+
+Gemini поддерживает три метода аутентификации:
+
+- **Google ADC** — запустите `gcloud auth application-default login` для авторизации через Google Application Default Credentials.
+- **Gemini CLI** — запустите `gemini login`, если Gemini CLI установлен.
+- **API key** — установите `GEMINI_API_KEY` в переменные окружения или настройте через UI управления провайдерами.
+
+Приложение автоматически определяет доступный метод auth и показывает провайдера Gemini в UI настройки рантайма и создания команд, когда backend доступен.
+
## Multimodel-режим
Multimodel-режим может направлять работу через разные provider backends в OpenCode-совместимой конфигурации. Используйте его, когда нужна гибкость провайдеров или разные model lanes для teammates.
diff --git a/landing/product-docs/ru/guide/team-brief-examples.md b/landing/product-docs/ru/guide/team-brief-examples.md
new file mode 100644
index 00000000..d1a79896
--- /dev/null
+++ b/landing/product-docs/ru/guide/team-brief-examples.md
@@ -0,0 +1,131 @@
+---
+title: Примеры team brief – Документация Agent Teams
+description: Практические шаблоны team brief для small fixes, docs work, implementation tasks, review и risky areas.
+lang: ru-RU
+---
+
+# Примеры team brief
+
+Хороший team brief даёт lead достаточно структуры, чтобы создать small tasks, но не требует заранее расписать каждую деталь реализации.
+
+Используйте форму:
+
+```text
+Outcome:
+Scope:
+Boundaries:
+Coordination:
+Verification:
+Review:
+```
+
+## Minimal brief
+
+Для маленькой low-risk работы.
+
+```text
+Outcome: Improve the quickstart so a new user can launch one team successfully.
+Scope: Keep edits inside landing/product-docs.
+Boundaries: Do not rewrite the whole docs structure.
+Coordination: Create one or two tasks, keep comments on the task.
+Verification: Run `pnpm --dir landing docs:build`.
+Review: Summarize changed pages and any remaining gaps.
+```
+
+## Implementation brief
+
+Для code changes внутри одной feature area.
+
+```text
+Outcome: Add a focused improvement to task comment filtering.
+Scope: Work inside the task/comment feature files unless a shared helper is clearly needed.
+Boundaries: Do not change task storage format or review state semantics.
+Coordination: Split parser, UI, and tests into separate tasks if they can be reviewed independently.
+Verification: Run the focused unit tests first, then the feature typecheck if touched.
+Review: Call out parsing edge cases and any behavior that affects existing task comments.
+```
+
+## Docs brief
+
+Для documentation и guide work.
+
+```text
+Outcome: Draft practical workflow guides from the docs audit.
+Scope: Add concise VitePress pages under landing/product-docs/guide.
+Boundaries: Avoid moving existing navigation hubs owned by other tasks.
+Coordination: Check related docs tasks before editing nav.
+Verification: Run `pnpm --dir landing docs:build`.
+Review: Include links added to sidebar and any pages intentionally left as drafts.
+```
+
+## Review-heavy brief
+
+Для risky areas: IPC, provider auth, persistence, Git или task lifecycle logic.
+
+```text
+Outcome: Fix the launch failure without changing successful launch behavior.
+Scope: Start from the newest launch-failure artifact and the affected runtime adapter.
+Boundaries: Do not change provider prompts until setup and runtime evidence are inspected.
+Coordination: Make one diagnostic task and one fix task if the cause is confirmed.
+Verification: Run focused tests and one desktop smoke check when practical.
+Review: Lead must inspect the diff before approval.
+```
+
+## Mixed provider brief
+
+Когда teammates работают на разных provider/model lanes.
+
+```text
+Outcome: Implement and review a small feature using separate builder and reviewer lanes.
+Scope: Builder edits the feature. Reviewer inspects only the task diff and tests.
+Boundaries: Do not switch model ids mid-task unless launch fails before work begins.
+Coordination: Builder posts result comment first. Reviewer posts findings as task comments.
+Verification: Builder runs focused tests. Reviewer checks failure output and changed scope.
+Review: Lead approves only after reviewer comments are resolved.
+```
+
+## Agent blocks в briefs
+
+Agent blocks - это скрытый текст для агентов, обёрнутый в маркеры `...`. Приложение убирает их из обычного отображения, но оставляет для координации агентов. Используйте их, когда brief должен сказать агентам то, что будет шумом для человека.
+
+Пример - brief, который указывает lead, как разделить работу, не показывая инструкции пользователю:
+
+```text
+Outcome: Add a dark mode toggle to the application settings.
+Scope: Settings UI, theme context, and CSS variables.
+Boundaries: Do not change existing light theme values or provider auth screens.
+
+
+Split this into three tasks: (1) theme context and CSS vars, (2) toggle component and settings wiring, (3) dark mode preview in existing docs screenshots if practical.
+
+```
+
+Блок оставляет human-facing brief чистым, а lead получает структурированные указания по разделению задач.
+
+## What to avoid
+
+| Weak brief | Better replacement |
+| --- | --- |
+| "Improve the app" | Назовите workflow, files и success check |
+| "Fix all docs" | Выберите одну guide group и build command |
+| "Use the best model" | Назовите provider/model choices или оставьте app defaults |
+| "Refactor as needed" | Укажите modules, которые можно менять |
+| "Make it production ready" | Определите review, tests и rollout checks |
+
+## Before launch
+
+Проверьте перед стартом:
+
+1. Brief называет concrete outcome.
+2. Risk boundaries explicit.
+3. Lead может разделить работу на reviewable tasks.
+4. Verification commands указаны, если известны.
+5. Sensitive areas требуют review before approval.
+
+Если brief всё ещё широкий, запустите solo или small team и попросите сначала task plan, а не implementation.
+
+## Related guides
+
+- [Создание команды](/ru/guide/create-team)
+- [MCP интеграция](/ru/guide/mcp-integration)
+- [Git и стратегия worktree](/ru/guide/git-worktree-strategy)
diff --git a/landing/product-docs/ru/guide/troubleshooting.md b/landing/product-docs/ru/guide/troubleshooting.md
index bb5f435a..64778cb1 100644
--- a/landing/product-docs/ru/guide/troubleshooting.md
+++ b/landing/product-docs/ru/guide/troubleshooting.md
@@ -1,8 +1,3 @@
----
-title: Диагностика
-description: Исправление ошибок запуска, пропавших ответов агентов, rate limits, проблем auth и lane bootstrap в Agent Teams.
----
-
---
title: Диагностика – Документация Agent Teams
description: Решение проблем с запуском команд, отсутствующими ответами агентов, rate limits, CLI auth и lane bootstrap stalls через локальные диагностики.
@@ -31,6 +26,8 @@ lang: ru-RU
Если OpenCode показывает `registered`, но bootstrap не подтверждён, сначала inspect artifacts, прежде чем менять team prompts.
+Contributor/debugging details находятся в [Архитектуре для контрибьюторов](/ru/reference/contributor-architecture), где есть ссылка на canonical debugging runbook для agent teams.
+
Посмотрите на последний artifact неудачного запуска:
```bash
@@ -55,6 +52,57 @@ jq '.activeRunId, .entries' ~/.claude/teams//.opencode-runtime/lanes//
+```
+
+Ключевые файлы и что они показывают:
+
+- `launch-state.json` — состояние запуска/активности участников (`.teamLaunchState`, `.summary`, `.members`)
+- `bootstrap-journal.jsonl` — упорядоченные события bootstrap от CLI/runtime (`tail -80`)
+- `bootstrap-state.json` — сводка фазы bootstrap
+- `config.json` — конфигурация провайдера, модели и проекта
+- `inboxes/*.json` и `sentMessages.json` — состояние доставки сообщений
+
+```bash
+jq '.teamLaunchState, .summary, .members' ~/.claude/teams//launch-state.json
+tail -80 ~/.claude/teams//bootstrap-journal.jsonl 2>/dev/null
+```
+
+### OpenCode runtime evidence
+
+Для OpenCode участников доказательство сессии находится в lane runtime store:
+
+- `.opencode-runtime/lanes.json` — индекс lane с состоянием
+- `.opencode-runtime/lanes//manifest.json` — `activeRunId` и записи evidence
+- `.opencode-runtime/lanes//opencode-sessions.json` — зафиксированные записи сессий
+
+Ожидаемое здоровое состояние: состояние lane `active`, manifest содержит `activeRunId` хотя бы с одной записью evidence, участник имеет `bootstrapConfirmed: true`.
+
+```bash
+jq '.lanes' ~/.claude/teams//.opencode-runtime/lanes.json 2>/dev/null
+find ~/.claude/teams//.opencode-runtime -maxdepth 3 -type f | sort
+```
+
+### Артефакты неудачного запуска
+
+Когда запуск помечен как неудачный, проверьте `latest.json`:
+
+```bash
+~/.claude/teams//launch-failure-artifacts/latest.json
+```
+
+Манифест включает:
+- `classification` — почему запуск считался неудачным
+- `bootstrapTransportBreadcrumb` — использованный путь доставки
+- Статусы старта участников и редактированные логи/трейсы
+
## Не видны ответы агента
Откройте task logs и teammate messages. Пропавшие replies часто связаны с:
diff --git a/landing/product-docs/ru/index.md b/landing/product-docs/ru/index.md
index 88b84ee8..f74cde89 100644
--- a/landing/product-docs/ru/index.md
+++ b/landing/product-docs/ru/index.md
@@ -1,6 +1,7 @@
---
title: Документация Agent Teams – Запускайте команды AI-агентов из локального desktop-приложения
description: Документация Agent Teams, бесплатного desktop-приложения для оркестрации AI-агентов. Создавайте команды, наблюдайте за канбан-доской, ревьюйте изменения и координируйте Claude, Codex, OpenCode и multimodel workflows.
+lang: ru-RU
layout: home
hero:
name: Документация Agent Teams
@@ -57,13 +58,24 @@ Agent Teams - бесплатное desktop-приложение для орке
+## Что дальше после запуска
+
+После создания первой команды изучите эти руководства:
+
+- **Настройка рантайма** - настройте Claude, Codex, OpenCode или multimodel-провайдеров: [Настроить рантаймы](/ru/guide/runtime-setup)
+- **Workflow агентов** - как агенты координируются через task board: [Разобрать workflow](/ru/guide/agent-workflow)
+- **Примеры team briefs** - паттерны промптов из реальных примеров: [Примеры](/ru/guide/team-brief-examples)
+- **Код-ревью** - проверяйте diff, принимайте или отклоняйте изменения: [Ревью изменений](/ru/guide/code-review)
+- **Диагностика** - исправляйте проблемы запуска и missing teammates: [Диагностика](/ru/guide/troubleshooting)
+- **Стратегия git worktree** - используйте изоляцию worktree, когда несколько участников редактируют один репозиторий параллельно: [О работе с worktree](/ru/guide/git-worktree-strategy)
+- **Релизы** - что нового в каждой версии: [Релизы](/ru/reference/release-notes)
+
## Справочник
-Используйте справочник, когда нужны точные термины, поведение провайдеров или границы приватности.
+Используйте справочник, когда нужны точные термины, поведение провайдеров, contributor architecture или границы приватности.
## Превью продукта
-
diff --git a/landing/product-docs/ru/reference/concepts.md b/landing/product-docs/ru/reference/concepts.md
index 559a8db6..9dd40211 100644
--- a/landing/product-docs/ru/reference/concepts.md
+++ b/landing/product-docs/ru/reference/concepts.md
@@ -1,11 +1,6 @@
----
-title: Концепции
-description: Основной словарь Agent Teams — команды, lead-агенты, teammates, задачи, канбан, inboxes, рантаймы и review.
----
-
---
title: Концепции – Документация Agent Teams
-description: Основные термины Agent Teams: teams, leads, teammates, tasks, kanban, inboxes, agent blocks, context phases, runtimes, providers.
+description: "Основные термины Agent Teams: teams, leads, teammates, tasks, kanban, inboxes, agent blocks, context phases, runtimes, providers."
lang: ru-RU
---
@@ -51,7 +46,7 @@ Messages - долговечные локальные записи. Но дост
Agent Block - скрытый agent-only instruction text, обёрнутый в `...`. UI убирает такие блоки из обычного human-facing display, но agents и runtime delivery могут использовать их для coordination details.
-Текущий canonical marker - `info_for_agent`; в старых документах могут встречаться legacy agent block formats.
+Текущий canonical marker — `info_for_agent`. В старых документах могут встречаться fenced code blocks с маркером ````info_for_agent```` или XML-подобные теги `` — это устаревшие паттерны, которые стоит заменить на `info_for_agent` при встрече.
## Context Phase
diff --git a/landing/product-docs/ru/reference/contributor-architecture.md b/landing/product-docs/ru/reference/contributor-architecture.md
new file mode 100644
index 00000000..ec95528a
--- /dev/null
+++ b/landing/product-docs/ru/reference/contributor-architecture.md
@@ -0,0 +1,55 @@
+---
+title: Архитектура для контрибьюторов – Документация Agent Teams
+description: Карта для контрибьюторов по feature layout, runtime/provider boundaries, hard guardrails и canonical architecture docs.
+lang: ru-RU
+---
+
+# Архитектура для контрибьюторов
+
+Эта страница - карта для контрибьюторов. Она ведёт к canonical repo guidance и не дублирует все implementation rules.
+
+## Канонические источники
+
+Используйте эти файлы как source of truth при изменениях в приложении:
+
+| Нужно | Канонический источник |
+| --- | --- |
+| Обзор репозитория и команды | [README.md](https://github.com/777genius/agent-teams-ai/blob/main/README.md) |
+| Локальные рабочие conventions | [CLAUDE.md](https://github.com/777genius/agent-teams-ai/blob/main/CLAUDE.md) |
+| Жёсткие guardrails | [AGENT_CRITICAL_GUARDRAILS.md](https://github.com/777genius/agent-teams-ai/blob/main/AGENT_CRITICAL_GUARDRAILS.md) |
+| Layout средних и больших features | [docs/FEATURE_ARCHITECTURE_STANDARD.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/FEATURE_ARCHITECTURE_STANDARD.md) |
+| Диагностика запуска agent teams | [docs/team-management/debugging-agent-teams.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/team-management/debugging-agent-teams.md) |
+
+## Feature layout
+
+Средние и большие features должны жить в `src/features//` и следовать feature architecture standard. Держите internals за public entrypoints и не делайте deep imports через границы feature.
+
+Для новой работы ориентируйтесь на существующий slice `src/features/recent-projects`. Маленькие fixes можно оставлять рядом с текущим code path, если новый feature slice добавит больше структуры, чем пользы.
+
+## Runtime и provider boundaries
+
+Agent Teams отвечает за orchestration: teams, tasks, messages, launch state, review UI, diagnostics и local persistence.
+
+Выбранный runtime/provider path отвечает за model execution, auth, model availability, rate limits, tool semantics и runtime-specific transcript evidence. Не пытайтесь чинить prompts или UI state вместо missing auth, missing binaries, rejected model ids или provider outages. User-facing детали настройки смотрите в [Провайдерах и рантаймах](/ru/reference/providers-runtimes).
+
+## Диагностика agent teams
+
+При launch hangs, OpenCode `registered` / bootstrap-unconfirmed states, missing teammate replies или подозрительных task logs начинайте с dedicated debugging runbook. Сначала смотрите newest launch failure artifact в `~/.claude/teams//launch-failure-artifacts/latest.json`, затем сопоставляйте UI state с persisted files и runtime-specific evidence.
+
+Не делайте broad cleanup во время диагностики. Останавливайте только process, lane, team или smoke run, который точно относится к проблеме.
+
+## Contributor conventions
+
+- Используйте `pnpm dev` для desktop Electron app при обычной разработке.
+- Не используйте browser dev mode как замену desktop runtime, IPC, terminal, provider auth или team lifecycle behavior.
+- Разделяйте ответственности Electron main, preload, renderer, shared и features.
+- Используйте `wrapAgentBlock(text)` для agent-only blocks вместо ручной склейки markers.
+- Предпочитайте focused verification. Избегайте broad `lint:fix` или formatting churn, если задача не про formatting.
+- Parsing, task lifecycle, provider/runtime detection, persistence, IPC, Git и review flows считайте high-risk зонами, где нужны targeted tests или clear verification path.
+
+## Связанные страницы
+
+- [Настройка рантайма](/ru/guide/runtime-setup)
+- [Диагностика](/ru/guide/troubleshooting)
+- [Код-ревью](/ru/guide/code-review)
+- [Приватность и локальные данные](/ru/reference/privacy-local-data)
diff --git a/landing/product-docs/ru/reference/faq.md b/landing/product-docs/ru/reference/faq.md
index 7d79fbed..dfa488c0 100644
--- a/landing/product-docs/ru/reference/faq.md
+++ b/landing/product-docs/ru/reference/faq.md
@@ -1,8 +1,3 @@
----
-title: FAQ
-description: Часто задаваемые вопросы об Agent Teams — цена, доступ к моделям, рантаймы, приватность, ревью и диагностика.
----
-
---
title: FAQ – Документация Agent Teams
description: Часто задаваемые вопросы о цене, доступе к моделям, настройке рантаймов, приватности данных, worktree isolation и код-ревью.
@@ -47,7 +42,7 @@ opencode --version
## Где хранятся team files?
-Team coordination data хранится локально в `~/.claude/teams//`, task files - в `~/.claude/tasks//`, а project session data - в `~/.claude/projects//`, когда она доступна.
+Team coordination data хранится локально в `~/.claude/teams//` (macOS/Linux) или `%APPDATA%\Claude\teams\\` (Windows), task files - в `~/.claude/tasks//` или `%APPDATA%\Claude\tasks\\`, а project session data - в `~/.claude/projects//`, когда она доступна.
## Что может выйти с моей машины?
@@ -62,7 +57,7 @@ Prompt context, selected file contents, tool results, command output, task text,
Дайте lead конкретный outcome, file или feature boundaries, risk limits и verification expectations. Например:
```text
-Improve the docs quickstart. Keep edits inside landing/product-docs, add practical examples, and run the docs build before marking work done.
+Improve the docs quickstart. Keep edits inside landing/product-docs, add practical examples, and run `pnpm --dir landing docs:build` before marking work done.
```
## Можно ревьюить код перед принятием?
diff --git a/landing/product-docs/ru/reference/privacy-local-data.md b/landing/product-docs/ru/reference/privacy-local-data.md
index a62dbd98..b2a06b3b 100644
--- a/landing/product-docs/ru/reference/privacy-local-data.md
+++ b/landing/product-docs/ru/reference/privacy-local-data.md
@@ -1,8 +1,3 @@
----
-title: Приватность и локальные данные
-description: Что desktop-приложение Agent Teams хранит локально и какие данные могут покинуть машину через provider-backed models.
----
-
---
title: Приватность и локальные данные – Документация Agent Teams
description: Что Agent Teams хранит локально, что может покинуть машину через provider-backed model calls, и практические рекомендации по приватности.
@@ -28,13 +23,16 @@ Desktop app работает на вашей машине и читает local
Важные local locations:
-| Location | Purpose |
-| --- | --- |
-| `~/.claude/teams//` | Team config, member metadata, inboxes, launch state, bootstrap evidence, runtime diagnostics, sent-message records, kanban state и review-related team files. |
-| `~/.claude/tasks//` | Durable task JSON files для team board. |
-| `~/.claude/projects//` | Claude/Codex-style project session files для session history, context analysis и transcript-backed UI. |
+| Платформа | Location | Purpose |
+| --- | --- | --- |
+| macOS/Linux | `~/.claude/teams//` | Team config, member metadata, inboxes, launch state, bootstrap evidence, runtime diagnostics, sent-message records, kanban state и review-related team files. |
+| Windows | `%APPDATA%\Claude\teams\\` | То же — team config, member metadata, inboxes, launch state и diagnostics. |
+| macOS/Linux | `~/.claude/tasks//` | Durable task JSON files для team board. |
+| Windows | `%APPDATA%\Claude\tasks\\` | То же — durable task JSON files. |
+| macOS/Linux | `~/.claude/projects//` | Claude/Codex-style project session files для session history, context analysis и transcript-backed UI. |
+| Windows | `%APPDATA%\Claude\projects\\` | То же — project session files. |
-Точные файлы зависят от runtime и версии app. Для launch debugging самые свежие evidence обычно лежат в соответствующей папке `~/.claude/teams//`.
+Точные файлы зависят от runtime и версии app. Для launch debugging самые свежие evidence обычно лежат в соответствующей папке `~/.claude/teams//` (или `%APPDATA%\Claude\teams\\`).
## Что может выйти с машины
diff --git a/landing/product-docs/ru/reference/providers-runtimes.md b/landing/product-docs/ru/reference/providers-runtimes.md
index f633569c..14e13f06 100644
--- a/landing/product-docs/ru/reference/providers-runtimes.md
+++ b/landing/product-docs/ru/reference/providers-runtimes.md
@@ -1,8 +1,3 @@
----
-title: Провайдеры и рантаймы
-description: Поддерживаемые runtime paths, provider ids, model ids, multi-provider стратегия и capability checks в Agent Teams.
----
-
---
title: Провайдеры и рантаймы – Документация Agent Teams
description: Поддерживаемые runtime paths (Claude Code, Codex, OpenCode), provider IDs, модели, multi-provider стратегии и capability checks.
@@ -44,7 +39,7 @@ Runtime отвечает за:
| Codex | Codex / OpenAI-backed models | Для Codex-native workflows | Использует Codex runtime integration и Codex auth/account state, когда они доступны. Часть diagnostics отличается от Claude transcripts. |
| OpenCode | OpenCode-managed model routing | Для multi-provider teams и широкой model coverage | OpenCode может маршрутизировать через множество model providers. Agent Teams считает OpenCode lanes runtime-specific evidence и не угадывает attribution при ambiguous lane identity. |
-Gemini provider ids существуют во внутренних configuration paths, но Gemini сейчас скрыт из основного team creation UI, пока launch flow остаётся in development.
+Gemini — поддерживаемый провайдер с Google ADC, Gemini CLI и API key аутентификацией. Он доступен в UI создания команд и настройки рантайма, когда runtime сообщает о его доступности.
## Provider ids
@@ -54,7 +49,7 @@ Gemini provider ids существуют во внутренних configuration
| --- | --- |
| `anthropic` | Anthropic / Claude Code path |
| `codex` | Codex path |
-| `gemini` | Gemini provider path, когда его отдаёт runtime |
+| `gemini` | Gemini provider path (Google ADC, Gemini CLI или API key) |
| `opencode` | OpenCode path, включая OpenCode-managed provider routing |
Эта таблица не гарантирует, что каждый provider authenticated, installed или доступен для каждой модели на каждой машине. Runtime status и capability checks - source of truth для конкретного launch.
@@ -82,6 +77,8 @@ Agent Teams остаётся provider-aware, но не provider-owned:
- model availability, auth, rate limits и tool behavior остаются ответственностью runtime/provider
- OpenCode - основной путь, когда одной team нужны разные provider/model lanes
+Contributor-facing границы и canonical implementation guidance смотрите в [Архитектуре для контрибьюторов](/ru/reference/contributor-architecture).
+
Рекомендуемые patterns:
| Pattern | When it helps | Risk |
diff --git a/landing/product-docs/ru/reference/release-notes.md b/landing/product-docs/ru/reference/release-notes.md
new file mode 100644
index 00000000..75ed7fdb
--- /dev/null
+++ b/landing/product-docs/ru/reference/release-notes.md
@@ -0,0 +1,42 @@
+---
+title: Релизы – Документация Agent Teams
+description: Release notes и changelog для Agent Teams. Ссылки на канонические RELEASE.md и CHANGELOG.md.
+lang: ru-RU
+---
+
+# Релизы
+
+Текущий релиз: **v1.2.0** (2026-03-31). Активная разработка продолжается в ветке `main` с незарелизенными изменениями для member work-sync, OpenCode delivery hardening и CI stabilization.
+
+## Как публикуются релизы
+
+Agent Teams следует [Semantic Versioning](https://semver.org/). Пуш тега в репозиторий запускает автоматический [release workflow](https://github.com/777genius/agent-teams-ai/blob/main/docs/RELEASE.md), который собирает подписанные пакеты для macOS, Windows и Linux и публикует их в GitHub Releases.
+
+## Последние релизы
+
+### v1.2.0 — Agent Graph, per-team tool approval, interactive AskUserQuestion
+
+Agent Graph с force-directed визуализацией и kanban layout, per-team tool approval controls с понятными permission prompts, уведомления о комментариях к задачам и интерактивные AskUserQuestion кнопки. Permission system overhaul с Write/Edit/NotebookEdit seeding и MCP tool catalog. Полный [changelog](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md#120---2026-03-31).
+
+### v1.1.0 — React 19 + Electron 40, user-initiated task starts
+
+React 19 + Electron 40 migration, запуск задач пользователем с kanban board, auth troubleshooting guide, подсветка синтаксиса для R/Ruby/PHP/SQL, ускорение поиска транскриптов в 3 раза, исправления WSL/Windows paths и XSS vulnerability. Полный [changelog](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md#110---2026-03-25).
+
+### v1.0.0 — Первый публичный релиз
+
+Первый стабильный билд: надёжность CLI/auth в packaged apps, IPC hardening, cross-platform packaging с подписанными macOS сборками, open-source governance docs (LICENSE, CONTRIBUTING, CODE_OF_CONDUCT, SECURITY). Полный [changelog](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md#100---2026-03-23).
+
+## Канонические источники
+
+| Документ | Описание |
+| --- | --- |
+| [RELEASE.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/RELEASE.md) | Процесс релиза, версионирование, имена артефактов, auto-update setup и шаблон release notes. |
+| [CHANGELOG.md](https://github.com/777genius/agent-teams-ai/blob/main/docs/CHANGELOG.md) | Полный changelog со всеми версиями, фичами, улучшениями и исправлениями. |
+| [GitHub Releases](https://github.com/777genius/agent-teams-ai/releases) | Установочные файлы для всех платформ. |
+
+## Связанные страницы
+
+- [Установка](/ru/guide/installation)
+- [Быстрый старт](/ru/guide/quickstart)
+- [Архитектура для контрибьюторов](/ru/reference/contributor-architecture)
+- [Разработчикам](/ru/developers/)
diff --git a/package.json b/package.json
index b61c4d8d..8777c1bc 100644
--- a/package.json
+++ b/package.json
@@ -19,7 +19,6 @@
"main": "dist-electron/main/index.cjs",
"scripts": {
"dev": "node ./scripts/dev-with-runtime.mjs",
- "dev:web": "node ./scripts/dev-web.mjs",
"dev:kill": "node bin/kill-dev.js",
"opencode:prove-mixed-recovery": "node ./scripts/prove-opencode-mixed-recovery.mjs",
"opencode:prove-semantic-gauntlet": "node ./scripts/prove-opencode-semantic-gauntlet.mjs",
@@ -30,6 +29,7 @@
"team:prove-provider-launch-stress": "node ./scripts/prove-provider-launch-stress.mjs",
"team:prove-launch-matrix": "pnpm exec vitest run --maxWorkers 1 --minWorkers 1 test/main/services/team/TeamAgentLaunchMatrix.safe-e2e.test.ts",
"team:smoke-changes-real-data": "tsx scripts/team-changes-real-data-smoke.ts",
+ "smoke:codex-runtime-install": "tsx scripts/smoke/codex-runtime-install.ts",
"prebuild": "tsx scripts/fetch-pricing-data.ts && pnpm --filter agent-teams-controller build && pnpm --filter agent-teams-mcp build",
"build": "node --max-old-space-size=8192 ./node_modules/electron-vite/bin/electron-vite.js build",
"dist": "node ./scripts/electron-builder/dist.mjs --mac --win --linux",
@@ -38,6 +38,7 @@
"dist:mac:x64": "node ./scripts/electron-builder/dist.mjs --mac --x64",
"dist:win": "node ./scripts/electron-builder/dist.mjs --win",
"dist:linux": "node ./scripts/electron-builder/dist.mjs --linux",
+ "smoke:packaged": "node ./scripts/electron-builder/smokePackagedApp.cjs",
"preview": "electron-vite preview",
"typecheck": "tsc --noEmit",
"typecheck:workspace": "pnpm typecheck && pnpm --filter agent-teams-mcp typecheck && pnpm --filter agent-teams-mcp typecheck:test",
@@ -55,6 +56,7 @@
"check:ci": "pnpm check:workspace:ci && pnpm lint && pnpm lint:mcp",
"fix": "pnpm lint:fix && pnpm format",
"quality": "pnpm check && pnpm format:check && npx knip",
+ "guard:runtime-artifacts": "node ./scripts/ci/forbid-runtime-artifacts.cjs",
"test:chunks": "tsx test/test-chunk-building.ts",
"test:semantic": "tsx test/test-semantic-steps.ts",
"test:noise": "tsx test/test-noise-filtering.ts",
@@ -147,10 +149,12 @@
"diff": "^8.0.3",
"dompurify": "^3.4.2",
"electron-updater": "^6.7.3",
+ "fast-json-stringify": "^6.4.0",
"fastify": "^5.8.5",
"highlight.js": "^11.11.1",
"idb-keyval": "^6.2.2",
"isbinaryfile": "^6.0.0",
+ "json-schema-ref-resolver": "^3.0.0",
"lucide-react": "^0.577.0",
"mdast-util-to-hast": "^13.2.1",
"mermaid": "^11.15.0",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 6b7e307e..4bb07233 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -213,6 +213,9 @@ importers:
electron-updater:
specifier: ^6.7.3
version: 6.7.3
+ fast-json-stringify:
+ specifier: ^6.4.0
+ version: 6.4.0
fastify:
specifier: ^5.8.5
version: 5.8.5
@@ -225,6 +228,9 @@ importers:
isbinaryfile:
specifier: ^6.0.0
version: 6.0.0
+ json-schema-ref-resolver:
+ specifier: ^3.0.0
+ version: 3.0.0
lucide-react:
specifier: ^0.577.0
version: 0.577.0(react@19.2.4)
@@ -6792,8 +6798,8 @@ packages:
fast-json-stable-stringify@2.1.0:
resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==}
- fast-json-stringify@6.3.0:
- resolution: {integrity: sha512-oRCntNDY/329HJPlmdNLIdogNtt6Vyjb1WuT01Soss3slIdyUp8kAcDU3saQTOquEK8KFVfwIIF7FebxUAu+yA==}
+ fast-json-stringify@6.4.0:
+ resolution: {integrity: sha512-ibRCQ0GZKJIQ+P3Et1h0LhPgp3PMTYk0MH8O+kW3lNYsvmaQww5Nn3f1jf73Q0jR1Yz3a1CDP4/NZD3vOajWJQ==}
fast-levenshtein@2.0.6:
resolution: {integrity: sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==}
@@ -12337,7 +12343,7 @@ snapshots:
'@fastify/fast-json-stringify-compiler@5.0.3':
dependencies:
- fast-json-stringify: 6.3.0
+ fast-json-stringify: 6.4.0
'@fastify/forwarded@3.0.1': {}
@@ -18240,7 +18246,7 @@ snapshots:
fast-json-stable-stringify@2.1.0: {}
- fast-json-stringify@6.3.0:
+ fast-json-stringify@6.4.0:
dependencies:
'@fastify/merge-json-schemas': 0.2.1
ajv: 8.18.0
@@ -18269,7 +18275,7 @@ snapshots:
'@fastify/proxy-addr': 5.1.0
abstract-logging: 2.0.1
avvio: 9.1.0
- fast-json-stringify: 6.3.0
+ fast-json-stringify: 6.4.0
find-my-way: 9.4.0
light-my-request: 6.6.0
pino: 10.3.1
diff --git a/runtime.lock.json b/runtime.lock.json
index 5f2ca466..853de4b5 100644
--- a/runtime.lock.json
+++ b/runtime.lock.json
@@ -1,27 +1,27 @@
{
- "version": "0.0.31",
- "sourceRef": "v0.0.31",
+ "version": "0.0.33",
+ "sourceRef": "v0.0.33",
"sourceRepository": "777genius/agent_teams_orchestrator",
"releaseRepository": "777genius/agent-teams-ai",
"releaseTag": "v1.2.0",
"assets": {
"darwin-arm64": {
- "file": "agent-teams-runtime-darwin-arm64-v0.0.31.tar.gz",
+ "file": "agent-teams-runtime-darwin-arm64-v0.0.33.tar.gz",
"archiveKind": "tar.gz",
"binaryName": "claude-multimodel"
},
"darwin-x64": {
- "file": "agent-teams-runtime-darwin-x64-v0.0.31.tar.gz",
+ "file": "agent-teams-runtime-darwin-x64-v0.0.33.tar.gz",
"archiveKind": "tar.gz",
"binaryName": "claude-multimodel"
},
"linux-x64": {
- "file": "agent-teams-runtime-linux-x64-v0.0.31.tar.gz",
+ "file": "agent-teams-runtime-linux-x64-v0.0.33.tar.gz",
"archiveKind": "tar.gz",
"binaryName": "claude-multimodel"
},
"win32-x64": {
- "file": "agent-teams-runtime-win32-x64-v0.0.31.zip",
+ "file": "agent-teams-runtime-win32-x64-v0.0.33.zip",
"archiveKind": "zip",
"binaryName": "claude-multimodel.exe"
}
diff --git a/scripts/ci/forbid-runtime-artifacts.cjs b/scripts/ci/forbid-runtime-artifacts.cjs
new file mode 100644
index 00000000..d61bc467
--- /dev/null
+++ b/scripts/ci/forbid-runtime-artifacts.cjs
@@ -0,0 +1,26 @@
+const { execFileSync } = require('node:child_process');
+
+const allowedRuntimeFiles = new Set(['resources/runtime/.gitkeep']);
+
+function trackedFiles() {
+ return execFileSync('git', ['ls-files', '-z'], { encoding: 'utf8' })
+ .split('\0')
+ .filter(Boolean);
+}
+
+const forbidden = trackedFiles().filter((file) => {
+ if (file.startsWith('.runtime-download/')) return true;
+ if (file.startsWith('resources/runtime/') && !allowedRuntimeFiles.has(file)) return true;
+ return false;
+});
+
+if (forbidden.length > 0) {
+ console.error('Runtime release artifacts must not be committed.');
+ console.error('These files are downloaded from GitHub Releases during dev/release builds:');
+ for (const file of forbidden) {
+ console.error(`- ${file}`);
+ }
+ process.exit(1);
+}
+
+console.log('Runtime artifact guard passed.');
diff --git a/scripts/electron-builder/smokePackagedApp.cjs b/scripts/electron-builder/smokePackagedApp.cjs
new file mode 100644
index 00000000..68594830
--- /dev/null
+++ b/scripts/electron-builder/smokePackagedApp.cjs
@@ -0,0 +1,130 @@
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { spawn } = require('node:child_process');
+
+const STARTUP_TIMEOUT_MS = Number(process.env.PACKAGED_SMOKE_TIMEOUT_MS ?? 30_000);
+const POST_STARTUP_STABLE_MS = Number(process.env.PACKAGED_SMOKE_STABLE_MS ?? 8_000);
+const REQUIRED_LOG_MARKERS = ['renderer did-finish-load'];
+const FAILURE_PATTERNS = [
+ /Cannot find module/i,
+ /MODULE_NOT_FOUND/i,
+ /Failed to start HTTP server/i,
+ /Unable to set login item/i,
+ /\[DEP0180\]/i,
+ /DeprecationWarning: fs\.Stats constructor is deprecated/i,
+];
+
+function fail(message, log = '') {
+ console.error(`[smokePackagedApp] ${message}`);
+ if (log.trim()) {
+ console.error('--- packaged app log ---');
+ console.error(log.trim());
+ }
+ process.exit(1);
+}
+
+function findExecutable(bundlePath, platform) {
+ if (platform === 'darwin') {
+ const macOsDir = path.join(bundlePath, 'Contents', 'MacOS');
+ const entries = fs.readdirSync(macOsDir);
+ const executable = entries.find((entry) => {
+ const fullPath = path.join(macOsDir, entry);
+ return fs.statSync(fullPath).isFile() && (fs.statSync(fullPath).mode & 0o111) !== 0;
+ });
+ if (!executable) fail(`No executable found in ${macOsDir}`);
+ return path.join(macOsDir, executable);
+ }
+
+ if (platform === 'win32') {
+ const executable = fs
+ .readdirSync(bundlePath)
+ .find((entry) => entry.toLowerCase().endsWith('.exe') && !entry.toLowerCase().includes('uninstall'));
+ if (!executable) fail(`No .exe found in ${bundlePath}`);
+ return path.join(bundlePath, executable);
+ }
+
+ if (platform === 'linux') {
+ const packageJsonPath = path.join(bundlePath, 'resources', 'app.asar.unpacked', 'package.json');
+ const packageJson = fs.existsSync(packageJsonPath)
+ ? JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'))
+ : {};
+ const preferredNames = [packageJson.name, 'agent-teams-ai', 'Agent Teams UI'].filter(Boolean);
+ for (const name of preferredNames) {
+ const candidate = path.join(bundlePath, name);
+ if (fs.existsSync(candidate)) return candidate;
+ }
+
+ const executable = fs.readdirSync(bundlePath).find((entry) => {
+ const fullPath = path.join(bundlePath, entry);
+ return fs.statSync(fullPath).isFile() && (fs.statSync(fullPath).mode & 0o111) !== 0;
+ });
+ if (!executable) fail(`No executable found in ${bundlePath}`);
+ return path.join(bundlePath, executable);
+ }
+
+ fail(`Unsupported platform: ${platform}`);
+}
+
+async function main() {
+ const [bundlePathArg, platform] = process.argv.slice(2);
+ if (!bundlePathArg || !platform) {
+ fail('Usage: node ./scripts/electron-builder/smokePackagedApp.cjs ');
+ }
+
+ const bundlePath = path.resolve(bundlePathArg);
+ const executable = findExecutable(bundlePath, platform);
+ const userDataDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-teams-smoke-'));
+ const args = [`--user-data-dir=${userDataDir}`];
+ const child = spawn(executable, args, {
+ env: {
+ ...process.env,
+ AGENT_TEAMS_PACKAGED_SMOKE: '1',
+ },
+ stdio: ['ignore', 'pipe', 'pipe'],
+ });
+
+ let log = '';
+ child.stdout.on('data', (chunk) => {
+ log += chunk.toString();
+ });
+ child.stderr.on('data', (chunk) => {
+ log += chunk.toString();
+ });
+
+ const exitPromise = new Promise((resolve) => {
+ child.on('exit', (code, signal) => resolve({ code, signal }));
+ });
+
+ const deadline = Date.now() + STARTUP_TIMEOUT_MS;
+ let startupSeenAt = null;
+ while (Date.now() < deadline) {
+ if (FAILURE_PATTERNS.some((pattern) => pattern.test(log))) {
+ child.kill();
+ fail('Detected startup failure pattern', log);
+ }
+
+ if (startupSeenAt === null && REQUIRED_LOG_MARKERS.every((marker) => log.includes(marker))) {
+ startupSeenAt = Date.now();
+ }
+
+ if (startupSeenAt !== null && Date.now() - startupSeenAt >= POST_STARTUP_STABLE_MS) {
+ child.kill();
+ console.log(`[smokePackagedApp] OK ${platform}: ${bundlePath}`);
+ return;
+ }
+
+ const exit = await Promise.race([
+ exitPromise,
+ new Promise((resolve) => setTimeout(() => resolve(null), 250)),
+ ]);
+ if (exit) {
+ fail(`Packaged app exited before startup completed: code=${exit.code} signal=${exit.signal}`, log);
+ }
+ }
+
+ child.kill();
+ fail(`Timed out after ${STARTUP_TIMEOUT_MS}ms waiting for packaged startup`, log);
+}
+
+main().catch((error) => fail(error?.stack || String(error)));
diff --git a/scripts/smoke/codex-runtime-install.ts b/scripts/smoke/codex-runtime-install.ts
new file mode 100644
index 00000000..24a636ff
--- /dev/null
+++ b/scripts/smoke/codex-runtime-install.ts
@@ -0,0 +1,170 @@
+#!/usr/bin/env tsx
+
+import { execFile } from 'node:child_process';
+import { existsSync } from 'node:fs';
+import { mkdtemp, readFile, rm, stat } from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+import { promisify } from 'node:util';
+
+import {
+ CodexRuntimeInstallerService,
+ resolveAppManagedCodexRuntimeBinaryPath,
+ resolveVerifiedAppManagedCodexRuntimeBinaryPath,
+} from '@features/codex-runtime-installer/main/infrastructure/CodexRuntimeInstallerService';
+import { CodexBinaryResolver } from '@main/services/infrastructure/codexAppServer/CodexBinaryResolver';
+import { getAppDataPath, setAppDataBasePath } from '@main/utils/pathDecoder';
+
+const execFileAsync = promisify(execFile);
+const VERSION_TIMEOUT_MS = 15_000;
+
+interface CodexRuntimeSmokeManifest {
+ rootVersion?: string;
+ platformVersion?: string;
+ platformTarget?: string;
+ binaryPath?: string;
+ integrity?: string;
+}
+
+interface CodexRuntimeSmokeReport {
+ platform: NodeJS.Platform;
+ arch: string;
+ appDataPath: string;
+ binaryPath: string;
+ statusVersion: string | null;
+ versionStdout: string;
+ resolverVersion: string | null;
+ rootVersion: string | null;
+ platformVersion: string | null;
+ platformTarget: string | null;
+}
+
+function assertCondition(condition: unknown, message: string): asserts condition {
+ if (!condition) {
+ throw new Error(message);
+ }
+}
+
+function isInsidePath(parentPath: string, childPath: string): boolean {
+ const relativePath = path.relative(parentPath, childPath);
+ return Boolean(relativePath) && !relativePath.startsWith('..') && !path.isAbsolute(relativePath);
+}
+
+async function readManifest(appDataPath: string): Promise {
+ const manifestPath = path.join(appDataPath, 'runtimes', 'codex', 'current.json');
+ const raw = await readFile(manifestPath, 'utf8');
+ return JSON.parse(raw) as CodexRuntimeSmokeManifest;
+}
+
+async function assertExecutableVersion(binaryPath: string): Promise {
+ const { stdout, stderr } = await execFileAsync(binaryPath, ['--version'], {
+ timeout: VERSION_TIMEOUT_MS,
+ windowsHide: true,
+ });
+ const output = `${stdout ?? ''}\n${stderr ?? ''}`.trim();
+ assertCondition(
+ /\bcodex-cli\s+\d+\.\d+\.\d+\b/i.test(output),
+ `Unexpected version output: ${output}`
+ );
+ return output;
+}
+
+async function runSmoke(): Promise {
+ const tempRoot = await mkdtemp(path.join(os.tmpdir(), 'codex-runtime-smoke-'));
+ const keepTemp = process.env.CODEX_RUNTIME_SMOKE_KEEP_TEMP === '1';
+ setAppDataBasePath(tempRoot);
+ CodexBinaryResolver.clearCache();
+
+ try {
+ const service = new CodexRuntimeInstallerService();
+ const status = await service.install();
+ assertCondition(status.installed, `Codex runtime install failed: ${JSON.stringify(status)}`);
+ assertCondition(status.binaryPath, 'Codex runtime install did not return a binary path');
+ assertCondition(
+ path.isAbsolute(status.binaryPath),
+ `Binary path is not absolute: ${status.binaryPath}`
+ );
+ assertCondition(existsSync(status.binaryPath), `Binary does not exist: ${status.binaryPath}`);
+
+ const binaryStat = await stat(status.binaryPath);
+ assertCondition(binaryStat.isFile(), `Binary path is not a file: ${status.binaryPath}`);
+
+ const appDataPath = getAppDataPath();
+ assertCondition(
+ isInsidePath(path.join(appDataPath, 'runtimes', 'codex'), status.binaryPath),
+ `Binary path is outside the app-managed Codex runtime root: ${status.binaryPath}`
+ );
+
+ const manifest = await readManifest(appDataPath);
+ assertCondition(
+ manifest.binaryPath === status.binaryPath,
+ 'Manifest binary path does not match install status'
+ );
+ assertCondition(
+ typeof manifest.integrity === 'string' && manifest.integrity.startsWith('sha512-'),
+ 'Manifest integrity is missing sha512 metadata'
+ );
+ assertCondition(typeof manifest.rootVersion === 'string', 'Manifest rootVersion is missing');
+ assertCondition(
+ typeof manifest.platformVersion === 'string',
+ 'Manifest platformVersion is missing'
+ );
+ assertCondition(
+ typeof manifest.platformTarget === 'string',
+ 'Manifest platformTarget is missing'
+ );
+
+ const appManagedPath = resolveAppManagedCodexRuntimeBinaryPath();
+ const verifiedPath = await resolveVerifiedAppManagedCodexRuntimeBinaryPath();
+ const resolvedPath = await CodexBinaryResolver.resolve();
+ assertCondition(
+ appManagedPath === status.binaryPath,
+ 'resolveAppManagedCodexRuntimeBinaryPath mismatch'
+ );
+ assertCondition(
+ verifiedPath === status.binaryPath,
+ 'resolveVerifiedAppManagedCodexRuntimeBinaryPath mismatch'
+ );
+ assertCondition(
+ resolvedPath === status.binaryPath,
+ 'CodexBinaryResolver did not prefer the app-managed binary'
+ );
+
+ const versionStdout = await assertExecutableVersion(status.binaryPath);
+ const resolverVersion = await CodexBinaryResolver.resolveVersion(resolvedPath);
+ assertCondition(
+ typeof resolverVersion === 'string' && /^\d+\.\d+\.\d+/.test(resolverVersion),
+ `CodexBinaryResolver returned an invalid version: ${resolverVersion}`
+ );
+
+ return {
+ platform: process.platform,
+ arch: process.arch,
+ appDataPath,
+ binaryPath: status.binaryPath,
+ statusVersion: status.version ?? null,
+ versionStdout,
+ resolverVersion,
+ rootVersion: manifest.rootVersion,
+ platformVersion: manifest.platformVersion,
+ platformTarget: manifest.platformTarget,
+ };
+ } finally {
+ CodexBinaryResolver.clearCache();
+ setAppDataBasePath(null);
+ if (keepTemp) {
+ console.log(`CODEX_RUNTIME_SMOKE_KEEP_TEMP=1, keeping temp root: ${tempRoot}`);
+ } else {
+ await rm(tempRoot, { recursive: true, force: true });
+ }
+ }
+}
+
+runSmoke()
+ .then((report) => {
+ console.log(JSON.stringify(report, null, 2));
+ })
+ .catch((error) => {
+ console.error(error);
+ process.exitCode = 1;
+ });
diff --git a/src/features/CLAUDE.md b/src/features/CLAUDE.md
index ec1c1b6e..32bbc565 100644
--- a/src/features/CLAUDE.md
+++ b/src/features/CLAUDE.md
@@ -19,13 +19,60 @@ Default location for new feature work:
- `src/features//`
-Before adding or moving code:
+Before adding a medium or large feature:
- decide whether the feature is full, thin, or process-limited
-- add only the layers the feature actually owns
-- expose production callers through public entrypoints only
-- keep tests close to the layer they verify under `test/features//` or
- feature-local `__tests__` when that is the established local pattern
+- start with the layer set the feature actually owns; do not add placeholder
+ folders just to match the full template
+- create explicit public entrypoints for every layer production callers need
+- put shared DTOs, channel names, and API fragments in `contracts/`
+- keep business policy in `core/domain` and use-case orchestration in
+ `core/application`
+- keep Electron, IPC, HTTP, file system, process, and provider specifics outside
+ `core/`
+- wire runtime dependencies from `main/composition/` when the feature owns main
+ process behavior
+- expose preload bridges through `preload/index.ts` and renderer surfaces
+ through `renderer/index.ts`
+- add focused tests for the layers that carry behavior
+
+When modifying an existing feature:
+
+- preserve the feature's current shape unless the change introduces a real new
+ boundary
+- route app shell and cross-feature imports through public entrypoints
+- move duplicated rules toward `core/domain` before adding another adapter copy
+- keep transport validation and normalization close to the boundary that receives
+ the data
+- update the feature README or local notes when the public surface or intended
+ shape changes
+- keep local README examples concrete and file-based; link back to the standard
+ for architecture rules instead of restating them
+
+Public entrypoint expectations:
+
+- `contracts/index.ts` exports only browser-safe contracts, constants, and
+ normalizers intended for cross-process use
+- `main/index.ts` exports composition and registration surfaces for main-process
+ callers
+- `preload/index.ts` exports bridge creation only
+- `renderer/index.ts` exports reusable renderer components, hooks, or utilities
+ that are intentionally consumed outside the feature
+- root `index.ts` is optional; use it only when the feature deliberately owns a
+ stable public barrel
+
+Testing expectations:
+
+- test pure domain rules directly and keep those tests independent of runtime
+ services
+- test application use cases with ports or fakes, not Electron or real provider
+ processes
+- test adapter mapping, boundary normalization, and renderer utilities where they
+ can regress user-visible behavior
+- prefer `test/features//...` for cross-layer coverage; feature-local
+ `__tests__` are fine when the surrounding feature already uses that pattern
+- for docs-only changes, verify links and examples instead of running broad test
+ suites
Do not duplicate architecture rules here. Keep architecture rules centralized in
[../../docs/FEATURE_ARCHITECTURE_STANDARD.md](../../docs/FEATURE_ARCHITECTURE_STANDARD.md).
diff --git a/src/features/README.md b/src/features/README.md
index 4e9851ff..8901e31d 100644
--- a/src/features/README.md
+++ b/src/features/README.md
@@ -3,22 +3,38 @@
This directory contains the canonical home for medium and large feature slices.
Before creating or refactoring a feature, read:
+
- [Feature Architecture Standard](../../docs/FEATURE_ARCHITECTURE_STANDARD.md)
- [Feature-local agent guidance](./CLAUDE.md)
-Reference implementation:
-- `src/features/recent-projects`
-- `src/features/agent-graph`
+Reference examples:
+
+- [`recent-projects`](./recent-projects/README.md) - full cross-process feature
+ with contracts, core, main, preload, renderer, and focused tests
+- [`agent-graph`](./agent-graph/README.md) - thin feature with `core/domain` and
+ renderer integration only
+- `codex-model-catalog` and `team-runtime-lanes` - process-limited features
+ that omit renderer or preload layers when they do not own those boundaries
Use `src/features//` by default when the work introduces:
+
- a new use case or business policy
- transport wiring
- more than one process boundary
- more than one adapter or provider
+Feature-local docs should answer navigation questions:
+
+- which shape the feature uses
+- which entrypoints are public
+- where new adapters, rules, bridges, or renderer surfaces belong
+- what tests protect the behavior
+- which local files are the best examples for future changes
+
Do not duplicate architecture rules in feature folders.
Keep the standard centralized in [../../docs/FEATURE_ARCHITECTURE_STANDARD.md](../../docs/FEATURE_ARCHITECTURE_STANDARD.md).
Rule of thumb:
+
- `recent-projects` is the full slice example with process-aware outer layers
- `agent-graph` is the thin slice example built around `core/` plus `renderer/`
diff --git a/src/features/agent-graph/renderer/ui/GraphMemberLogPreviewHud.tsx b/src/features/agent-graph/renderer/ui/GraphMemberLogPreviewHud.tsx
index ef2d209d..f4fbc58b 100644
--- a/src/features/agent-graph/renderer/ui/GraphMemberLogPreviewHud.tsx
+++ b/src/features/agent-graph/renderer/ui/GraphMemberLogPreviewHud.tsx
@@ -108,14 +108,46 @@ function itemIcon(item: MemberLogPreviewItem): React.JSX.Element {
return ;
}
+function hasOpenCodeRuntimeWarning(preview: MemberLogPreviewMember | undefined): boolean {
+ return (
+ preview?.warnings.some(
+ (warning) =>
+ warning.code === 'opencode_runtime_timeout' ||
+ warning.code === 'opencode_runtime_unavailable' ||
+ warning.code === 'opencode_ambiguous_lane'
+ ) === true
+ );
+}
+
+function hasOpenCodeDeliveryDelayedWarning(preview: MemberLogPreviewMember | undefined): boolean {
+ return preview?.warnings.some((warning) => warning.code === 'opencode_delivery_delayed') === true;
+}
+
+function hasOpenCodeEmptyStateWarning(preview: MemberLogPreviewMember | undefined): boolean {
+ return hasOpenCodeDeliveryDelayedWarning(preview) || hasOpenCodeRuntimeWarning(preview);
+}
+
function resolveEmptyText(
preview: MemberLogPreviewMember | undefined,
loading: boolean,
error: string | null
): string {
- if (preview?.warnings.some((warning) => warning.code === 'codex_member_wide_not_supported')) {
+ const hasCodexUnsupportedWarning = preview?.warnings.some(
+ (warning) => warning.code === 'codex_member_wide_not_supported'
+ );
+ const hasOnlyCodexUnsupportedCoverage =
+ hasCodexUnsupportedWarning === true &&
+ (preview?.coverage.length ?? 0) > 0 &&
+ preview?.coverage.every((coverage) => coverage.provider === 'codex_native_trace');
+ if (hasOnlyCodexUnsupportedCoverage) {
return 'Unsupported provider';
}
+ if ((preview?.items.length ?? 0) === 0 && hasOpenCodeDeliveryDelayedWarning(preview)) {
+ return 'OpenCode logs delayed';
+ }
+ if ((preview?.items.length ?? 0) === 0 && hasOpenCodeRuntimeWarning(preview)) {
+ return 'Logs unavailable';
+ }
if (loading && !preview) return 'Loading logs';
if (error && !preview) return 'Logs unavailable';
return 'No recent logs';
@@ -552,7 +584,8 @@ export const GraphMemberLogPreviewHud = ({
: node.label;
const preview = previewsByMember.get(normalizeMemberName(memberName));
const items = preview?.items ?? [];
- const isInitialLoading = loading && !preview;
+ const isEmptyLoading =
+ loading && (!preview || (items.length === 0 && hasOpenCodeEmptyStateWarning(preview)));
return (