diff --git a/docs/research/codex-app-server-account-feature-plan.md b/docs/research/codex-app-server-account-feature-plan.md new file mode 100644 index 00000000..880c72bc --- /dev/null +++ b/docs/research/codex-app-server-account-feature-plan.md @@ -0,0 +1,5195 @@ +# Codex App-Server Account Feature - Detailed Implementation Plan + +**Date**: 2026-04-20 +**Status**: Reference-quality implementation plan +**Primary repo**: `claude_team` +**Secondary repo**: `agent_teams_orchestrator` only for later parity, not on the first critical path +**Canonical architecture reference**: [FEATURE_ARCHITECTURE_STANDARD.md](../FEATURE_ARCHITECTURE_STANDARD.md) + +## Executive Summary + +We should restore the old strong Codex subscription UX, but we should **not** bring back the old legacy Codex transport or legacy OAuth semantics. + +The correct design is: + +- keep **execution** on the current `codex-native` runtime path +- introduce a new dedicated feature slice: `src/features/codex-account` +- use **official `codex app-server` account APIs** as the managed-account control plane +- keep **app-owned API key storage** in the app +- explicitly merge the three different truths: + - managed ChatGPT account truth from `codex app-server` + - API key availability truth from app secure storage and ambient env detection + - real execution truth from `codex exec` + +This feature should become the source of truth for: + +- autodetect of an already logged-in Codex / ChatGPT account +- login / cancel / logout UI flow +- plan type display +- rate limit display +- subscription-first connection copy +- launch-readiness policy for ChatGPT-backed Codex +- deterministic per-launch auth-mode forcing + +Core rule: + +- `codex exec` remains the execution seam +- `codex app-server` becomes the account control-plane seam +- legacy Codex transport stays deleted +- legacy Codex OAuth stays deleted +- direct `auth.json` parsing stays forbidden +- `chatgptAuthTokens` host-managed mode stays out of scope + +## Goals And Non-Goals + +### Goals + +- restore strong Codex subscription UX on top of the native runtime +- make managed ChatGPT account truth first-class again in the app +- keep API key support without letting it hijack subscription semantics +- keep runtime execution on `codex exec` +- keep ownership boundaries aligned with `FEATURE_ARCHITECTURE_STANDARD.md` +- make rollout safe through additive, testable composition + +### Non-goals + +This feature is **not** trying to: + +- revive legacy Codex transport +- revive legacy Codex OAuth implementation details +- parse `~/.codex/auth.json` +- add browser-mode local app-server support in the first wave +- add app-server-managed API key login in the first wave +- add per-member auth preferences or per-member Codex backend preferences +- redesign the whole CLI shell UX for other providers +- bundle plugin/app-server enrichment beyond account control-plane needs +- solve orchestrator parity in the same first implementation + +## Glossary + +These terms are used repeatedly in the plan and must stay consistent. + +### Preferred auth mode + +Persisted user intent. + +Allowed values: + +- `auto` +- `chatgpt` +- `api_key` + +### Effective auth mode + +The auth mode the next launch will actually use after runtime evaluation. + +Allowed values: + +- `chatgpt` +- `api_key` +- `null` + +### Snapshot state + +High-level UI/account state describing what the app currently believes about Codex account +availability. + +Examples: + +- `managed_account_connected` +- `both_available` +- `degraded` + +### Launch readiness + +Execution-oriented state used to decide whether the app should launch Codex and under which auth +mode. + +Examples: + +- `ready_chatgpt` +- `ready_api_key` +- `missing_auth` + +### Managed account + +A ChatGPT-authenticated Codex account owned and persisted by Codex itself. + +### API key availability + +Whether the app can supply an OpenAI API key from its own secure storage or ambient env detection. + +### Degraded + +A state where the control plane cannot fully verify Codex account truth right now, but the app may +still have partial or fresh-enough knowledge to present a careful status and in some cases still +launch. + +### Last-known-good snapshot + +The most recent successful account snapshot that came from a real app-server read and passed normal +merge and validation logic. + +### Freshness window + +A bounded period during which the feature may temporarily reuse last-known-good managed-account +truth while the app-server is degraded. + +### Control plane + +The account-management seam implemented via `codex app-server`. + +### Execution plane + +The actual task-running seam implemented via `codex exec`. + +## Chosen Plan Assessment + +Chosen plan: + +- full Codex app-server account seam as a dedicated feature slice, while keeping `codex-native` execution + +Assessment: + +- `🎯 9 🛡️ 9 🧠 7` +- estimated implementation size: `1800-3200` lines in `claude_team`, plus tests and docs + +## Top 3 Viable Shapes + +### 1. Full app-server account feature slice - chosen + +`🎯 9 🛡️ 9 🧠 7` +Estimated size: `1800-3200` lines + +Idea: + +- build a dedicated `codex-account` feature +- use `codex app-server` for account state, login lifecycle, and rate limits +- keep API keys app-owned +- keep execution on `codex exec` + +Why this wins: + +- best long-term architecture +- truthful subscription UX +- avoids legacy transport return +- creates a clean seam between account control plane and execution plane +- aligns with the repo's feature architecture standard + +Main cost: + +- more moving parts than simple CLI probing +- requires careful runtime-policy integration + +### 2. Hybrid read-via-app-server and login-via-cli + +`🎯 8 🛡️ 8 🧠 6` +Estimated size: `1200-2200` lines + +Idea: + +- `account/read` and rate limits via app-server +- login/logout still via `codex login` / `codex logout` + +Pros: + +- simpler login integration +- still gets rich autodetect and plan truth + +Cons: + +- split control plane +- less internally coherent +- more transitional than final + +### 3. CLI-only managed-account seam + +`🎯 8 🛡️ 8 🧠 4` +Estimated size: `700-1200` lines + +Idea: + +- use `codex login status`, `codex login`, `codex logout` +- build UI around plain CLI probing + +Pros: + +- simpler +- safer if we were optimizing only for speed + +Cons: + +- poorer structured account metadata +- weak rate-limit surface +- less extensible +- does not justify a full feature slice as well + +## Final Decision + +We are taking option 1. + +Reason: + +- the user requirement is not just "support subscription somehow" +- the requirement is "bring back the strong legacy-quality Codex subscription UX, but on the native runtime" +- for that requirement, the app-server account seam is the cleanest and most future-proof architecture + +## Resolved Decisions Register + +This section keeps the most important architectural choices explicit, so they do not get +re-litigated ad hoc during implementation. + +### Resolved - execution seam + +Decision: + +- keep execution on raw `codex exec` + +Why: + +- it matches the current cutover direction +- it avoids reopening the legacy transport question inside this feature + +### Resolved - control-plane seam + +Decision: + +- use `codex app-server` for account lifecycle and rate-limit truth + +Why: + +- it is the official structured surface +- it supports managed account autodetect and login lifecycle without file parsing + +### Resolved - secret ownership + +Decision: + +- ChatGPT managed auth belongs to Codex +- API key ownership remains app-owned + +Why: + +- avoids dual key stores +- keeps responsibility boundaries clear + +### Resolved - Codex connection naming + +Decision: + +- persisted preference uses `preferredAuthMode: "auto" | "chatgpt" | "api_key"` + +Why: + +- `oauth` is legacy wording and no longer the right semantic label for Codex + +### Resolved - browser mode + +Decision: + +- browser-mode app-server support is deferred + +Why: + +- desktop Electron path is the real target for the first implementation +- we should not hide platform limitations behind fake parity + +### Resolved - device code + +Decision: + +- ChatGPT browser flow is first-class +- `chatgptDeviceCode` is deferred unless required by a real blocker + +Why: + +- browser flow better matches the intended legacy-quality desktop UX + +### Resolved - degraded launch policy + +Decision: + +- degraded control-plane state may still be launchable only with positive current or sufficiently + fresh prior managed-account evidence + +Why: + +- prevents false hard blocks +- also prevents indefinite stale-account lies + +## Problem Statement + +### What is wrong today + +The current codebase has fully cut over to `codex-native` execution, but the account and UX layer was flattened too far. + +Current product truth: + +- Codex runtime lane is native-only +- Codex UI is effectively API-key-only +- Codex managed ChatGPT subscription autodetect is gone from app UX +- Codex login/logout from app UI was removed +- current launch-readiness policy still assumes API key credentials are required + +This creates a product and architecture mismatch: + +- the real Codex native runtime supports ChatGPT account auth +- but the app currently acts as if only API keys exist + +### Why this is dangerous + +If we only restore cosmetic UI copy and do not change runtime policy, we will create a worse failure mode: + +- UI says subscription is available +- but launch still fails because the app hard-gates on `OPENAI_API_KEY` or `CODEX_API_KEY` + +That would be a deceptive product state. + +So the plan must fix: + +1. managed account detection +2. managed login flow +3. runtime launch policy +4. UI presentation + +all together + +## Confirmed Facts And Constraints + +This section lists facts the plan relies on. + +## Official Codex facts + +Based on the current public OpenAI Codex docs, plus the protocol schemas generated from the +installed Codex binary on this machine: + +- Codex supports both ChatGPT account auth and API key auth +- `codex app-server` supports: + - `account/read` + - `account/login/start` + - `account/login/cancel` + - `account/logout` + - `account/rateLimits/read` + - `account/updated` + - `account/login/completed` + - `account/rateLimits/updated` +- `account/read` returns: + - `account | null` + - `requiresOpenaiAuth` +- when `account.type === "chatgpt"`, the current schema requires: + - `email` + - `planType` +- when `account.type === "apiKey"`, the current schema exposes only: + - `type` +- current generated `account/updated` schema includes nullable: + - `authMode` + - `planType` +- current documented `authMode` values include: + - `apikey` + - `chatgpt` + - `chatgptAuthTokens` + - `null` +- current generated `account/rateLimits/read` schema exposes: + - `rateLimits` + - `rateLimitsByLimitId` + - per-snapshot `planType | null` +- Codex config supports: + - `forced_login_method = "chatgpt"` + - `forced_login_method = "api"` +- Codex tools accept config overrides via top-level `-c key=value` +- current app-server docs explicitly show ChatGPT browser flow via: + - `type: "chatgpt"` +- current app-server docs explicitly show externally managed token mode via: + - `type: "chatgptAuthTokens"` +- the generated protocol types explicitly mark `chatgptAuthTokens` as unstable / internal-only, so + the first implementation must not depend on it + +Interpretation rule: + +- steady-state managed-account identity must still come from successful `account/read` +- notification fields are freshness accelerators, not a durable replacement read model + +Sources: + +- [Authentication - Codex](https://developers.openai.com/codex/auth) +- [App Server - Codex](https://developers.openai.com/codex/app-server) + +## Locally verified facts + +Verified on this machine on 2026-04-20: + +- `codex login status` returns `Logged in using ChatGPT` +- `codex app-server` starts locally +- `account/read` returned: + - `account.type = "chatgpt"` + - `email = "quantjumppro@gmail.com"` + - `planType = "pro"` + - `requiresOpenaiAuth = true` + +Practical implication: + +- managed-account autodetect is real +- it does not require reverse-engineering auth storage + +## Current repo facts + +Current relevant code facts: + +- `recent-projects` already uses `codex app-server` over short-lived JSON-RPC stdio sessions +- generic Codex runtime status currently sits in: + - `src/main/services/runtime/ClaudeMultimodelBridgeService.ts` +- generic provider connection logic currently sits in: + - `src/main/services/runtime/ProviderConnectionService.ts` +- launch env assembly currently sits in: + - `src/main/services/runtime/providerAwareCliEnv.ts` +- Codex UI currently flattened to API-key-only sits in: + - `src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx` + - `src/renderer/components/runtime/providerConnectionUi.ts` + - `src/renderer/components/dashboard/CliStatusBanner.tsx` + - `src/renderer/components/settings/sections/CliStatusSection.tsx` + +## Current schema facts + +Current config and shared-type state: + +- `providerConnections.codex` is currently `Record` +- `configValidation` currently rejects real Codex connection fields except for ignored stale legacy fields +- shared `AppConfig` mirrors still declare Codex provider connections as empty + +Practical implication: + +- this feature requires deliberate config schema expansion and migration logic + +## App-Server Compatibility Facts + +Current local code and generated protocol schemas show: + +- `recent-projects` already initializes app-server with: + - `experimentalApi: false` + - `optOutNotificationMethods` +- generated `initialize` response includes: + - `codexHome` + - `platformFamily` + - `platformOs` +- generated login response for `type: "chatgpt"` includes: + - `loginId` + - `authUrl` + +Practical implication: + +- the feature can and should stay on the stable app-server surface +- compatibility should be decided by an initialize-plus-required-method handshake, not by semver + parsing alone +- auth-root diagnostics can use `initialize.codexHome` as a first-class observed fact instead of + only inferring from environment variables + +## Honest Confidence Hotspots + +These are the places where confidence is lower than the rest of the plan and where we should be +deliberately conservative. + +### Hotspot 1 - undocumented or lightly documented auth variants + +Assessment: + +- `🎯 6 🛡️ 9 🧠 3` + +What we know: + +- official app-server docs clearly show: + - `chatgpt` + - `apiKey` + - `chatgptAuthTokens` +- local schema / CLI evidence suggests more variants may exist + +Plan decision: + +- first implementation depends only on the clearly documented browser-flow `chatgpt` path +- do not build phase 1 around additional auth variants + +Why this is the safe choice: + +- avoids coupling to protocol surfaces that may be less stable or less publicly specified + +### Hotspot 2 - exact ordering of app-server notifications + +Assessment: + +- `🎯 7 🛡️ 9 🧠 4` + +What we know: + +- docs and local evidence show `account/login/completed` and `account/updated` +- but we should not assume stronger guarantees than we have to + +Plan decision: + +- notifications accelerate freshness +- explicit snapshot refresh remains the recovery source of truth + +Why this is the safe choice: + +- even if event order changes slightly, the feature still converges to the correct steady state + +### Hotspot 3 - rate-limits payload usefulness for the first UI + +Assessment: + +- `🎯 7 🛡️ 8 🧠 4` + +What we know: + +- app-server exposes ChatGPT rate-limit reads and updates +- exact display value of every field may not be necessary for phase 1 UX + +Plan decision: + +- keep rate-limits lazy +- present the minimal truthful subset first +- avoid making base account UX depend on rate-limit richness + +Why this is the safe choice: + +- avoids blocking the critical auth/launch story on secondary UI detail + +### Hotspot 4 - `forced_login_method` long-term contract stability + +Assessment: + +- `🎯 7 🛡️ 8 🧠 3` + +What we know: + +- the auth docs and local behavior support the approach +- the CLI reference page is not the strongest canonical place for this exact contract + +Plan decision: + +- isolate this override behind the coordinator/env builder seam +- do not scatter it through renderer or generic shell logic + +Why this is the safe choice: + +- if the exact override mechanism ever changes, the blast radius stays small + +### Hotspot 5 - precedence of account fields across read, notification, and rate-limit surfaces + +Assessment: + +- `🎯 7 🛡️ 9 🧠 4` + +What we know: + +- the generated protocol schema shows `account/updated` carries nullable `authMode` and `planType` +- `account/rateLimits/read` also carries `planType` +- notifications are best-effort and should not be treated as a durable replay log + +Plan decision: + +- the latest successful `account/read` owns steady-state account identity +- `account/updated` and rate-limit snapshots may refresh hints and trigger coalesced rereads +- they must never synthesize account presence on their own + +Why this is the safe choice: + +- avoids phantom login or phantom subscription UI caused by late, partial, or duplicated + notifications + +### Hotspot 6 - binary compatibility and stable-vs-experimental app-server surface + +Assessment: + +- `🎯 8 🛡️ 9 🧠 4` + +What we know: + +- official app-server docs describe `initialize.params.capabilities.experimentalApi` +- current local `recent-projects` integration already uses `experimentalApi: false` +- generated `initialize` response exposes `codexHome`, `platformFamily`, and `platformOs` + +Plan decision: + +- first-wave `codex-account` must depend only on stable account APIs +- feature readiness must be gated by successful initialize plus required stable method support +- do not make product behavior depend on parsing CLI semver strings alone + +Why this is the safe choice: + +- reduces risk from partial protocol drift across installed Codex binaries +- keeps compatibility logic tied to the actual negotiated surface + +### Hotspot 7 - managed workspace restriction behavior in admin-controlled installs + +Assessment: + +- `🎯 6 🛡️ 8 🧠 4` + +What we know: + +- official auth docs support `forced_chatgpt_workspace_id` +- official docs say mismatched credentials cause Codex to log the user out and exit +- the app does not currently own workspace selection or workspace switching for Codex + +Plan decision: + +- first wave treats workspace restriction as admin policy truth, not as generic missing-auth +- the UI must not invent a workspace picker or a fake remediation flow the app does not own + +Why this is the safe choice: + +- preserves truthful UX in managed environments without expanding scope into unsupported account + management + +### Hotspot 8 - trust boundary around `authUrl` and sensitive login metadata + +Assessment: + +- `🎯 7 🛡️ 9 🧠 3` + +What we know: + +- `account/login/start { type: "chatgpt" }` returns `authUrl` and `loginId` +- existing generic `shell:openExternal` allows `http`, `https`, and `mailto` +- the feature only needs browser auth URLs, not general-purpose URL opening + +Plan decision: + +- keep raw `authUrl` handling in main only +- require a stricter feature-specific validation policy before opening: + - scheme must be `https` + - no renderer round-trip for the raw URL + - no raw URL logging +- treat `loginId` as process-lifecycle metadata, not as user-facing state + +Why this is the safe choice: + +- reduces accidental leakage of login URLs or correlation ids across IPC, logs, and renderer state + +### Hotspot 9 - mutation races and app-server session topology + +Assessment: + +- `🎯 7 🛡️ 9 🧠 5` + +What we know: + +- `account/login/completed`, `account/login/cancel`, `account/logout`, and forced snapshot refreshes + can overlap in time +- app-server notifications are connection-scoped +- `recent-projects` already uses separate short-lived stdio app-server sessions + +Plan decision: + +- serialize mutating account operations in the main-process feature +- keep the login session on its own dedicated app-server connection +- keep passive reads on separate short-lived sessions +- let fresh steady-state snapshot truth settle races instead of trusting action intent alone + +Why this is the safe choice: + +- avoids cross-session notification bleed, duplicate side effects, and stale mutations reanimating + old UI state + +## Pre-Implementation Confidence Burn-Down Checklist + +These checks are the shortest path to reducing the remaining honest uncertainty before or during the +first implementation steps. + +### Burn-down check 1 - capture real browser-login notification sequence + +Goal: + +- observe the real order of: + - `account/login/completed` + - `account/updated` + - any adjacent auth-related notifications + +Why: + +- confirms our event-ordering assumptions are conservative enough + +Expected outcome: + +- even if order differs slightly from expectation, the explicit refresh model remains valid + +### Burn-down check 2 - capture one real `account/rateLimits/read` sample + +Goal: + +- verify the actual payload shape we want to expose in the first UI + +Why: + +- reduces uncertainty around which fields are worth surfacing in phase F + +Expected outcome: + +- confirm minimal truthful fields for initial UI +- defer decorative/secondary fields safely + +### Burn-down check 3 - re-verify ChatGPT launch path with ambient API keys present + +Goal: + +- prove that the chosen env sanitization plus auth override actually forces the intended ChatGPT path + +Why: + +- this is the highest-value correctness risk in the whole feature + +Expected outcome: + +- capture one signoff artifact that launch uses ChatGPT semantics even when API-key env vars are + present in the parent shell + +### Burn-down check 4 - verify logout semantics against stale cached state + +Goal: + +- prove that explicit logout wins over last-known-good snapshot reuse + +Why: + +- prevents the most embarrassing stale-account resurrection bug + +Expected outcome: + +- logout clears managed-account truth immediately +- degraded follow-up reads cannot resurrect it + +### Burn-down check 5 - confirm app-server and exec share identical auth-store roots + +Goal: + +- compare resolved `HOME`, `USERPROFILE`, and `CODEX_HOME` across both paths + +Why: + +- split auth store is a high-severity, low-visibility failure mode + +Expected outcome: + +- one centralized env normalization path is sufficient for both control plane and execution plane + +### Burn-down check 6 - capture one real `account/updated` payload sequence + +Goal: + +- observe whether `authMode` and `planType` arrive as expected across login, logout, and steady + state refreshes + +Why: + +- confirms the precedence rules stay conservative against nullable or partial notification payloads + +Expected outcome: + +- keep `account/read` as the steady-state owner +- use notifications only as accelerators and invalidation hints + +### Burn-down check 7 - verify initialize handshake on the stable surface + +Goal: + +- confirm the feature can derive a compatibility verdict from stable initialize plus required method + support + +Why: + +- prevents false-ready UI on installs where `codex app-server` exists but the required account + surface is too old or otherwise incompatible + +Expected outcome: + +- initialize succeeds with `experimentalApi: false` +- the feature records `codexHome`, `platformFamily`, and `platformOs` for diagnostics +- unsupported/mismatched installs become `app-server-incompatible`, not generic auth failure + +### Burn-down check 8 - capture one real ChatGPT login URL handling sample + +Goal: + +- validate the actual `authUrl` scheme/shape and prove the browser-open path can stay main-only + +Why: + +- closes the last trust-boundary uncertainty around login URL handling without expanding scope into + brittle host-specific assumptions + +Expected outcome: + +- the real login URL is `https` +- raw `authUrl` never needs to cross IPC +- logs can record only redacted/derived diagnostics such as scheme and hostname when necessary + +### Burn-down check 9 - exercise cancel/completion/logout race resolution + +Goal: + +- prove that overlapping account mutations still converge to one truthful steady state + +Why: + +- this is one of the easiest places to create zombie pending state or accidental false logout + +Expected outcome: + +- cancel followed by late login completion does not force a false disconnect +- logout during pending login still ends in logged-out truth +- stale pre-mutation reads cannot overwrite post-mutation state + +## Recommended Placement Of Burn-Down Checks In The Phase Plan + +To keep momentum, we should not treat all uncertainty as a separate research project. + +### Before or during Phase B + +- Burn-down check 1 +- Burn-down check 2 +- Burn-down check 6 +- Burn-down check 7 +- Burn-down check 8 + +Reason: + +- these directly shape contracts and event handling + +### Before or during Phase D + +- Burn-down check 3 +- Burn-down check 5 + +Reason: + +- these directly shape launch correctness and env routing + +### Before or during Phase E + +- Burn-down check 4 +- Burn-down check 9 + +Reason: + +- this directly shapes logout semantics, mutation ordering, and stale-state invalidation + +## Uncertainty Triage - What Must Be Settled Before Broad Coding + +Not every unknown deserves to block the feature. The plan should distinguish hard preconditions from +safe rollout follow-ups. + +### Must be explicit before the feature spreads across shell and renderer + +- account field precedence between `account/read`, `account/updated`, and `account/rateLimits/read` +- stable-surface compatibility gate for installed `codex app-server` +- containment rules for `authUrl`, `loginId`, and account email across main, IPC, and logs +- mutation serialization and race-settlement policy for login/cancel/logout +- shared auth-store root resolution for app-server and `codex exec` +- launch-time env sanitization when ambient API keys exist +- logout invalidation semantics and last-known-good clearing rules + +Why: + +- each of these can create silent false truth in UI or billing/auth mismatches at runtime + +### Can be refined during rollout without invalidating the architecture + +- how rich the first rate-limit UI should be +- whether degraded state needs an extra badge in addition to text +- whether we surface plan-type changes instantly from notifications or only after follow-up reads +- how aggressively background refresh should coalesce under bursty notification traffic +- whether managed-workspace restriction gets a dedicated visual badge or text-only treatment + +Why: + +- these change UX sharpness, not the core correctness contract + +## Known Current Mismatches The Plan Must Explicitly Eliminate + +These are not abstract concerns. They already exist in the current code and must be treated as +first-class implementation targets. + +### Mismatch 1 - Codex connection truth is still API-key-first + +Current reality: + +- `ProviderConnectionService` still treats Codex readiness as "API key exists" +- `getConfiguredConnectionIssue()` still says Codex native requires `OPENAI_API_KEY` or + `CODEX_API_KEY` + +Why this is dangerous: + +- once ChatGPT account UX is restored, launch policy can still lie and hard-fail incorrectly + +### Mismatch 2 - shell UI copy still flattens Codex to API key management + +Current reality: + +- `providerConnectionUi.ts` still uses: + - `Configure API key` + - `Saved API key available in Manage` + - `Codex native ready` +- there is no first-class Codex managed account summary + +Why this is dangerous: + +- the renderer will keep presenting the wrong mental model even if the runtime becomes correct + +### Mismatch 3 - shell login/logout flow is terminal-modal based + +Current reality: + +- `CliStatusBanner.tsx` +- `CliStatusSection.tsx` + +still drive provider login/logout through terminal modals and shell commands. + +Why this is dangerous: + +- even if app-server account logic exists, the visible UX would still route through the wrong seam + +### Mismatch 4 - config schema has no real Codex connection preference + +Current reality: + +- `providerConnections.codex` is still effectively empty +- validation only tolerates stale legacy keys instead of modeling the current desired state + +Why this is dangerous: + +- renderer state, persistence, and launch policy can drift because there is no canonical stored + preference + +### Mismatch 5 - app-server infrastructure is feature-owned by `recent-projects` + +Current reality: + +- generic JSON-RPC stdio transport is currently nested under `recent-projects` + +Why this is dangerous: + +- a second feature would either duplicate the transport or deep-import another feature's internals + +### Mismatch 6 - current multimodel shell status cannot be the Codex account source of truth + +Current reality: + +- `CliProviderStatus` is useful for binary/backend/model truth +- it is not sufficient for login lifecycle, plan display, or dual-surface auth truth + +Why this is dangerous: + +- forcing all Codex account truth into `CliProviderStatus` would create a leaky, provider-specific + blob in generic shell status contracts + +## Hard constraints + +The implementation must respect all of the following: + +1. Execution must stay on `codex-native` / `codex exec`. +2. The feature must not recreate legacy Codex transport. +3. The feature must not parse `~/.codex/auth.json`. +4. The feature must not duplicate API key storage responsibility into Codex-managed storage. +5. The feature must not hard-block launch only because app-server is transiently degraded. +6. The feature must keep auth truth and execution truth separate. +7. The feature must keep app-server and `codex exec` running against the same auth storage context. + +## Why This Must Be A Feature Slice + +This is not just another Codex-specific if-statement. + +This work: + +- spans `main -> preload -> renderer` +- owns transport wiring +- owns its own use cases +- owns provider-specific business policy +- is expected to grow + +That matches the feature standard's criteria for a full slice. + +So the implementation should **not** be buried into: + +- `ProviderConnectionService` +- `ProviderRuntimeSettingsDialog` +- `CliStatusBanner` +- `CliStatusSection` + +Those shell modules should consume this feature, not own it. + +## Feature Topology + +```text +src/features/codex-account/ + contracts/ + api.ts + channels.ts + dto.ts + events.ts + index.ts + core/ + domain/ + CodexConnectionPreference.ts + CodexManagedAccount.ts + CodexLaunchReadiness.ts + CodexConnectionSnapshot.ts + CodexLoginState.ts + application/ + ports/ + CodexManagedAccountSourcePort.ts + CodexManagedLoginPort.ts + CodexRateLimitSourcePort.ts + CodexApiKeyAvailabilityPort.ts + CodexBinaryResolverPort.ts + CodexShellEnvPort.ts + BrowserLauncherPort.ts + ClockPort.ts + LoggerPort.ts + use-cases/ + GetCodexConnectionSnapshotUseCase.ts + RefreshCodexConnectionSnapshotUseCase.ts + StartCodexChatgptLoginUseCase.ts + CancelCodexLoginUseCase.ts + LogoutCodexManagedAccountUseCase.ts + ReadCodexRateLimitsUseCase.ts + EvaluateCodexLaunchReadinessUseCase.ts + main/ + composition/ + createCodexAccountFeature.ts + adapters/ + input/ + ipc/registerCodexAccountIpc.ts + output/ + presenters/ + CodexConnectionSnapshotPresenter.ts + CodexRateLimitsPresenter.ts + CodexAccountEventPresenter.ts + runtime/ + ProviderConnectionApiKeySourceAdapter.ts + CodexLaunchReadinessRuntimeAdapter.ts + shell/ + ElectronBrowserLauncherAdapter.ts + infrastructure/ + cache/ + InMemoryCodexAccountCache.ts + codex/ + CodexAccountAppServerClient.ts + CodexLoginSessionManager.ts + CodexAccountEnvBuilder.ts + preload/ + createCodexAccountBridge.ts + index.ts + renderer/ + index.ts + adapters/ + codexAccountViewModel.ts + codexProviderShellAdapter.ts + hooks/ + useCodexAccount.ts + useCodexLoginFlow.ts + useCodexRateLimits.ts + ui/ + CodexAccountConnectionPanel.tsx + CodexLoginPendingPanel.tsx + CodexRateLimitsPanel.tsx + CodexConnectionSummaryBadge.tsx +``` + +## Responsibility Split By Layer + +### `contracts/` + +Contains: + +- DTOs +- event payloads +- IPC channel names +- preload API contract + +Must not contain: + +- Electron APIs +- runtime policy +- child-process details + +### `core/domain/` + +Contains: + +- preference model +- managed-account model +- launch-readiness model +- invariants for combining account and API key truth + +Must not contain: + +- `ipcRenderer` +- `electron` +- shell env access +- JSON-RPC transport + +### `core/application/` + +Contains: + +- use cases +- ports +- merge rules +- state transition rules + +Must not contain: + +- actual app-server spawn logic +- actual browser open logic +- app config singleton access + +### `main/composition/` + +Contains: + +- wiring of ports to infrastructure +- export of a small feature facade + +### `main/adapters/input/` + +Contains: + +- IPC registration only + +### `main/adapters/output/` + +Contains: + +- translation from existing shell services into feature ports +- presenters for IPC-safe DTOs + +### `main/infrastructure/` + +Contains: + +- app-server stdio JSON-RPC details +- login session lifecycle management +- env sanitization and assembly +- cache implementation + +### `preload/` + +Contains: + +- bridge methods and event subscriptions + +### `renderer/` + +Contains: + +- hooks +- view-model mapping +- Codex-specific UI pieces + +## Feature Facade And Public Contract Shape + +To keep SRP and interface segregation intact, the feature should expose a small facade rather than +letting shell code reach into individual infrastructure pieces. + +Recommended main-process facade shape: + +```ts +interface CodexAccountFeatureFacade { + getSnapshot(options?: { forceFresh?: boolean }): Promise; + refreshSnapshot(): Promise; + startChatgptLogin(): Promise; + cancelLogin(): Promise; + logout(): Promise; + getRateLimits(options?: { forceFresh?: boolean }): Promise; + evaluateLaunchReadiness(options: { + binaryPath?: string | null; + preferredAuthMode?: 'auto' | 'chatgpt' | 'api_key' | null; + }): Promise; + subscribe(listener: (event: CodexAccountEventDto) => void): () => void; +} +``` + +Design rule: + +- the shell should depend on this facade or its IPC equivalent +- the shell should not call: + - `CodexAccountAppServerClient` + - `CodexLoginSessionManager` + - cache objects + - low-level env builders + +This keeps the implementation open for extension without forcing broad shell rewrites later. + +## Shared Infrastructure Extraction Plan + +We already have generic app-server transport primitives hiding under `recent-projects`. + +That code should be extracted before `codex-account` is built, otherwise `recent-projects` becomes the owner of unrelated infrastructure. + +Recommended extraction target: + +```text +src/main/services/infrastructure/codex-app-server/ + JsonRpcStdioClient.ts + CodexAppServerSessionFactory.ts + codexAppServerDefaults.ts +``` + +What gets extracted: + +- generic stdio JSON-RPC client +- generic initialize/initialized session bootstrap +- default request timeout values +- default suppressed notification configuration + +What stays inside `recent-projects`: + +- thread-list request logic +- recent-projects-specific source adapter + +What stays inside `codex-account`: + +- account request logic +- login lifecycle logic +- rate-limits logic + +Important DRY rule: + +- extract transport primitives +- do **not** create one giant "CodexService" that mixes unrelated product features + +## Architecture Decision On Sources Of Truth + +This feature only works if source-of-truth boundaries are explicit. + +## Truth 1 - managed account truth + +Source: + +- `codex app-server account/read` + +Used for: + +- whether a managed ChatGPT account exists +- account email +- plan type +- account auth mode + +## Truth 1a - field precedence inside managed account truth + +The feature must not treat every app-server field as equally authoritative. + +Precedence rules: + +1. `account/read` owns steady-state identity: + - whether an account exists + - `account.type` + - `email` + - baseline `planType` + - `requiresOpenaiAuth` +2. `account/updated.authMode` may update an observed auth hint immediately, but must not by itself + create or delete a managed account. +3. `account/updated.planType` may refresh displayed subscription metadata when present, but if it + arrives as `null` or arrives without a known account, the feature should trigger a coalesced + follow-up `account/read` before changing steady-state account identity. +4. `account/rateLimits/read.planType` may corroborate subscription UI, but must not create account + presence or override a newer successful `account/read`. +5. explicit logout clears managed-account truth synchronously before any background refresh result + is accepted. +6. degraded reads may reuse last-known-good account truth only within the freshness window and only + if no explicit logout happened after that snapshot. + +Implementation consequence: + +- `planType` is a field with precedence and fallback rules +- it is not a standalone durable source of truth from notifications alone + +## Truth 2 - API key truth + +Source: + +- existing app API key storage plus ambient env detection + +Used for: + +- whether the app can launch Codex using API key mode +- whether an app-managed OpenAI key is stored +- where the key comes from + +## Truth 3 - execution truth + +Source: + +- `codex exec` + +Used for: + +- whether a real Codex run starts +- real failure or success at execution time + +## Truth 4 - renderer shell status truth + +Source: + +- composed presentation model from: + - generic runtime provider status + - Codex account feature snapshot + +Important design choice: + +- we should **not** force all Codex account fields into `CliProviderStatus` +- `CliProviderStatus` remains the source of truth for: + - binary/runtime/backend/model status +- the feature snapshot remains the source of truth for: + - managed account + - preferred and effective auth mode + - launch readiness + - login lifecycle + - rate limits + +The shell must compose these two bounded contexts at presentation time. + +This is not "two conflicting sources of truth". + +It is: + +- one runtime status context +- one account status context + +## Ownership Matrix + +This table is the practical anti-bug contract for the feature. + +| Concern | Canonical source | Owning layer | Persisted? | Cache TTL | Used by | Must not be inferred from | +| --- | --- | --- | --- | --- | --- | --- | +| Codex binary path / binary installed | existing multimodel runtime status | shell runtime services | no | existing shell TTL | shell cards, launch gating | `codex-account` snapshot | +| Codex backend lane | existing runtime config `runtime.providerBackends.codex` | shell runtime services | yes | n/a | launch, status, provisioning | account auth state | +| Codex preferred auth mode | `providerConnections.codex.preferredAuthMode` | `codex-account` feature | yes | n/a | account panel, launch policy | `CliProviderStatus.authMethod` | +| Managed account presence | `account/read` | `codex-account` feature | no | 3-10s | account panel, readiness | local file parsing | +| Managed account email | `account/read` | `codex-account` feature | no | 3-10s | account panel | renderer local state | +| Managed account plan type | latest successful `account/read`; nullable refresh hints from `account/updated` and `account/rateLimits/read` | `codex-account` feature | no | 3-10s | subscription UI, rate limits | static plan assumptions or notification-only state | +| Requires OpenAI auth | `account/read.requiresOpenaiAuth` | `codex-account` feature | no | 3-10s | readiness messaging | provider id alone | +| API key availability | app secure storage plus ambient env detection | provider connection adapter | no | 0-3s | readiness, secondary badges | app-server account state | +| Effective auth mode for next launch | `EvaluateCodexLaunchReadinessUseCase` | `codex-account` feature | no | request-scoped | launch env builder | raw config alone | +| Login pending state | `CodexLoginSessionManager` | `codex-account` feature | no | live | UI pending panels | cached snapshot | +| Rate limits | `account/rateLimits/read` | `codex-account` feature | no | 30-60s | account detail panel | plan type alone | +| Final execution success / failure | `codex exec` | runtime execution lane | no | live | launch result UX | account snapshot | + +Implementation rule: + +- if a field is not canonical in this table, the layer may display it but must not own it + +## Dependency Direction And Anti-Corruption Rules + +To stay aligned with `FEATURE_ARCHITECTURE_STANDARD.md`, the new feature must preserve these +directions: + +1. `renderer` depends on: + - `@features/codex-account/renderer` + - `@features/codex-account/contracts` +2. `preload` depends on: + - `@features/codex-account/contracts` +3. `main shell` depends on: + - `@features/codex-account/main` +4. `codex-account/core/*` depends only on: + - feature-local ports and domain models +5. `codex-account/main/infrastructure/*` may depend on: + - Electron + - child process / stdio + - shared shell env helpers +6. `recent-projects` must not become a transitive dependency of `codex-account` + +Anti-corruption rule: + +- the feature may adapt values out of `CliProviderStatus` +- it must not reshape its domain around `CliProviderStatus` + +This is important because `CliProviderStatus` is a generic shell contract, not the Codex account +domain model. + +## Dependency Enforcement Recommendations + +The architecture standard already gives the broad rules. For this feature, we should make the most +important ones operational. + +### Recommended import discipline + +- shell code imports only: + - `@features/codex-account/main` + - `@features/codex-account/contracts` + - `@features/codex-account/renderer` + - `@features/codex-account/preload` +- tests may deep-import internals when needed +- production code outside the feature should not deep-import internal adapters, ports, or + infrastructure + +### Recommended lint / review guardrails + +Watch specifically for: + +- `src/renderer/components/*` importing feature `main/*` +- feature `core/*` importing `electron`, child-process modules, or shell services +- unrelated features importing `codex-account/main/infrastructure/*` +- `recent-projects` becoming a transport owner again through back references + +### Practical enforcement rule + +If a shell file needs a new Codex-specific detail and that detail is not in the feature facade or +contracts yet: + +- extend the facade or contracts +- do not bypass the boundary with a deep import + +## Explicitly forbidden truth sources + +- `~/.codex/auth.json` +- legacy OAuth state +- terminal output parsing as the steady-state account model +- `chatgptAuthTokens` +- stale renderer-only cached assumptions + +## Domain Model + +## Connection preference + +New persisted preference: + +- `auto` +- `chatgpt` +- `api_key` + +Recommended config key: + +- `providerConnections.codex.preferredAuthMode` + +Important decision: + +- do **not** reuse legacy `oauth` naming for Codex + +Reason: + +- `oauth` is legacy wording from the old implementation +- `chatgpt` maps much more clearly to the new managed-account seam + +## Effective auth mode + +Resolved per snapshot: + +- `chatgpt` +- `api_key` +- `null` + +This can differ from user preference when: + +- preferred mode is `auto` +- one surface is unavailable + +## Snapshot state + +Recommended states: + +- `runtime_missing` +- `checking` +- `not_connected` +- `managed_account_connected` +- `api_key_available` +- `both_available` +- `login_in_progress` +- `logout_in_progress` +- `degraded` + +## Launch readiness + +Recommended states: + +- `ready_chatgpt` +- `ready_api_key` +- `ready_both` +- `warning_degraded_but_launchable` +- `missing_auth` +- `runtime_missing` + +## Domain invariants + +1. A managed ChatGPT account and an API key can both exist simultaneously. +2. API key presence must not erase managed account metadata. +3. Managed account presence must not erase API key availability. +4. Launch policy must resolve one effective auth mode per run. +5. Account metadata is display state, not secret state. +6. Login state is transient and process-owned. +7. A degraded app-server read must not automatically mean "logged out". + +## State Resolution Matrix + +This matrix is the fastest way to keep renderer, use cases, and runtime policy aligned. + +| Binary available | Managed account | API key available | App-server health | Preferred auth | Snapshot state | Launch readiness | Effective auth mode | UI headline | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | +| no | any | any | any | any | `runtime_missing` | `runtime_missing` | `null` | Codex runtime missing | +| yes | no | no | healthy | auto | `not_connected` | `missing_auth` | `null` | Connect ChatGPT account or add API key | +| yes | yes | no | healthy | auto | `managed_account_connected` | `ready_chatgpt` | `chatgpt` | ChatGPT account connected | +| yes | no | yes | healthy | auto | `api_key_available` | `ready_api_key` | `api_key` | API key available | +| yes | yes | yes | healthy | auto | `both_available` | `ready_both` | `chatgpt` | ChatGPT account connected - API key also available | +| yes | yes | yes | healthy | chatgpt | `both_available` | `ready_chatgpt` | `chatgpt` | ChatGPT account preferred | +| yes | yes | yes | healthy | api_key | `both_available` | `ready_api_key` | `api_key` | API key preferred - ChatGPT account also connected | +| yes | yes | no | degraded | auto or chatgpt | `degraded` | `warning_degraded_but_launchable` | `chatgpt` | ChatGPT account detected - unable to fully verify right now | +| yes | no | yes | degraded | auto or api_key | `degraded` | `ready_api_key` | `api_key` | API key available - account status degraded | +| yes | no | no | degraded | auto | `degraded` | `missing_auth` or warning only if last good account still fresh | `null` or last-good-derived | Unable to verify Codex account state | + +Interpretation rule: + +- `snapshot state` is broader UX truth +- `launch readiness` is stricter execution truth +- they are related but must not be collapsed into one boolean + +## Managed Workspace Restriction Policy + +Official Codex auth/config docs allow administrators to set: + +- `forced_login_method` +- `forced_chatgpt_workspace_id` + +Docs also state that if active credentials do not match the configured restriction, Codex logs the +user out and exits. + +### First-wave product policy + +- do not add a workspace picker or workspace-switch UI in this feature +- do not pretend the app can resolve admin-managed workspace policy on the user's behalf +- do surface workspace restriction as a distinct normalized policy state when detected + +### Expected UX treatment + +If the installed Codex runtime is restricted to another ChatGPT workspace: + +- do not show generic `Connect ChatGPT account` copy as the only explanation +- do not claim the subscription is simply missing +- show an admin-policy-oriented message such as: + - `This Codex installation is restricted to a different ChatGPT workspace` + +### Architecture implication + +Workspace restriction should be modeled as: + +- a normalized error/policy category +- potentially `not_connected` or `missing_auth` at the raw launch-readiness layer +- but with policy-specific renderer messaging + +This avoids exploding the core state machine while still keeping the UX honest. + +## Config And Migration Plan + +## New config shape + +Update: + +- `ConfigManager.ProviderConnectionsConfig` +- shared `AppConfig` +- config validation +- settings reset defaults + +Recommended new shape: + +```ts +providerConnections: { + anthropic: { + authMode: 'auto' | 'oauth' | 'api_key' + }, + codex: { + preferredAuthMode: 'auto' | 'chatgpt' | 'api_key' + } +} +``` + +## Migration rules + +On config read: + +1. If `providerConnections.codex.preferredAuthMode` is missing: + - default to `auto` +2. If stale legacy `providerConnections.codex.authMode === 'oauth'`: + - migrate to `preferredAuthMode = 'chatgpt'` +3. If stale legacy `providerConnections.codex.authMode === 'api_key'`: + - migrate to `preferredAuthMode = 'api_key'` +4. If stale legacy `providerConnections.codex.authMode === 'auto'`: + - migrate to `preferredAuthMode = 'auto'` +5. Ignore `apiKeyBetaEnabled` completely after migration +6. Write back only the new shape + +## Migration algorithm - exact behavior + +This part needs to be exact because configuration drift is one of the easiest ways to create +hard-to-debug launch mismatches. + +### Read-time normalization order + +When config is loaded: + +1. load raw JSON from disk +2. apply generic config defaults +3. normalize `runtime.providerBackends.codex` +4. normalize `providerConnections.codex` +5. validate the normalized shape +6. persist normalized config only if it changed materially + +### Exact Codex connection normalization rules + +Given `raw.providerConnections.codex`: + +- if it is missing or not an object: + - replace with `{ preferredAuthMode: "auto" }` +- if `preferredAuthMode` exists and is one of: + - `auto` + - `chatgpt` + - `api_key` + - keep it +- otherwise, inspect stale keys in this order: + - `authMode === "oauth"` -> `preferredAuthMode = "chatgpt"` + - `authMode === "api_key"` -> `preferredAuthMode = "api_key"` + - `authMode === "auto"` -> `preferredAuthMode = "auto"` + - everything else -> `preferredAuthMode = "auto"` + +Then: + +- drop `authMode` +- drop `apiKeyBetaEnabled` +- drop any unknown Codex connection keys + +### Backward-compatibility rule + +Old configs must be: + +- readable +- normalizable +- writable into the new shape + +but they must not keep legacy Codex connection fields alive after one clean save cycle. + +### Migration idempotency and corrupt-input hardening rules + +Normalization must be safe under repeated reads and partially corrupted user config. + +Rules: + +1. repeated load -> normalize -> save cycles must converge to the same Codex connection subtree +2. non-object `providerConnections.codex` values must normalize to: + - `{ preferredAuthMode: "auto" }` +3. malformed or unknown Codex connection keys must not block app startup +4. already-normalized config should not be rewritten just because the normalizer ran again +5. backup/restore and import paths must reuse the same Codex normalizer instead of re-implementing + migration logic separately + +Practical consequence: + +- migration is a deterministic cleanup step, not an ongoing source of config churn + +### Migration safety rule + +Config migration must never infer: + +- "user prefers API key" merely because an API key exists +- "user prefers ChatGPT" merely because a managed account exists + +Preference is persisted user intent. Availability is runtime-observed fact. They must remain +distinct. + +## Validation rules + +`configValidation` must: + +- accept only `preferredAuthMode` under `providerConnections.codex` +- accept values: + - `auto` + - `chatgpt` + - `api_key` +- tolerate stale legacy keys during migration path only if they are normalized before persistence + +## Critical Runtime Policy + +This is the highest-risk part of the feature. + +### Why current policy is wrong + +Today Codex launch policy effectively means: + +- if no `OPENAI_API_KEY` or `CODEX_API_KEY`, Codex launch is treated as not ready + +That is incompatible with the desired product once ChatGPT-managed auth comes back. + +### New launch policy + +The feature must own launch-readiness evaluation. + +Inputs: + +- Codex binary availability +- managed account presence +- API key availability +- preferred auth mode +- app-server health + +Outputs: + +- launch readiness state +- effective auth mode +- env mutation instructions +- user-facing advisory message + +### Policy rules + +1. If preferred mode is `chatgpt` and managed account exists: + - launch is ready + - effective auth mode is `chatgpt` +2. If preferred mode is `api_key` and API key exists: + - launch is ready + - effective auth mode is `api_key` +3. If preferred mode is `auto`: + - prefer ChatGPT when managed account exists + - otherwise use API key when available + - otherwise missing-auth +4. If app-server is degraded: + - do not hard-fail launch automatically unless there is explicit evidence auth is absent + - return warning-level degraded readiness where appropriate + +### Why app-server degradation must not hard-block launch + +`codex app-server` is an account control-plane seam. + +It is **not** the execution seam. + +A transient app-server issue must not cause the app to say: + +- "Codex cannot launch" + +when `codex exec` itself could still work. + +That would create a false negative and a serious product bug. + +## Launch Policy Decision Table + +This table should directly drive the implementation of +`EvaluateCodexLaunchReadinessUseCase`. + +| Preferred auth | Managed account detected | API key available | App-server degraded | Resulting readiness | Effective auth | Required exec env policy | User-facing message | +| --- | --- | --- | --- | --- | --- | --- | --- | +| `chatgpt` | yes | any | no | `ready_chatgpt` | `chatgpt` | strip API keys, `forced_login_method="chatgpt"` | Launch using ChatGPT account | +| `chatgpt` | yes | any | yes | `warning_degraded_but_launchable` | `chatgpt` | strip API keys, `forced_login_method="chatgpt"` | ChatGPT account detected - verification degraded | +| `chatgpt` | no | yes | no | `missing_auth` | `null` | no launch | Preferred ChatGPT account is not connected | +| `chatgpt` | no | no | no | `missing_auth` | `null` | no launch | Connect a ChatGPT account to use the selected auth mode | +| `api_key` | any | yes | any | `ready_api_key` | `api_key` | inject key, `forced_login_method="api"` | Launch using API key | +| `api_key` | any | no | any | `missing_auth` | `null` | no launch | Add an API key to use the selected auth mode | +| `auto` | yes | yes | no | `ready_both` | `chatgpt` | strip API keys, `forced_login_method="chatgpt"` | Auto selected ChatGPT account | +| `auto` | yes | no | no | `ready_chatgpt` | `chatgpt` | strip API keys, `forced_login_method="chatgpt"` | Auto selected ChatGPT account | +| `auto` | no | yes | no | `ready_api_key` | `api_key` | inject key, `forced_login_method="api"` | Auto selected API key | +| `auto` | yes | yes | yes | `warning_degraded_but_launchable` | `chatgpt` | strip API keys, `forced_login_method="chatgpt"` | Auto selected ChatGPT account - account verification degraded | +| `auto` | no | yes | yes | `ready_api_key` | `api_key` | inject key, `forced_login_method="api"` | Auto selected API key - account verification degraded | +| `auto` | no | no | yes | `missing_auth` unless last-good account freshness rule applies | `null` | no launch | Unable to verify Codex authentication | + +Important interpretation: + +- degraded app-server state does not grant permission to guess a missing managed account forever +- the only acceptable degraded-launch case is when there is positive current or sufficiently fresh + prior evidence that the managed account exists + +## Core Algorithm Sketches + +These sketches are intentionally close to implementation logic so that multiple contributors do not +invent subtly different policies. + +### Snapshot merge algorithm - pseudocode + +```ts +function buildCodexSnapshot(input: { + binaryAvailable: boolean; + preferredAuthMode: 'auto' | 'chatgpt' | 'api_key'; + managedAccountResult: + | { kind: 'success'; account: ManagedAccount | null; requiresOpenaiAuth: boolean } + | { kind: 'degraded'; reason: string }; + apiKeyAvailability: ApiKeyAvailability; + loginState: LoginState; + lastKnownGoodManagedAccount: ManagedAccount | null; + lastKnownGoodObservedAt: number | null; + now: number; + freshnessWindowMs: number; +}): CodexConnectionSnapshot { + if (!input.binaryAvailable) { + return runtimeMissingSnapshot(input.preferredAuthMode, input.loginState, input.now); + } + + const managedContext = resolveManagedAccountContext({ + managedAccountResult: input.managedAccountResult, + lastKnownGoodManagedAccount: input.lastKnownGoodManagedAccount, + lastKnownGoodObservedAt: input.lastKnownGoodObservedAt, + now: input.now, + freshnessWindowMs: input.freshnessWindowMs, + }); + + return mergeManagedAndApiKeyTruth({ + preferredAuthMode: input.preferredAuthMode, + managedContext, + apiKeyAvailability: input.apiKeyAvailability, + loginState: input.loginState, + now: input.now, + }); +} +``` + +### Launch readiness algorithm - pseudocode + +```ts +function evaluateLaunchReadiness(snapshot: CodexConnectionSnapshot): CodexLaunchReadiness { + if (!snapshot.binaryAvailable) { + return { state: 'runtime_missing', effectiveAuthMode: null, message: 'Codex runtime missing' }; + } + + switch (snapshot.preferredAuthMode) { + case 'chatgpt': + if (snapshot.managedAccount?.type === 'chatgpt') { + return snapshot.state === 'degraded' + ? degradedChatgptReady() + : chatgptReady(); + } + return missingChatgptAuth(); + + case 'api_key': + return snapshot.apiKey.available ? apiKeyReady() : missingApiKeyAuth(); + + case 'auto': + if (snapshot.managedAccount?.type === 'chatgpt') { + return snapshot.state === 'degraded' + ? degradedChatgptReady() + : (snapshot.apiKey.available ? readyBoth() : chatgptReady()); + } + if (snapshot.apiKey.available) { + return apiKeyReady(); + } + return missingAnyAuth(); + } +} +``` + +### Account event reconciliation algorithm - pseudocode + +```ts +function onAccountUpdated(event: { authMode: AuthMode | null; planType: PlanType | null }) { + cache.lastObservedAuthMode = event.authMode ?? cache.lastObservedAuthMode ?? null; + + if (event.planType !== null) { + cache.lastObservedPlanTypeHint = event.planType; + } + + scheduleCoalescedSnapshotRefresh('account-updated'); +} +``` + +Critical rule: + +- notification handlers must not clear `managedAccount` only because `authMode` or `planType` + arrives as `null` +- destructive transitions belong to explicit logout handling and successful follow-up `account/read` + results + +### Exec env mutation algorithm - pseudocode + +```ts +function buildExecEnv(baseEnv: Env, readiness: CodexLaunchReadiness, apiKey: string | null): Env { + const env = { ...baseEnv }; + + if (readiness.effectiveAuthMode === 'chatgpt') { + delete env.OPENAI_API_KEY; + delete env.CODEX_API_KEY; + env.CODEX_FORCED_LOGIN_METHOD = 'chatgpt'; + return env; + } + + if (readiness.effectiveAuthMode === 'api_key') { + if (apiKey) { + env.OPENAI_API_KEY = apiKey; + env.CODEX_API_KEY = apiKey; + } + env.CODEX_FORCED_LOGIN_METHOD = 'api'; + return env; + } + + return env; +} +``` + +Implementation note: + +- the real implementation should pass `forced_login_method` via Codex config override arguments, not + invent a new environment variable contract +- the pseudocode uses a symbolic field only to make the decision process readable + +## Extremely important env semantics + +This section is subtle and non-optional. + +### Problem 1 - ambient API keys can poison managed-account autodetect + +If an app-server child process inherits: + +- `OPENAI_API_KEY` +- `CODEX_API_KEY` + +then account reads may reflect API-key auth instead of the managed ChatGPT account the user expects to see. + +### Problem 2 - execution can silently use the wrong auth surface + +If ChatGPT mode is selected but execution still inherits: + +- `OPENAI_API_KEY` +- `CODEX_API_KEY` + +then the run may silently go through API-key auth. + +That creates the worst possible bug: + +- UI says subscription is active +- but runtime actually bills via API key + +### Required env policy + +For **managed-account control-plane sessions**: + +- sanitize `OPENAI_API_KEY` +- sanitize `CODEX_API_KEY` +- do not set `forced_login_method` +- preserve the same resolved auth storage context as execution + +Reason: + +- control-plane reads should discover unbiased managed-account truth + +For **execution sessions in `chatgpt` mode**: + +- sanitize `OPENAI_API_KEY` +- sanitize `CODEX_API_KEY` +- pass `-c forced_login_method="chatgpt"` + +For **execution sessions in `api_key` mode**: + +- inject the resolved API key env +- pass `-c forced_login_method="api"` + +For **execution sessions in `auto -> chatgpt` resolution**: + +- same as `chatgpt` + +For **execution sessions in `auto -> api_key` resolution**: + +- same as `api_key` + +### Shared auth storage context rule + +`account/read`, login, logout, and `codex exec` must use the same resolved: + +- `HOME` +- `USERPROFILE` +- `CODEX_HOME` + +or the UI can observe one auth store while execution uses another. + +That would create a second class of severe bug: + +- UI sees a logged-in account +- but `codex exec` runs against a different auth store and fails + +So the feature must centralize Codex env resolution. + +## Platform-Specific Runtime Notes + +This feature is cross-process and cross-platform enough that platform rules should be explicit. + +### Auth-store env precedence + +Recommended precedence for auth-store-related env resolution: + +1. explicit per-call overrides provided by the app +2. resolved interactive shell env +3. process env fallback + +### `HOME`, `USERPROFILE`, `CODEX_HOME` + +Rules: + +- always resolve a single canonical auth root +- materialize both `HOME` and `USERPROFILE` consistently to avoid child-process drift +- preserve an explicit `CODEX_HOME` if one exists +- do not silently invent different auth roots for app-server vs exec + +Diagnostics rule: + +- when available, compare the app's resolved auth root with `initialize.codexHome` +- treat disagreement as a first-class diagnostic signal, not as a silent implementation detail + +### Windows nuance + +On Windows, child processes may consult `USERPROFILE` even when `HOME` is also set. + +Implementation implication: + +- the env builder should not treat `HOME` alone as sufficient on Windows-capable flows + +### macOS and Linux nuance + +On Unix-like systems, `HOME` is usually the primary root. + +Implementation implication: + +- if `USERPROFILE` is absent, we should still fill it consistently once we have a canonical root so + both app-server and exec see the same store contract + +### Browser-launch nuance + +Desktop login should use the Electron/browser-launch adapter, not shell command guessing. + +Implementation implication: + +- browser opening belongs to the feature infrastructure adapter +- it should not be scattered across banner/section/dialog components + +### PATH and shell env nuance + +The feature should inherit enough environment to find the Codex binary and preserve normal shell +behavior, but auth-critical env values must still be rewritten deterministically. + +Implementation implication: + +- preserve normal shell-derived env where safe +- sanitize only the auth-critical variables the policy explicitly owns + +## Concurrency, Ordering, And Race Policy + +This section is necessary because the feature will combine: + +- cached reads +- explicit refresh +- login notifications +- logout actions +- shell status refreshes + +Without a strict ordering contract, the UI can easily regress into stale or contradictory state. + +### Snapshot sequencing rule + +Every refresh path should carry: + +- `requestId` +- `startedAt` +- `observedAt` + +The feature should only publish a snapshot if it is newer than the last settled snapshot for the +same source epoch. + +Practical rule: + +- a slow degraded read must not overwrite a newer successful read +- a pre-login cached snapshot must not overwrite a post-login snapshot +- a pre-logout cached snapshot must not overwrite a post-logout snapshot + +### Single-flight rule + +Passive snapshot refreshes from: + +- dashboard banner +- settings dialog +- provider card refresh + +must collapse into one in-flight promise per main-process feature instance. + +### Login lifecycle exclusivity rule + +At most one login session may be live at a time. + +If the user clicks login repeatedly: + +- return the current pending login state +- do not start parallel app-server login sessions + +### Renderer subscription rule + +Renderer hooks must treat `snapshot-updated` and `login-state-changed` as additive feature events, +not as implicit replacement for shell runtime status. + +This avoids one subtle class of bug: + +- account snapshot event arrives +- shell status is still loading +- UI accidentally treats feature snapshot as full provider status + +### Cache invalidation rule + +Invalidate snapshot cache immediately on: + +- login start +- login completion +- logout success +- preferred auth mode change +- explicit manual refresh + +Do not wait for TTL expiry after user-initiated auth transitions. + +## Freshness Window And Last-Known-Good Policy + +This is intentionally separated from generic cache policy because it affects safety semantics, not +just performance. + +### Recommended default + +- keep a `lastKnownGoodManagedAccountSnapshot` +- keep a `lastKnownGoodObservedAt` +- use a default freshness window around `60 seconds` + +Why not longer by default: + +- too long and the app starts lying after real logout or auth expiry + +Why not shorter by default: + +- too short and transient app-server failures collapse the UX into false logout too easily + +### What may be carried forward + +During a degraded control-plane read, the feature may carry forward only: + +- managed account presence +- managed account email +- managed account plan type +- effective auth mode only if it was derived from the managed account path + +### What must be recomputed fresh + +The feature must recompute or re-read fresh: + +- API key availability +- binary/runtime availability +- current preferred auth mode from config +- login pending state +- logout in progress state + +### What must never be carried forward + +Do not carry forward: + +- pending login state +- failed login state +- logout in progress +- rate limit snapshots beyond their own TTL +- a degraded state as if it were a successful state + +### Freshness expiration rule + +Once the freshness window expires: + +- the feature may still show `degraded` +- but it must stop treating stale managed-account evidence as sufficient for launchability + +### Post-logout rule + +After explicit logout success: + +- clear the last-known-good managed-account snapshot immediately +- do not allow degraded reads to resurrect the old account state + +## Feature Data Contracts + +## DTOs + +### `CodexConnectionSnapshotDto` + +Fields: + +- `state` +- `preferredAuthMode` +- `effectiveAuthMode` +- `binaryAvailable` +- `requiresOpenaiAuth` +- `managedAccount` +- `apiKey` +- `launchReadiness` +- `degradedReason` +- `login` +- `observedAt` + +### `CodexManagedAccountDto` + +Fields: + +- `type: "chatgpt" | "apiKey" | null` +- `email?: string | null` +- `planType?: "free" | "go" | "plus" | "pro" | "team" | "business" | "enterprise" | "edu" | "unknown" | null` + +Important note: + +- we are not planning to use app-server `apiKey` login mode, but the DTO should still model it defensively because the protocol supports it + +### `CodexApiKeyAvailabilityDto` + +Fields: + +- `available: boolean` +- `source: "stored" | "environment" | null` +- `label?: string | null` + +### `CodexLaunchReadinessDto` + +Fields: + +- `state` +- `message` +- `effectiveAuthMode` + +### `CodexLoginStateDto` + +Fields: + +- `state: "idle" | "starting" | "awaiting_browser" | "pending" | "completed" | "cancelled" | "failed"` +- `loginId?: string | null` +- `message?: string | null` + +Sensitive-field rule: + +- `loginId` is process-lifecycle metadata and should not be rendered to the user +- renderer-facing contracts may omit `loginId` entirely if cancellation and status updates can be + driven by main-owned session state +- raw `authUrl` must never appear in renderer-facing DTOs + +### `CodexRateLimitsDto` + +Fields: + +- `rateLimits` +- `rateLimitsByLimitId?` +- `planType?` +- `observedAt` + +## Event contract + +Recommended event union: + +- `snapshot-updated` +- `login-state-changed` +- `rate-limits-updated` +- `degraded` + +These should be emitted over one feature event channel and consumed by renderer hooks. + +## DTO And Event Shape Examples + +The plan should include concrete examples so that main, preload, and renderer do not each invent +their own interpretation. + +### Example `CodexConnectionSnapshotDto` + +```json +{ + "state": "both_available", + "preferredAuthMode": "auto", + "effectiveAuthMode": "chatgpt", + "binaryAvailable": true, + "requiresOpenaiAuth": true, + "managedAccount": { + "type": "chatgpt", + "email": "user@example.com", + "planType": "pro" + }, + "apiKey": { + "available": true, + "source": "stored", + "label": "Stored in app" + }, + "launchReadiness": { + "state": "ready_both", + "message": "ChatGPT account connected - API key also available", + "effectiveAuthMode": "chatgpt" + }, + "degradedReason": null, + "login": { + "state": "idle", + "loginId": null, + "message": null + }, + "observedAt": 1776640000000 +} +``` + +### Example degraded snapshot + +```json +{ + "state": "degraded", + "preferredAuthMode": "auto", + "effectiveAuthMode": "chatgpt", + "binaryAvailable": true, + "requiresOpenaiAuth": true, + "managedAccount": { + "type": "chatgpt", + "email": "user@example.com", + "planType": "pro" + }, + "apiKey": { + "available": false, + "source": null, + "label": null + }, + "launchReadiness": { + "state": "warning_degraded_but_launchable", + "message": "ChatGPT account detected - verification degraded", + "effectiveAuthMode": "chatgpt" + }, + "degradedReason": "app-server-timeout", + "login": { + "state": "idle", + "loginId": null, + "message": null + }, + "observedAt": 1776640005000 +} +``` + +### Example `login-state-changed` event + +```json +{ + "type": "login-state-changed", + "payload": { + "state": "pending", + "message": "Waiting for ChatGPT browser login to complete" + }, + "observedAt": 1776640002000 +} +``` + +### Example `snapshot-updated` event + +```json +{ + "type": "snapshot-updated", + "payload": { + "state": "managed_account_connected", + "preferredAuthMode": "chatgpt", + "effectiveAuthMode": "chatgpt" + }, + "observedAt": 1776640008000 +} +``` + +Contract rule: + +- event payloads may be smaller than full DTOs +- snapshot read methods must still return the full DTO +- renderer must not assume an event payload is a complete replacement snapshot unless the contract + explicitly says so + +Sensitive-field containment rule: + +- `authUrl` must never be emitted over the feature event channel +- incremental events should not include full account email unless a full snapshot read is actually + required for UI rendering +- `loginId` should stay main-owned unless there is a concrete renderer need that cannot be solved by + process-owned cancel/status APIs + +## Error Normalization Matrix + +The feature should normalize raw transport/process failures into stable categories so renderer copy +and metrics stay coherent. + +| Raw failure family | Normalized category | Typical feature impact | UI treatment | +| --- | --- | --- | --- | +| app-server initialize timeout | `app-server-timeout` | degraded snapshot or failed login start | degraded / retryable message | +| app-server process spawn failure | `app-server-unavailable` | degraded snapshot or hard login failure | binary/runtime dependent messaging | +| app-server initialize succeeds but required stable account surface is unavailable | `app-server-incompatible` | feature hidden/locked or hard login failure | update-runtime / incompatible-runtime messaging | +| login returned unsafe or unsupported browser URL | `unsafe-auth-url` | login fails before browser open | explicit security-oriented error, no open attempt | +| browser open failure | `browser-open-failed` | login failed | explicit action error | +| login cancelled by user | `login-cancelled` | login state settles to cancelled | non-destructive informational state | +| login completed with error | `login-failed` | login failed, snapshot refresh follows | explicit error | +| logout RPC failure | `logout-failed` | logout stays unresolved, snapshot preserved | explicit error | +| admin-managed workspace or login policy rejects current account | `workspace-restricted` | login blocked or account cleared by Codex policy | policy-specific guidance, not generic auth-missing | +| app restarted or shut down while login was pending | `login-session-lost` | pending login abandoned, fresh snapshot required on next startup | informational recovery message, settle to idle | +| rate-limits read failure | `rate-limits-unavailable` | rate-limit panel degraded only | non-blocking warning | +| stale result received after newer state | `stale-result-ignored` | no user-visible state change | debug-level logging only | + +Normalization rule: + +- renderer copy should key off normalized categories +- raw stderr / transport text may be attached for diagnostics, but should not drive UX wording + +## Event Ordering And Delivery Rules + +These rules matter because most user-visible bugs in this feature will come from correct data +arriving in the wrong order. + +### Preferred event ordering + +For login success: + +1. `login-state-changed: starting` +2. `login-state-changed: awaiting_browser` +3. `login-state-changed: pending` +4. app-server `account/login/completed success=true` +5. `login-state-changed: completed` +6. forced snapshot refresh +7. `snapshot-updated` +8. optional `rate-limits-updated` after explicit or lazy read +9. `login-state-changed: idle` once settled + +For logout success: + +1. local logout action enters pending state +2. `account/logout` +3. clear last-known-good managed-account snapshot +4. forced snapshot refresh +5. `snapshot-updated` +6. local logout pending state clears + +### Delivery rule + +Feature event delivery should be best-effort and additive: + +- missing one event must not make the renderer permanently stale +- the renderer must always be able to recover by reading a fresh snapshot + +### Idempotency rule + +Renderer and main-side subscribers should tolerate: + +- duplicate `login-state-changed` +- duplicate `snapshot-updated` +- late degraded events that are older than the currently rendered snapshot + +### Staleness rejection rule + +If an incoming event or refresh result is older than the settled snapshot already in memory: + +- ignore it +- log a low-level diagnostic if useful + +Do not let older results reanimate older UI state. + +## Cross-Window And Subscriber Coherence Rules + +The main process must remain the single owner of mutable Codex account state. + +### Main-process ownership rule + +The following state must live in one main-process feature instance, not in renderer-local stores: + +- latest settled snapshot +- last-known-good managed account snapshot +- active login session state +- rate-limit cache +- compatibility verdict for the installed app-server seam + +### Renderer subscription rule + +Any renderer may subscribe late, unsubscribe early, or restart independently. + +Therefore: + +- late subscribers must bootstrap from `getSnapshot()` and not rely on having seen past events +- feature events are accelerators, not the only source of truth +- one renderer closing must not cancel a login session started by another renderer + +### Broadcast rule + +If multiple renderer surfaces are open at once: + +- all should receive the same normalized feature events from the same main-process owner +- no renderer should start a second login flow just because it missed an earlier local UI state + +### Shutdown rule + +If the last renderer unsubscribes while a login is pending: + +- the main-process feature may keep the login session alive until: + - completion + - explicit cancel + - timeout + - app shutdown + +This avoids coupling correctness to whichever window happened to open the flow. + +## App Restart, Crash, And Pending Login Recovery Policy + +Login session state is explicitly process-owned, not durable product state. + +### Startup recovery rule + +On app startup: + +- do not restore a previously pending login from persisted config or renderer state +- initialize login state as `idle` +- perform a fresh snapshot read to determine the actual steady state + +### Shutdown rule + +On app shutdown while login is pending: + +- do not block app quit waiting for remote login completion +- a best-effort `account/login/cancel` is optional, but correctness must not depend on it +- local session state should settle as lost/abandoned for diagnostics only + +### Crash/reload rule + +If the renderer reloads or the app restarts during login: + +- the next session should not show a phantom permanently pending state +- the feature should converge via fresh `account/read`, not by trying to resume an old `loginId` + +### Persistence rule + +Do not persist: + +- pending login state +- `loginId` +- `authUrl` + +Persist only durable user intent: + +- preferred auth mode + +## Operation Serialization And Race Resolution Policy + +The feature must treat auth mutations as serialized control-plane operations, not as unrelated UI +button handlers. + +### Serialization rule + +Serialize these operations through one main-process account-operation gate: + +- start login +- cancel login +- logout +- explicit auth-recovery refresh that escalates to `refreshToken = true` + +Passive reads may coalesce, but mutating operations must not run in parallel. + +### Race resolution rule + +If mutation intent and observed remote completion disagree, final truth comes from the freshest +successful post-mutation snapshot, not from the earlier button click alone. + +### Required race outcomes + +1. cancel requested, then late `login/completed success=true` arrives: + - do a forced snapshot refresh + - if the snapshot shows a connected managed account, show connected state + - do not force logout just to honor the earlier cancel intent +2. logout requested during pending login: + - best-effort cancel login first if needed + - then run logout + - final logged-out truth must win over any stale pre-logout account evidence +3. stale read started before logout settles after logout: + - ignore the stale read result + - never resurrect the old managed account from that stale result + +### Preference-change rule + +Changing preferred auth mode during a pending login affects future launch choice only. + +It must not: + +- silently cancel an in-flight login unless product explicitly chooses that UX later +- rewrite the meaning of a login session that already started under the prior preference + +## Protocol Assumptions We Intentionally Avoid Relying On + +The feature should stay robust even if some secondary protocol details vary. + +Do not rely on: + +- `account/updated` always arriving before or after `account/login/completed` +- rate-limit notifications being delivered during every app lifecycle +- undocumented auth variants being available in every Codex build +- raw CLI reference pages being the only authoritative source for auth override details +- exact raw transport error text remaining stable enough for renderer copy + +Instead, rely on: + +- explicit snapshot refresh for steady-state truth +- normalized error categories +- narrow, documented auth variants for first-wave behavior + +## IPC channels + +Recommended channels: + +- `CODEX_ACCOUNT_GET_SNAPSHOT` +- `CODEX_ACCOUNT_REFRESH_SNAPSHOT` +- `CODEX_ACCOUNT_START_CHATGPT_LOGIN` +- `CODEX_ACCOUNT_CANCEL_LOGIN` +- `CODEX_ACCOUNT_LOGOUT` +- `CODEX_ACCOUNT_GET_RATE_LIMITS` +- `CODEX_ACCOUNT_EVENT` + +## IPC Request / Response Matrix + +This matrix keeps the cross-process contract explicit and prevents renderer/main drift. + +| Channel | Request shape | Response shape | Side effects | Cache interaction | Failure shape | +| --- | --- | --- | --- | --- | --- | +| `CODEX_ACCOUNT_GET_SNAPSHOT` | `{ forceFresh?: boolean }` or no payload | `CodexConnectionSnapshotDto` | none | may serve cached snapshot unless `forceFresh` | returns rejected IPC promise with normalized error message | +| `CODEX_ACCOUNT_REFRESH_SNAPSHOT` | no payload | `CodexConnectionSnapshotDto` | forces control-plane refresh | bypasses normal snapshot TTL | returns rejected IPC promise with normalized error message | +| `CODEX_ACCOUNT_START_CHATGPT_LOGIN` | no payload | `CodexLoginStateDto` | starts login session, may open browser | invalidates snapshot cache on state transitions | returns failed login state or rejected IPC promise for hard failures | +| `CODEX_ACCOUNT_CANCEL_LOGIN` | no payload | `CodexLoginStateDto` | cancels active login if present | no steady-state cache effect except follow-up refresh | safe no-op if no active login | +| `CODEX_ACCOUNT_LOGOUT` | no payload | `CodexConnectionSnapshotDto` | logs out managed account and refreshes snapshot | clears last-known-good managed-account snapshot | rejected IPC promise or error-bearing snapshot depending on final implementation choice | +| `CODEX_ACCOUNT_GET_RATE_LIMITS` | `{ forceFresh?: boolean }` or no payload | `CodexRateLimitsDto \| null` | none | may serve rate-limit cache unless `forceFresh` | returns `null` or rejected IPC promise depending on error-handling contract | +| `CODEX_ACCOUNT_EVENT` | subscription only | `CodexAccountEventDto` | none | n/a | best-effort delivery only | + +Contract rule: + +- all IPC errors should be normalized into stable, user-safe messages +- renderer code must not depend on raw transport/process error text + +## Electron API integration + +Extend: + +- `src/shared/types/api.ts` +- `src/preload/index.ts` + +Pattern should match existing feature slices like `recent-projects`. + +## Preload And Renderer API Shape + +The renderer-facing API should be explicit so we do not leak main-process internals into UI code. + +Recommended preload-facing contract: + +```ts +export interface CodexAccountElectronApi { + getSnapshot: (options?: { forceFresh?: boolean }) => Promise; + refreshSnapshot: () => Promise; + startChatgptLogin: () => Promise; + cancelLogin: () => Promise; + logout: () => Promise; + getRateLimits: (options?: { forceFresh?: boolean }) => Promise; + onEvent: (callback: (event: CodexAccountEventDto) => void) => () => void; +} +``` + +Integration rule: + +- this contract belongs under `src/features/codex-account/contracts` +- `src/preload/index.ts` should only bridge it +- renderer hooks should consume this contract through the app API abstraction, not directly through + ad hoc `window.electronAPI` calls spread across components + +## Account Control-Plane Flows + +## App-Server Method Matrix + +This matrix documents how the feature is expected to use the official app-server surface. + +| Method / notification | Used in phase | Typical caller | Input | Expected output | Notes | +| --- | --- | --- | --- | --- | --- | +| `account/read` | B+ | snapshot use cases | `{ refreshToken?: boolean }` | current account plus `requiresOpenaiAuth` | passive reads should default `refreshToken` to `false` | +| `account/login/start` with `type: "chatgpt"` | E+ | login use case | no extra payload | `loginId` plus `authUrl` | browser flow is first-class | +| `account/login/cancel` | E+ | cancel use case / session manager | active `loginId` | success or no-op | safe to call only when login is active | +| `account/logout` | E+ | logout use case | none | logout acknowledgement | should be followed by forced snapshot refresh | +| `account/rateLimits/read` | F+ | rate-limit use case | none or method-specific default params | plan/rate-limit snapshot | should stay lazy by default | +| `account/updated` | E+ | login session manager / event bridge | notification only | auth mode plus plan changes | may arrive outside explicit reads | +| `account/login/completed` | E+ | login session manager | notification only | success or error for a specific `loginId` | must drive pending-state settlement | +| `account/rateLimits/updated` | F+ | optional rate-limit subscription handling | notification only | updated rate-limit view | should not be required for base snapshot correctness | + +Usage rule: + +- `account/read` is the canonical steady-state read path +- notifications are accelerators for freshness, not replacements for a recoverable read model + +## `refreshToken` Usage Policy + +Official app-server docs and local schema expose `account/read { refreshToken?: boolean }`. + +This flag is powerful enough that the plan should constrain it explicitly. + +### Default rule + +- passive background reads use `refreshToken = false` +- normal explicit refresh also starts with `refreshToken = false` + +### Escalation rule + +Allow a one-time `refreshToken = true` read only when there is a concrete auth-staleness reason, +for example: + +- a just-completed login has not converged after the first normal snapshot read +- explicit user recovery action after an auth-related degraded state + +### Forbidden rule + +Do not: + +- set `refreshToken = true` on every poll +- loop repeated token-refresh reads in the background +- use token refresh as a substitute for the normal snapshot model + +### Operational reason + +- overusing token refresh increases latency and creates another path to false logout or confusing + transient state + +## App-Server Compatibility Gate + +The feature must not equate "binary exists" with "account seam is supported". + +### Stable-surface rule + +First-wave `codex-account` should initialize app-server with: + +- `experimentalApi: false` +- only the notification subscriptions it actually needs + +Do not opt into experimental API just to make the first wave easier. + +### Required handshake contract + +Before the feature reports the managed-account seam as supported, it must prove: + +1. `codex app-server` starts +2. `initialize` succeeds on the stable surface +3. the initialize response yields diagnostics we can record: + - `codexHome` + - `platformFamily` + - `platformOs` +4. required stable methods behave as expected: + - `account/read` + - `account/login/start` + - `account/login/cancel` + - `account/logout` + - `account/rateLimits/read` + +### Compatibility verdict rule + +If initialize works but one of the required stable account methods is absent, rejected as +experimental-only, or otherwise incompatible with the expected shape: + +- classify the feature state as `app-server-incompatible` +- do not classify it as generic missing auth +- do not offer misleading login or subscription controls + +### Versioning rule + +Prefer capability/protocol evidence over string-parsed `codex --version`. + +Semver may still be logged for diagnostics, but: + +- semver alone must not unlock the feature +- semver alone must not disable the feature when the stable handshake succeeds + +## Flow 1 - autodetect existing account + +```mermaid +sequenceDiagram + participant UI as Renderer Hook + participant IPC as Feature IPC + participant UC as Get Snapshot Use Case + participant APP as App Server Client + participant KEY as API Key Adapter + + UI->>IPC: getCodexConnectionSnapshot() + IPC->>UC: execute() + UC->>APP: account/read + UC->>KEY: getApiKeyAvailability() + APP-->>UC: managed account state + KEY-->>UC: api key availability + UC-->>IPC: merged snapshot + IPC-->>UI: snapshot dto +``` + +## Flow 2 - start ChatGPT login + +```mermaid +sequenceDiagram + participant UI as Renderer + participant IPC as Feature IPC + participant UC as Start Login Use Case + participant LOGIN as Login Session Manager + participant APP as App Server + participant BROWSER as Browser Launcher + + UI->>IPC: startCodexChatgptLogin() + IPC->>UC: execute() + UC->>LOGIN: acquire or create login session + LOGIN->>APP: account/login/start(type=chatgpt) + APP-->>LOGIN: authUrl + loginId + LOGIN->>BROWSER: openExternal(authUrl) + LOGIN-->>IPC: login pending + IPC-->>UI: login state changed + APP-->>LOGIN: account/login/completed + LOGIN-->>IPC: snapshot refresh + login completed event + IPC-->>UI: snapshot updated +``` + +## Flow 3 - launch Codex + +```mermaid +sequenceDiagram + participant SHELL as Runtime Shell + participant FACADE as Codex Account Feature + participant READY as Launch Readiness UC + participant ENV as Provider Env Builder + participant EXEC as codex exec + + SHELL->>FACADE: evaluateLaunchReadiness() + FACADE->>READY: execute() + READY-->>FACADE: readiness + effectiveAuthMode + env policy + FACADE-->>SHELL: launch policy + SHELL->>ENV: build env using policy + ENV-->>SHELL: sanitized/injected env + SHELL->>EXEC: launch codex exec +``` + +## Login State Machine + +```mermaid +stateDiagram-v2 + [*] --> idle + idle --> starting: start login + starting --> awaiting_browser: authUrl received + awaiting_browser --> pending: browser opened + pending --> completed: account/login/completed success=true + pending --> failed: account/login/completed success=false + pending --> cancelled: account/login/cancel + awaiting_browser --> failed: browser open failed + starting --> failed: start request failed + completed --> idle: refresh settled + cancelled --> idle: refresh settled + failed --> idle: reset +``` + +## Subscription Lifecycle Semantics + +This section describes the intended steady-state lifecycle of a managed ChatGPT-backed Codex +subscription as the app should understand it. + +### Lifecycle phases + +1. no managed account detected +2. login initiated +3. browser auth pending +4. managed account connected +5. temporarily degraded verification +6. explicit logout + +### Important semantic rules + +- `managed account connected` means the control plane has positive evidence of a ChatGPT-backed + account +- `degraded` does not mean disconnected +- explicit logout is stronger than cached prior evidence and must clear it immediately +- API key availability may coexist with any lifecycle phase except `runtime_missing` + +### User-visible implication + +The UI should present the managed account as: + +- connected +- pending +- degraded +- disconnected + +and should not collapse these into one binary `authenticated` flag for Codex. + +## Main-Side Use Cases + +### `GetCodexConnectionSnapshotUseCase` + +Responsibilities: + +- get a cached or fresh merged snapshot +- merge managed account and API key availability +- derive effective auth mode and launch readiness + +Must not: + +- open browser +- mutate config + +### `RefreshCodexConnectionSnapshotUseCase` + +Responsibilities: + +- force a fresh read from app-server +- optionally request proactive token refresh only on explicit user action + +Important nuance: + +- do **not** set `refreshToken = true` on every passive read +- reserve that for explicit manual refresh or post-login reconciliation + +### `StartCodexChatgptLoginUseCase` + +Responsibilities: + +- ensure a single in-flight login +- start login via app-server +- open browser +- update login state + +Must handle: + +- duplicate click while login already pending +- browser open failure +- login timeout + +First implementation decision: + +- browser flow via `type: "chatgpt"` is in scope +- device code flow via `type: "chatgptDeviceCode"` is explicitly deferred unless we discover a real + Electron/browser-launch blocker + +Reason: + +- browser flow matches the prior UX expectation more closely +- device-code support is valuable, but it is not required to restore the intended desktop UX + +### `CancelCodexLoginUseCase` + +Responsibilities: + +- cancel active login if any +- cleanly tear down pending session + +### `LogoutCodexManagedAccountUseCase` + +Responsibilities: + +- perform app-server `account/logout` +- refresh snapshot + +### `ReadCodexRateLimitsUseCase` + +Responsibilities: + +- load rate limits lazily +- avoid blocking basic connection UI + +### `EvaluateCodexLaunchReadinessUseCase` + +Responsibilities: + +- compute launch policy from feature truth +- return: + - readiness state + - effective auth mode + - env mutation instructions + - user-facing advisory + +This use case must become the shell's single source of truth for Codex launch auth policy. + +## Main Infrastructure Design + +## `CodexAccountEnvBuilder` + +Purpose: + +- build consistent env for app-server account sessions +- build deterministic env mutation instructions for execution + +Inputs: + +- resolved shell env +- binary path +- selected auth mode +- API key value or absence + +Outputs: + +- account-session env +- exec-session env policy + +This module exists because generic `buildProviderAwareCliEnv()` currently knows only the old Codex API-key-only world. + +## `CodexAccountAppServerClient` + +Purpose: + +- short-lived request client for: + - `account/read` + - `account/logout` + - `account/rateLimits/read` + +Behavior: + +- request-scoped sessions only +- initialize and dispose per request + +## `CodexLoginSessionManager` + +Purpose: + +- own one long-lived login session while login is pending + +Responsibilities: + +- start login +- observe notifications +- cancel login +- timeout pending login +- emit feature events + +Important rule: + +- there can be at most one active login session at a time + +## Module Responsibility Matrix + +This table is the SRP-oriented version of the implementation design. + +| Module | Owns | Must not own | Typical collaborators | +| --- | --- | --- | --- | +| `CodexAccountAppServerClient` | request-scoped app-server RPC for account/read, logout, rate limits | login session lifecycle, renderer events, config | session factory, logger | +| `CodexLoginSessionManager` | long-lived login session, login notifications, timeout, cancel | generic account/read cache, API key storage, shell copy | app-server transport, browser launcher, logger | +| `CodexAccountEnvBuilder` | auth-store env normalization, auth-sensitive env mutation policy | launch decision semantics, config migration | shell env port, binary resolver port | +| `GetCodexConnectionSnapshotUseCase` | merge snapshot truth and caching policy | browser opening, low-level process logic | managed account source, api key source, cache, clock | +| `EvaluateCodexLaunchReadinessUseCase` | effective auth mode selection and readiness semantics | actual child-process spawning | snapshot/domain models | +| `CodexConnectionCoordinator` | shell-facing launch integration and env assembly handoff | account lifecycle, renderer subscriptions | feature facade, provider connection service | +| presenter adapters | stable DTO/event shaping | domain policy changes | use cases, contracts | +| renderer hooks | subscription orchestration and action wiring | business truth invention | preload API, view-model adapters | +| feature UI components | rendering | transport, config mutation, process logic | hooks, view models | + +Review rule: + +- if a new change makes one row start owning another row's responsibilities, stop and split the + concern before continuing + +## Session And Timeout Policy + +### Short-lived read sessions + +Use for: + +- account/read +- rateLimits/read +- logout + +Recommended timeouts: + +- initialize timeout: aligned with existing app-server defaults +- request timeout: short and bounded + +### Long-lived login session + +Use for: + +- start login +- wait for `account/login/completed` + +Recommended timeout: + +- hard max pending duration around `10 minutes` + +Reason: + +- long enough for browser auth +- short enough to avoid zombie sessions + +## App-Server Session Topology Rule + +The feature should make session ownership explicit so connection-scoped notifications do not leak +between concerns. + +### Topology + +- passive reads: + - short-lived dedicated app-server sessions +- rate-limits reads: + - short-lived dedicated app-server sessions or reused read-session helper, but not the live login + session +- login flow: + - one dedicated long-lived session that owns: + - `account/login/start` + - login notifications + - optional `account/login/cancel` + +### No-pooling rule + +Do not multiplex in the first wave: + +- `recent-projects` and `codex-account` over one shared live app-server child +- login notifications and passive reads over one generic pooled session + +### Cleanup rule + +When a feature-owned app-server session is disposed: + +- kill the child deterministically +- reject outstanding requests +- ignore any late results or notifications from that disposed session generation + +This keeps process cleanup and notification ownership unambiguous. + +### Notification suppression policy + +For read sessions: + +- suppress noisy thread notifications + +For login session: + +- do **not** suppress: + - `account/login/completed` + - `account/updated` + +## Timeout Defaults Table + +These defaults should remain centralized so different callers do not invent incompatible timing. + +| Interaction | Recommended default | Why | +| --- | --- | --- | +| app-server initialize for read sessions | align with existing shared app-server defaults | keeps transport consistent with `recent-projects` | +| `account/read` request timeout | short and bounded | passive reads should fail fast into degraded state | +| `account/logout` request timeout | short and bounded | logout should resolve or fail clearly | +| `account/rateLimits/read` timeout | short-medium | secondary UI should not hang the page | +| login pending max duration | around `10 minutes` | enough for browser auth, short enough to avoid zombie state | +| snapshot cache TTL | around `3-10 seconds` | enough dedupe without stale-feeling UI | +| rate-limits cache TTL | around `30-60 seconds` | secondary UI can be less fresh | +| freshness window for last-known-good managed account | around `60 seconds` | balances resilience and honesty | + +Consistency rule: + +- do not hardcode these values independently in multiple modules +- centralize them in feature-local configuration/constants or shared transport defaults where + appropriate + +## Retry And Backoff Policy + +Retries are one of the easiest ways to accidentally hide truth or create duplicate state +transitions. This feature should be conservative. + +### What may retry automatically + +- passive `account/read` refresh after a transient initialization or timeout failure + +Recommended default: + +- at most one immediate retry for passive reads +- only when the failure is clearly transport-level + +### What should not auto-retry + +- login start +- logout +- cancel login +- manual refresh button actions +- any request that already has a user-visible action outcome + +Why: + +- auto-retrying user actions can create duplicate browser flows, duplicate state transitions, or + surprising side effects + +### Backoff rule + +If passive background refresh keeps failing: + +- do not spin +- let the feature surface `degraded` +- wait for next normal refresh trigger or explicit user action + +### Timeout handling rule + +Timeout must be surfaced as a first-class degraded reason category, not collapsed into generic +"not connected". + +## Caching And Refresh Policy + +### Snapshot cache + +Recommended: + +- in-memory cache in main feature +- small TTL around `3-10 seconds` +- single-flight refresh collapse + +Reason: + +- dashboard banner and settings dialog can ask for the same snapshot nearly simultaneously + +### Rate-limits cache + +Recommended: + +- separate cache +- longer TTL around `30-60 seconds` + +Reason: + +- rate limits are secondary UI, not critical hot-path state + +### Post-action invalidation + +After: + +- login success +- logout success +- explicit refresh + +invalidate the snapshot cache immediately. + +## Shell Integration Plan + +This section makes the integration concrete. + +## Main process composition + +Add feature creation in: + +- `src/main/index.ts` + +Pattern: + +- create the feature alongside `recent-projects` +- register its IPC handlers after feature construction + +Later, if browser-mode support is desired: + +- wire the feature into `src/main/standalone.ts` +- add HTTP adapter endpoints + +## Preload integration + +Add to: + +- `src/preload/index.ts` + +Pattern: + +- same as `recent-projects` +- spread `createCodexAccountBridge()` into `window.electronAPI` + +## Shared API type integration + +Extend: + +- `src/shared/types/api.ts` + +with a new feature API contract interface. + +## Renderer integration + +Shell components that must stop owning Codex business logic: + +- `ProviderRuntimeSettingsDialog` +- `CliStatusBanner` +- `CliStatusSection` +- `providerConnectionUi.ts` + +How they should change: + +1. `ProviderRuntimeSettingsDialog` + - for provider `codex`, render `CodexAccountConnectionPanel` + - stop hardcoding Codex as API-key-only +2. `CliStatusBanner` + - stop using terminal modal login/logout for Codex + - use feature hook actions +3. `CliStatusSection` + - same as banner +4. `providerConnectionUi.ts` + - stop flattening Codex auth summary + - use feature adapter output for Codex-specific text + +## Legacy UX Parity Policy + +The user requirement here is not just "make auth work". It is also "restore the good legacy Codex +subscription UX while keeping the new native runtime". + +That means we should reuse the current shell surfaces and preserve their visual grammar, instead of +inventing a new Codex settings screen. + +### Required UX shape + +The feature should plug into the existing surfaces: + +- provider manage dialog +- dashboard CLI banner +- settings CLI section + +It should not introduce: + +- a separate standalone Codex settings page +- a second disconnected login modal system +- a renderer-only fake status card + +### Required visible information + +For Codex, the composed UI should be able to show: + +- current preferred auth mode +- managed account connected or not +- account email when available +- plan type when available +- API key also available or not +- effective launch mode in auto +- pending login / cancelling / logout states +- degraded-but-still-launchable states + +### Required action set + +For Codex, the composed UI should expose: + +- connect ChatGPT account +- cancel login while pending +- disconnect managed account +- choose preferred auth mode +- manage API key without implying it is the only path +- refresh account state + +### Copy policy + +When Codex is managed-account-backed: + +- do not flatten everything to "Connected via API key" +- do not label the primary action as `Configure API key` +- do not use Anthropic-specific `OAuth` wording + +Preferred wording family: + +- `ChatGPT account` +- `Codex subscription` +- `Plan` +- `API key also available` +- `Auto - prefer ChatGPT account` + +### Visual ownership rule + +The feature owns Codex-specific content blocks. + +The shell owns: + +- container cards +- section framing +- generic button spacing/layout primitives +- provider ordering + +This preserves legacy familiarity without duplicating layout systems. + +## Surface-By-Surface UI Contract + +Each visible shell surface should have a clear responsibility so the same state is not explained in +three conflicting ways. + +### `ProviderRuntimeSettingsDialog` + +Purpose: + +- authoritative management surface for Codex auth preference and account state + +Must show: + +- preferred auth mode selector +- managed account summary +- API key secondary availability +- login / cancel / logout actions +- degraded state explanation +- rate-limit section when requested or expanded + +Must not: + +- pretend to be a generic provider card when provider is `codex` + +### `CliStatusBanner` + +Purpose: + +- concise dashboard summary and quick action entry point + +Must show: + +- one-line Codex status summary derived from the feature +- manage/open action into the richer settings surface +- degraded warning when appropriate + +Must not: + +- become the full account management UI + +### `CliStatusSection` + +Purpose: + +- settings-level operational summary for the installed runtime + +Must show: + +- consistent Codex account summary +- launch-relevant status +- path into full manage dialog + +Must not: + +- use different wording than the manage dialog for the same auth truth + +### Shared UI consistency rule + +For the same underlying snapshot: + +- headline wording +- effective auth mode wording +- degraded wording +- connect/disconnect affordances + +must all stay semantically consistent across surfaces, even if the amount of detail differs. + +## What remains shell-owned + +- generic provider card structure +- generic runtime/backend status +- generic model availability presentation + +## What becomes feature-owned + +- Codex account summary copy +- Codex connect/disconnect actions +- Codex login pending UI +- Codex rate-limit presentation +- Codex auth-mode selection UI + +## Renderer Hook Composition Contract + +To keep renderer code aligned with the feature standard, hooks and adapters should have explicit +roles. + +### `useCodexAccount` + +Responsibilities: + +- fetch and subscribe to the current snapshot +- expose refresh action +- expose derived loading/error/degraded flags for feature UI + +Must not: + +- open browser directly +- compute shell layout copy inline + +### `useCodexLoginFlow` + +Responsibilities: + +- expose login, cancel, and logout actions +- surface pending action state and latest action error + +Must not: + +- own snapshot caching +- own rate-limit fetching + +### `useCodexRateLimits` + +Responsibilities: + +- fetch rate limits lazily +- respect dedicated rate-limit TTL and pending state + +Must not: + +- block base snapshot rendering + +### `codexAccountViewModel` / `codexProviderShellAdapter` + +Responsibilities: + +- merge feature DTOs with generic shell provider status into stable view models +- keep wording and badge semantics consistent across surfaces + +Must not: + +- call transport directly +- mutate app config + +### UI component rule + +Feature UI components should be as close to pure renderers as possible: + +- inputs in +- callbacks out + +This keeps renderer complexity low and makes snapshot-state regression tests much easier. + +## Browser-mode policy + +Initial implementation recommendation: + +- Electron / preload path is the first-class path +- browser-mode support is explicitly deferred unless product requires it immediately + +If browser mode is visible: + +- Codex account feature should degrade honestly as unsupported or unavailable +- do not silently attempt local app-server control through browser mode without explicit HTTP support + +This keeps the architecture clean and avoids half-working local machine assumptions in browser sessions. + +## Browser Auth URL Handling Policy + +The feature should treat login URLs as sensitive, short-lived control-plane data. + +### Open rule + +For Codex ChatGPT login: + +- open the URL from main process only +- validate the URL before opening +- require `https:` scheme +- reject `http:`, `mailto:`, custom schemes, or malformed URLs for this feature-specific path + +### Trust rule + +Do not hardcode a hostname allowlist in the first wave unless official docs start guaranteeing a +stable host set. + +Instead: + +- trust app-server as the source of the URL +- enforce `https` scheme +- avoid logging the full URL +- record only derived diagnostics when needed, such as scheme/hostname + +### IPC rule + +- renderer asks to start login +- main starts login and opens the validated URL +- renderer never receives the raw `authUrl` + +### Failure rule + +If URL validation fails: + +- classify as `unsafe-auth-url` +- fail login cleanly +- do not attempt browser open + +## Runtime Integration Plan + +This is where the feature touches existing runtime services. + +## Existing problem + +`providerAwareCliEnv.ts` and `ProviderConnectionService.ts` currently encode the rule: + +- Codex readiness requires API key presence + +That must change. + +## Recommended integration pattern + +Introduce a small runtime-side coordinator: + +- `src/main/services/runtime/CodexConnectionCoordinator.ts` + +Responsibilities: + +- ask the feature for launch readiness +- ask `ProviderConnectionService` for API key value resolution when needed +- apply auth-mode-specific env mutation policy + +Why a coordinator is better than stuffing more into `ProviderConnectionService`: + +- keeps provider-generic code smaller +- avoids turning `ProviderConnectionService` into a God object +- keeps Codex feature policy close to the feature seam + +## Responsibilities after the coordinator exists + +### `ProviderConnectionService` keeps responsibility for: + +- app-owned API key discovery +- app-owned API key injection primitives +- Anthropic-specific connection mode handling + +### `CodexConnectionCoordinator` owns: + +- choosing ChatGPT vs API key for Codex launch +- deciding which env vars must be sanitized +- deciding which `forced_login_method` override to pass + +### `providerAwareCliEnv.ts` becomes: + +- provider-generic env assembly plus delegation to Codex coordinator when provider is `codex` + +### `ClaudeMultimodelBridgeService.ts` becomes: + +- generic runtime status reader +- plus additive merge of Codex account snapshot for Codex-specific presentation + +## Detailed Touch Points + +Likely first-wave touch points: + +- `src/main/index.ts` +- `src/preload/index.ts` +- `src/shared/types/api.ts` +- `src/main/services/infrastructure/ConfigManager.ts` +- `src/main/ipc/configValidation.ts` +- `src/shared/types/notifications.ts` +- `src/main/services/runtime/providerAwareCliEnv.ts` +- `src/main/services/runtime/ProviderConnectionService.ts` +- `src/main/services/runtime/ClaudeMultimodelBridgeService.ts` +- `src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx` +- `src/renderer/components/runtime/providerConnectionUi.ts` +- `src/renderer/components/dashboard/CliStatusBanner.tsx` +- `src/renderer/components/settings/sections/CliStatusSection.tsx` + +Important design rule: + +- shell files should mostly lose Codex-specific conditional logic, not gain more of it + +## File-Level Implementation Map + +This section translates the architecture into concrete repo edits so the work stays transparent. + +### New feature files + +Expected first-wave additions: + +- `src/features/codex-account/contracts/api.ts` +- `src/features/codex-account/contracts/channels.ts` +- `src/features/codex-account/contracts/dto.ts` +- `src/features/codex-account/contracts/events.ts` +- `src/features/codex-account/contracts/index.ts` +- `src/features/codex-account/core/domain/*` +- `src/features/codex-account/core/application/ports/*` +- `src/features/codex-account/core/application/use-cases/*` +- `src/features/codex-account/main/composition/createCodexAccountFeature.ts` +- `src/features/codex-account/main/adapters/input/ipc/registerCodexAccountIpc.ts` +- `src/features/codex-account/main/adapters/output/presenters/*` +- `src/features/codex-account/main/adapters/output/runtime/*` +- `src/features/codex-account/main/infrastructure/cache/*` +- `src/features/codex-account/main/infrastructure/codex/*` +- `src/features/codex-account/preload/createCodexAccountBridge.ts` +- `src/features/codex-account/preload/index.ts` +- `src/features/codex-account/renderer/index.ts` +- `src/features/codex-account/renderer/adapters/*` +- `src/features/codex-account/renderer/hooks/*` +- `src/features/codex-account/renderer/ui/*` + +### Existing files that should mostly gain composition hooks, not deep new business logic + +#### `src/main/index.ts` + +Should: + +- instantiate the feature +- register IPC +- expose a small facade to shell services + +Should not: + +- implement account/read logic inline +- manage login state inline + +#### `src/preload/index.ts` + +Should: + +- merge the feature bridge into `window.electronAPI` + +Should not: + +- contain auth logic +- transform account domain state into shell copy + +#### `src/main/services/infrastructure/ConfigManager.ts` + +Should: + +- add persisted `providerConnections.codex.preferredAuthMode` +- normalize stale legacy Codex connection values + +Should not: + +- resolve runtime effective auth mode + +#### `src/main/ipc/configValidation.ts` + +Should: + +- accept only the new normalized Codex connection field +- reject new unknown fields after migration normalization + +Should not: + +- infer defaults that belong to `ConfigManager` + +#### `src/main/services/runtime/ProviderConnectionService.ts` + +Should: + +- remain owner of app API key storage lookup and injection primitives + +Should not: + +- remain final authority for Codex launch readiness +- own ChatGPT managed-account policy + +#### `src/main/services/runtime/providerAwareCliEnv.ts` + +Should: + +- delegate Codex-specific env policy to the coordinator / feature seam + +Should not: + +- continue hardcoding API-key-only Codex logic + +#### `src/main/services/runtime/ClaudeMultimodelBridgeService.ts` + +Should: + +- keep generic provider/runtime status probing +- optionally compose Codex account snapshot into Codex-facing presentation + +Should not: + +- become the owner of Codex account lifecycle + +#### `src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx` + +Should: + +- host the feature-owned Codex panel + +Should not: + +- directly own Codex login flow logic +- directly derive Codex copy from generic provider flags alone + +#### `src/renderer/components/runtime/providerConnectionUi.ts` + +Should: + +- become thinner for Codex +- consume feature adapters for Codex-specific labels + +Should not: + +- remain the place where Codex account semantics are invented + +#### `src/renderer/components/dashboard/CliStatusBanner.tsx` +#### `src/renderer/components/settings/sections/CliStatusSection.tsx` + +Should: + +- call feature actions / hooks +- render feature-composed Codex status segments + +Should not: + +- keep normal Codex login/logout on terminal modal commands once the feature is complete + +## Failure Modes And Safety Policy + +### Failure mode: no Codex binary + +Behavior: + +- state becomes `runtime_missing` +- login actions disabled +- connection panel explains binary missing + +### Failure mode: app-server initialize failure + +Behavior: + +- managed-account state becomes degraded +- API key availability remains visible +- do not auto-mark user as logged out +- do not hard-stop launch if execution could still work + +Additional rule: + +- if the last successful snapshot is still within the acceptable freshness window, prefer showing + `degraded` over collapsing to `not_connected` + +### Failure mode: account read timeout + +Behavior: + +- same as degraded +- last good snapshot may be reused briefly if not stale beyond TTL + +### Failure mode: login start failure + +Behavior: + +- `login.state = failed` +- snapshot remains refreshable + +### Failure mode: unsafe auth URL + +Behavior: + +- `login.state = failed` +- do not attempt browser open +- surface a security-oriented error category rather than a generic browser failure + +### Failure mode: browser open failure + +Behavior: + +- `login.state = failed` +- no implicit retry loop +- preserve returned login metadata in memory long enough to support explicit retry or diagnostics + +### Failure mode: login completed false + +Behavior: + +- keep explicit error message +- invalidate login session +- refresh snapshot once + +### Failure mode: logout failure + +Behavior: + +- keep current snapshot +- surface error + +### Failure mode: app restart or shutdown during pending login + +Behavior: + +- next app session starts from `idle` +- no persisted phantom pending state +- fresh snapshot determines whether login actually completed elsewhere or must be retried + +### Failure mode: rate-limits failure + +Behavior: + +- degrade only the rate-limit panel +- do not mark account disconnected + +## Diagnostics And Logging Policy + +We want enough observability to debug auth issues, but not enough to leak secrets or create false +confidence from logs. + +### Recommended structured events + +Add additive logs around: + +- account snapshot refresh started +- account snapshot refresh settled +- login started +- login browser open attempted +- login completed +- login cancelled +- logout started +- logout settled +- launch readiness resolved +- execution auth mode resolved +- degraded account read + +### Safe fields to log + +- provider id +- backend id +- preferred auth mode +- effective auth mode +- snapshot state +- readiness state +- requiresOpenaiAuth +- binary available +- degraded reason category + +### Fields to avoid logging verbatim + +- `authUrl` +- API keys +- refresh tokens +- full account email if logging policy treats it as sensitive +- raw `loginId` unless support/debug mode explicitly needs it + +Recommended compromise for email: + +- either do not log it +- or log only a redacted form for support diagnostics + +Recommended compromise for `loginId`: + +- either do not log it +- or log only a short fingerprint / suffix that cannot be used as a live control token + +Recommended compromise for `authUrl`: + +- log at most: + - URL scheme + - hostname + - whether validation passed +- never log query parameters or full path verbatim + +### Important anti-lie rule + +Logs must distinguish: + +- `app-server degraded` +- `managed account absent` +- `execution failed` + +These are different operational truths and must not be collapsed into one generic auth error. + +## Telemetry And Operational Metrics + +We do not need heavy telemetry to build the feature, but we should design enough observability to +see whether rollout is healthy. + +### Recommended counters + +- `codex_account_snapshot_refresh_total` +- `codex_account_snapshot_refresh_degraded_total` +- `codex_account_login_start_total` +- `codex_account_login_success_total` +- `codex_account_login_failure_total` +- `codex_account_login_cancel_total` +- `codex_account_logout_total` +- `codex_account_launch_ready_chatgpt_total` +- `codex_account_launch_ready_api_key_total` +- `codex_account_launch_missing_auth_total` + +### Recommended timers / histograms + +- snapshot refresh latency +- app-server initialize latency +- `account/read` latency +- login time to completion +- logout latency + +### Recommended rollout health signals + +During rollout, we should watch for: + +- degraded refresh rate +- login failure rate +- mismatch rate between expected and effective auth mode +- launch failures after a `ready_chatgpt` decision + +### Anti-goal + +Do not let telemetry become a hidden source of truth for auth semantics. + +It is for diagnosis only, not for deciding whether the user is connected. + +## Troubleshooting Playbook + +When this feature misbehaves, these are the most likely symptom clusters and where to look first. + +### Symptom - UI says ChatGPT account connected, but launch behaves like API key + +Check: + +- ChatGPT launch env sanitization +- `forced_login_method="chatgpt"` override actually being passed +- whether ambient `OPENAI_API_KEY` or `CODEX_API_KEY` survived into exec env + +Likely owner: + +- `CodexConnectionCoordinator` +- `CodexAccountEnvBuilder` + +### Symptom - UI loses account state after a transient refresh failure + +Check: + +- degraded-path handling +- freshness window logic +- last-known-good snapshot clearing behavior + +Likely owner: + +- snapshot merge use case +- cache / freshness policy + +### Symptom - login opens browser twice or gets stuck pending + +Check: + +- login session exclusivity guard +- duplicate-click handling +- notification delivery and timeout cleanup + +Likely owner: + +- `CodexLoginSessionManager` + +### Symptom - login never resumes after app restart + +Check: + +- whether pending login was incorrectly persisted +- whether startup incorrectly restored stale renderer-local login state +- whether the first fresh snapshot after restart was skipped + +Likely owner: + +- startup recovery policy +- main-process feature initialization + +### Symptom - cancel appears to work, but account still becomes connected + +Check: + +- whether cancel raced with a login that had already completed upstream +- whether the final post-cancel snapshot was used as the source of truth +- whether UI incorrectly treated cancel intent as stronger than fresh steady-state truth + +Likely owner: + +- operation serialization policy +- login session manager +- post-mutation snapshot settlement + +### Symptom - renderer surfaces disagree about current state + +Check: + +- feature view-model adapters +- whether one surface is reading raw provider status instead of the feature snapshot +- whether stale event ordering is overriding newer state + +Likely owner: + +- renderer adapters / hooks +- shell composition boundary + +### Symptom - app-server sees account, but exec does not + +Check: + +- resolved `HOME` +- resolved `USERPROFILE` +- resolved `CODEX_HOME` +- whether app-server and exec are built from the same auth-root normalization path + +Likely owner: + +- env builder +- shell env source adapter + +## Security And Privacy Rules + +These are non-negotiable. + +1. Do not parse `auth.json`. +2. Do not copy Codex-managed tokens into app storage. +3. Do not log auth URLs verbatim if they may include sensitive values. +4. Do not persist login session ids beyond process lifetime. +5. Keep account metadata in memory by default. +6. Keep API keys in the app's existing secure storage only. +7. Do not use `account/login/start { type: "apiKey" }` in the first implementation. +8. Do not send raw `authUrl` over IPC to renderer. +9. Do not persist `loginId` or pending login state across restarts. +10. Validate login URLs with a feature-specific `https`-only policy before browser open. + +### Why we should not use app-server API-key login mode + +Because it would create two overlapping secret owners: + +- app secure storage +- Codex internal storage + +That violates: + +- DRY +- single responsibility +- clear ownership + +So the rule is: + +- managed ChatGPT account is owned by Codex +- API key is owned by the app + +## Security Review Checklist + +Before rollout, the implementation should be reviewed against this checklist. + +### Secret handling + +- no API key written into feature logs +- no auth URL logged verbatim +- no ChatGPT managed token copied into app storage +- no legacy auth files parsed directly + +### Process/env handling + +- ChatGPT launches strip `OPENAI_API_KEY` +- ChatGPT launches strip `CODEX_API_KEY` +- API-key launches inject only the intended key +- app-server and exec use the same auth-store env roots + +### Persistence handling + +- only `preferredAuthMode` is persisted for Codex connection preference +- login ids are not persisted across process lifetime +- last-known-good managed account cache is memory-only + +### UI honesty + +- degraded control-plane state is not shown as confirmed logout +- API-key availability is not shown as managed-account connection +- managed-account connection is not shown as API-key billing + +### Rollout safety + +- no hidden automatic fallback from ChatGPT launch failure to API-key launch +- no normal UI path silently invokes legacy Codex transport + +## Testing Strategy + +## Unit tests - feature core + +Add tests for: + +- managed account + API key merge rules +- effective auth mode resolution +- launch readiness resolution +- degraded-state behavior +- migration defaults + +## Unit tests - main infrastructure + +Add tests for: + +- `CodexAccountEnvBuilder` +- `CodexAccountAppServerClient` +- `CodexLoginSessionManager` +- cache TTL and single-flight behavior +- app-server timeout handling +- env sanitization rules + +Critical cases: + +1. managed account exists and ambient API key is present +2. ChatGPT mode must strip API keys +3. API-key mode must inject API key +4. account read must not inherit ambient API keys +5. same HOME / USERPROFILE / CODEX_HOME resolution used for read and exec + +## Unit tests - renderer + +Add tests for: + +- ChatGPT connected state +- API-key only state +- both-available state +- degraded state +- runtime missing state +- login pending state +- plan type display +- rate-limit panel visibility + +## Integration tests - shell + +Update existing shell tests: + +- `ProviderRuntimeSettingsDialog.test.ts` +- `CliStatusVisibility.test.ts` +- `providerAwareCliEnv.test.ts` +- `ProviderConnectionService.test.ts` +- `ClaudeMultimodelBridgeService.test.ts` + +## Test Matrix - must-cover scenarios + +This matrix should be used to ensure we are not only testing the happy path. + +| Scenario | Snapshot expectation | Launch expectation | UI expectation | Test level | +| --- | --- | --- | --- | --- | +| Binary missing | `runtime_missing` | blocked | login disabled, missing runtime copy | unit + renderer | +| Managed account only | `managed_account_connected` | ChatGPT launch | plan/email visible | unit + integration | +| API key only | `api_key_available` | API-key launch | API key available copy | unit + integration | +| Both available with `auto` | `both_available` | ChatGPT launch | Auto prefers ChatGPT | unit + integration | +| Both available with `api_key` | `both_available` | API-key launch | API key preferred copy | unit + integration | +| Managed account detected but app-server degraded | `degraded` | warning launchable | degraded banner, not false logout | unit + integration | +| No auth and app-server degraded | `degraded` or `not_connected` depending freshness | blocked unless freshness rule applies | unable to verify copy | unit | +| App-server stable handshake incompatible | feature locked or degraded with incompatibility verdict | no misleading login affordance | update-runtime / incompatible-runtime copy | unit + integration | +| Login pending | `login_in_progress` or pending login state | no duplicate login start | cancel action visible | unit + renderer | +| Two renderer subscribers during one login flow | shared pending state in both surfaces | one login only | consistent pending/cancel UI in both places | unit + renderer integration | +| Pending login lost on app restart | fresh snapshot wins, no phantom pending state | login can be retried cleanly | informational recovery or silent idle reset | unit + integration | +| Unsafe login URL returned | login fails before open | no browser open side effect | explicit safe error state | unit | +| Cancel races with late login completion | freshest post-race snapshot wins | no forced false logout | pending clears into truthful connected or idle state | unit + integration | +| Logout races with stale pre-logout read | post-logout truth wins | no resurrection of old account | disconnected state remains stable | unit + integration | +| Corrupted legacy Codex config subtree | normalizes to safe default | no launch-preference crash | settings load with sane default copy | unit | +| Browser open failure | failed login state | no launch behavior change | explicit error surfaced | unit + renderer | +| Logout success | no managed account | auto may fall back to API key | UI clears managed account | integration | +| Managed workspace restriction | no phantom managed account | launch blocked with policy truth | workspace-restricted copy, no fake workspace switcher | unit + renderer | +| Stale slow read after fast successful read | latest successful snapshot preserved | none | no regression flicker | unit | +| Ambient API key present during ChatGPT launch | managed account still primary | keys stripped | no API-key-primary wording | unit + integration | + +## Test doubles and harness requirements + +To keep the implementation testable, we should plan the following fakes: + +- fake app-server client for deterministic `account/read`, `account/logout`, and rate-limit reads +- fake app-server initialize handshake that can: + - succeed with stable account support + - succeed but reject required methods as incompatible + - return diagnostic `codexHome` / platform metadata +- fake login session transport that can emit: + - success + - failure + - timeout + - duplicate notification + - late success after cancel intent +- fake browser launcher that can: + - succeed + - fail +- fake login URL validator that can: + - accept valid `https` URLs + - reject unsafe schemes or malformed values +- fake clock for TTL and freshness testing +- fake API key source adapter +- fake shell env source for `HOME` / `USERPROFILE` / `CODEX_HOME` determinism +- fake config input fixtures covering: + - missing Codex subtree + - stale legacy keys + - malformed non-object Codex subtree + +Important rule: + +- do not make most tests spawn the real `codex app-server` +- reserve real app-server checks for live signoff and a very small number of high-value integration tests + +## Live signoff + +Required live or semi-live signoff: + +1. already logged-in ChatGPT account autodetects without relogin +2. launch succeeds with ChatGPT auth and no API key +3. launch succeeds with API key mode +4. both-available state prefers ChatGPT in auto mode +5. chatgpt mode strips API keys from the exec env +6. login opens browser and completes +7. logout clears managed account state +8. initialize diagnostics capture the expected `codexHome` and compatibility verdict +9. if a managed-policy environment is available, workspace restriction surfaces policy-specific copy +10. restarting during a pending login does not leave the next app session stuck pending +8. app-server degradation does not falsely report logged out +9. app-server degradation does not falsely hard-block `codex exec` + +## Manual QA And Failure Injection Checklist + +In addition to automated tests, the following manual checks are high value because they exercise +real process boundaries and browser behavior. + +### Happy-path manual checks + +- open the app with an already logged-in ChatGPT-backed Codex account +- verify autodetect without relogin +- verify `auto` chooses ChatGPT +- verify explicit `api_key` preference still works when an API key exists + +### Failure-injection manual checks + +- break or suspend app-server startup and verify the UI shows `degraded`, not false logout +- remove the stored API key and verify `api_key` preference becomes `missing_auth` +- keep an ambient API key in the shell env and verify explicit ChatGPT launch still strips it +- trigger login and close/cancel before completion +- trigger login and force browser-open failure if possible via a fake or test harness +- logout and verify stale degraded reads do not resurrect the old account state + +### Visual parity checks + +- compare dashboard banner, settings section, and manage dialog for the same state +- verify wording consistency for: + - connected ChatGPT account + - API-key-only availability + - both available + - degraded + - runtime missing + +## Signoff Artifacts And Evidence Discipline + +To keep rollout transparent, each significant phase should leave behind explicit evidence. + +### Recommended evidence file + +Create and maintain: + +- `docs/research/codex-app-server-account-signoff-evidence.md` + +### What evidence should include + +For each completed phase: + +- date +- branch / commit SHA +- what scenarios were exercised +- what test suites were run +- what live manual checks were run +- whether any known gaps remain + +### Minimum evidence snippets + +Capture: + +- one managed-account autodetect result +- one API-key-only result +- one both-available result +- one degraded-path result +- one ChatGPT launch proof with API keys absent from effective exec env + +### Anti-handwave rule + +Do not mark a phase "done" based only on: + +- unit tests +- visual inspection +- a single happy-path local login + +We need evidence that the failure and degraded paths behave truthfully too. + +## Recommended Signoff Commands + +These commands should be treated as the default signoff baseline for this repo unless implementation +details require a small adjustment. + +### Targeted runtime and UI suite + +```bash +pnpm test -- \ + test/main/services/runtime/providerAwareCliEnv.test.ts \ + test/main/services/runtime/ProviderConnectionService.test.ts \ + test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts \ + test/main/ipc/configValidation.test.ts \ + test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts \ + test/renderer/components/runtime/providerConnectionUi.test.ts \ + test/renderer/components/cli/CliStatusVisibility.test.ts +``` + +### Targeted `recent-projects` safety suite + +```bash +pnpm test -- \ + test/features/recent-projects/main/infrastructure/CodexAppServerClient.test.ts \ + test/features/recent-projects/main/adapters/output/CodexRecentProjectsSourceAdapter.test.ts \ + test/features/recent-projects/core/application/ListDashboardRecentProjectsUseCase.test.ts \ + test/features/recent-projects/contracts/normalizeDashboardRecentProjectsPayload.test.ts \ + test/features/recent-projects/renderer/adapters/RecentProjectsSectionAdapter.test.ts +``` + +### Typecheck baseline + +```bash +pnpm typecheck +``` + +### Lint baseline + +```bash +pnpm lint +``` + +### Full repo safety pass before wider rollout + +```bash +pnpm test +pnpm typecheck +pnpm lint +``` + +### Live manual signoff + +There is no single universal script for this yet, so the plan should expect a small manual desktop +signoff pass covering: + +- autodetect existing ChatGPT account +- ChatGPT-backed launch without API key +- API-key-backed launch +- degraded-path truthfulness +- logout and stale-state non-resurrection + +## Release Readiness Checklist + +Before wider rollout or merge into the main delivery branch, the implementation should satisfy this +checklist end-to-end. + +### Contract readiness + +- feature contracts compile cleanly +- preload bridge matches contracts +- renderer hooks consume only the supported API surface + +### Runtime readiness + +- ChatGPT and API-key launches both work +- exec env sanitization is verified +- app-server degradation is truthful and non-destructive + +### UI readiness + +- dashboard, settings, and manage dialog agree on the same state +- Codex wording no longer flattens everything to API key +- terminal-modal Codex login path is removed or intentionally retained only per rollout policy + +### Safety readiness + +- security checklist is green +- troubleshooting playbook scenarios have at least been spot-checked +- signoff evidence file has current results + +### Regression readiness + +- targeted runtime/UI suites pass +- targeted `recent-projects` safety suite passes +- typecheck and lint pass + +## Post-Release Triage Checklist + +If issues appear after internal rollout, use this triage order before making design changes. + +### 1. Determine the symptom class + +- wrong UI state +- wrong launch auth mode +- login lifecycle bug +- degraded-state regression +- rate-limit-only issue + +### 2. Confirm whether steady-state snapshot is correct + +Ask: + +- does a forced fresh snapshot show the right account truth? + +If yes: + +- the bug is more likely in event ordering, caching, or renderer composition + +If no: + +- the bug is more likely in control-plane reads, env routing, or migration/config + +### 3. Confirm whether execution env matches launch decision + +Ask: + +- did the launch decision say `chatgpt` or `api_key`? +- did the actual exec env honor that? + +### 4. Confirm whether auth-store roots match + +Ask: + +- did app-server and exec use the same `HOME` / `USERPROFILE` / `CODEX_HOME`? + +### 5. Only then consider rollback or feature hiding + +The first response should usually be: + +- diagnose +- patch the narrow seam + +not: + +- broaden fallback +- revive legacy transport + +## Rollout Plan + +## Recommended rollout shape + +1. land feature behind internal enablement +2. wire read-only autodetect first +3. wire launch policy next +4. wire login/logout UI after launch policy is correct +5. remove Codex terminal modal login path only after feature login is green + +## Why this order matters + +The most dangerous intermediate state is: + +- UI claims subscription support +- runtime still hard-requires API key + +So launch policy must land before or together with user-facing subscription affordances. + +## Safe Disable And Rollback Policy + +This feature should be additive enough that we can disable exposure without corrupting runtime +truth. + +### Safe disable rule + +If the feature must be temporarily hidden during rollout: + +- hide Codex managed-account UI affordances +- keep persisted `preferredAuthMode` readable +- do not auto-rewrite user preference +- do not silently remap ChatGPT preference to API-key preference + +### What rollback must not do + +Do not respond to rollout stress by: + +- reintroducing legacy Codex transport +- auto-falling back from failed ChatGPT launch to API key without user intent +- erasing Codex account metadata from config or UI state + +### Acceptable temporary rollback shape + +If we need a temporary rollback during early rollout: + +- feature UI can be hidden or marked internal +- launch policy can remain additive and guarded +- the old terminal-modal Codex login path may remain only until the feature is fully proven + +This preserves correctness without lying about runtime semantics. + +## Remaining Open Questions And Recommended Defaults + +The plan is intentionally decisive, but a few implementation choices can still remain configurable. +These should not block the first pass because we already have recommended defaults. + +### Open question 1 - exact freshness window duration + +Recommended default: + +- `60 seconds` + +Rationale: + +- long enough to survive brief app-server instability +- short enough to avoid long-lived stale-account lies + +### Open question 2 - whether rate limits should auto-load or stay lazy + +Recommended default: + +- lazy-load rate limits + +Rationale: + +- account summary and launch readiness are higher priority +- rate limits should not slow the base snapshot path + +### Open question 3 - whether device-code login should land in phase 1 + +Recommended default: + +- no, keep it deferred + +Rationale: + +- browser flow is more aligned with the intended UX +- device code adds scope without unblocking the primary desktop path + +### Open question 4 - whether to expose degraded state as a separate badge vs text-only warning + +Recommended default: + +- keep degraded as explicit text/state first, add badge only if it materially improves clarity + +Rationale: + +- textual honesty is more important than visual flourish + +### Open question 5 - whether to hard-pin a minimum Codex binary version + +Recommended default: + +- no hard semver pin for the first wave beyond diagnostics and troubleshooting + +Rationale: + +- protocol/capability handshake is a safer unlock mechanism than guessing from version strings +- hard pins are still possible later if rollout evidence shows a real compatibility floor + +### Open question 6 - whether Codex account snapshot should be folded more deeply into generic runtime status + +Recommended default: + +- no, keep it as a separate feature snapshot and compose at the shell boundary + +Rationale: + +- preserves clean bounded contexts +- avoids poisoning generic provider contracts with Codex-specific semantics + +### Open question 7 - whether to hard-allowlist ChatGPT login hostnames + +Recommended default: + +- no hard hostname allowlist in the first wave, only strict `https` validation plus redacted + diagnostics + +Rationale: + +- current docs and schema establish the existence of `authUrl`, but not a future-proof hostname + contract +- strict scheme validation gives strong safety without coupling the feature to undocumented host + choices + +### Open question 8 - whether explicit user refresh should immediately escalate to `refreshToken = true` + +Recommended default: + +- no, start with a normal read and escalate only on concrete auth-staleness evidence + +Rationale: + +- keeps manual refresh predictable without overusing token refresh as a blunt instrument +- preserves a clear distinction between ordinary state refresh and auth recovery + +## Risk Register + +This section lists the most important remaining implementation risks and how the plan contains them. + +### Risk 1 - false subscription billing semantics + +Failure shape: + +- UI says ChatGPT account is active +- launch silently uses API key + +Mitigation: + +- ChatGPT execution must strip `OPENAI_API_KEY` and `CODEX_API_KEY` +- force `forced_login_method="chatgpt"` +- add dedicated tests for this exact case + +### Risk 2 - split auth store between control plane and execution + +Failure shape: + +- app-server sees logged-in account +- `codex exec` uses different `HOME` / `CODEX_HOME` + +Mitigation: + +- centralize env resolution +- test exact parity of resolved auth store env for read/login/logout/exec + +### Risk 3 - false logout on transient app-server failure + +Failure shape: + +- timeout or initialize failure +- UI collapses to `not connected` + +Mitigation: + +- degraded state is first-class +- freshness window can preserve last-known-good account truth temporarily + +### Risk 4 - shell regains ownership and recreates coupling + +Failure shape: + +- banner/dialog/section each re-implement Codex logic differently + +Mitigation: + +- feature owns Codex semantics +- shell only hosts composition and generic layout + +### Risk 5 - config drift creates silent preference flips + +Failure shape: + +- presence of API key or account implicitly changes user preference + +Mitigation: + +- explicit separation of persisted preference and observed availability +- one-way normalization with no inferred preference writes + +### Risk 6 - rollout exposes login before launch policy is correct + +Failure shape: + +- user can log into ChatGPT in UI +- runtime still blocks without API key + +Mitigation: + +- rollout gate order enforces launch policy before or alongside subscription UX + +### Risk 7 - over-expansion of scope into browser mode or apiKey app-server login + +Failure shape: + +- feature accumulates multiple control planes in first wave + +Mitigation: + +- browser mode explicitly deferred +- app-server `apiKey` login explicitly out of first implementation + +## Follow-On Plan For `agent_teams_orchestrator` + +This feature is intentionally scoped to `claude_team` first, but we should document the expected +later parity path now so the first implementation does not paint us into a corner. + +### What should carry over later + +- normalized Codex auth vocabulary + - `preferredAuthMode` + - `effectiveAuthMode` + - `degraded` + - managed-account vs API-key truth +- same launch-readiness semantics +- same exec env sanitization rules +- same no-silent-fallback rule + +### What should not be copied blindly + +- Electron-specific browser launch adapters +- preload contracts +- renderer hook design + +### Suggested parity order + +1. share only pure policy and data-shape concepts first +2. extract any provider-agnostic launch-readiness helpers only if duplication is real +3. keep transport/UI/process integration repo-specific + +### Important guardrail + +Do not prematurely contort the `claude_team` implementation around orchestrator parity if it makes +the desktop feature worse or harder to reason about. + +## Phase Gates And Go / No-Go Rules + +This section is the release-quality version of the rollout. + +### Gate 1 - read-only truth is trustworthy + +Must be true before exposing any new login affordance: + +- autodetect works for already logged-in ChatGPT users +- API key availability still shows correctly +- degraded account reads do not erase last-known-good truth immediately +- no current non-Codex provider behavior regresses + +### Gate 2 - launch policy is trustworthy + +Must be true before defaulting UI toward subscription messaging: + +- ChatGPT-backed launch works with no API key present +- ChatGPT-backed launch strips API keys from env +- API-key launch still works +- auto mode resolves deterministically and observably + +### Gate 3 - interactive login is trustworthy + +Must be true before removing terminal-modal login for Codex: + +- browser login starts reliably +- duplicate clicks do not create duplicate sessions +- cancel works +- logout works +- stale cached snapshots do not reappear after login/logout transitions + +### Gate 4 - UI parity is trustworthy + +Must be true before calling the feature complete: + +- no normal Codex UI path still says API key is the only connection mechanism +- dashboard and settings surfaces agree on account truth +- the rendered Codex panel uses the same account truth as launch policy + +### Hard no-go conditions + +Do not ship the feature if any of these remain true: + +- ChatGPT mode can still execute with inherited API keys +- app-server and exec use different auth storage roots +- degraded app-server reads collapse to false logout +- `ProviderConnectionService` remains the final readiness authority for Codex +- Codex login in normal UI still routes through terminal modal commands + +## Implementation Phases And Commit Boundaries + +### Phase A - shared transport extraction + +Goal: + +- extract reusable app-server transport primitives from `recent-projects` + +Estimated size: + +- `250-450` lines + +Suggested commit: + +- `refactor(codex-account): extract shared app-server transport` + +Primary deliverables: + +- extracted generic JSON-RPC stdio primitives +- no deep import from `recent-projects` into the new feature +- `recent-projects` still green on the extracted transport + +Acceptance criteria: + +- no behavior change for `recent-projects` +- extracted transport owns initialize/initialized handshake +- transport defaults are centralized in one place + +### Phase B - feature skeleton and read-only snapshot + +Goal: + +- create the feature slice +- implement `account/read` +- implement snapshot DTO and IPC bridge + +Estimated size: + +- `450-750` lines + +Suggested commit: + +- `feat(codex-account): add managed account snapshot feature` + +Primary deliverables: + +- feature slice exists with contracts, main, preload, renderer entrypoints +- `account/read` wired through feature IPC +- API key availability merged into snapshot +- cached snapshot and degraded state logic working + +Acceptance criteria: + +- already logged-in ChatGPT user is detected +- API-key-only user is detected +- degraded app-server read does not present false logout + +### Phase C - config, migration, and renderer composition + +Goal: + +- add `preferredAuthMode` +- integrate feature panel into runtime settings and provider cards + +Estimated size: + +- `300-550` lines + +Suggested commit: + +- `feat(codex-account): add codex auth preference and shell composition` + +Primary deliverables: + +- persisted `preferredAuthMode` +- config migration on load and save +- shell surfaces render the feature-owned Codex panel + +Acceptance criteria: + +- stale legacy Codex config normalizes forward +- renderer can display auto/chatgpt/api_key preference correctly +- generic shell layout remains unchanged for non-Codex providers + +### Phase D - runtime launch policy integration + +Goal: + +- remove API-key-only hard gate +- add env sanitization and `forced_login_method` policy + +Estimated size: + +- `300-600` lines + +Suggested commit: + +- `feat(codex-account): wire codex launch readiness policy` + +Primary deliverables: + +- Codex launch readiness no longer owned by API-key-only logic +- ChatGPT env sanitization wired +- `forced_login_method` wired per effective auth mode + +Acceptance criteria: + +- ChatGPT launch works without API key +- API-key launch still works +- ambient API keys cannot hijack ChatGPT launch mode + +### Phase E - login, cancel, logout + +Goal: + +- full managed login lifecycle via app-server + +Estimated size: + +- `350-650` lines + +Suggested commit: + +- `feat(codex-account): add codex app-server login lifecycle` + +Primary deliverables: + +- browser login flow +- cancel flow +- logout flow +- live login session manager with timeout and duplicate-click safety + +Acceptance criteria: + +- one login session at a time +- login success refreshes snapshot +- logout clears managed account state in UI + +### Phase F - rate limits and final shell cleanup + +Goal: + +- add rate limits +- remove Codex terminal modal auth path + +Estimated size: + +- `150-300` lines + +Suggested commit: + +- `refactor(codex-account): finalize native codex account ui` + +Primary deliverables: + +- rate-limit panel +- final Codex copy cleanup +- terminal-modal Codex login/logout removed from normal UI paths + +Acceptance criteria: + +- dashboard and settings show consistent Codex account story +- plan type and rate limits are visible when available +- no normal Codex UI path still presents API key as the only connection method + +## Phase-By-Phase Task Checklist + +This section gives the recommended execution order inside each phase so implementation can proceed +with fewer ambiguous jumps. + +### Phase A checklist + +1. extract generic JSON-RPC stdio transport primitives out of `recent-projects` +2. move initialize/initialized handshake helpers into shared Codex app-server infrastructure +3. repoint `recent-projects` to the extracted transport +4. verify no behavior change in `recent-projects` + +### Phase B checklist + +1. scaffold `src/features/codex-account` public entrypoints +2. define DTOs, channels, and event contracts +3. implement `account/read` client +4. implement managed-account plus API-key merge logic +5. add cache and single-flight snapshot reads +6. register IPC and preload bridge +7. verify read-only snapshot in renderer/devtools path + +### Phase C checklist + +1. add `providerConnections.codex.preferredAuthMode` to config types +2. implement read-time and write-time normalization +3. update validation to the new shape +4. create feature-owned Codex panel and adapters +5. integrate panel into runtime settings/manage surfaces +6. remove API-key-only wording from Codex-specific renderer paths + +### Phase D checklist + +1. implement launch-readiness use case +2. implement auth-specific exec env policy +3. introduce runtime coordinator for Codex launch decisions +4. delegate Codex env assembly away from API-key-only logic +5. verify ChatGPT launch, API-key launch, and degraded-path behavior + +### Phase E checklist + +1. implement login session manager +2. implement browser login start flow +3. implement cancel flow +4. implement logout flow +5. wire login events into snapshot refresh and renderer subscriptions +6. verify duplicate-click safety and timeout behavior + +### Phase F checklist + +1. implement lazy rate-limit reads and cache +2. surface plan/rate-limit info in feature UI +3. remove normal Codex terminal-modal login/logout path +4. harmonize dashboard/settings/manage wording +5. capture final signoff evidence and residual known gaps + +## PR Slicing And Review Discipline + +Even with a strong plan, this feature can create bugs if we ship overly broad mixed PRs. + +### Recommended slicing rule + +Prefer one phase per PR when possible. + +If a phase becomes too wide, split it by seam, not by random files. + +Good split examples: + +- transport extraction +- read-only snapshot and IPC +- config migration and shell composition +- runtime launch policy +- login lifecycle +- rate limits and cleanup + +Bad split examples: + +- "main changes" vs "renderer changes" when both are needed to make one behavior coherent +- mixing migration, login, and runtime policy into one giant PR + +### Review checklist for each PR + +Every PR should answer: + +1. what is the new source of truth introduced or changed +2. what existing source of truth stops owning that behavior +3. what failure mode is newly covered +4. what migration risk exists +5. what tests prove the behavior + +### Green-state rule + +Each PR should leave the app in a coherent state: + +- no UI promise without runtime support +- no runtime support hidden behind stale UI wording +- no half-migrated config shape exposed to renderer code + +## Implementation Anti-Patterns To Avoid + +These are the failure modes most likely to reintroduce the problems this plan is trying to solve. + +### Anti-pattern 1 - generic shell service quietly regains Codex policy + +Bad shape: + +- `ProviderConnectionService` or `ClaudeMultimodelBridgeService` starts accumulating special-case + Codex account logic again + +Why it is bad: + +- recreates the coupling we are explicitly trying to remove + +### Anti-pattern 2 - renderer computes business truth from badges and booleans + +Bad shape: + +- UI derives Codex auth semantics from generic `authenticated`, `authMethod`, or `statusMessage` + flags alone + +Why it is bad: + +- produces inconsistent copy and launch assumptions across surfaces + +### Anti-pattern 3 - migration infers preference from availability + +Bad shape: + +- existing API key or managed account silently rewrites `preferredAuthMode` + +Why it is bad: + +- mutates user intent based on incidental environment state + +### Anti-pattern 4 - degraded becomes a synonym for connected + +Bad shape: + +- stale evidence is treated as permanently sufficient proof of current auth state + +Why it is bad: + +- creates false readiness and false billing semantics + +### Anti-pattern 5 - launch fallback becomes silent + +Bad shape: + +- failed ChatGPT launch silently retries with API key + +Why it is bad: + +- destroys trust in the runtime story and billing expectations + +## Definition Of Done + +This feature is done only when all of the following are true: + +1. A previously logged-in ChatGPT Codex account autodetects automatically. +2. The UI clearly distinguishes managed account and API key availability. +3. `auto` mode works and prefers ChatGPT when available. +4. Launch policy no longer falsely requires API key when managed account exists. +5. ChatGPT-mode execution sanitizes API-key env vars. +6. API-key mode still works. +7. Login, cancel, and logout work from the real UI. +8. Codex terminal-login path is no longer used in normal UI flows. +9. `recent-projects` remains green. +10. Existing non-Codex provider UX remains unchanged. + +## Explicit Rejections + +The following approaches are intentionally rejected. + +### Rejected - read `~/.codex/auth.json` + +Reason: + +- brittle +- storage backend may vary +- security-sensitive + +### Rejected - revive legacy Codex OAuth transport + +Reason: + +- wrong architecture +- incompatible with native-only cutover intent + +### Rejected - put all Codex account logic into `ProviderConnectionService` + +Reason: + +- violates SRP +- creates provider-specific policy blob +- fights the feature architecture standard + +### Rejected - hard-block launch whenever app-server is degraded + +Reason: + +- false negative risk +- execution seam and account control plane are different + +### Rejected - use app-server API-key login mode in the first wave + +Reason: + +- creates dual key stores +- unclear ownership + +## Final Recommendation + +Implement this as a full feature slice with: + +- extracted shared app-server transport primitives +- app-server-managed account truth +- app-owned API key truth +- feature-owned launch-readiness policy +- shell integration through composition, not more Codex branches in shell components + +The critical correctness points are: + +- unify auth storage context across app-server and exec +- sanitize API-key env vars in ChatGPT mode +- do not let app-server degradation become a false execution blocker + +If those three rules are followed, this plan should fit the orchestrator/UI architecture cleanly and restore the right Codex UX without reintroducing the old legacy path. diff --git a/docs/research/codex-app-server-account-signoff.md b/docs/research/codex-app-server-account-signoff.md new file mode 100644 index 00000000..5b5eeed7 --- /dev/null +++ b/docs/research/codex-app-server-account-signoff.md @@ -0,0 +1,303 @@ +# Codex App-Server Account Feature - Signoff Evidence + +Date: 2026-04-20 + +Worktree: +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan` + +Branch: +- `spike/codex-native-runtime-plan` + +Related plan: +- [codex-app-server-account-feature-plan.md](./codex-app-server-account-feature-plan.md) + +## Scope + +This signoff covers the app-server-backed Codex account feature work implemented in this repo: + +- shared Codex app-server transport extraction +- `codex-account` feature slice +- Codex `preferredAuthMode` config migration and validation +- renderer/runtime integration for managed ChatGPT account plus API key truth +- per-launch `forced_login_method` overrides for native Codex execution +- lazy rate-limits support +- login lifecycle wiring in the real UI path + +## Automated Verification + +### Targeted tests + +Command: + +```bash +pnpm vitest run \ + test/features/codex-account/core/evaluateCodexLaunchReadiness.test.ts \ + test/features/codex-account/main/CodexAccountEnvBuilder.test.ts \ + test/features/codex-account/main/createCodexAccountFeature.test.ts \ + test/features/codex-account/main/CodexLoginSessionManager.test.ts \ + test/features/codex-account/preload/createCodexAccountBridge.test.ts \ + test/features/codex-account/renderer/useCodexAccountSnapshot.test.ts \ + test/main/services/runtime/providerAwareCliEnv.test.ts \ + test/main/services/runtime/ProviderConnectionService.test.ts \ + test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts \ + test/main/services/schedule/ScheduledTaskExecutor.test.ts \ + test/main/services/team/TeamProvisioningServicePrepare.test.ts \ + test/main/services/team/TeamProvisioningServicePrompts.test.ts \ + test/main/services/infrastructure/ConfigManager.codexMigration.test.ts \ + test/renderer/api/httpClient.codexAccount.test.ts \ + test/renderer/api/httpClient.exactTaskLogs.test.ts \ + test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts \ + test/renderer/components/runtime/providerConnectionUi.test.ts \ + test/renderer/components/cli/CliStatusVisibility.test.ts \ + test/renderer/components/team/dialogs/ProvisioningProviderStatusList.test.ts \ + test/main/ipc/configValidation.test.ts \ + test/features/recent-projects/main/infrastructure/CodexAppServerClient.test.ts \ + test/features/recent-projects/main/adapters/output/CodexRecentProjectsSourceAdapter.test.ts \ + test/features/recent-projects/core/application/ListDashboardRecentProjectsUseCase.test.ts \ + test/features/recent-projects/contracts/normalizeDashboardRecentProjectsPayload.test.ts \ + test/features/recent-projects/renderer/adapters/RecentProjectsSectionAdapter.test.ts +``` + +Result: + +- `25` test files passed +- `204` tests passed + +### Typecheck + +Command: + +```bash +pnpm exec tsc -p tsconfig.json --noEmit +``` + +Result: + +- passed + +### Targeted lint + +Command: + +```bash +pnpm exec eslint \ + src/main/services/infrastructure/ConfigManager.ts \ + src/main/services/runtime/ProviderConnectionService.ts \ + src/main/services/runtime/providerAwareCliEnv.ts \ + src/main/services/schedule/ScheduledTaskExecutor.ts \ + src/features/codex-account/preload/createCodexAccountBridge.ts \ + src/features/codex-account/renderer/hooks/useCodexAccountSnapshot.ts \ + src/renderer/api/httpClient.ts \ + src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx \ + test/main/services/infrastructure/ConfigManager.codexMigration.test.ts \ + test/features/codex-account/preload/createCodexAccountBridge.test.ts \ + test/features/codex-account/renderer/useCodexAccountSnapshot.test.ts \ + test/main/services/runtime/ProviderConnectionService.test.ts \ + test/main/services/runtime/providerAwareCliEnv.test.ts \ + test/renderer/api/httpClient.codexAccount.test.ts \ + test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts +``` + +Result: + +- passed + +## Live Read-Only Signoff + +### 1. Real `codex app-server account/read` + +Probe result: + +```json +{ + "account": { + "type": "chatgpt", + "email": "quantjumppro@gmail.com", + "planType": "pro" + }, + "requiresOpenaiAuth": true +} +``` + +What this proves: + +- installed Codex binary supports the stable app-server initialize flow used by the extracted transport +- ChatGPT account autodetect works on the real machine +- managed account truth is available without touching legacy transport + +### 2. Real `codex app-server account/rateLimits/read` + +Probe result summary: + +```json +{ + "rateLimits": { + "limitId": "codex", + "primary": { + "usedPercent": 77, + "windowDurationMins": 300 + }, + "secondary": { + "usedPercent": 45, + "windowDurationMins": 10080 + }, + "credits": { + "hasCredits": false, + "unlimited": false, + "balance": "0" + }, + "planType": "pro" + } +} +``` + +What this proves: + +- live rate-limit payload shape matches the feature assumptions +- plan/rate-limit surface can be driven from the real app-server contract + +### 3. Real feature-facade snapshot + +Command path: + +- `createCodexAccountFeature(...).refreshSnapshot({ includeRateLimits: true })` + +Observed summary: + +```json +{ + "preferredAuthMode": "chatgpt", + "effectiveAuthMode": "chatgpt", + "appServerState": "healthy", + "managedAccount": { + "type": "chatgpt", + "email": "quantjumppro@gmail.com", + "planType": "pro" + }, + "apiKey": { + "available": true, + "source": "environment", + "sourceLabel": "Detected from OPENAI_API_KEY" + }, + "launchAllowed": true, + "launchReadinessState": "ready_chatgpt", + "planType": "pro", + "rateLimitPrimaryUsedPercent": 77 +} +``` + +What this proves: + +- the real feature composition builds the expected snapshot +- app-server truth, API-key availability merge, readiness evaluation, and rate-limit shaping all work together + +### 4. Live preference-resolution checks through the feature facade + +Observed summary: + +```json +{ + "preferredAuthMode": "auto", + "effectiveAuthMode": "chatgpt", + "launchAllowed": true, + "launchReadinessState": "ready_both", + "managedAccountType": "chatgpt", + "apiKeyAvailable": true +} +``` + +```json +{ + "preferredAuthMode": "api_key", + "effectiveAuthMode": "api_key", + "launchAllowed": true, + "launchReadinessState": "ready_api_key", + "managedAccountType": "chatgpt", + "apiKeyAvailable": true +} +``` + +What this proves: + +- `auto` mode prefers ChatGPT when both auth sources exist +- `api_key` preference still resolves correctly even when a managed account is also present + +### 5. Live execution env sanitization check through `ProviderConnectionService` + +With a connected managed-account snapshot and `preferredAuthMode = "chatgpt"`, observed result: + +```json +{ + "OPENAI_API_KEY": null, + "CODEX_API_KEY": null +} +``` + +What this proves: + +- ChatGPT-mode execution sanitizes ambient API-key env vars when managed-account launch is selected + +### 6. Live provider-aware launch override check + +Command path: + +- `providerConnectionService.setCodexAccountFeature(createCodexAccountFeature(...))` +- `buildProviderAwareCliEnv({ binaryPath: "codex", providerId: "codex" })` + +Observed summary: + +```json +{ + "providerArgs": [ + "-c", + "forced_login_method=\"chatgpt\"" + ], + "connectionIssues": {} +} +``` + +What this proves: + +- the native Codex launch policy now emits a deterministic `forced_login_method` override +- the override is available through the shared provider-aware execution seam used by runtime launch paths + +## Definition Of Done Cross-Check + +1. Previously logged-in ChatGPT Codex account autodetects automatically. + Status: yes - proven by live `account/read`. + +2. UI clearly distinguishes managed account and API key availability. + Status: yes - implemented in the Codex panel and covered by renderer tests. + +3. `auto` mode works and prefers ChatGPT when available. + Status: yes - proven by live feature-facade check. + +4. Launch policy no longer falsely requires API key when managed account exists. + Status: yes - feature readiness plus live snapshot show `launchAllowed = true` in ChatGPT mode. + +5. ChatGPT-mode execution sanitizes API-key env vars. + Status: yes - covered by tests and live `ProviderConnectionService` probe. + +6. API-key mode still works. + Status: yes - covered by tests and live preference-resolution probe. + +7. Login, cancel, and logout work from the real UI. + Status: code path and tests are implemented; live destructive signoff was intentionally not executed in this document to avoid mutating the active local Codex account session. + +8. Codex terminal-login path is no longer used in normal UI flows. + Status: yes - normal Codex settings flow uses feature IPC actions, not terminal modal auth. + +9. `recent-projects` remains green. + Status: yes - targeted recent-projects safety suites passed. + +10. Existing non-Codex provider UX remains unchanged. + Status: targeted Anthropic/Gemini runtime and renderer tests passed. + +## Conclusion + +For this repo, the planned app-server account feature work is in signoffable shape: + +- architecture is aligned with the feature-slice plan +- renderer/runtime behavior is covered by targeted automated tests +- live app-server read and rate-limit contracts were verified on the installed Codex binary +- provider-aware native launch paths now receive deterministic Codex auth-mode overrides diff --git a/docs/research/codex-native-runtime-integration-decision.md b/docs/research/codex-native-runtime-integration-decision.md new file mode 100644 index 00000000..b90fe412 --- /dev/null +++ b/docs/research/codex-native-runtime-integration-decision.md @@ -0,0 +1,5179 @@ +# Codex Native Runtime Integration Decision + +**Status**: Decision +**Date**: 2026-04-19 +**Owner repos**: + +- `claude_team` +- `agent_teams_orchestrator` +- `plugin-kit-ai` + +## Purpose + +Record the chosen direction for improving Codex integration in the multimodel runtime without losing native Codex capabilities such as plugins, skills, and MCP. + +## Chosen Plan Assessment + +- Chosen plan: normalized internal event/log layer plus staged `Codex-native` backend lane +- Assessment: `🎯 9 🛡️ 9 🧠 7` +- Estimated first serious wave: `2200-4500` lines across `agent_teams_orchestrator`, `claude_team`, and `plugin-kit-ai` + +## Current Status As Of 2026-04-19 + +The staged cutover is now complete through Phase 4. + +- Phase 0 - implementation-complete and evidence-backed +- Phase 1 - rollout-state preparation complete +- Phase 2 - limited internal unlock completed +- Phase 3 - native-first default switch completed +- Phase 4 - legacy Codex lane removal completed + +Current product truth: + +- Codex now runs only through the `codex-native` lane in normal product flows +- legacy `adapter` and `api` Codex runtime lanes have been removed from active runtime selection and launch paths +- runtime status now exposes a single native Codex backend option +- stored legacy Codex backend values normalize forward to `codex-native` +- the remaining supported credential surface for native Codex is: + - `CODEX_API_KEY` + - `OPENAI_API_KEY` + +Repo-visible evidence: + +- [codex-native-runtime-phase-0-signoff-evidence.md](./codex-native-runtime-phase-0-signoff-evidence.md) +- [codex-native-runtime-phase-1-signoff-evidence.md](./codex-native-runtime-phase-1-signoff-evidence.md) +- [codex-native-runtime-phase-4-signoff-evidence.md](./codex-native-runtime-phase-4-signoff-evidence.md) + +## One-Page Summary + +We are **not** doing a one-shot swap from the current Codex backend to `@openai/codex-sdk / codex exec`. + +We are doing this instead: + +- keep the current Codex adapter/API path as the fallback lane initially +- add a new provider-neutral normalized event/log layer inside `agent_teams_orchestrator` +- add a separate `Codex-native` lane that uses the real Codex runtime through `@openai/codex-sdk / codex exec` +- keep unified logs, transcript projection, and UI-facing activity summaries on top of the normalized layer +- use `plugin-kit-ai` for plugin catalog/discover/install/update/remove/repair and native Codex plugin placement +- keep `codex app-server` out of the first critical path, except maybe later as selective control-plane enrichment +- keep native capability truth keyed to the actual runtime identity, not just to one coarse backend id + +Core rule: + +- if we need unified logs, we normalize events +- if we need native Codex capabilities, we do not fake Codex into Anthropic runtime semantics +- if we claim native capability parity, we key that claim to the real native runtime identity, not only to `codex-native` + +## Current Reality + +Today, `Codex` inside our multimodel runtime is **not** executed through the real Codex runtime. + +Instead, the current path is: + +- `claude_team` +- `agent_teams_orchestrator` +- internal Codex backend +- OpenAI Responses API + +In practice this means: + +- the orchestrator keeps Anthropic-style streaming semantics +- `Codex` is treated as a model backend, not as a native runtime +- native Codex plugins are not honestly end-to-end supported +- current `Codex` capability support is limited by our adapter, not by the real Codex runtime + +## Current-Code Seams That Matter + +These are the important code facts that shape the decision. + +### 1. Current Codex backend selection is adapter/API only + +Today the runtime only resolves: + +- `adapter` +- `api` + +That lives in: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/runtimeBackends/codexBackendResolver.ts` + +Important consequence: + +- current Codex runtime selection does **not** have a real `codex-cli` or `codex-sdk` lane yet + +### 2. Current Codex path translates into Anthropic-style semantics + +The current Codex fetch adapter explicitly translates between: + +- Anthropic Messages API shape +- OpenAI Responses API shape + +That lives in: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/api/codex-fetch-adapter.ts` + +Important consequence: + +- current Codex support is not just “another provider” +- it is intentionally shaped to preserve Anthropic-style turn/tool semantics + +### 3. The main query loop is deeply coupled to Anthropic-style tool flow + +The current query loop and tool pipeline are built around: + +- `tool_use` +- `tool_result` +- `content_block_start` +- `input_json_delta` +- `message_delta` + +That coupling is visible in: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/query.ts` +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/tools/toolOrchestration.ts` + +Important consequence: + +- a full swap to `codex exec` is **not** a transport-only replacement +- it changes the execution model and the tool ownership model + +### 4. Current runtime capability reporting is already backend-aware + +The runtime backend registry already distinguishes provider/backend status and currently marks Codex plugins as unsupported for the current lanes. + +That lives in: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/runtimeBackends/registry.ts` + +Important consequence: + +- we already have a good seam for capability-gated rollout +- Codex plugin support can stay honest and lane-dependent + +### 5. The repo already has an adapter pattern for message projection + +`sdkMessageAdapter` already converts one SDK-ish message model into REPL-facing messages and stream events. + +That lives in: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/remote/sdkMessageAdapter.ts` + +Important consequence: + +- adding a normalized layer is aligned with the current direction of the codebase +- this is an extension of an existing pattern, not a foreign architecture + +### 6. `claude_team` UI is protected by transcript/read-model layers, not raw runtime streams + +`claude_team` primarily reads runtime history through: + +- `ParsedMessage` +- `parseJsonlLine(...)` +- strict exact-log transcript parsing +- explicit task-log read models + +Important files: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/types/jsonl.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/types/messages.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/utils/jsonl.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/parsing/SessionParser.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/exact/BoardTaskExactLogStrictParser.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/discovery/TeamTranscriptSourceLocator.ts` + +Important consequence: + +- `claude_team` does **not** want raw Codex-native events directly as the first migration step +- the safest plan is to keep the current transcript/read-model contract stable and additive + +### 7. Existing task-log metadata already uses additive transcript fields successfully + +The current system already adds task-log metadata to transcript messages without changing the base message parser contract. + +Important files: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/boardTaskActivity/BoardTaskTranscriptProjector.ts` +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/boardTaskActivity/contract.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/contract/BoardTaskTranscriptContract.ts` + +Important consequence: + +- we already have a proven pattern for additive transcript enrichment +- normalized Codex-native projection should follow the same discipline instead of replacing the transcript contract wholesale + +### 8. Backend ids already cross the orchestrator/main/preload/renderer boundary + +Current backend identity is already shared through: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/runtimeBackends/types.ts` +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/utils/config.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/cliInstaller.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/ClaudeMultimodelBridgeService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/ProviderRuntimeBackendSelector.tsx` + +Important consequence: + +- `codex-native` is not just a new orchestrator enum value +- it must be introduced additively across config, runtime status payloads, main/preload bridges, renderer selectors, and tests +- we must not overload `api` or `adapter` with new semantics just to avoid touching those seams + +### 9. Transcript invariants are narrower and more coupled than they first look + +Current `claude_team` transcript consumers rely not only on entry types, but also on exact enriched fields such as: + +- `requestId` +- `sourceToolUseID` +- `sourceToolAssistantUUID` +- `toolUseResult` +- `boardTaskLinks` +- `boardTaskToolActions` + +Important files: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/utils/jsonl.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/types/jsonl.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/types/messages.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/analysis/ToolExecutionBuilder.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/analysis/ToolResultExtractor.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/exact/BoardTaskExactLogStrictParser.ts` + +Important consequence: + +- transcript compatibility in phase 1 is not satisfied by preserving only `user` / `assistant` / `system` +- the projector must preserve the linking and dedupe semantics those fields carry +- exact-log selectors already deduplicate assistant streaming rows with `requestId` plus anchor evidence, so vague “close enough” projection is not safe +- if a Codex-native event cannot be projected without violating these invariants, it should stay in the normalized layer first + +### 10. `codex-sdk` thread persistence and raw `codex exec` persistence control are not equivalent yet + +Current upstream reality: + +- `@openai/codex-sdk` persists threads in `~/.codex/sessions` +- `resumeThread()` exists +- `ThreadOptions` expose `workingDirectory`, `sandboxMode`, `approvalPolicy`, and `additionalDirectories` +- raw `codex exec` supports `--ephemeral` +- current TypeScript SDK does **not** expose `ephemeral` in `ThreadOptions` + +Important sources: + +- `/tmp/openai-codex/sdk/typescript/README.md` +- `/tmp/openai-codex/sdk/typescript/src/threadOptions.ts` +- `/tmp/openai-codex/sdk/typescript/src/thread.ts` +- `/tmp/openai-codex/sdk/typescript/src/exec.ts` +- `/tmp/openai-codex/codex-rs/exec/src/cli.rs` +- `/tmp/openai-codex/codex-rs/README.md` + +Important consequence: + +- we cannot assume `@openai/codex-sdk` and raw `codex exec` are interchangeable for session ownership +- phase 0 must explicitly decide whether the first `Codex-native` spike is SDK-first, raw-exec-first, or dual-path +- otherwise we risk baking unwanted durable Codex session persistence into the rollout before we have UI/session ownership clarity + +### 11. Approval UX and live runtime state already depend on request-correlation semantics + +Current `claude_team` runtime UX tracks live approval state through: + +- `pendingApprovals` +- `resolvedApprovals` +- `requestId` +- permission request payloads + +Important files: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/team.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/index.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/ToolApprovalSheet.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/activity/ActivityItem.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` + +Important consequence: + +- phase 1 must preserve a stable request-correlation contract for live activity, not just for persisted transcript parsing +- approval request state, approval result icons, and some streaming dedupe logic already assume `requestId` is stable and meaningful +- the normalized layer needs a first-class request-correlation story, not an implicit one + +### 12. Transcript chain and sidechain semantics are already part of the contract + +Current transcript/runtime plumbing already treats these fields as meaningful behavior, not decorative metadata: + +- `parentUuid` +- `logicalParentUuid` +- `isSidechain` +- `isMeta` +- `sessionId` +- `agentId` +- `agentName` + +Important files: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/types/logs.ts` +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/utils/sessionStorage.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/parsing/SessionParser.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/analysis/ConversationGroupBuilder.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TaskBoundaryParser.ts` + +Important consequence: + +- phase 1 must preserve parent/chain semantics for persisted transcript rows +- sidechain versus main-thread identity must remain truthful +- internal-user/tool-result rows must not drift in `isMeta` semantics +- if Codex-native projection cannot preserve those semantics truthfully, it should stay normalized-only first instead of emitting misleading transcript rows + +### 13. Runtime status/settings already assume specific Codex backend semantics + +Current runtime settings and status surfaces already depend on concrete Codex backend assumptions through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/providerConnectionUi.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/ProviderRuntimeBackendSelector.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/dialogs/ProvisioningProviderStatusList.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/ClaudeMultimodelBridgeService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/CliProviderModelAvailabilityService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/providerModelProbe.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/infrastructure/CliInstallerService.ts` + +Important current-code facts: + +- `isConnectionManagedRuntimeProvider(...)` currently special-cases `codex`, so UI assumes Codex runtime follows the selected connection mode instead of an independent backend selector +- runtime settings, provisioning checks, and installer snapshots already carry `selectedBackendId`, `resolvedBackendId`, `availableBackends`, and `externalRuntimeDiagnostics` +- model verification cache signatures already depend on `selectedBackendId`, `resolvedBackendId`, and `backend.endpointLabel` +- current Codex model probe arguments are still generic Claude-CLI provider probes, not a separate Codex-native probing contract + +Important consequence: + +- `codex-native` cannot be introduced as an orchestrator-only backend enum +- phase 0 must explicitly decide whether Codex remains connection-managed in UI or gains an independently selectable runtime lane +- phase 1 must give `codex-native` an explicit runtime status/settings contract and explicit model-probe policy +- otherwise runtime summary UI, provisioning checks, installer snapshots, and model verification can quietly drift out of sync + +### 14. Approval UX depends on a concrete control/permission protocol, not a generic concept + +Current approval behavior already depends on specific protocol shapes through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/team.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/teamSlice.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/ToolApprovalSheet.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/activity/ActivityItem.tsx` + +Important current-code facts: + +- the lead-runtime path emits manual approvals from CLI `control_request` messages and only `subtype=can_use_tool` becomes a `ToolApprovalRequest` +- non-`can_use_tool` control requests are auto-allowed explicitly to avoid deadlock +- teammate approval fallback already exists as a separate `permission_request` inbox/message path +- renderer approval icons and pending states inspect `structured.type === 'permission_request'` and correlate them through `request_id` into `pendingApprovals` and `resolvedApprovals` + +Important consequence: + +- phase 1 cannot claim Codex-native approval parity unless there is a truthful adaptation path into the current `ToolApprovalRequest` + `requestId` contract +- if Codex-native cannot yet provide a safe allow/deny response loop, the lane must stay limited instead of pretending approval UX still works +- approval/control adaptation must be treated as its own contract layer, not as a vague future cleanup + +### 15. Connection auth mode and Codex runtime backend are currently coupled in env construction + +Current Codex connection and runtime routing already mutate the execution env through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/ProviderConnectionService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/providerAwareCliEnv.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/providerRuntimeEnv.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/providerConnectionUi.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/test/renderer/components/runtime/providerConnectionUi.test.ts` + +Important current-code facts: + +- current Codex API-key mode explicitly writes `CLAUDE_CODE_CODEX_BACKEND=api` +- current Codex OAuth mode explicitly writes `CLAUDE_CODE_CODEX_BACKEND=adapter` +- current UI copy and tests already assume `Codex API key` means the public Responses API path and `Codex subscription` means the built-in adapter path +- runtime backend selection env and provider-connection env are both applied during CLI env construction, so stale coupling here can silently override a new lane + +Important consequence: + +- `codex-native` cannot be added safely without explicitly decoupling “how Codex authenticates” from “which Codex execution lane runs” +- phase 0 must define whether API-key mode for Codex-native still uses the real Codex runtime or only the old Responses API lane +- runtime env construction must stop assuming that Codex auth mode alone determines the backend lane + +### 16. App config validation and launch granularity currently lag behind backend-lane truth + +Current app config and launch surfaces already constrain how backend truth can evolve through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/ipc/configValidation.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/infrastructure/ConfigManager.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/team.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/runtimeBackends/codexBackendResolver.ts` + +Important current-code facts: + +- app-side `RuntimeConfig.providerBackends.codex` currently only allows `auto | adapter` +- app IPC validation for `runtime.providerBackends.codex` also only allows `auto | adapter` +- orchestrator-side Codex backend resolution already knows `auto | adapter | api` +- `TeamLaunchRequest` carries `providerId`, `model`, and `effort`, but no per-launch backend id +- provisioning summaries and probe cache keys currently reason about provider-level launch truth, not launch-specific backend overrides + +Important consequence: + +- `codex-native` is not just a new orchestrator backend enum - it is also a config-schema and launch-contract change +- phase 0 must explicitly decide whether the first rollout keeps backend selection global per provider or introduces per-launch backend override +- if the rollout keeps global provider backend selection, the plan must say that clearly and keep team launch/provisioning UX honest about that limitation + +### 17. Codex backend routing currently behaves like process-level state, not member-level launch state + +Current team launch and teammate spawn plumbing already suggests backend routing is process-scoped through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/tools/shared/spawnMultiAgent.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/team.ts` + +Important current-code facts: + +- `buildProvisioningEnv(providerId)` resolves env per provider, not per requested backend lane +- `TeamLaunchRequest` and member provider overrides carry `providerId`, but not backend id +- teammate spawn diagnostics log `process.env.CLAUDE_CODE_CODEX_BACKEND`, which indicates current Codex backend selection is inherited from process env at spawn time +- current team launch/provisioning summaries can show provider-level runtime/backend info, but they do not expose member-level Codex backend selection + +Important consequence: + +- phase 1 must not imply that different Codex teammates inside one orchestrator process can independently choose different Codex backend lanes unless the launch contract is explicitly expanded +- the safest first rollout assumption is that Codex backend selection remains process-wide or at most provider-global for the launched runtime +- provisioning, launch UI, and team-member overrides must stay honest about that limitation + +### 18. Provisioning probe cache is still provider-scoped and can outlive backend/auth changes + +Current provisioning-readiness and warm-up cache behavior is defined through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/infrastructure/ConfigManager.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/ipc/config.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/http/config.ts` + +Important current-code facts: + +- `createProbeCacheKey(cwd, providerId)` currently keys probe results only by absolute `cwd`, `getClaudeBasePath()`, and resolved `providerId` +- `getCachedOrProbeResult(...)` checks that cache **before** rebuilding provider env, so a cached hit bypasses newer backend/auth env resolution +- `buildProvisioningEnv(providerId)` already derives backend-sensitive env through provider connection settings and runtime backend settings, but that identity is not part of the probe cache key +- `clearProbeCache(...)` is currently only used by explicit `forceFresh` paths, while normal config updates through `ConfigManager.updateConfig(...)` do not invalidate affected probe entries +- probe cache TTL is currently `36h` +- model verification already uses backend-aware signatures, so provisioning readiness can disagree with model verification after a backend/auth switch + +Important consequence: + +- switching Codex auth mode, runtime backend selection, or probe policy can leave stale provider-level readiness truth alive for up to the cache TTL +- `codex-native` rollout needs an explicit backend-aware probe-cache identity or explicit invalidation contract +- provisioning banners, readiness checks, and backend-aware model verification must not be allowed to drift into split-brain truth + +### 19. External runtime diagnostics already surface Codex CLI presence, but that is not lane readiness + +Current runtime-status and installer snapshot plumbing already carries external runtime diagnostics through: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/runtimeBackends/registry.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/ClaudeMultimodelBridgeService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/infrastructure/CliInstallerService.ts` + +Important current-code facts: + +- current Codex runtime status always includes `externalRuntimeDiagnostics: [detectExternalBinary('codex', 'Codex CLI')]` +- that diagnostic is published even while current selected/resolved backend truth is still only `adapter/api` +- current Codex capability truth still marks plugins as `unsupported` despite surfacing Codex CLI detection +- installer snapshots and bridged provider status already persist/copy these diagnostics forward + +Important consequence: + +- finding a local `codex` binary must not be treated as proof that `codex-native` is selectable, ready, authenticated, or safe to advertise +- phase 1 needs an explicit rule for how external binary detection relates to backend availability and lane readiness +- runtime status and installer/provisioning UI must not collapse “CLI detected” into “Codex-native ready” + +### 20. Backend option status already distinguishes `selectable` from `available`, but UI mostly behaves as if only `available` matters + +Current backend-option status and runtime selector plumbing already exposes: + +- `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/runtimeBackends/types.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/ProviderRuntimeBackendSelector.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/ClaudeMultimodelBridgeService.ts` + +Important current-code facts: + +- `RuntimeBackendOptionStatus` already has both `selectable` and `available` +- runtime bridge preserves `selectable` into `CliProviderStatus.availableBackends` +- current renderer selector effectively disables options based on `!option.available`, not on `option.selectable` +- current Codex statuses for `adapter/api` mostly collapse these concepts anyway, so the mismatch has not hurt much yet + +Important consequence: + +- `codex-native` can create a new state we do not model well today: backend option is visible and intentionally selectable, but not yet authenticated/verified +- phase 1 needs an explicit semantics split between: + - backend can be selected + - backend is currently available + - backend is currently resolved + - backend is currently verified for execution +- otherwise UI can either hide the lane until too late or misrepresent it as fully ready when it is only selectable + +### 21. Unified runtime-status fallback currently drops backend-rich truth + +Current main-process runtime-status bridging still has a legacy fallback path through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/ClaudeMultimodelBridgeService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/providerConnectionUi.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx` + +Important current-code facts: + +- when `runtime status --json` fails or is unsupported, `ClaudeMultimodelBridgeService` falls back to legacy `auth status` and `model list` probes +- that legacy path rebuilds provider status from `createDefaultProviderStatus(...)`, which starts with: + - `selectedBackendId: null` + - `resolvedBackendId: null` + - `availableBackends: []` + - `externalRuntimeDiagnostics: []` +- the fallback path partially restores generic provider auth/model truth, but it does not restore backend-option truth for Codex +- current renderer still special-cases Codex as connection-managed, so losing backend-rich status can silently reinforce old Codex semantics during transient failures + +Important consequence: + +- `codex-native` rollout needs an explicit rule for degraded status transport +- transient runtime-status failures must not erase backend-lane truth so completely that the lane disappears or reverts to old connection-managed-only semantics in UI +- if backend-rich truth is unavailable, the degraded state must be explicit, not silently collapsed into legacy provider-only status + +### 22. Current Codex status copy still derives “runtime” mostly from auth mode, not from backend lane + +Current renderer/runtime copy for Codex still flows through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/providerConnectionUi.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/dashboard/CliStatusBanner.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/settings/sections/CliStatusSection.tsx` + +Important current-code facts: + +- `isConnectionManagedRuntimeProvider(provider)` still returns `provider.providerId === 'codex'` +- `getProviderCurrentRuntimeSummary(provider)` for Codex currently derives “Current runtime” from `authMethod` or `configuredAuthMode`, not from `selectedBackendId` / `resolvedBackendId` +- current Codex connection copy still revolves around: + - `Codex subscription` + - `OpenAI API key` +- settings/dashboard sections choose between “managed runtime summary” and backend summary using that Codex-specific connection-managed branch + +Important consequence: + +- `codex-native` can be selected correctly in backend truth while UI copy still describes only old auth-world semantics +- phase 1 needs an explicit rule for when Codex copy is allowed to talk about connection method versus execution lane +- otherwise status banners, settings summaries, and empty/error states can quietly misdescribe the active lane even when backend plumbing is correct + +### 23. Runtime status currently has two renderer write paths, and the progressive snapshot path bypasses epoch/loading reconciliation + +Current status transport and renderer-store plumbing already flows through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/infrastructure/CliInstallerService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/ipc/cliInstaller.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/index.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/cliInstallerSlice.ts` + +Important current-code facts: + +- `CliInstallerService.getStatus()` seeds `latestStatusSnapshot` immediately, then progressively publishes status snapshots from: + - `gatherStatus(...)` + - the multimodel provider callback inside `checkAuthStatus(...)` + - later model-availability updates through `handleProviderModelAvailabilityUpdate(...)` +- IPC `cliInstaller:getStatus` also returns a cached/final response path, while `cliInstaller:getProviderStatus` separately patches cached provider truth through `patchCachedProviderStatus(...)` +- renderer progress handling currently does `useStore.setState({ cliStatus: progress.status })` for `progress.type === 'status'` +- that progress-driven write path bypasses: + - `cliStatusEpoch` + - `cliProviderStatusSeq` + - `cliStatusLoading` + - `cliProviderStatusLoading` + - `cliStatusError` +- slice-driven `fetchCliStatus()` and `fetchCliProviderStatus()` still do their own request sequencing and loading-state management, so the store already has two independent status-write paths + +Important consequence: + +- `codex-native` rollout can otherwise race between: + - request/response status fetches + - background progressive status snapshots + - provider-specific refreshes + - late model-verification updates +- phase 1 needs an explicit in-flight snapshot contract so partial or older status pushes cannot silently overwrite fresher backend-lane truth +- renderer/store must be able to distinguish: + - in-flight partial snapshot + - settled status truth + - degraded transport truth + +### 24. Extension preflight and action gating still rely on coarse runtime truth, not backend-lane truth + +Current extension store and action-gating logic already flows through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/utils/extensionNormalizers.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/extensionsSlice.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/extensions/ExtensionStoreView.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/extensions/common/InstallButton.tsx` + +Important current-code facts: + +- extension mutations currently preflight only against coarse store state like: + - `cliStatus === null` + - `cliStatusLoading` + - runtime installed/startable truth + - provider-level authenticated/mutable capability truth +- `getExtensionActionDisableReason(...)` does not currently express backend-lane-specific states like: + - selected lane exists but is not yet verified + - runtime status is degraded but last known lane truth still exists + - provider supports plugins only on one backend lane, not on another +- extension store copy already says support can differ by section and provider, but mutation gating is still mostly global-runtime and provider-capability driven +- this is acceptable today only because current Codex plugin truth is still effectively one-dimensional: unsupported on the old lane + +Important consequence: + +- once `codex-native` exists, plugin management can otherwise become enabled or disabled based on provider-wide truth that is too coarse for backend-lane reality +- phase 1 needs backend-aware extension preflight semantics, not just provider-wide auth/capability semantics +- install/uninstall buttons, extension banners, and mutation preflight must stay honest when the selected lane is: + - supported but not verified + - degraded + - still on the old Codex backend + +### 25. Team model selectors and provisioning diagnostics still see a provider-wide runtime shape, not full backend-lane identity + +Current team model/runtime plumbing already flows through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/teamModelCatalog.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/teamModelAvailability.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/dialogs/TeamModelSelector.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/dialogs/CreateTeamDialog.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/dialogs/LaunchTeamDialog.tsx` + +Important current-code facts: + +- `RuntimeAwareProviderStatus` in `teamModelCatalog.ts` is currently only: + - `providerId` + - `authMethod` + - `backend` +- `TeamModelRuntimeProviderStatus` in `teamModelAvailability.ts` still omits: + - `selectedBackendId` + - `resolvedBackendId` + - `availableBackends` + - `externalRuntimeDiagnostics` +- launch/create dialogs build `runtimeProviderStatusById` from full provider status, but team-model helpers immediately narrow that truth to the smaller provider-wide shape above +- current runtime-aware model disabling for Codex therefore still reasons mostly from auth/backend summary heuristics, not from explicit backend-lane identity + +Important consequence: + +- `codex-native` can otherwise have different model-visibility or model-selection truth than old Codex while team selectors still reason as if Codex were one provider-wide runtime +- phase 1 needs an explicit lane-aware runtime shape for team model selectors and provisioning diagnostics +- otherwise create/launch dialogs can quietly validate, hide, or explain models using stale old-Codex assumptions + +### 26. Provisioning prepare-cache identity currently depends on backend summary display text, not canonical backend identity + +Current provisioning warmup/model cache plumbing already flows through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/dialogs/providerPrepareCacheKey.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/dialogs/ProvisioningProviderStatusList.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/dialogs/CreateTeamDialog.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/dialogs/LaunchTeamDialog.tsx` + +Important current-code facts: + +- `buildProviderPrepareModelCacheKey(...)` currently keys warmup/model-cache reuse by: + - `cwd` + - `providerId` + - `backendSummary` + - `limitContext` +- `backendSummary` is derived from `getProvisioningProviderBackendSummary(...)` +- that summary is a display-oriented string derived from: + - selected/resolved backend ids when labels exist + - backend labels + - fallback labels/copy +- both launch and create dialogs reuse that display-derived summary as cache identity for provider prepare diagnostics + +Important consequence: + +- `codex-native` rollout can otherwise tie cache correctness to UI wording rather than canonical backend identity +- copy changes, label collisions, or fallback-summary drift can produce false cache hits or misses across Codex lanes +- phase 1 needs canonical provisioning cache identity based on backend/auth/probe truth, not backend summary text + +### 27. Persisted team identity, replay flows, runtime snapshots, and resume guards are still lane-agnostic + +Current team persistence and replay plumbing already flows through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/team.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/ipc/teams.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamMetaStore.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamMembersMetaStore.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamMemberResolver.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamBackupService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/teamSlice.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/dialogs/launchDialogPrefill.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` + +Important current-code facts: + +- `TeamLaunchRequest` and `TeamCreateRequest` currently carry: + - `providerId` + - `model` + - `effort` + - `limitContext` + but no backend lane id or canonical runtime-lane identity +- shared `TeamConfig` and `TeamMember` persistence also carry only: + - `providerId` + - `model` + - `effort` + with no backend lane field in config-level or member-level identity +- `team.meta.json` (`TeamMetaFile`) persists: + - `providerId` + - `model` + - `effort` + - `skipPermissions` + - `worktree` + - `extraCliArgs` + - `limitContext` + but no canonical backend lane identity +- `members.meta.json` persists per-member: + - `providerId` + - `model` + - `effort` + but no backend lane identity +- renderer-side `TeamLaunchParams` persisted in local storage also only stores: + - `providerId` + - `model` + - `effort` + - `limitContext` +- `resolveLaunchDialogPrefill(...)` reuses `savedRequest` and `previousLaunchParams`, but neither source can preserve selected/resolved backend lane truth +- `teams:getDraftLaunchPayload` reconstructs draft launch truth from `team.meta.json` and `members.meta.json`, but that payload also only contains provider/model/effort-level identity +- draft-team replay path reconstructs `TeamCreateRequest` from `team.meta.json` plus `members.meta.json`, so retry-after-failure also replays only provider/model/effort truth +- `TeamMemberResolver` merges `config.json` and `members.meta.json` member identity only through `providerId/model/effort`, so downstream team/runtime views cannot recover lane truth later +- `TeamAgentRuntimeEntry` / `TeamAgentRuntimeSnapshot` expose backend process shape (`lead`, `tmux`, `in-process`, etc.), but not provider backend lane identity +- `handleLaunchTeam(...)` and draft-launch-to-create flow validate/request only provider/model/effort fields, so launch IPC cannot explicitly carry `codex-native` lane identity yet +- `TeamProvisioningService.shouldSkipResumeForProviderRuntimeChange(...)` currently compares only: + - provider id + - model + and does not compare backend-lane identity +- `TeamProvisioningService.getConfiguredRuntimeBackend(providerId)` resolves launch-time backend from current global runtime config, so relaunch after a settings change can silently use a different Codex lane than the original launch assumed +- `TeamBackupService` durable restore path is centered on `config.json` plus `members.meta.json` and does not restore backend-lane-aware identity today, so launched-team restore also replays lane-agnostic identity unless those files gain canonical backend identity +- `TeamBackupService` root file set does not currently include `team.meta.json`, so draft-team retry truth and launched-team restore truth already come from different persistence surfaces, and neither one stores canonical backend lane identity + +Important consequence: + +- a saved or replayed team launch can silently drift onto a different Codex lane after global runtime settings change +- a failed draft create that is later retried can also silently shift lanes because `team.meta.json` / `members.meta.json` never persisted lane identity +- a restored team can also come back without backend-lane truth because backup/restore currently preserves only lane-agnostic files +- resume guards can falsely treat old and new launches as the “same runtime” because they only compare provider/model, not backend lane +- runtime snapshots, resolved member views, and relaunch UI cannot honestly answer whether a team is: + - pinned to a lane + - inheriting the current global lane + - or drifting because launch persistence never stored the lane in the first place +- phase 1 needs an explicit persisted-team-identity and relaunch-identity contract before `codex-native` can be considered safe for team flows + +### 28. Team summaries, list surfaces, and synthetic provisioning cards are still lane-blind + +Current team-summary and list-surface plumbing already flows through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/team.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamConfigReader.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamDataService.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/teamSlice.ts` + +Important current-code facts: + +- `TeamSummary` currently exposes: + - display/name + - project/session history + - launch-state counters + - pending-create / partial-failure state + but no: + - `providerId` + - `selectedBackendId` + - `resolvedBackendId` + - canonical backend-lane identity +- `TeamConfigReader.readTeamSummary(...)` and `readDraftTeamSummary(...)` build team list cards from: + - `config.json` + - `team.meta.json` + - `members.meta.json` + - launch-state files + but never project backend-lane truth into the resulting summary +- renderer team list state uses `TeamSummary` as the canonical list/card surface through: + - `teams` + - `teamByName` + - `teamBySessionId` +- synthetic `provisioningSnapshotByTeam` cards created during team creation also omit provider/backend lane truth and only show generic display/member/project data +- current summary equality/store reconciliation already keys heavily off `TeamSummary` fields, so list/card updates cannot become lane-aware unless the shared summary contract changes first + +Important consequence: + +- even if persisted team identity becomes backend-aware later, current team list/cards/tabs still cannot show whether a team is: + - on old Codex + - on `codex-native` + - inheriting the current global lane + - or pinned to a stored lane +- draft cards and live team list cards can present the same team as if they were equivalent while one path is inherited-global and another is lane-pinned +- phase 1 needs an explicit team-summary/list-surface contract instead of assuming lane truth can stay hidden below detail views + +### 29. Member runtime summaries, bootstrap copy, and composer capability suggestions are still provider-wide, not lane-aware + +Current member/detail/composer display plumbing already flows through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/memberRuntimeSummary.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/members/MemberList.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/members/MemberDetailDialog.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/members/MemberDetailHeader.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/bootstrapPromptSanitizer.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/messages/MessageComposer.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/providerSlashCommands.ts` + +Important current-code facts: + +- `resolveMemberRuntimeSummary(...)` currently builds member runtime copy only from: + - configured `providerId` + - configured/inferred `model` + - configured `effort` + - runtime model inference + - RSS memory suffix + and does not carry: + - `selectedBackendId` + - `resolvedBackendId` + - canonical backend-lane identity +- `MemberCard` and `MemberDetailHeader` receive only a final `runtimeSummary: string`, so renderer detail surfaces cannot distinguish old Codex from `codex-native` unless that string becomes lane-aware first +- bootstrap/system-copy sanitization also builds runtime summary only from `providerId/model/effort`, not from backend lane truth +- `MessageComposer` derives `leadProviderId` only from: + - `lead.providerId` + - or `inferTeamProviderIdFromModel(lead.model)` +- slash command suggestions then branch only on `providerId === 'codex'` through `getSuggestedSlashCommandsForProvider(...)`, so capability hints remain provider-wide rather than lane-aware + +Important consequence: + +- even if top-level runtime status becomes lane-aware, member cards, member detail, bootstrap copy, and composer suggestions can still collapse old Codex and `codex-native` into the same visible runtime story +- lane-specific capability affordances like Codex slash commands, plugin/app wording, or runtime summary copy can appear purely because the provider is `codex`, even when the selected lane is still old Codex or degraded +- phase 1 needs an explicit member/composer surface contract instead of assuming provider-level Codex identity is good enough once backend-lane truth matters + +### 30. Plugin install success, activation in a new thread, restart semantics, and app-auth completion are still conflated into one coarse “installed” state + +Current extension/plugin activation plumbing already flows through: + +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/extensions/ExtensionStoreView.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/extensions/plugins/PluginsPanel.tsx` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/extensionsSlice.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/extensions/plugin.ts` +- `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/utils/extensionNormalizers.ts` +- `https://developers.openai.com/codex/plugins/build` +- `https://developers.openai.com/codex/app-server` +- `/tmp/openai-codex/codex-rs/app-server/README.md` +- `/tmp/openai-codex/codex-rs/tui/src/chatwidget/plugins.rs` +- `/tmp/openai-codex/codex-rs/cli/src/main.rs` +- `/tmp/openai-codex/codex-rs/features/src/lib.rs` + +Important current-code and current-doc facts: + +- current extension UI only has a coarse warning: `Running sessions won't pick up extension changes until restarted.` +- `PluginsPanel` still describes multimodel plugin support in provider-wide terms and does not express lane-specific activation semantics +- shared plugin types currently stop at: + - installed scopes + - version + - install path + and carry no explicit activation/session-visibility fields like: + - active in current session + - active only in new thread + - requires restart + - requires app auth/setup completion +- extension action gating currently only answers “can install/uninstall now?”, not “when does this become usable in the selected lane?” +- official Codex app-server/plugin docs still mark `plugin/list`, `plugin/read`, `plugin/install`, and `plugin/uninstall` as under development for production clients +- official Codex plugin invocation docs already assume plugin usage happens through an explicit new turn/thread flow rather than retroactively mutating an already-running turn +- upstream Codex feature and CLI copy already use: + - `start a new chat or restart Codex to use it` + - `Please restart Codex` +- Codex TUI plugin install/auth flow explicitly distinguishes: + - plugin installed + - remaining app setup/auth still needed + - plugin may not be usable until required apps are installed + +Important consequence: + +- phase 1 cannot treat `install succeeded` as equivalent to: + - plugin active in current session + - plugin active in current thread + - plugin usable without restart/new-thread boundary + - plugin fully usable without extra app/MCP auth setup +- `codex-native` rollout needs an explicit plugin-activation/session-visibility contract that separates: + - native placement success + - lane supports plugin execution + - plugin usable in next thread only + - plugin requires full runtime restart + - plugin still blocked on app/auth setup +- without that contract, extension UI can easily overclaim “installed and ready” when the real truth is only “installed and available after new thread/restart” + +### 31. Structured mention targeting is richer in Codex app-server than in the current SDK/exec embedding seam + +Current Codex invocation-shape differences already flow through: + +- `https://developers.openai.com/codex/app-server` +- `/tmp/openai-codex/sdk/typescript/src/thread.ts` +- `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/UserInput.ts` +- `/tmp/openai-codex/codex-rs/core/src/session/turn.rs` +- `/tmp/openai-codex/codex-rs/core/src/plugins/mentions.rs` +- `/tmp/openai-codex/codex-rs/core/src/plugins/mentions_tests.rs` + +Important current-code and current-doc facts: + +- Codex app-server already supports structured user-input items like: + - `text` + - `image` + - `localImage` + - `skill` + - `mention` +- official app-server examples show deterministic plugin/app invocation through: + - `mention` items with `plugin://...` + - `mention` items with `app://...` +- current TypeScript SDK input surface is still only: + - `text` + - `local_image` +- real Codex core can still resolve explicit plugin/app mentions from linked text like: + - `[@sample](plugin://sample@test)` + - `[$calendar](app://calendar)` +- core tests prove structured mentions and linked-text mentions dedupe and resolve correctly, but that is still a lower-level runtime behavior, not the same thing as an explicit first-class SDK input contract + +Important consequence: + +- phase 1 cannot assume that the chosen execution seam already gives us a first-class, deterministic plugin/app/skill invocation API in Node/Electron +- if we start with raw `codex exec` or current `@openai/codex-sdk`, exact plugin/app targeting may depend on: + - linked text mentions + - prompt shaping + - runtime-side parsing behavior + rather than on a structured invocation item we directly control +- `codex-native` rollout therefore needs an explicit mention-targeting contract that says whether phase 1 supports: + - explicit deterministic plugin/app targeting + - linked-text mention targeting only + - or no lane-specific invocation affordance yet +- without that contract, UI/composer surfaces can overclaim exact plugin/app invocation support just because installation and runtime execution exist + +### 32. Live turn notifications, sparse turn/thread payloads, and hydrated thread history are not the same truth source + +Current Codex thread/history plumbing already differs sharply between: + +- active-turn notifications from: + - `https://developers.openai.com/codex/app-server` + - `/tmp/openai-codex/codex-rs/app-server/README.md` +- sparse `Turn` / `Thread` payloads from: + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/Turn.ts` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/Thread.ts` +- our persisted/hydrated transcript readers in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/utils/jsonl.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/types/messages.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/stream/BoardTaskLogStreamService.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/exact/BoardTaskExactLogDetailSelector.ts` + +Important current-code and current-doc facts: + +- official app-server docs explicitly separate: + - `thread/read` + - `thread/turns/list` + - `thread/resume` + - `thread/fork` + from live `turn/*` and `item/*` notifications +- official `Turn` schema says `turn.items` is: + - only populated on `thread/resume` or `thread/fork` response + - empty on other responses and notifications +- official `Thread` schema says `thread.turns` is: + - only populated on `thread/resume`, `thread/rollback`, `thread/fork`, and `thread/read` with `includeTurns` + - empty on other responses and notifications +- official app-server docs also note that `turn/started` and `turn/completed` currently carry empty `items` arrays even when item events streamed, and UIs should rely on `item/*` for active-turn item streaming instead +- app-server notifications are also explicitly subscription/connection-shaped: + - `thread/start` and `thread/fork` auto-subscribe the current connection to turn/item notifications + - `thread/unsubscribe` removes that connection from the thread event stream + - per-connection notification opt-out already exists through `optOutNotificationMethods` + - some streamed notifications are explicitly documented as connection-scoped +- this means active notifications are the right truth for: + - in-flight activity + - incremental rendering + - approval/runtime progress + but they are still not the same thing as: + - hydrated thread history + - replayable/persisted transcript truth + - explicit read/resume/fork history snapshots +- our current `claude_team` exact-log and task-log paths are already grounded in hydrated/persisted `ParsedMessage[]` loaded from JSONL streams, not in some generic in-memory live event cache +- `ParsedMessage`-based downstream consumers already expect stable persisted fields like: + - `uuid` + - `parentUuid` + - `requestId` + - `sourceToolUseID` + - `toolUseResult` + - chain/sidechain metadata + and those expectations cannot safely be replaced by raw partial live-notification state in phase 1 + +Important consequence: + +- phase 1 cannot treat live Codex notifications as if they were already a canonical thread-history source +- active turn streaming and history hydration must stay separate contracts +- `codex-native` rollout needs an explicit rule for which source is authoritative for: + - live activity + - replay/resume + - exact log + - task log detail + - post-hoc transcript reads +- without that rule, it is easy to build a nice live spike and still break exact-log/task-log/replay flows because sparse or partial live turn state gets mistaken for persisted history truth + +### 33. Approval requests can resolve by lifecycle cleanup, not only by explicit user decision + +Current approval lifecycle semantics already differ between official Codex app-server and our current CLI-oriented approval flow: + +- official Codex docs in: + - `https://developers.openai.com/codex/app-server` + - `/tmp/openai-codex/codex-rs/app-server/README.md` +- current approval store/runtime plumbing in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/teamSlice.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/index.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/team.ts` + +Important current-code and current-doc facts: + +- official app-server approval flow explicitly emits `serverRequest/resolved { threadId, requestId }` not only after a client decision, but also when the pending request is cleared by: + - turn start + - turn completion + - turn interruption +- the same cleanup rule applies both to: + - approval requests + - `requestUserInput` + - other server-initiated request lifecycles tied to turn state +- our current renderer/store flow is stricter and more CLI-specific: + - `respondToToolApproval(...)` removes a pending approval only after successful IPC response + - current store also knows about explicit `autoResolved` and `dismissed` events from our existing main-process protocol + - pending/resolved UI state is keyed by `runId + requestId` + - activity rows and approval icons already depend on that cleanup being truthful +- this means a Codex-native lane cannot stop at “we can show an approval request and send allow/deny” +- it also needs a truthful cleanup contract for: + - lifecycle-cleared pending requests + - interrupted turns + - replaced turns + - requests that never receive an explicit user response + +Important consequence: + +- phase 1 cannot treat “approval response path exists” as enough for approval UX parity +- `codex-native` rollout needs an explicit authoritative rule for when a pending approval becomes: + - answered by the user + - auto-resolved + - lifecycle-cleared + - dismissed because the run/turn is no longer active +- without that rule, UI can easily get: + - stuck pending approvals + - wrong resolved icons + - stale request rows after turn interruption/restart + - mismatched approval state between live activity and transcript/detail views + +### 34. Generic interactive prompts and MCP elicitations currently have no honest UI path in our app + +Current interactive-request support already differs sharply between official Codex runtime capabilities and our current app surfaces: + +- official Codex docs in: + - `https://developers.openai.com/codex/app-server` + - `/tmp/openai-codex/codex-rs/app-server/README.md` +- current local UI/runtime surfaces in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/ToolApprovalSheet.tsx` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/activity/PendingRepliesBlock.tsx` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/teamSlice.ts` + +Important current-code and current-doc facts: + +- official Codex app-server supports: + - `tool/requestUserInput` for 1-3 short user questions + - `mcpServer/elicitation/request` for structured MCP-server input +- those request types have their own lifecycle and can also resolve/clear through `serverRequest/resolved` +- in our current repo code, there is no local support path for: + - `requestUserInput` + - `mcpServer/elicitation` + - generic structured runtime prompts outside the existing tool-approval flow +- current renderer/runtime interaction is heavily centered on: + - `ToolApprovalRequest` + - approval sheet + - pending approval rows + rather than on a generalized runtime prompt/response surface +- this means a Codex-native lane cannot honestly assume that all provider-native interactive requests can already be surfaced just because approval UX exists + +Important consequence: + +- phase 1 cannot claim full Codex-native interactive parity if the chosen seam can emit `requestUserInput` or MCP elicitation but the app only understands tool approvals +- `codex-native` rollout needs an explicit contract for whether phase 1: + - supports these prompts end-to-end + - blocks them with a clear limitation + - or keeps the lane limited until a truthful UI path exists +- without that rule, turns can stall or degrade silently when runtime asks for structured input the app cannot surface + +### 35. `codex exec` and the current TypeScript SDK are headless seams with explicit interactive capability limits + +Current execution-seam capability differs sharply between official Codex app-server and the current `codex exec` / TypeScript SDK seam: + +- official docs and sources in: + - `https://developers.openai.com/codex/sdk` + - `https://developers.openai.com/codex/noninteractive` + - `/tmp/openai-codex/sdk/typescript/src/thread.ts` + - `/tmp/openai-codex/sdk/typescript/src/events.ts` + - `/tmp/openai-codex/sdk/typescript/src/exec.ts` + - `/tmp/openai-codex/codex-rs/exec/src/lib.rs` +- richer app-server control-plane docs in: + - `https://developers.openai.com/codex/app-server` + - `/tmp/openai-codex/codex-rs/app-server/README.md` + +Important current-code and current-doc facts: + +- official docs position the TypeScript SDK as the application embedding seam, but the current SDK still wraps local `codex exec` +- the current TypeScript SDK input surface is narrow: + - `text` + - `local_image` +- the current TypeScript SDK streamed event surface is also narrow: + - `thread.started` + - `turn.started` + - `turn.completed` + - `turn.failed` + - `item.started` + - `item.updated` + - `item.completed` + - `error` +- raw `codex exec` source explicitly rejects several server-request flows in exec mode rather than surfacing them for the host app to resolve: + - command execution approval + - file change approval + - `request_user_input` + - dynamic tool calls + - `apply_patch` approval + - exec command approval + - permissions approval + - ChatGPT auth-token refresh +- this means the current exec/SDK seam is not simply “the same as app-server, but easier” +- it is a more headless seam with an explicitly smaller interactive/control surface + +Important consequence: + +- phase 1 cannot honestly treat raw `codex exec` or the current TypeScript SDK as approval-parity or full interactive-parity seams +- if phase 1 uses raw exec or the current SDK, the lane needs an explicit capability contract for what is: + - supported end-to-end + - automatically rejected by the runtime seam + - unsupported in the app because the seam never exposes it +- without that rule, the rollout can quietly overclaim: + - manual approvals + - generic runtime prompts + - MCP elicitation + - dynamic tool behavior + even though the actual execution seam is headless-limited + +### 36. `--ephemeral` avoids durable session ownership but also disables exec's final turn-item backfill + +Current session-ownership safety and transcript-completeness tradeoffs differ between raw `codex exec` modes: + +- raw `codex exec` sources in: + - `/tmp/openai-codex/codex-rs/exec/src/lib.rs` +- official app-server and non-interactive docs in: + - `https://developers.openai.com/codex/app-server` + - `https://developers.openai.com/codex/noninteractive` + +Important current-code and current-doc facts: + +- raw `codex exec` can run with `--ephemeral`, which avoids durable Codex-owned session storage +- the current TypeScript SDK does not expose the same `ephemeral` control directly +- app-server docs and schemas already note that `turn/completed` can arrive with empty `turn.items` +- raw exec compensates for that in non-ephemeral mode by doing one last `thread/read` and backfilling completed-turn items before shutdown +- raw exec explicitly skips that backfill path when the thread is ephemeral +- this means `--ephemeral` is not a free safety win: + - it reduces durable session ownership + - but it also removes one built-in completed-turn recovery path + +Important consequence: + +- phase 0 cannot choose `--ephemeral` only because it feels safer around session ownership +- it also has to decide how completed-turn item completeness will be recovered for: + - transcript projection + - final assistant message capture + - post-turn exact-log/task-log reads + - replay/history hydration +- without that rule, the rollout can easily become “session-safer but history-weaker” in a way that only shows up after live demos succeed + +### 37. Current Codex API-key routing in our app does not match the native exec/SDK auth surface automatically + +Current Codex credential-routing semantics already differ between our old app/backend path and the real Codex exec/SDK seam: + +- current app/runtime code in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/ProviderConnectionService.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/providerAwareCliEnv.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/providerRuntimeEnv.ts` + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/runtimeBackends/codexBackendResolver.ts` +- official Codex docs and SDK sources in: + - `https://developers.openai.com/codex/noninteractive` + - `/tmp/openai-codex/sdk/typescript/src/exec.ts` + - `/tmp/openai-codex/sdk/typescript/README.md` + +Important current-code and current-doc facts: + +- our current app-side Codex API-key mode is built around: + - `OPENAI_API_KEY` + - `CLAUDE_CODE_CODEX_BACKEND=api` + - existing old-lane `api` / `adapter` backend routing +- current connection-info, issue detection, and source labeling for Codex API keys also inspect `OPENAI_API_KEY`, not `CODEX_API_KEY` +- official non-interactive Codex docs say `CODEX_API_KEY` is supported in `codex exec` +- the current TypeScript SDK explicitly injects `CODEX_API_KEY` when the `apiKey` option is provided +- this means the real Codex exec/SDK seam does not automatically share the same credential surface as our old Responses-API-backed Codex lane +- a `codex-native` rollout therefore needs more than “backend id decoupling” +- it also needs an explicit credential-routing contract for how: + - stored keys + - env vars + - connection-issue messages + - readiness checks + - runtime status copy + map onto the selected lane + +Important consequence: + +- phase 1 cannot assume that old `OPENAI_API_KEY`-based Codex API-key truth automatically authenticates the native exec/SDK lane +- if the chosen lane is raw exec or the current SDK, the rollout needs an explicit rule for whether the host: + - passes `CODEX_API_KEY` + - calls the SDK with `apiKey` + - or uses some later app-server login surface +- without that rule, UI/status can say “Codex API key ready” while the actual selected lane still starts with the wrong credential shape + +### 38. Current Codex model inventory, disabled-model heuristics, and probe flow are still largely static/provider-wide + +Current model-selection and model-verification truth already differs between our app and the richer native Codex model surface: + +- current app/runtime code in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/CliProviderModelAvailabilityService.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/providerModelProbe.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/teamModelCatalog.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/utils/providerModelVisibility.ts` + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/utils/model/codex.ts` +- richer native Codex model surface in: + - `/tmp/openai-codex/codex-rs/app-server/README.md` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/Model.ts` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/ModelListParams.ts` + +Important current-code and current-doc facts: + +- current Codex model inventory is mostly static: + - hardcoded model ids in orchestrator/runtime helpers + - hardcoded team model catalog options + - hardcoded UI-disabled Codex models and reasons +- current provider model verification is also CLI-shaped and provider-wide: + - probe prompt is fixed + - probe args are generic + - preflight default for Codex is hardcoded to `gpt-5.4-mini` +- official native Codex model surface is richer and more dynamic: + - `model/list` + - `includeHidden` + - `supportedReasoningEfforts` + - `defaultReasoningEffort` + - `inputModalities` + - `additionalSpeedTiers` + - `availabilityNux` + - optional upgrade metadata +- this means `codex-native` cannot safely inherit the old assumption that “Codex models are just this fixed provider-wide list plus a few static UI-disabled rules” + +Important consequence: + +- phase 1 cannot assume that old Codex model inventory, disabled-model reasons, and probe defaults still describe the native lane honestly +- if `codex-native` is added without a lane-aware model contract, we can get: + - wrong available model lists + - wrong disabled badges/reasons + - wrong reasoning-effort choices + - wrong default/preflight model assumptions + - stale provider-wide heuristics standing in for native-lane truth +- without that rule, create/launch dialogs, runtime settings, provisioning hints, and model verification can all stay internally consistent while still being wrong about what the native lane really supports + +### 39. Native Codex thread start/resume has trust semantics that do not match our current host-owned workspace-trust boundary automatically + +Current workspace-trust ownership in our orchestrator/app is explicit and host-controlled: + +- current host trust boundary code in: + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/utils/config.ts` + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/interactiveHelpers.tsx` + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/main.tsx` +- current native Codex start flow/docs in: + - `/tmp/openai-codex/codex-rs/exec/src/lib.rs` + - `/tmp/openai-codex/codex-rs/app-server/README.md` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/ThreadStartParams.ts` + +Important current-code and current-doc facts: + +- our current orchestrator trust model is explicit: + - `checkHasTrustDialogAccepted()` gates trust + - interactive sessions show a trust dialog + - hooks, LSP, MCP-prefetch, and full env application are deferred until trust is accepted +- current raw `codex exec` uses its own gate: + - it exits when not inside a trusted directory unless `--skip-git-repo-check` or bypass mode is used + - that is not the same contract as our persisted host trust-dialog acceptance +- current Codex app-server docs explicitly say: + - `thread/start` with `cwd` and resolved sandbox `workspace-write` or full access also marks that project as trusted in user `config.toml` +- this means native Codex start/resume can carry trust side effects or trust assumptions that do not line up with our existing host-owned trust boundary by default + +Important consequence: + +- phase 1 cannot assume native Codex trust semantics are equivalent to our host trust dialog +- if `codex-native` launches a thread in a writable/full-access mode, we must explicitly decide: + - whether host trust remains the only authority + - whether native trust writes are allowed at all + - whether native trust writes are allowed only after host trust is already accepted +- without that rule, the rollout can silently: + - mutate persistent trust state behind the host's back + - bypass trust-gated env/hook/LSP behavior + - or conflate Codex repo-check semantics with our actual workspace-trust semantics + +### 40. Codex collaboration-mode and instruction channels can override or duplicate our current system/bootstrap instruction ownership + +Current instruction ownership in our codebase is already layered and load-bearing: + +- current host/system prompt assembly in: + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/utils/systemPrompt.ts` + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/screens/REPL.tsx` +- current team-bootstrap/runtime copy expectations in: + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/teamBootstrap/teamBootstrapMemberBriefingGuard.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/bootstrapPromptSanitizer.ts` +- native Codex instruction surfaces in: + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/TurnStartParams.ts` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/ThreadStartParams.ts` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/ThreadResumeParams.ts` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/CollaborationMode.ts` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/Settings.ts` + - `/tmp/openai-codex/codex-rs/app-server/README.md` + +Important current-code and current-doc facts: + +- our orchestrator already has a strict system-prompt layering model: + - override prompt + - coordinator/agent prompt + - custom/default prompt + - append prompt +- team bootstrap and UI sanitization rely on specific instruction text staying present and not being silently replaced +- native Codex exposes multiple instruction channels: + - `baseInstructions` + - `developerInstructions` + - `collaborationMode` +- native Codex docs/schema explicitly state: + - `collaborationMode` takes precedence over model, reasoning effort, and developer instructions + - `collaborationMode.settings.developer_instructions: null` means “use built-in instructions for the selected mode” + - `collaborationMode/list` omits built-in developer instructions from the response + +Important consequence: + +- phase 1 cannot treat collaboration mode as an innocuous cosmetic preset +- we must explicitly decide who owns instruction truth for `codex-native`: + - host system/bootstrap prompt assembly + - native `baseInstructions` / `developerInstructions` + - collaboration-mode built-ins +- without that rule, the rollout can silently: + - duplicate instructions + - lose bootstrap-critical guidance + - override host-selected model/effort/instruction semantics + - or make UI/runtime behavior drift because built-in Codex instructions are active even though app surfaces cannot inspect them directly + +### 41. Rich replayable native-thread history depends on an explicit `persistExtendedHistory` policy and that choice is not retroactive + +Current replay/exact-log correctness in our app already depends on persisted and hydrated history, not just live turn streams: + +- current replay/exact-log consumers in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/parsing/SessionParser.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/exact/BoardTaskExactLogStrictParser.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/discovery/TeamTranscriptSourceLocator.ts` +- native Codex history controls in: + - `/tmp/openai-codex/codex-rs/app-server/README.md` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/ThreadStartParams.ts` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/ThreadResumeParams.ts` + +Important current-doc facts: + +- native Codex `thread/start`, `thread/resume`, and `thread/fork` accept `persistExtendedHistory: true` +- Codex docs describe this as the way to persist a richer subset of history needed for less-lossy later `thread/read`, `thread/resume`, and `thread/fork` +- Codex docs also explicitly say this does not backfill events that were not persisted previously +- that means history completeness is partly decided when the thread is created/resumed/forked, not only later when UI asks to hydrate it + +Important consequence: + +- phase 1 cannot treat persisted-history richness as a later optimization toggle +- we must explicitly decide: + - whether native threads start with `persistExtendedHistory: true` + - whether some lanes/operations stay lossy by design + - how replay/exact-log/UI truth marks threads whose history can never be fully hydrated later +- without that rule, the rollout can silently create mixed native-thread populations where: + - some threads hydrate richly + - some threads stay permanently lossy + - and replay/exact-log code cannot tell the difference honestly + +### 42. Native Codex app-server exposes process-wide config, feature, and marketplace mutation surfaces that do not match our current host-owned settings model automatically + +Current app-side runtime/config ownership is host-managed: + +- current host-owned app config in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/infrastructure/ConfigManager.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/ipc/configValidation.ts` +- native Codex app-server config/state mutation surfaces in: + - `/tmp/openai-codex/codex-rs/app-server/README.md` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/ProfileV2.ts` + +Important current-doc facts: + +- `experimentalFeature/enablement/set` patches in-memory process-wide feature enablement +- `marketplace/add` persists remote marketplace config into user marketplace state +- `config/value/write` and `config/batchWrite` write to user `config.toml` +- `config/mcpServer/reload` can hot-reload loaded threads after disk config edits +- native config surface also includes: + - `profile` + - `profiles` + - `developer_instructions` + - `approvals_reviewer` + - other user-config-layer fields + +Important consequence: + +- phase 1 cannot treat native config/feature/marketplace mutation as harmless helper APIs +- if later selective app-server enrichment is used, we must explicitly decide whether these surfaces are: + - forbidden in phase 1 + - mirrored into host-owned config/state + - or allowed only through one explicit host-controlled bridge +- without that rule, the rollout can silently: + - mutate user/global native config outside app settings + - enable plugins/apps process-wide for unrelated threads + - persist marketplaces or feature flags the host never represented + - or split truth between host-managed config and native process-wide config + +### 43. Detached native review threads create secondary thread identities that do not map automatically onto our current launch/chain/review surfaces + +Current app and transcript surfaces already carry their own session/thread identity expectations: + +- current team/runtime identity surfaces in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/providerSlashCommands.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/exact/BoardTaskExactLogStrictParser.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/discovery/TeamTranscriptSourceLocator.ts` +- native Codex detached review flow in: + - `/tmp/openai-codex/codex-rs/app-server/README.md` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/ReviewStartParams.ts` + - `/tmp/openai-codex/codex-rs/app-server-protocol/schema/typescript/v2/ReviewStartResponse.ts` + +Important current-code and current-doc facts: + +- our UI already suggests Codex `/review` affordance in `providerSlashCommands.ts` +- native `review/start` can run: + - `inline` on the current thread + - or `detached` on a new review thread +- for detached review: + - `reviewThreadId` differs from the original `threadId` + - the server emits a new `thread/started` notification for the review thread + - review-mode items stream on that new thread identity + +Important consequence: + +- phase 1 cannot treat native review as “just another turn on the same conversation” unless we explicitly force inline-only behavior +- we must explicitly decide whether phase 1: + - disables native review affordances + - supports inline review only + - or supports detached review with explicit child-thread/sidechain mapping +- without that rule, the rollout can silently: + - create second native threads the app never modeled + - lose review-thread identity in replay/logs + - or make `/review` appear supported while detached review semantics are still unmapped + +### 44. `codex-native` backend identity alone is not enough to represent native binary-version, protocol-surface, or experimental-surface truth + +Current app-side runtime/backend truth is still mostly keyed on backend ids and coarse diagnostics: + +- shared runtime/backend status shapes in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/cliInstaller.ts` + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/services/runtimeBackends/types.ts` +- current backend selector/runtime summary surfaces in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/ProviderRuntimeBackendSelector.tsx` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx` +- current model-verification cache signature in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/CliProviderModelAvailabilityService.ts` +- native Codex binary/protocol reality in: + - `/tmp/openai-codex/sdk/typescript/src/exec.ts` + - `/tmp/openai-codex/codex-rs/app-server/README.md` + +Important current-code and current-doc facts: + +- current shared provider/runtime status does **not** carry: + - native executable source + - native Codex binary version + - native protocol/capability revision + - stable-vs-experimental protocol surface truth +- current SDK exec path can resolve Codex from: + - platform-specific bundled npm packages + - an explicit executable path + - not necessarily the user's detected external `codex` binary +- app-server schema generation is explicitly version-specific +- app-server stable and experimental schemas differ, and experimental surface requires explicit opt-in +- current UI selectors/settings mostly treat `selectedBackendId` / `resolvedBackendId` as enough backend identity for user-facing truth +- current model-verification signature is backend-aware, but it is not native-binary-version-aware or native-protocol-surface-aware + +Important consequence: + +- phase 1 cannot treat `codex-native` backend id alone as the full source of capability truth +- we must explicitly decide whether native lane status/probes/cache identity surface: + - executable source + - native binary version + - protocol/capability revision + - stable-vs-experimental surface truth where relevant +- without that rule, the rollout can silently: + - claim one universal `codex-native` capability story across different machines + - reuse stale readiness/model/probe truth across version-skewed native binaries + - or let packaged dependency upgrades change native capabilities without the app noticing + +### 45. App-server capability surface and live notification truth are negotiated per connection, not globally + +Current app-server protocol behavior is explicitly connection-scoped: + +- upstream protocol/connection docs in: + - `https://developers.openai.com/codex/app-server` + - `/tmp/openai-codex/codex-rs/app-server/README.md` +- current host app already has multiple truth-ingestion paths in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/index.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/ClaudeMultimodelBridgeService.ts` + +Important current-doc facts: + +- `experimentalApi` is negotiated once during `initialize` and fixed for that connection lifetime +- `optOutNotificationMethods` is also per connection and exact-match only +- `thread/unsubscribe` is connection-scoped +- event subscriptions and live notifications are therefore connection-scoped, not global process truth +- some typed notifications and fields can be absent purely because that connection did not opt in or opted out, not because the runtime feature itself is absent + +Important consequence: + +- if later selective app-server enrichment uses more than one connection profile, phase 1 cannot assume they all see the same capability surface or the same live event stream +- we must explicitly decide whether any future app-server use has: + - one canonical connection profile + - one canonical `experimentalApi` policy + - one canonical notification-subscription policy +- without that rule, the rollout can silently: + - see different fields/methods on different connections + - lose live notifications on one path while another still thinks the lane is healthy + - or misdiagnose missing notifications as runtime failure instead of connection-policy drift + +### 46. Native Codex history mutation semantics do not match our mostly append-only transcript and log-processing assumptions automatically + +Current host transcript/log plumbing already leans on append-only and compaction-boundary semantics: + +- append-only and compaction-aware transcript/log plumbing in: + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/hooks/useLogMessages.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/infrastructure/FileWatcher.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/discovery/TeamTranscriptSourceLocator.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/taskLogs/exact/BoardTaskExactLogStrictParser.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/parsing/SessionParser.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` +- current orchestrator compaction semantics in: + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/remote/sdkMessageAdapter.ts` + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/utils/sessionStorage.ts` +- native Codex mutation surfaces in: + - `/tmp/openai-codex/codex-rs/app-server/README.md` + +Important current-code and current-doc facts: + +- our current watcher/parser stack has explicit append-only optimizations: + - last processed line counts + - last processed file size + - incremental tail parsing +- our current orchestrator already models compaction through explicit `compact_boundary` semantics instead of pretending the full file is immutable context forever +- native Codex app-server exposes history mutation operations that are stronger than “append more events”: + - `thread/compact/start` + - `thread/rollback` +- `thread/rollback` explicitly prunes the last turns from future resumes and persists a rollback marker +- `thread/compact/start` changes model-visible history and streams progress while the canonical stored thread can later differ from what a pure append-only local event cache assumed + +Important consequence: + +- phase 1 cannot assume native canonical history is merely append-only-plus-hydration +- we must explicitly decide whether replay/exact-log/task-log truth is sourced from: + - append-only projected transcript + - canonical native thread history after rollback/compaction + - or one reconciliation rule between the two +- without that rule, the rollout can silently: + - keep stale pre-rollback activity visible in append-only local logs + - read cached append-only tails as if they still matched canonical native history + - or let compaction/rollback mutate replay truth without exact-log/task-log knowing which source is authoritative + +### 47. Native turn metadata truth for usage, model, reasoning effort, reroute, and plan does not map cleanly to our current assistant-message-centric assumptions + +Current host context/status/transcript plumbing still leans heavily on assistant-message-local usage/model truth: + +- current host usage/model/context surfaces in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/TeamDetailView.tsx` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/utils/jsonl.ts` + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/utils/analyzeContext.ts` + - `/Users/belief/dev/projects/claude/agent_teams_orchestrator/src/tasks/LocalAgentTask/LocalAgentTask.tsx` +- native Codex notification and metadata surfaces in: + - `https://developers.openai.com/codex/app-server` + - `/tmp/openai-codex/codex-rs/app-server/README.md` + - `/tmp/openai-codex/codex-rs/app-server-protocol/src/protocol/v2.rs` + - `/tmp/openai-codex/codex-rs/app-server/src/codex_message_processor/token_usage_replay.rs` + - `/tmp/openai-codex/sdk/typescript/src/events.ts` + - `/tmp/openai-codex/sdk/typescript/src/thread.ts` + +Important current-code and current-doc facts: + +- `TeamDetailView` currently derives context metrics from: + - `lastAssistantUsage` + - `lastAssistantModelName` +- `TeamProvisioningService` currently updates lead context usage from: + - `messageObj.usage` + - `messageObj.model` + - and a narrow fallback through `result.modelUsage.contextWindow` +- `jsonl.ts` currently persists assistant `usage` and `model` on transcript rows and deduplicates streaming rows by `requestId` +- `analyzeContext.ts` explicitly uses current message-level API usage as the same source of truth as the status line +- app-server docs explicitly say token usage streams separately via `thread/tokenUsage/updated` +- app-server docs explicitly say `thread/resume` and `thread/fork` emit restored token usage immediately after the response so clients can render usage before the next turn starts +- app-server docs explicitly say resume uses persisted `model` and `reasoningEffort` unless explicit overrides disable that fallback +- app-server docs explicitly expose turn-level metadata outside assistant transcript rows: + - `turn/plan/updated` + - `turn/diff/updated` + - `model/rerouted` +- app-server docs explicitly say current `turn/*` notifications still carry empty `items` arrays and clients should rely on `item/*` for canonical item lists +- current TypeScript SDK/raw-exec seam is narrower: + - `turn.completed` exposes usage + - completed `agent_message` items expose final response text + - but there is no app-server-grade typed surface for `thread/tokenUsage/updated`, `turn/plan/updated`, or `model/rerouted` + +Important consequence: + +- phase 1 cannot assume native turn truth lives on the last assistant transcript row the way current Anthropic-shaped flows often do +- we must explicitly decide the authoritative source for: + - live token usage + - restored token usage after resume/fork/reload + - context-window truth + - final model and reasoning-effort truth after reroute or persisted-resume fallback + - plan/diff/reroute metadata +- without that rule, the rollout can silently: + - under-report or lose native usage after resume/fork/reload + - compute context-window warnings from stale or guessed assistant-row usage + - keep showing the configured model when the native lane rerouted or resumed with persisted model/effort truth + - lose turn-plan/diff/reroute truth while transcript and status surfaces still look “complete” + +### 48. Native thread-local defaults can drift from host launch intent, while our team/runtime surfaces still mostly assume provider/model/effort are launch-owned and stable + +Current host launch, persistence, and runtime-summary surfaces still mostly treat provider/model/effort as launch-owned runtime identity: + +- current host launch/persistence/runtime surfaces in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/shared/types/team.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/teamSlice.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/memberRuntimeSummary.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/utils/bootstrapPromptSanitizer.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamBackupService.ts` +- native Codex thread-default and persisted-runtime surfaces in: + - `https://developers.openai.com/codex/app-server` + - `/tmp/openai-codex/codex-rs/app-server/README.md` + - `/tmp/openai-codex/codex-rs/app-server-protocol/src/protocol/v2.rs` + - `/tmp/openai-codex/codex-rs/state/src/extract.rs` + - `/tmp/openai-codex/sdk/typescript/src/thread.ts` + +Important current-code and current-doc facts: + +- `TeamLaunchRequest`, `TeamCreateRequest`, and renderer-side `TeamLaunchParams` currently persist: + - `providerId` + - `model` + - `effort` + but no richer native thread-default identity +- `TeamProvisioningService.shouldSkipResumeForProviderRuntimeChange(...)` currently compares provider and model, but not effort or richer native thread-default drift +- `TeamProvisioningService.applyEffectiveLaunchStateToConfig(...)` writes effective lead/member provider, model, and effort back into config-owned truth +- `memberRuntimeSummary.ts` and `bootstrapPromptSanitizer.ts` still derive most runtime copy from configured provider/model/effort plus best-effort runtime-model hints, not from native thread-default authority +- `TeamBackupService`, `members.meta.json`, relaunch prefill, and draft replay paths still preserve provider/model/effort intent, not the richer native thread-default state a resumed thread may actually inherit +- official app-server docs explicitly say config overrides on `turn/start` become the default for subsequent turns on the same thread +- official app-server docs explicitly say `thread/resume` uses the latest persisted `model` and `reasoningEffort` by default unless explicit overrides disable that fallback +- official app-server docs explicitly say resuming with a different model emits a warning and applies a one-time model-switch instruction on the next turn +- official app-server docs explicitly say `dynamicTools` persisted on `thread/start` are restored on `thread/resume` when you do not provide new dynamic tools +- upstream state extraction tests explicitly show: + - `TurnContext` sets persisted `model` and `reasoning_effort` + - `SessionMeta` does not + +Important consequence: + +- phase 1 cannot treat host launch params, `team.meta.json`, local-storage launch params, or config-owned provider/model/effort as automatically equal to the live native thread-defaults after resumed or overridden native turns +- we must explicitly decide the authoritative source for: + - launch intent + - current native thread-defaults + - resume behavior when launch intent and persisted native defaults diverge + - warning/copy truth when resume preserves old defaults or applies a one-time model switch +- without that rule, the rollout can silently: + - resume a native thread on persisted model/effort while UI still shows the newer launch intent as if it were live runtime truth + - overwrite config/meta/summary truth with launch-owned values that never matched the resumed native thread defaults + - skip or allow resume based on provider/model only while effort or other thread-default drift still changes behavior materially + - make relaunch/retry/restore look like “the same team runtime” even though native thread-local defaults have already diverged from saved host intent + +### 49. Native thread-status and warning truth does not map cleanly to our current process and provisioning status assumptions + +Current host runtime and team-status surfaces still mostly describe liveness and readiness through process, provisioning, and probe truth: + +- current host status and warning surfaces in: + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/team/TeamProvisioningService.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/store/slices/teamSlice.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/team/TeamDetailView.tsx` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/main/services/runtime/ClaudeMultimodelBridgeService.ts` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/dashboard/CliStatusBanner.tsx` + - `/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/src/renderer/components/settings/CliStatusSection.tsx` +- native Codex thread-status and warning surfaces in: + - `https://developers.openai.com/codex/app-server` + - `/tmp/openai-codex/codex-rs/app-server/README.md` + - `/tmp/openai-codex/codex-rs/app-server-protocol/src/protocol/v2.rs` + - `/tmp/openai-codex/codex-rs/app-server/src/thread_status.rs` + - `/tmp/openai-codex/codex-rs/app-server/src/codex_message_processor.rs` + - `/tmp/openai-codex/codex-rs/exec/src/event_processor_with_jsonl_output.rs` + +Important current-code and current-doc facts: + +- `TeamProvisioningService` and `teamSlice` currently center around: + - provisioning run state + - `runtimeAlive` + - lead activity + - probe warnings + - runtime snapshot presence + more than native thread lifecycle truth +- current dashboard/settings runtime status surfaces are mostly provider-global, while native Codex `thread.status` is thread-scoped +- official app-server docs explicitly say: + - `thread/started` already carries the current `thread.status` + - `thread/status/changed` is emitted whenever a loaded thread's status changes + - status can be `notLoaded`, `idle`, `systemError`, or `active` with `activeFlags` + - `thread/unsubscribe` can later emit `thread/closed` and a `thread/status/changed` transition back to `notLoaded` + - generic runtime warnings use `warning { threadId?, message }` + - startup/config diagnostics use `configWarning { summary, details?, path?, range? }` +- upstream app-server code has dedicated thread-status resolution and watch machinery instead of deriving thread truth only from process liveness +- raw exec also has warning events, but they are not equivalent to app-server's typed `thread.status` lifecycle + +Important consequence: + +- phase 1 cannot treat host process liveness, provisioning progress, runtime snapshot presence, or probe warnings as automatically equivalent to native thread health or loaded-state truth +- phase 1 also cannot let provider-global Codex status banners stand in for thread-specific health truth once multiple native threads can be loaded, resumed, degraded, or closed independently +- we must explicitly decide the authoritative source for: + - thread loaded/notLoaded truth + - active/idle/systemError truth + - thread-scoped runtime warnings + - config/startup warnings that are not tied to one active turn +- without that rule, the rollout can silently: + - show a team or runtime as healthy because the process is alive while the native thread is already in `systemError` + - keep showing a thread as active/available after it has become `notLoaded` due to unsubscribe or inactivity + - drop thread-scoped warnings because they are not attached to assistant transcript rows or provisioning probes + - conflate config warnings, runtime warnings, and process warnings into one coarse status banner that cannot explain what is actually wrong + +## What We Learned + +After deep code and docs analysis, the most important conclusions are: + +1. `@openai/codex-sdk` and `codex exec --json` are the real official execution seam for embedded Codex runtime usage. +2. `codex exec` supports API-key mode, so API-key mode itself is not the blocker. +3. `Codex` native plugins, apps, skills, and MCP are part of the real Codex runtime flow. +4. Our current `agent_teams_orchestrator` query loop is deeply coupled to Anthropic-style events and tool semantics. +5. A full drop-in swap from the current Codex adapter to `@openai/codex-sdk / codex exec` would not be a safe transport-only change. It would change runtime semantics. +6. `plugin-kit-ai` is a good fit for plugin management and native plugin placement. +7. `codex app-server` is promising for richer control-plane features, but should not be the foundation of the first production rollout for plugin management. +8. Backend ids already cross repo boundaries, so `codex-native` must be introduced as an additive shared contract, not a hidden orchestrator-only detail. +9. Transcript compatibility depends on enriched linkage fields like `requestId`, `sourceToolUseID`, and `toolUseResult`, not just on entry labels. +10. `@openai/codex-sdk` currently does not expose the same persistence control as raw `codex exec --ephemeral`, so the SDK-vs-CLI seam is a real phase-0 decision, not an implementation footnote. +11. Live approval and activity UX already depends on stable request-correlation semantics, so request identity cannot be treated as incidental metadata. +12. Transcript chain and sidechain identity are already load-bearing semantics for team logs, grouping, and subagent linking, so phase 1 cannot treat them as optional metadata. +13. Codex runtime settings, provisioning summaries, installer status, and model verification already depend on backend-specific runtime status fields, so `codex-native` needs an explicit settings/probe contract from day one. +14. Approval UX is currently grounded in specific `control_request` / `permission_request` semantics, so Codex-native must either adapt truthfully into that contract or stay limited in phase 1. +15. Codex auth-mode configuration currently rewrites backend env directly, so `codex-native` needs an explicit rule for decoupling authentication choice from execution-lane choice. +16. App config validation and team launch contracts currently lag behind backend-lane truth, so `codex-native` needs an explicit config-schema and launch-granularity decision instead of being smuggled in as a hidden runtime-only option. +17. Current team launch plumbing suggests Codex backend routing is process-scoped rather than member-scoped, so phase 1 must not imply mixed Codex backend lanes inside one launched runtime unless launch contracts are explicitly expanded. +18. Provisioning probe caching is currently provider-scoped and long-lived, so backend/auth changes can leave stale readiness truth unless cache identity and invalidation become backend-aware. +19. External Codex CLI detection is already surfaced through runtime status and installer snapshots, and an SDK-based lane may resolve its binary from bundled `@openai/codex` packages instead of the user's external CLI, so the rollout must keep “binary detected” separate from “Codex-native lane ready”. +20. Runtime backend status already distinguishes `selectable` from `available`, but current UI mostly treats backend options as one-dimensional availability, so `codex-native` needs explicit option-state semantics. +21. Main-process status bridging still has a legacy fallback that drops backend-rich truth, so `codex-native` needs an explicit degraded-status contract instead of silently collapsing to provider-only status on transient runtime-status failures. +22. Current Codex UI summary/copy still derives “runtime” mostly from auth method and connection mode, so `codex-native` needs explicit lane-aware wording instead of inheriting the old subscription/API-key phrasing. +23. Runtime status already has two renderer write paths, and the progressive snapshot path bypasses request epoch/loading reconciliation, so `codex-native` needs an explicit in-flight/degraded snapshot contract instead of trusting last-writer-wins store mutation. +24. Extension preflight and action gating still depend on coarse runtime/provider truth, so `codex-native` needs backend-lane-aware mutation gating instead of inheriting today's one-dimensional plugin-support checks. +25. Team model selectors and provisioning diagnostics still narrow runtime truth down to a provider-wide shape, so `codex-native` needs an explicit lane-aware team-model contract instead of relying on old Codex heuristics. +26. Provisioning prepare-cache reuse still keys off backend summary display text, so `codex-native` needs canonical backend-aware cache identity instead of copy-coupled cache semantics. +27. Persisted team identity, relaunch prefill, draft replay, backup/restore, runtime snapshots, and resume guards are still lane-agnostic, so `codex-native` needs an explicit persisted-vs-inherited backend identity contract instead of silently following whatever global Codex backend is current at replay time. +28. Team summaries, list surfaces, and synthetic provisioning cards are still lane-blind, so `codex-native` needs an explicit summary-surface contract instead of assuming lane truth can stay hidden below detail views. +29. Member runtime summaries, bootstrap copy, and composer capability suggestions are still provider-wide, so `codex-native` needs an explicit member/composer contract instead of assuming lane-sensitive copy or slash-command affordances can safely keep keying off `providerId === 'codex'`. +30. Plugin install success, current-session activation, new-thread visibility, restart requirements, and app-auth completion are still too conflated in current extension UX, so `codex-native` needs an explicit installed-vs-active-vs-usable contract before plugin support can be advertised safely. +31. Structured plugin/app targeting is richer in Codex app-server than in the current SDK/exec embedding seam, so `codex-native` needs an explicit phase-1 mention-targeting contract instead of silently relying on linked-text mention heuristics and then overclaiming deterministic invocation support. +32. Codex live notifications are good active-turn truth but not the same thing as hydrated thread history, and our current exact-log/task-log consumers already depend on persisted/hydrated `ParsedMessage[]`, so phase 1 needs an explicit live-stream-vs-history-hydration contract instead of treating one source as both. +33. Codex approval requests can be cleared by lifecycle events, not just by user response, so `codex-native` needs an explicit approval-resolution and cleanup contract instead of assuming our current CLI-style allow/deny flow already covers pending-state truth. +34. Codex can also request generic user input and MCP elicitation, while our current app only has a truthful path for tool approvals, so `codex-native` needs an explicit interactive-request support contract instead of quietly assuming approval UX covers all provider-native prompts. +35. Raw `codex exec` and the current TypeScript SDK are headless seams with explicit interactive capability limits, so phase 1 cannot quietly market them as approval-parity or app-server-parity execution paths. +36. `--ephemeral` reduces durable Codex session ownership, but it also disables exec's final completed-turn `thread/read` backfill, so session-safety and history-completeness must be chosen together rather than optimized independently. +37. Current app-side Codex API-key routing is still built around `OPENAI_API_KEY` and old backend env semantics, while the real exec/SDK seam uses `CODEX_API_KEY`, so `codex-native` needs an explicit credential-routing contract instead of reusing old Codex API-key assumptions. +38. Current Codex model inventory, UI-disabled model heuristics, reasoning-effort assumptions, and probe defaults are still largely static/provider-wide, while native Codex exposes a richer model surface, so `codex-native` needs an explicit lane-aware model contract instead of inheriting old Codex model truth. +39. Native Codex start/resume has its own trust semantics, and app-server can persist project trust on thread start, so phase 1 must keep host workspace-trust ownership explicit instead of assuming native trust behavior matches our current trust dialog. +40. Codex collaboration mode and developer-instruction channels can take precedence over model/effort/instructions, so phase 1 needs one explicit instruction owner instead of letting built-in Codex instructions and our system/bootstrap prompt layers stack or race implicitly. +41. Rich replayable native-thread history depends on opting into `persistExtendedHistory` at thread birth/resume/fork and that choice is not retroactive, so phase 1 needs an explicit persisted-history policy instead of treating history completeness as a later tune-up. +42. Native app-server config, feature, and marketplace mutation surfaces are process-wide or persistent by default, so selective app-server enrichment needs an explicit host-owned config bridge instead of letting native state mutate behind app settings. +43. Native detached review can create a second thread id and emit its own `thread/started`, so phase 1 needs an explicit review-thread identity policy instead of assuming `/review` always stays on the current conversation. +44. `codex-native` backend id alone is not enough to represent native binary-version, protocol-surface, or experimental-surface truth, so phase 1 needs an explicit native runtime identity contract instead of assuming one lane id means one stable capability set everywhere. +45. App-server capability surface and live notification truth are negotiated per connection, not globally, so later selective app-server enrichment needs one canonical connection policy instead of assuming every connection sees the same fields, methods, and live events. +46. Native Codex history mutation semantics include rollback and compaction flows that do not match our mostly append-only transcript/log assumptions automatically, so phase 1 needs an explicit canonical-history-versus-projected-transcript contract instead of assuming append-only local logs always stay truthful. +47. Native Codex usage, model, reasoning-effort, reroute, and plan truth are not guaranteed to live on assistant transcript rows, so phase 1 needs an explicit turn-metadata authority contract instead of guessing from last-assistant usage/model and provider-wide config. +48. Native Codex thread-defaults are mutable per turn and `thread/resume` prefers persisted defaults, so host launch `provider/model/effort` is only launch intent unless the rollout explicitly forces fresh threads or explicit override semantics. +49. Native Codex thread lifecycle and warning surfaces have their own thread-scoped loaded, active, idle, system-error, and warning truth, so phase 1 needs an explicit thread-status and warning-authority contract instead of treating provider-global status, process liveness, provisioning, and probe warnings as the same thing. + +## Chosen Direction + +We will **not** force Codex into the current Anthropic-shaped runtime contract. + +We will instead: + +- add a new **internal normalized event/log layer** +- keep execution semantics provider-native where needed +- add a separate **Codex-native runtime lane** +- use `plugin-kit-ai` for plugin management and native plugin placement + +In practical terms: + +- current Codex path stays available as the fallback/default path at first +- real Codex runtime execution becomes a separate lane instead of a drop-in replacement +- unified logs come from normalization, not from pretending every provider has Anthropic-native runtime semantics + +## Decision Summary + +### We are doing this + +- keep the current Codex adapter path as the fallback/default path initially +- introduce a new `Codex-native` backend lane using `@openai/codex-sdk / codex exec` +- treat the first `Codex-native` lane as capability-scoped by the chosen seam rather than assuming app-server-grade interactivity +- keep auth/model truth for the first `Codex-native` lane scoped by that same seam instead of inheriting old Codex API-key or static-model assumptions +- keep host workspace-trust ownership explicit instead of letting native thread start mutate or imply trust implicitly +- freeze one instruction owner for phase 1 instead of mixing collaboration-mode built-ins with our host system/bootstrap prompt layers +- freeze persisted-history policy at thread birth/resume so replay, exact-log, and hydrate-after-reload truth stay explicit +- introduce a normalized internal event/log format for all providers +- map Anthropic, Gemini, and future Codex-native events into that normalized format +- keep unified logging, transcript projection, analytics, and UI-facing event handling on top of the normalized layer +- use `plugin-kit-ai` for: + - install + - update + - remove + - repair + - discover + - catalog + - native Codex plugin placement through native marketplace/filesystem layout + +### We are not doing this + +- not replacing the whole multimodel runtime in one shot +- not forcing real Codex runtime execution into fake Anthropic transport semantics +- not pretending a full `@openai/codex-sdk / codex exec` swap is a drop-in backend replacement +- not making `app-server plugin/*` the first production seam + +## Phase-0 Decision Checkpoints + +These must be answered explicitly before implementation starts spreading across repos. + +### 1. Backend identity checkpoint + +Current runtime backend ids for Codex are only: + +- `auto` +- `adapter` +- `api` + +That means the plan must introduce a new explicit backend lane rather than overloading existing ids. + +Default: + +- add a distinct `codex-native` backend id +- do not hide it behind `api` or `adapter` + +### 2. Transcript ownership checkpoint + +We must decide what remains the UI source of truth during migration. + +Default: + +- `claude_team` transcript/read-model path remains the UI source of truth +- Codex thread id is stored as provider-native continuation metadata + +### 3. Capability truth checkpoint + +We must decide how plugin support is reported during migration. + +Default: + +- support is backend-lane-specific +- old Codex path may stay `plugins: unsupported` +- `codex-native` may become `plugins: supported` only after proven real-session execution + +### 4. UI migration checkpoint + +We must decide whether `claude_team` consumes raw normalized events in phase 1. + +Default: + +- no +- phase 1 keeps current transcript/read-model UI path stable + +### 5. Session resume checkpoint + +We must decide whether Codex-native resume is enabled in the first rollout. + +Default: + +- treat resume as feature-flagged until transcript/session ownership is proven safe + +### 6. Request-correlation checkpoint + +We must decide what request identity guarantees the normalized layer and transcript projector must preserve. + +Default: + +- keep `requestId` as a first-class cross-layer correlation key for streamed assistant dedupe and approval UX +- preserve tool-linking identifiers where there is a truthful originating action +- do not downgrade these fields to best-effort metadata in phase 1 + +### 7. Backend-id compatibility checkpoint + +We must decide how `codex-native` is introduced across shared config and UI contracts. + +Default: + +- add `codex-native` as a new explicit backend id in orchestrator config/runtime types +- propagate it additively through main/preload/renderer payloads +- keep existing `auto`, `adapter`, and `api` meanings stable +- do not silently repurpose `api` to mean `codex-sdk` + +### 8. SDK-vs-raw-exec checkpoint + +We must decide whether the first `Codex-native` lane is built on top of `@openai/codex-sdk`, raw `codex exec`, or a narrow wrapper that can choose between them. + +Default: + +- do not commit to SDK-only before phase 0 explicitly evaluates the `ephemeral` gap and session ownership impact +- prefer whichever seam lets us make session persistence behavior explicit instead of accidental + +### 9. Runtime settings and connection-management checkpoint + +We must decide whether `codex-native` remains hidden behind Codex connection mode or becomes a first-class runtime lane in settings/status/provisioning. + +Default: + +- do not keep the current implicit rule that all Codex runtime choice is connection-managed +- add `codex-native` as an explicit backend/status lane if it exists +- update runtime settings UI, provisioning summaries, installer snapshots, and runtime status payloads together +- do not let model verification silently reuse the old Codex probe assumptions without an explicit `codex-native` probe policy + +### 10. Approval/control adaptation checkpoint + +We must decide how provider-native approval/control events become current approval UX truth. + +Default: + +- manual approval parity is not assumed automatically for `codex-native` +- phase 0 must prove whether Codex-native can emit a truthful `ToolApprovalRequest`-compatible contract with stable `requestId` +- if that is not yet true, phase 1 keeps the lane limited instead of shipping fake approval support + +### 11. Model verification checkpoint + +We must decide how Codex-native participates in model verification and provisioning readiness checks. + +Default: + +- `codex-native` gets an explicit backend-aware probe policy and signature +- do not reuse cached availability from old Codex backend ids across the new lane +- do not treat current generic Codex provider probes as automatically valid for the new execution seam + +### 12. Connection-vs-runtime env checkpoint + +We must decide how Codex authentication mode and Codex execution lane interact in env construction. + +Default: + +- stop assuming that Codex API-key mode automatically means `CLAUDE_CODE_CODEX_BACKEND=api` +- define auth mode and runtime backend as separate inputs with an explicit resolution rule +- make `codex-native` capable of using API-key auth without being silently forced back onto the old Responses API lane + +### 13. Config-schema and launch-granularity checkpoint + +We must decide whether `codex-native` is selected globally per provider, per launch, or both. + +Default: + +- do not smuggle `codex-native` in through runtime env alone +- update app-side runtime config validation and shared runtime config types before the lane is exposed +- keep the first rollout global-per-provider unless there is a deliberate per-launch backend contract expansion +- if per-launch backend override does not exist yet, provisioning and launch UI must stay honest that backend choice is provider-global, not task-specific + +### 14. Process-scope routing checkpoint + +We must decide whether one launched orchestrator runtime can host more than one Codex backend lane at the same time. + +Default: + +- assume no mixed Codex backend lanes within one launched orchestrator process in phase 1 +- treat Codex backend routing as process-scoped or runtime-global until spawn and launch contracts prove otherwise +- do not imply teammate-level or member-level Codex backend choice until launch payloads and spawn plumbing explicitly carry it + +### 15. Probe-cache and preflight-truth checkpoint + +We must decide how provisioning-readiness cache identity and invalidation behave when Codex backend, auth mode, or probe policy changes. + +Default: + +- do not keep readiness cache keyed only by `cwd + provider` +- include backend-sensitive identity or deterministically invalidate affected entries when Codex auth mode, runtime backend, Claude base path, or probe policy changes +- do not allow provider-level cached readiness to outlive a backend/auth switch while model verification already sees a new lane +- if the contract is not ready yet, bypass cached provisioning readiness for `codex-native`-related checks instead of pretending the old cache is safe + +### 16. External-runtime-diagnostic checkpoint + +We must decide what it means when Codex CLI is merely detected on disk versus when the `codex-native` lane is actually available and verified. + +Default: + +- keep external binary detection separate from backend availability and from plugin-support truth +- do not mark `codex-native` selectable or ready just because `detectExternalBinary('codex')` succeeds +- require runtime status, installer snapshots, and provisioning UI to distinguish: + - CLI detected + - lane selectable + - lane resolved + - lane authenticated + - lane verified for execution + +### 17. Backend-option-state checkpoint + +We must decide how `selectable`, `available`, `resolved`, and `verified` differ for `codex-native`, and how the renderer should behave in each state. + +Default: + +- do not treat backend options as one boolean +- keep `selectable` and `available` as separate semantics +- allow the plan to express “user may choose this lane” separately from “this lane is authenticated and ready right now” +- update the renderer/backend-selector contract so `codex-native` does not depend on old `available === selectable` assumptions + +### 18. Runtime-status fallback checkpoint + +We must decide what UI/main truth should look like when backend-rich runtime status is temporarily unavailable. + +Default: + +- do not silently fall back from backend-rich Codex status to provider-only status without marking degradation +- preserve the last known backend-rich truth or surface an explicit degraded state instead of erasing backend ids/options entirely +- do not let transient status transport failures force Codex back into the old connection-managed-only UX model + +### 19. Runtime-copy and summary checkpoint + +We must decide how Codex status copy, banners, and settings summaries talk about auth choice versus execution lane once `codex-native` exists. + +Default: + +- do not let `Current runtime` for Codex be derived only from `authMethod` / `configuredAuthMode` +- use lane-aware summary rules whenever backend ids are available +- reserve auth-mode wording for connection method, not for execution-lane truth +- update dashboard/settings summary helpers together with backend-lane rollout + +### 20. Progressive-status and snapshot-reconciliation checkpoint + +We must decide how progressive status snapshots, cached `getStatus()` responses, and provider-specific refreshes reconcile in renderer/store once backend-rich Codex truth matters. + +Default: + +- do not keep a silent last-writer-wins contract for `cliStatus` +- define explicit semantics for: + - in-flight partial snapshot + - settled status truth + - degraded transport truth +- require progressive status pushes to preserve enough sequencing/settledness information that older partial snapshots cannot silently overwrite fresher provider/backend truth +- keep renderer loading/error/request-sequencing state aligned with whichever status transport path is allowed to mutate `cliStatus` + +### 21. Extension-preflight and action-gating checkpoint + +We must decide how backend-lane truth becomes extension-action truth once Codex plugin support depends on `codex-native`, not on provider id alone. + +Default: + +- do not gate plugin management only on coarse `cliStatusLoading`, provider auth, or provider-wide mutable capability truth +- define backend-aware preflight semantics for: + - old Codex lane + - `codex-native` selectable-but-unverified + - degraded runtime-status truth + - backend-specific plugin capability support +- require extension store banners, install buttons, and mutation preflight to use the same lane-aware truth model + +### 22. Team-model and provisioning-runtime checkpoint + +We must decide what runtime shape team model selectors and provisioning diagnostics are allowed to rely on once Codex has more than one meaningful backend lane. + +Default: + +- do not keep team model/runtime helpers narrowed to provider-wide auth/backend summary truth +- extend the shared runtime shape used by team model selectors so lane-specific model visibility, selection errors, and provisioning notes can depend on canonical backend identity +- require create/launch dialogs, team model selectors, and provisioning diagnostics to speak the same lane-aware runtime vocabulary + +### 23. Provisioning-prepare cache-identity checkpoint + +We must decide what canonical identity keys reusable provider prepare/model results once backend-lane truth matters. + +Default: + +- do not key provisioning prepare/model cache by backend summary display text +- key it by canonical backend/auth/probe identity instead +- keep cache correctness independent from UI copy and summary-label changes + +### 24. Persisted-team-identity and replay-identity checkpoint + +We must decide whether team launch/relaunch/resume, draft-team persistence, and backup/restore persist Codex backend lane identity or explicitly inherit the current global Codex backend at replay time. + +Default: + +- do not keep launch persistence provider/model-only when backend lane materially changes runtime semantics +- do not keep `team.meta.json`, `members.meta.json`, or shared team runtime snapshots provider/model-only when backend lane materially changes runtime semantics +- do not let backup/restore silently re-materialize a team without backend-lane truth if the restored runtime semantics would differ by lane +- if phase 1 keeps backend choice global-per-provider, store and UI must say launches inherit the current global backend instead of pretending lane persistence exists +- if phase 1 needs stable relaunch identity, persist canonical backend identity alongside saved launch params and runtime snapshots +- make resume guards compare canonical backend identity, not just provider/model + +### 25. Team-summary and list-surface checkpoint + +We must decide what backend-lane truth, if any, team cards, draft cards, and team-list summaries are allowed to expose once Codex lanes diverge materially. + +Default: + +- do not keep `TeamSummary` permanently lane-blind if team lifecycle semantics can differ by lane +- either enrich team summaries with canonical lane identity or explicitly keep list surfaces lane-agnostic and avoid lane-sensitive copy/actions there +- keep synthetic provisioning snapshots and persisted team summaries on the same lane-vocabulary contract so cards do not disagree about the same team + +### 26. Member-runtime-summary and composer-capability checkpoint + +We must decide what backend-lane truth member cards, member detail, bootstrap copy, and composer capability suggestions are allowed to expose once old Codex and `codex-native` diverge materially. + +Default: + +- do not keep member runtime summaries permanently provider-wide if backend lane materially changes runtime semantics or capability affordances +- either enrich member/composer surfaces with canonical backend-lane truth or explicitly keep them lane-agnostic and avoid lane-sensitive copy/actions there +- keep member runtime copy, bootstrap/system summary copy, and composer slash-command/plugin affordances on the same backend-vocabulary contract so detail and composer surfaces do not tell a different Codex story than runtime status/settings + +### 27. Plugin-activation and session-visibility checkpoint + +We must decide what “installed”, “active”, “usable”, “requires restart/new thread”, and “requires app auth/setup” mean for each runtime lane once Codex plugin support depends on `codex-native`. + +Default: + +- do not treat install/uninstall success as immediate activation truth +- keep native placement truth separate from current-session execution truth +- require an explicit lane-aware contract for at least: + - installed in filesystem/marketplace + - executable on the selected lane + - usable only in a new thread or restarted session + - still blocked on app/auth setup +- if exact current-session activation cannot be proven safely, UI must stay conservative and say new-thread/restart required instead of implying “ready now” + +### 28. Mention-targeting and invocation-shape checkpoint + +We must decide what kind of explicit plugin/app/skill targeting phase 1 can honestly support on the chosen Codex execution seam. + +Default: + +- do not assume SDK/exec gives us the same structured invocation surface as app-server +- make phase 1 explicit about whether it supports: + - deterministic structured mention targeting + - linked-text mention targeting only + - or no explicit plugin/app targeting affordance yet +- if the chosen seam still depends on linked text mentions, UI/composer surfaces must stay conservative and avoid claiming first-class deterministic invocation semantics +- keep mention-targeting truth separate from install/catalog truth so “plugin installed” does not silently become “app can invoke it exactly” + +### 29. Live-stream versus history-hydration checkpoint + +We must decide what source is authoritative for active-turn rendering versus replayable thread history. + +Default: + +- keep live `turn/*` and `item/*` notifications as active activity truth, not as automatic persisted-history truth +- keep explicit hydration sources separate, such as: + - `thread/read` + - `thread/turns/list` + - `thread/resume` + - `thread/fork` + - projected persisted transcript reads +- do not let sparse `Turn` / `Thread` payloads or partial live item caches stand in for exact-log, replay, or post-hoc transcript history +- if phase 1 cannot prove a safe direct history-hydration contract from the chosen Codex seam, keep exact-log/task-log/replay surfaces grounded in the persisted transcript projector instead of improvising from live event cache + +### 30. Approval-resolution and lifecycle-cleanup checkpoint + +We must decide what event is authoritative for clearing pending approval or request-user-input state when the user did not explicitly answer. + +Default: + +- do not assume pending approval state ends only through successful allow/deny IPC +- treat lifecycle cleanup as first-class truth when the runtime says the request is no longer pending +- require an explicit mapping for at least: + - user answered + - auto-resolved + - lifecycle-cleared on turn start/complete/interrupt + - run/turn dismissed or no longer active +- if the chosen Codex seam cannot yet express truthful cleanup semantics, phase 1 must keep approval UX limited instead of leaving stale pending state in renderer/store + +### 31. Interactive-request and elicitation checkpoint + +We must decide what phase 1 does when Codex-native asks for generic user input or MCP-server elicitation rather than a plain approval. + +Default: + +- do not assume tool approval UI can stand in for generic interactive prompts +- explicitly decide whether phase 1: + - supports `requestUserInput` + - supports MCP elicitation + - blocks them with a clear limitation + - or keeps the lane limited until a truthful response UI exists +- if these request types are unsupported in phase 1, the lane must not overclaim parity for flows that depend on them + +### 32. Headless-exec capability-boundary checkpoint + +We must decide whether the first Codex-native execution seam is explicitly headless-limited and, if so, what phase 1 is allowed to claim about approvals and other interactive/runtime-control features. + +Default: + +- do not assume raw `codex exec` or the current TypeScript SDK inherits app-server interactive semantics +- if phase 1 uses raw exec or the current SDK, explicitly document which of these are: + - supported + - rejected by the runtime seam itself + - unsupported because the seam never surfaces them to the app +- keep lane capability truth conservative for at least: + - manual approvals + - generic `requestUserInput` + - MCP elicitation + - dynamic tool behavior + - other server-request-style controls +- if richer interaction is required later, add it as a separate seam decision instead of quietly expanding the headless lane by implication + +### 33. Ephemeral-versus-backfill checkpoint + +We must decide whether phase 1 optimizes first for minimal durable Codex session ownership, for stronger completed-turn item completeness, or for an explicit replacement hydration strategy. + +Default: + +- do not treat `--ephemeral` as a free safety win +- make the tradeoff explicit between: + - ephemeral/no durable Codex-owned session persistence + - non-ephemeral exec with final `thread/read` completed-turn item backfill + - explicit post-turn hydration/projector recovery if ephemeral remains preferred +- if phase 1 chooses `--ephemeral`, transcript and history completeness must be recovered through an explicit tested path before exact-log/task-log/replay claims are considered safe +- if phase 1 chooses non-ephemeral execution, durable session ownership and resume semantics must stay explicit in UI/runtime truth instead of being treated like an invisible implementation detail + +### 34. Codex credential-routing and API-key surface checkpoint + +We must decide how the first `codex-native` lane receives credentials and how that truth is reflected in status, issues, and UI copy. + +Default: + +- do not assume the old Codex `OPENAI_API_KEY` path automatically authenticates the native exec/SDK lane +- if phase 1 uses raw exec or the current SDK, explicitly decide whether the host: + - passes `CODEX_API_KEY` + - passes SDK `apiKey` + - or uses another explicit auth surface +- keep connection-issue detection, readiness checks, and status copy lane-aware so old Codex API-key readiness and native exec/SDK readiness cannot drift apart +- do not let provider-level “Codex API key configured” truth stand in for native-lane authentication truth unless the credential-routing contract explicitly proves they are the same path + +### 35. Native-lane model inventory and reasoning-effort checkpoint + +We must decide what source is authoritative for `codex-native` model lists, disabled states, reasoning-effort options, and default/preflight model choices. + +Default: + +- do not assume the old static Codex model catalog remains truthful for the native lane +- explicitly decide whether phase 1 model truth comes from: + - a native model-list surface + - a curated lane-aware allowlist + - or a temporary conservative subset with explicit limitations +- keep these at minimum lane-aware: + - visible model ids + - disabled-model reasons + - default/preflight model + - supported reasoning-effort choices + - any upgrade/availability guidance shown in UI +- do not let provider-wide old-Codex heuristics stand in for native-lane model truth once backend lane materially changes model behavior + +### 36. Workspace-trust and native-thread-start checkpoint + +We must decide who owns workspace-trust truth when a native Codex lane starts or resumes threads with writable/full-access semantics. + +Default: + +- do not assume native Codex trust behavior is equivalent to our host trust dialog +- keep host trust as the authoritative phase-1 boundary for: + - full env application + - hooks/LSP/MCP startup + - any UI that says the workspace is trusted +- if the chosen native seam can mark projects trusted in Codex config/state, explicitly decide whether that is: + - forbidden in phase 1 + - allowed only after host trust is already accepted + - or surfaced as a second explicit trust authority +- do not equate raw exec repo-check semantics with our persisted trust-dialog semantics + +### 37. Instruction-ownership and collaboration-mode checkpoint + +We must decide which instruction channel owns phase-1 `codex-native` behavior and which native Codex instruction surfaces are intentionally out of scope. + +Default: + +- do not let collaboration-mode built-ins, native `baseInstructions`, native `developerInstructions`, and host system/bootstrap prompts all stack by accident +- explicitly decide whether phase 1 uses: + - host-owned system/bootstrap prompts only + - native instruction channels only + - or one carefully-defined hybrid +- if `collaborationMode` is not intentionally adopted in phase 1, keep it disabled instead of leaving it as an implicit future default +- if any native instruction channel is used, define how it interacts with: + - host model/effort selection + - bootstrap-critical guidance + - CLAUDE.md/rules/host prompt ownership + +### 38. Persisted-history policy checkpoint + +We must decide what persisted-history richness phase 1 guarantees for native threads and when that choice is made. + +Default: + +- do not treat `persistExtendedHistory` as an invisible implementation toggle +- explicitly decide whether native `thread/start` / `thread/resume` / `thread/fork` use: + - richer persisted history by default + - a conservative lossy default + - or an explicit lane-specific/history-specific rule +- keep replay/exact-log/reload truth aware of whether a thread was born with rich or lossy persisted history +- do not assume later enabling richer persistence retroactively repairs older native threads + +### 39. Native-config, feature-state, and marketplace-ownership checkpoint + +We must decide whether any native app-server config/feature/marketplace mutation surface is allowed to write process-wide or persistent state during phase 1. + +Default: + +- do not let native `config/*`, `experimentalFeature/enablement/set`, or `marketplace/add` become a second hidden settings authority +- if selective app-server enrichment is used later, explicitly decide whether those mutations are: + - blocked in phase 1 + - mirrored through host-owned config/services + - or surfaced as explicit global native-state operations with matching UI truth +- keep host-owned settings/config as the default authority for runtime, connection, and marketplace truth unless one bridge is explicitly frozen + +### 40. Native-review thread-identity checkpoint + +We must decide what phase 1 does with native review flows that can fork detached review threads with their own thread id and lifecycle. + +Default: + +- do not assume native review always stays on the current thread +- explicitly decide whether phase 1: + - disables native review affordances + - supports inline review only + - or supports detached review with explicit child-thread/sidechain mapping +- if detached review is unsupported, UI/composer affordances must not imply otherwise + +### 41. Native binary-version and protocol-surface checkpoint + +We must decide what native runtime identity fields phase 1 treats as capability-defining for `codex-native`. + +Default: + +- do not treat backend id alone as enough native runtime identity +- explicitly decide whether phase 1 status/probe/capability truth carries: + - native executable source + - native binary version + - protocol/capability revision + - stable-vs-experimental surface truth where app-server enrichment is involved +- if bundled SDK binary and external CLI can both satisfy the lane, keep their capability truth separate unless proven equivalent +- do not let packaged dependency bumps or user-installed Codex version skew silently change what `codex-native` means without status, cache, and UI noticing + +### 42. App-server connection-policy checkpoint + +We must decide what one canonical connection policy means if selective app-server enrichment is added later. + +Default: + +- do not assume app-server capability surface is process-global +- explicitly decide whether future app-server usage has: + - one canonical `experimentalApi` policy + - one canonical `optOutNotificationMethods` policy + - one canonical live-subscription policy +- if different connection profiles are allowed later, their differing surface and notification truth must be explicit in capability and debugging signals +- do not diagnose missing fields or notifications as runtime breakage before ruling out connection-policy drift + +### 43. Canonical-history versus append-only-projection checkpoint + +We must decide which source is authoritative when native Codex history is logically mutated by rollback or compaction while our local transcript/log stack still prefers append-only processing. + +Default: + +- do not assume append-only projected transcript remains canonical after native rollback or compaction +- explicitly decide whether phase 1 replay/exact-log/task-log truth is sourced from: + - canonical native thread history + - append-only projected transcript + - or one explicit reconciliation strategy +- if append-only local transcript remains part of phase 1, define how stale pre-rollback or pre-compaction activity is: + - hidden + - marked superseded + - or reconciled on reload/hydration +- do not let incremental watchers and append-only cache assumptions masquerade as canonical history after native history mutation + +### 44. Turn-metadata and usage-authority checkpoint + +We must decide which native source is authoritative for usage, model, reasoning-effort, reroute, and plan truth, and which of those truths phase 1 is allowed to surface at all on the chosen seam. + +Default: + +- do not infer native-lane token usage, context-window truth, or final model truth only from assistant transcript rows +- treat authoritative sources as seam-scoped: + - raw exec / current SDK: + - `turn.completed` usage is authoritative for completed-turn usage truth available on that seam + - app-server, if added later: + - `thread/tokenUsage/updated` is authoritative for replayed and restored usage truth + - persisted thread metadata plus explicit reroute notifications govern final model/reasoning-effort truth +- if the chosen seam does not expose truthful plan/diff/reroute metadata, keep those fields normalized-only or explicitly unavailable in phase 1 instead of guessing +- do not let context panels, provisioning usage, token warnings, or status copy imply richer native usage/model truth than the chosen seam can actually prove + +### 45. Native thread-defaults and launch-intent checkpoint + +We must decide how host launch intent, persisted native thread-defaults, and resume or fresh-thread policy interact once native turns can mutate default runtime behavior on the thread itself. + +Default: + +- do not assume `TeamLaunchRequest`, `TeamCreateRequest`, `TeamLaunchParams`, `team.meta.json`, or config-owned provider/model/effort remain canonical runtime truth after a resumed native thread restores persisted defaults +- explicitly decide whether phase 1 resume behavior: + - inherits persisted native thread-defaults + - always overrides them with host launch intent + - or blocks/skips resume when they differ +- compare at least provider, model, and effort when deciding whether a resumed native thread still matches host launch intent +- if the host cannot model some native thread-default truth honestly, keep that surface explicit as inherited or unknown rather than silently rewriting it into launch-owned config or summary copy + +### 46. Native thread-status and warning-authority checkpoint + +We must decide how native thread lifecycle and warning truth interact with host process, provisioning, and probe status surfaces. + +Default: + +- do not assume process alive, provisioning active, or runtime snapshot present means the native thread is healthy or loaded +- explicitly decide whether phase 1 thread-health truth is sourced from: + - native thread status notifications and reads when available + - host process/provisioning status only + - or one explicit reconciliation strategy between them +- keep `thread.status` states like `notLoaded`, `idle`, `active`, and `systemError` distinct from generic host process liveness +- keep thread-scoped runtime warnings and config warnings distinct from provisioning probe warnings or transcript-attached warnings + +## Lowest-Confidence Execution Seam Options + +This is the one place where the plan should stay explicit about alternatives instead of pretending there is no tradeoff. + +### Option 1 - SDK-first phase-0 spike + +Use `@openai/codex-sdk` first and accept its current thread/session semantics for the spike. + +- Assessment: `🎯 7 🛡️ 7 🧠 5` +- Rough spike surface: `300-900` lines + +Pros: + +- matches the official Node/Electron embedding seam +- gives a higher-level thread API quickly +- likely minimizes phase-0 implementation code + +Cons: + +- hides some raw CLI behavior behind the SDK wrapper +- does not currently expose `ephemeral` +- still inherits the current exec seam's headless interactive limits +- can accidentally normalize around durable Codex-owned thread persistence before we intend to + +### Option 2 - raw `codex exec` wrapper first + +Use a narrow local wrapper around `codex exec --json` for the first spike, then decide later whether the production lane stays raw or moves up to the SDK. + +- Assessment: `🎯 8 🛡️ 8 🧠 6` +- Rough spike surface: `400-1100` lines + +Pros: + +- keeps runtime flags and persistence behavior fully explicit +- lets us test `--ephemeral` directly +- exposes headless interactive limits early instead of hiding them behind the SDK wrapper +- makes normalized-event mapping closer to the actual process boundary we must understand anyway + +Cons: + +- slightly more glue code in phase 0 +- less ergonomic than the SDK for long-lived thread objects +- easier to accidentally overfit phase 0 to headless exec semantics if we forget this is evidence-gathering, not the final product seam +- may need an extra abstraction layer later if we switch upward to the SDK + +### Option 3 - dual wrapper from day one + +Build a small local abstraction that can drive either `@openai/codex-sdk` or raw `codex exec`, and start phase 0 by comparing both. + +- Assessment: `🎯 6 🛡️ 8 🧠 8` +- Rough spike surface: `700-1500` lines + +Pros: + +- maximizes optionality +- makes the seam explicit early +- can keep the production decision open a bit longer + +Cons: + +- higher upfront complexity +- bigger chance of overengineering phase 0 +- easy to spend too much time abstracting before we even know the correct session ownership model + +### Recommended default for phase 0 + +Start with **Option 2 - raw `codex exec` wrapper first**. + +Reason: + +- it gives the cleanest evidence for the two scariest unknowns: + - event-shape truth + - session persistence truth +- it also exposes the real headless capability boundary before UI/runtime copy starts assuming richer interaction support +- it keeps `ephemeral` visible instead of hidden +- if phase 0 later proves that durable SDK-owned threads are acceptable, we can still move upward to `@openai/codex-sdk` with much better confidence + +## Why We Chose This + +### Main benefit + +This path gives us both: + +- unified internal logs/events +- a real path to native Codex runtime capabilities + +without requiring a full rewrite of the current multimodel runtime. + +### Main reason against a direct full swap + +The current orchestrator is deeply coupled to Anthropic-shaped runtime behavior: + +- `tool_use` +- `tool_result` +- `content_block_start` +- `input_json_delta` +- `message_delta` +- current permission and sandbox flow +- current synthetic tool/result handling +- current transcript persistence and resume logic + +`codex exec` emits a different event model: + +- `thread.started` +- `turn.started` +- `turn.completed` +- `turn.failed` +- `item.started` +- `item.updated` +- `item.completed` + +and item types such as: + +- `agent_message` +- `reasoning` +- `command_execution` +- `file_change` +- `mcp_tool_call` + +That is not just a different wire format. It is a different runtime shape. + +## Architecture Layers + +### Execution plane + +This is the runtime that actually talks to the provider or executes the provider-native agent runtime. + +Planned state: + +- `Anthropic` - current path +- `Gemini` - current path +- `Codex fallback` - current adapter/API path +- `Codex-native` - real Codex runtime through `@openai/codex-sdk / codex exec`, with phase-1 capability truth scoped by the chosen seam rather than assumed to equal app-server + +### Normalized event/log plane + +This is the new provider-neutral projection layer we want inside `agent_teams_orchestrator`. + +It is the source of truth for: + +- logs +- transcript projection +- activity timeline rendering +- analytics-friendly event summaries +- desktop-facing runtime activity DTOs + +It is **not** required to be a lossless mirror of any one provider wire format. + +### Transcript compatibility plane + +This is separate from normalized runtime events. + +Its job is: + +- persist runtime history in a shape that current `claude_team` transcript readers can still consume +- preserve current read-model stability for: + - `ParsedMessage` + - exact-log parsing + - task activity + - grouped tool/result rendering + +This means: + +- normalized events are **not** written directly to disk for UI consumption in phase 1 +- they must first pass through a transcript compatibility projector + +### Chain and sidechain identity plane + +This sits underneath transcript compatibility. + +Its job is: + +- preserve a truthful parent/child transcript chain for persisted rows +- preserve truthful main-thread versus sidechain identity +- preserve enough session/member identity for team-log readers and subagent linking + +Phase-1 rule: + +- projected transcript rows must not invent or flatten chain/sidechain identity just to fit a convenient shape +- progress-like or transport-only events that are not real transcript messages must not become new chain participants by accident + +### Request-correlation plane + +This is separate from both normalized events and persisted transcript shape. + +Its job is: + +- preserve stable request identity for streamed assistant dedupe +- preserve approval request identity for live approval UX +- preserve truthful tool-action correlation where current UI and analysis code already rely on it + +Phase-1 rule: + +- request-correlation semantics must stay explicit across runtime events, normalized events, and projected transcript rows +- if a Codex-native event cannot be assigned a truthful request correlation, it should not be forced into a shape that pretends it has one + +### Approval/control adaptation plane + +This sits on top of request-correlation and underneath the current approval UX. + +Its job is: + +- translate provider-native approval/control events into the existing `ToolApprovalRequest` contract when that translation is truthful +- preserve stable request identity for pending/resolved approval state +- preserve a clear allow/deny response path back to the runtime + +Phase-1 rule: + +- `codex-native` must not claim approval parity unless this plane is explicitly specified and tested +- if provider-native events cannot truthfully map into the current approval contract, the lane must stay limited instead of fabricating fake `permission_request` rows + +### Approval-resolution and lifecycle-cleanup plane + +This sits between provider-native request cleanup semantics and the renderer's pending/resolved approval state. + +Its job is: + +- separate explicit user decisions from lifecycle-driven request cleanup +- keep pending approval state, resolved icons, and stale-request dismissal truthful when a turn is interrupted, replaced, or completed before the user answers +- preserve a stable authority order between: + - explicit user response + - runtime auto-resolution + - runtime lifecycle cleanup + - run-level dismissal + +Phase-1 rule: + +- do not let `codex-native` approval UX depend only on successful allow/deny IPC +- if runtime cleanup semantics exist, they must map into an explicit renderer/store event instead of being inferred indirectly +- pending approval state must clear truthfully even when no explicit user decision happened +- if phase 1 cannot prove truthful cleanup semantics, keep the lane limited instead of leaving approval state half-mapped + +### Interactive-request and elicitation plane + +This sits between provider-native structured prompts and any UI surface that can collect user input back into the runtime. + +Its job is: + +- separate tool approvals from generic user-input prompts and MCP elicitation requests +- keep runtime turns from silently stalling when the provider expects structured user input rather than a simple allow/deny +- make unsupported interactive request types explicit instead of letting them fail as invisible no-op state + +Phase-1 rule: + +- do not let `codex-native` imply full interactive parity if only approval prompts are supported +- if `requestUserInput` or MCP elicitation are unsupported in phase 1, surface that as a deliberate lane limitation +- if supported, they need their own authoritative request lifecycle and response contract rather than being squeezed into the tool-approval model + +### Headless-exec capability-boundary plane + +This sits between the chosen Codex execution seam and all app/runtime claims about interactivity or runtime-side control. + +Its job is: + +- keep headless exec/SDK capability truth separate from richer app-server capability truth +- prevent phase 1 from overclaiming support for server-request-style interactions the seam explicitly rejects +- force the rollout to say which interactive/runtime-control features are truly available on the chosen lane + +Phase-1 rule: + +- if the chosen seam is raw `codex exec` or the current TypeScript SDK, treat it as a headless-limited lane unless proven otherwise +- do not let UI, settings, or capability payloads imply support for: + - manual approval loops + - `requestUserInput` + - MCP elicitation + - dynamic tool calls + - other server-request-style controls + unless the chosen seam actually exposes and supports them end-to-end + +### Ephemeral-session and completion-backfill plane + +This sits between session-ownership safety decisions and transcript/history completeness decisions. + +Its job is: + +- separate “avoid durable Codex-owned session persistence” from “preserve final completed-turn item completeness” +- keep the `--ephemeral` tradeoff explicit instead of hiding it behind a vague safety preference +- force phase 1 to name its authoritative recovery path for final-turn items and post-turn history truth + +Phase-1 rule: + +- if the chosen seam uses non-ephemeral exec, treat final `thread/read` backfill as an explicit part of the lane contract and test it +- if the chosen seam uses `--ephemeral`, do not assume completed-turn item completeness still holds unless an explicit replacement hydration/projector strategy is specified and tested +- do not let transcript, exact-log, replay, or post-turn detail UX depend on implicit backfill behavior that the chosen seam no longer provides + +### Session ownership plane + +This is where we must stay conservative. + +Current reality: + +- `codex-sdk` threads are persisted in `~/.codex/sessions` +- `claude_team` and current orchestrator flows already have their own transcript/session assumptions + +Phase-1 rule: + +- our transcript remains the UI/read-model source of truth +- the Codex thread id should be treated as a provider-native continuation token, not as the only session history source for UI + +### Runtime status/settings plane + +This sits alongside session ownership and management. + +Its job is: + +- keep `selectedBackendId`, `resolvedBackendId`, `availableBackends`, and backend summaries truthful +- keep provisioning readiness and installer/runtime diagnostics aligned with the real lane contract +- keep model verification signatures and probe policy aligned with the active execution seam + +Phase-1 rule: + +- `codex-native` must not piggyback on the old “Codex runtime follows connection method” assumption unless that rule is consciously preserved and tested +- if the lane is first-class in orchestrator, it must be first-class in settings/status/provisioning truth too + +### Connection/auth-routing plane + +This sits between provider connection settings and the execution plane. + +Its job is: + +- apply authentication credentials without silently rewriting execution-lane truth +- keep provider connection mode, backend selection env, and runtime status consistent +- make it explicit when API-key auth is compatible with more than one backend lane +- keep old-lane credential surfaces and native exec/SDK credential surfaces from masquerading as one shared “Codex API key ready” state + +Phase-1 rule: + +- `codex-native` must not inherit the old rule “Codex API key mode means Responses API lane” unless that mapping is intentionally preserved and documented +- env construction must resolve auth choice and runtime backend choice separately, then combine them explicitly +- if the chosen seam is raw exec or the current SDK, credential routing must explicitly bridge host-stored key truth into the seam's real auth surface instead of assuming old `OPENAI_API_KEY` routing is already native-lane-compatible + +### Config and launch-granularity plane + +This sits between saved app settings, provisioning, and execution selection. + +Its job is: + +- keep shared config schema, config validation, and runtime backend vocabulary aligned +- define whether backend choice is global-per-provider or launch-specific +- keep provisioning warnings, launch summaries, and runtime validation truthful about that granularity + +Phase-1 rule: + +- if backend choice is still global-per-provider, phase 1 must say so explicitly in both config semantics and provisioning UX +- do not imply task-specific or team-specific `codex-native` selection until `TeamLaunchRequest` and related launch contracts actually support it + +### Model-inventory and reasoning-effort plane + +This sits between backend/lane truth and model selectors, verification probes, and provisioning hints. + +Its job is: + +- keep native-lane model inventory distinct from old provider-wide static catalogs when they diverge +- keep disabled-model heuristics, reasoning-effort choices, and default/preflight model choices aligned with the selected lane +- prevent static Codex model assumptions from silently standing in for richer native model truth + +Phase-1 rule: + +- do not let `codex-native` inherit the old static Codex model catalog unless that subset is intentionally frozen and documented +- if phase 1 uses a curated subset instead of native dynamic model listing, that subset and its disabled reasons must still be lane-aware and explicit +- model verification, create/launch selectors, and runtime settings must not disagree about what models or reasoning-effort options the selected lane actually supports + +### Workspace-trust and native-thread-start plane + +This sits between host trust ownership and native Codex thread lifecycle. + +Its job is: + +- keep host workspace-trust truth separate from native Codex trust side effects +- prevent native thread start/resume from silently mutating project trust behind the host's back +- keep trust-gated env/hook/LSP/MCP behavior aligned with one explicit authority + +Phase-1 rule: + +- do not let `codex-native` mark a project trusted or behave as if it already is trusted before host trust is satisfied +- if native trust writes are allowed at all, they must be explicitly sequenced after host trust and surfaced truthfully instead of being treated as an invisible side effect +- do not let raw exec repo-check semantics stand in for our persisted trust-dialog semantics + +### Instruction-ownership and collaboration-mode plane + +This sits between native Codex instruction channels and our current host-owned system/bootstrap prompt assembly. + +Its job is: + +- keep one explicit owner for phase-1 instruction truth +- prevent collaboration-mode built-ins from silently overriding host-selected model/effort/instruction semantics +- prevent bootstrap-critical instructions from being duplicated, replaced, or hidden by a second instruction layer the app cannot inspect well + +Phase-1 rule: + +- do not mix host system/bootstrap prompts with native collaboration-mode built-ins unless one explicit precedence contract is frozen and tested +- if phase 1 does not intentionally adopt `collaborationMode`, keep that channel off instead of leaving it as latent magic +- if native `baseInstructions` or `developerInstructions` are used, they must have an explicit relationship to host prompt assembly rather than being appended opportunistically + +### Process-scope backend-routing plane + +This sits between launch/provisioning and actual teammate spawn behavior. + +Its job is: + +- keep backend-routing truth aligned with the actual lifetime and scope of env/application +- prevent UI and provisioning copy from implying member-level backend choice when backend routing is still inherited from process state +- make mixed-lane support an explicit future capability instead of an accidental assumption + +Phase-1 rule: + +- do not claim that one launched orchestrator runtime can run both old Codex and `codex-native` lanes side by side unless spawn plumbing explicitly supports that +- if Codex backend selection is still process-scoped, team launch UX must describe it as such + +### Probe-cache and preflight-truth plane + +This sits between runtime settings/provisioning and actual readiness truth. + +Its job is: + +- keep provisioning-readiness cache identity aligned with backend/auth/probe-policy truth +- prevent long-lived provider-only cache entries from masking a real backend or auth switch +- keep provisioning readiness and backend-aware model verification from diverging into split-brain status + +Phase-1 rule: + +- a Codex backend/auth change that alters execution-lane truth must either invalidate affected probe cache entries immediately or bypass them deterministically +- do not reuse provider-only cached readiness for `codex-native` if the active model-verification signature or backend summary says the lane changed + +### External-runtime-diagnostic plane + +This sits between external binary discovery and user-facing backend status. + +Its job is: + +- keep local binary detection separate from execution-lane readiness +- prevent UI, installer snapshots, or provisioning summaries from treating “CLI exists” as “lane is ready” +- make the relationship between detected binary, selectable backend option, and verified runtime truth explicit +- keep external CLI discovery separate from bundled SDK-binary readiness if the chosen seam resolves Codex from packaged npm dependencies rather than the user's PATH + +Phase-1 rule: + +- `externalRuntimeDiagnostics` may support explanations and install hints, but they must not silently upgrade capability or readiness truth for `codex-native` +- if the lane is not yet selectable or authenticated, CLI detection alone must not make it appear ready +- if the chosen seam uses a bundled SDK binary, external CLI detection must stay advisory instead of implying that the exact binary this lane will execute is already available + +### Backend-option-state plane + +This sits between runtime status payloads and renderer backend-selection UX. + +Its job is: + +- keep option-state semantics explicit across `selectable`, `available`, `resolved`, and `verified` +- prevent renderer/backend-selector behavior from collapsing those states into one boolean +- allow `codex-native` to be introduced as a visible lane without forcing fake readiness or fake unselectability + +Phase-1 rule: + +- the renderer must not treat `available` as the only state that matters once `codex-native` exists +- runtime status and renderer logic must agree on whether an unavailable-but-selectable lane is still user-choosable for configuration or migration purposes + +### Runtime-status fallback plane + +This sits between orchestrator status transport and UI/backend-selection state. + +Its job is: + +- define what happens when backend-rich status payloads are unavailable transiently +- keep degraded transport separate from true provider/backend capability loss +- prevent legacy provider-only fallback from erasing meaningful backend-lane truth + +Phase-1 rule: + +- if unified runtime status is unavailable, UI must still distinguish: + - last known backend truth + - current degraded transport state + - actual backend unavailability +- a transport fallback must not silently remap `codex-native` into old provider-only Codex semantics + +### Runtime-copy and summary plane + +This sits between backend-rich status truth and user-facing labels/banners. + +Its job is: + +- keep connection-method wording separate from execution-lane wording +- prevent auth-mode labels from masquerading as backend-lane truth +- keep settings, dashboard, and detail summaries aligned on what “current runtime” actually refers to + +Phase-1 rule: + +- once `codex-native` exists, Codex runtime summary helpers must become lane-aware +- UI may still show `Codex subscription` or `OpenAI API key` as connection method, but not as a substitute for `selectedBackendId` / `resolvedBackendId` + +### Progressive-status and snapshot-reconciliation plane + +This sits between main-process status publishing and renderer/store state. + +Its job is: + +- reconcile progressive status snapshots, cached IPC status responses, and provider-specific refresh results +- preserve whether a snapshot is partial, settled, or degraded +- prevent stale or partial snapshot pushes from silently clobbering newer backend-lane truth + +Phase-1 rule: + +- renderer/store must not treat every incoming `cliStatus` snapshot as equally authoritative +- if progressive snapshots are kept, they must carry enough sequencing or settledness semantics to coexist safely with request/response refresh paths + +### Extension-preflight and action-gating plane + +This sits between runtime/backend truth and extension-management UX. + +Its job is: + +- project backend-lane truth into plugin/MCP/skill action availability honestly +- keep coarse runtime-install status separate from backend-lane execution readiness +- prevent provider-wide capability truth from overstating what the selected lane can actually manage + +Phase-1 rule: + +- plugin actions for Codex must not become enabled just because Codex as a provider is authenticated or mutable on some other lane +- extension banners, install buttons, and mutation preflight must share the same backend-aware readiness model + +### Team-model and provisioning-runtime plane + +This sits between runtime/backend truth and create/launch dialog model selection. + +Its job is: + +- project lane-aware runtime truth into team model visibility, model validation, and provisioning notes +- prevent provider-wide Codex heuristics from standing in for backend-lane identity +- keep create/launch dialogs aligned with the same lane vocabulary used by runtime settings and provisioning status + +Phase-1 rule: + +- team model selectors and provisioning diagnostics must not rely only on provider id plus auth/backend summary once `codex-native` exists +- lane-specific model truth must be explainable in create/launch UI without falling back to old Codex-wide assumptions + +### Provisioning-prepare cache-identity plane + +This sits between provisioning warmup/model diagnostics and cached reuse. + +Its job is: + +- keep prepare/model cache identity canonical and backend-aware +- decouple cache validity from backend summary wording +- prevent false cache reuse across different Codex lanes or auth/probe combinations + +Phase-1 rule: + +- prepare/model cache identity must not be derived from display summary text +- provisioning cache reuse must stay stable under copy changes and must split cleanly across old Codex and `codex-native` + +### Persisted-team-identity and replay-identity plane + +This sits between saved launch requests, draft team metadata, member metadata, backup/restore artifacts, relaunch defaults, runtime snapshots, and resume decisions. + +Its job is: + +- keep persisted team launch identity honest about whether backend lane is pinned or inherited from current global runtime config +- keep team draft metadata and member metadata honest about whether they carry lane identity or only provider/model defaults +- keep backup/restore semantics honest about whether restored teams preserve lane identity or merely restore provider/model defaults +- prevent relaunch/restart/resume flows from silently changing Codex lane after settings drift +- keep runtime snapshots and relaunch UI clear about which backend identity the team actually expects + +Phase-1 rule: + +- do not persist or replay Codex team launches using only provider/model if backend lane materially changes runtime semantics +- do not let `team.meta.json`, `members.meta.json`, `TeamConfig`, or runtime snapshots imply stable lane identity if they only persist provider/model/effort +- if launch identity remains global-per-provider, expose that as an explicit inherited-global rule instead of pretending lane persistence exists +- resume guards and runtime snapshots must compare or expose canonical backend identity whenever lane drift would change runtime behavior + +### Team-summary and list-surface plane + +This sits between persisted team/runtime truth and renderer-facing team cards, tabs, and list summaries. + +Its job is: + +- decide whether team summary surfaces are lane-aware or intentionally lane-agnostic +- prevent list cards, draft cards, and runtime detail cards from implying different Codex lane truths for the same team +- keep summary-level UI honest about pinned-vs-inherited backend identity without forcing every detail-only field into the list surface + +Phase-1 rule: + +- do not let `TeamSummary` remain accidentally lane-blind if users can make backend-lane decisions from team cards, create/launch summaries, or restore/retry flows +- if summary surfaces stay lane-agnostic in phase 1, explicitly keep lane-sensitive actions and wording out of them instead of implying hidden certainty +- synthetic provisioning snapshots and persisted team summaries must not disagree about whether lane identity is known, inherited, or intentionally omitted + +### Member-runtime-summary and composer-capability plane + +This sits between backend-rich runtime truth and member-level/detail/composer-facing copy or capability affordances. + +Its job is: + +- keep member runtime summary strings honest about whether lane truth is known or intentionally omitted +- keep bootstrap/system summary copy from collapsing old Codex and `codex-native` into the same visible runtime story +- keep composer slash-command/plugin/app affordances aligned with the actual selected/resolved lane instead of provider-only Codex identity + +Phase-1 rule: + +- do not let member/detail/composer surfaces imply lane-specific truth they do not actually carry +- lane-sensitive command or plugin affordances must not key only off `providerId === 'codex'` once backend lane matters +- if phase 1 keeps these surfaces lane-agnostic, explicitly keep lane-sensitive copy/actions out of them instead of quietly inheriting provider-wide Codex assumptions + +### Plugin-activation and session-visibility plane + +This sits between plugin-management success and user-facing “you can use this now” truth. + +Its job is: + +- separate native placement success from actual execution readiness on the selected lane +- keep current-session visibility, new-thread visibility, restart-required truth, and app-auth/setup completion as separate concepts +- prevent extension cards/buttons/banners from overstating activation state once `codex-native` exists + +Phase-1 rule: + +- do not let `isInstalled` imply “active in the current session” +- `codex-native` plugin UX must at least distinguish: + - installed but old lane selected + - installed on `codex-native` but usable only in a new thread or after restart + - installed but still blocked on required app/auth setup +- if exact activation state inside an already-running session cannot be proven safely, UI must stay conservative and describe next-thread/restart semantics explicitly + +### Mention-targeting and invocation-shape plane + +This sits between “plugin/app exists and is installed” truth and “runtime can explicitly invoke this target the way UI suggests” truth. + +Its job is: + +- separate catalog/install truth from invocation-shape truth +- keep structured mention targeting, linked-text mention targeting, and implicit runtime discovery as separate concepts +- prevent composer or extension UI from overstating exact plugin/app invocation support on the chosen Codex execution seam + +Phase-1 rule: + +- do not let plugin/app install support imply first-class deterministic invocation support +- if the chosen seam is raw `codex exec` or current `@openai/codex-sdk`, phase 1 must explicitly say whether plugin/app invocation is: + - structured and exact + - linked-text mention based + - or not yet surfaced as an explicit UI affordance +- if invocation still depends on linked-text mentions, keep that behavior behind conservative copy and tests instead of presenting it like an app-server-grade structured contract + +### Live-stream and history-hydration plane + +This sits between active runtime notifications and replayable/history-bearing transcript truth. + +Its job is: + +- separate active-turn streaming truth from replayable thread-history truth +- keep sparse `Turn` / `Thread` response payloads from being mistaken for fully hydrated history +- keep exact-log/task-log/reload consumers grounded in explicit hydration or persisted transcript projection instead of optimistic live caches + +Phase-1 rule: + +- do not let `turn/started`, `turn/completed`, or sparse thread payloads become the canonical history source for UI/transcript consumers +- if live `item/*` events are used for in-flight activity, that must stay a separate path from exact-log/replay/post-hoc reads +- any history used for resume, exact log, task log detail, or persisted transcript views must come from an explicit hydration/projector contract, not from whatever live notifications happened to be seen on one connection + +### Persisted-history policy plane + +This sits between native thread creation/resume/fork policy and later replay/exact-log/history hydration guarantees. + +Its job is: + +- keep richer persisted-history choice explicit at thread birth/resume/fork +- prevent mixed populations of native threads from looking equally replayable when some were created with lossy history policy +- keep replay/exact-log/reload truth honest about whether richer historical items can ever be hydrated later + +Phase-1 rule: + +- do not let persisted-history richness be an implicit side effect of whichever seam happens to start the thread +- if `persistExtendedHistory` is enabled, that choice must be explicit and stable enough for replay/exact-log guarantees +- if it is not enabled, UI/transcript/replay flows must not quietly assume richer historical completeness will appear later + +### Native-config, feature-state, and marketplace-ownership plane + +This sits between selective app-server enrichment and our current host-owned config/settings model. + +Its job is: + +- keep process-wide native feature/config mutations from becoming hidden second authorities +- keep marketplace persistence and feature toggles aligned with what the host app can actually display and own +- prevent one thread or one helper API call from mutating global native state for unrelated sessions without explicit UI truth + +Phase-1 rule: + +- do not let `experimentalFeature/enablement/set`, `marketplace/add`, `config/value/write`, or `config/batchWrite` become implicit side effects of normal lane operation +- if any native config/feature mutation is allowed, it must go through one explicit host-owned bridge or be presented as an explicit global operation +- do not split truth between host config and native process-wide config without a reconciliation contract + +### Native-review thread-identity plane + +This sits between native review flows and our existing launch/chain/replay/task-log identity surfaces. + +Its job is: + +- keep inline review and detached review as separate identity behaviors +- prevent detached native review threads from being mistaken for activity on the original thread +- keep `/review` affordances honest about whether review stays inline or can spawn a secondary thread + +Phase-1 rule: + +- do not let native detached review create hidden second threads the app cannot model or replay honestly +- if detached review is unsupported, keep native review inline-only or keep `/review` affordances conservative +- if detached review is supported later, it must map explicitly into child-thread/sidechain truth rather than being inferred post hoc + +### Native binary-version and protocol-surface plane + +This sits between backend-lane identity and all capability/model/review/config claims that depend on the actual native runtime being executed. + +Its job is: + +- distinguish backend lane id from the actual native executable and protocol surface in use +- keep capability, model, review, and interactive claims tied to the real native runtime identity rather than to one coarse lane label +- prevent cache/status/UI truth from assuming one universal `codex-native` behavior across bundled binaries, external CLIs, or different protocol surfaces + +Phase-1 rule: + +- do not let `selectedBackendId === 'codex-native'` stand in for the full native capability contract +- if the chosen seam can resolve either bundled SDK binary or external CLI, status and probe identity must keep that distinction explicit or stay conservative +- if any app-server enrichment later depends on experimental API opt-in, that experimental surface must be explicit in capability truth instead of being ambient or version-assumed + +### App-server connection-policy plane + +This sits between later selective app-server enrichment and the app's assumptions about capability visibility, notification truth, and debugging signals. + +Its job is: + +- keep connection-scoped protocol negotiation from masquerading as global runtime truth +- keep missing fields, methods, or notifications attributable to connection policy instead of to phantom runtime drift +- prevent multiple app-server connection profiles from quietly producing different capability or live-event views of the same native lane + +Phase-1 rule: + +- do not let future app-server use mix connection policies invisibly +- if app-server is added later, keep one canonical connection profile by default +- if multiple connection profiles exist later, capability and notification differences must be explicit in logs/status/debugging truth + +### Canonical-history and append-only-projection plane + +This sits between native thread history authority and our current append-only transcript/log readers. + +Its job is: + +- keep canonical native history and append-only local projection from silently diverging after rollback or compaction +- prevent exact-log, replay, and task-log readers from trusting stale append-only tails after native history mutation +- force one explicit rule for how superseded history is represented after native rollback or native compaction changes replay truth + +Phase-1 rule: + +- do not let append-only local transcript automatically masquerade as canonical native history after rollback or compaction +- if append-only projection remains in phase 1, define how stale history is reconciled or marked superseded +- if canonical native history becomes authoritative, reload/exact-log/task-log must use that authority explicitly instead of relying on incremental append-only caches + +### Turn-metadata, usage, and reroute-authority plane + +This sits between native turn/session metadata and the app's current tendency to read usage/model truth from assistant transcript rows. + +Its job is: + +- keep seam-specific usage truth from being guessed from transcript rows that only happen to exist on current lanes +- keep restored token usage after resume/fork/reload attributable to the native source that actually owns it +- keep final model/reasoning-effort truth honest when persisted-resume fallback or model reroute changes what actually ran +- keep turn-plan/diff/reroute metadata available in the normalized layer without forcing fake transcript fields when the chosen seam cannot project them truthfully + +Phase-1 rule: + +- do not assume the last assistant transcript row owns native usage/model truth +- if native usage/model/reroute/plan truth arrives outside transcript rows, keep that authority explicit +- if the chosen seam does not expose a truthful field, surface `unavailable` or normalized-only truth instead of silently backfilling from configured model or stale assistant-row metadata + +### Native thread-defaults and launch-intent plane + +This sits between host launch persistence and native thread-local runtime defaults that can be restored or mutated independently of the original launch request. + +Its job is: + +- keep host launch intent separate from persisted native thread-defaults that may be restored on resume +- prevent relaunch, retry, restore, and runtime-summary surfaces from silently presenting launch-owned provider/model/effort as if they were still the live native thread defaults +- keep resume warnings and one-time model-switch semantics explicit when resumed native threads inherit or switch away from current launch intent + +Phase-1 rule: + +- do not assume saved launch params or config-owned provider/model/effort equal live native thread-defaults once a native thread has been resumed or had turn-level overrides applied +- if phase 1 allows resume, either persist enough native thread-default identity to explain the inherited runtime truth or force an explicit override or fresh-thread policy +- do not let resume guards compare only provider/model if effort or other native thread-default drift can still change real runtime behavior + +### Native thread-status and warning-authority plane + +This sits between native thread lifecycle truth and the host's current process, provisioning, and banner-style status surfaces. + +Its job is: + +- keep native `thread.status` truth from being flattened into generic process liveness or provisioning progress +- keep thread-scoped warnings and config/startup warnings attributable to the surface that actually owns them +- prevent runtime cards, banners, and team status from silently treating `runtimeAlive` or process existence as equivalent to native thread health + +Phase-1 rule: + +- do not assume host process liveness equals native thread `active` or `idle` truth +- if phase 1 cannot consume native `thread.status` directly on the chosen seam, keep UI/status copy explicit about the limitation instead of silently inventing equivalent states +- do not collapse config warnings, thread-scoped runtime warnings, and process or provisioning warnings into one undifferentiated warning channel + +### Management plane + +This is where plugin lifecycle and provider-specific environment management live. + +For Codex plugins we want: + +- `plugin-kit-ai` as the management engine +- real Codex runtime as the execution engine + +That split must stay explicit. + +## Proposed Normalized Event Model + +The normalized layer should stay concept-level, not provider-wire-level. + +Recommended first event families: + +- `turn_started` +- `assistant_text` +- `reasoning` +- `usage_updated` +- `turn_plan_updated` +- `turn_diff_updated` +- `model_rerouted` +- `thread_defaults_restored` +- `tool_intent` +- `tool_progress` +- `tool_result` +- `mcp_call` +- `command_execution` +- `file_change` +- `approval_requested` +- `approval_resolved` +- `turn_completed` +- `turn_failed` +- `system_notice` + +### Mapping rule + +We should map provider-native activity into the **closest truthful normalized event**, not the closest Anthropic wire primitive. + +Examples: + +- Anthropic `tool_use` -> `tool_intent` +- Anthropic `tool_result` -> `tool_result` +- Codex `mcp_tool_call` -> `mcp_call` +- Codex `command_execution` -> `command_execution` +- Codex text output item -> `assistant_text` +- Codex reasoning item -> `reasoning` +- Codex resume restoring persisted thread-local model/effort/defaults -> `thread_defaults_restored` + +### Non-goal + +The normalized layer should **not** try to preserve full provider-native reconstruction ability in phase 1. + +It should be optimized for: + +- correctness +- UI usefulness +- analytics usefulness +- transcript projection + +not for exact reverse-compilation back into provider-native streams. + +## Transcript Compatibility Strategy + +This is the most important addition to make the plan actually safe for `claude_team`. + +### Rule + +We should separate: + +- runtime execution contract +- normalized event contract +- persisted transcript contract + +Those are three different layers. + +### Phase-1 persisted transcript rule + +The first Codex-native rollout should keep a transcript shape that remains compatible with current `claude_team` parsers. + +That means: + +- no breaking replacement of current JSONL entry types +- no breaking replacement of current content block types +- no requirement that `claude_team` learn raw Codex item/event shapes first + +### What must remain safe initially + +The current parser contract recognizes entry types such as: + +- `user` +- `assistant` +- `system` +- `summary` +- `file-history-snapshot` +- `queue-operation` + +and content block types such as: + +- `text` +- `thinking` +- `tool_use` +- `tool_result` +- `image` + +So phase 1 should assume: + +- the persisted transcript contract remains backward-compatible with those expectations +- any new metadata is additive + +### Phase-1 transcript invariants + +Backward-compatible entry labels are necessary, but not sufficient. + +The first Codex-native rollout should preserve these invariants: + +- streamed assistant transcript rows still carry stable `requestId` semantics for dedupe and approval correlation +- projected tool-result-like rows preserve `sourceToolUseID` and `sourceToolAssistantUUID` whenever there is a truthful originating action +- enriched `toolUseResult` remains available when current UI/read-model logic expects structured result data +- additive task metadata fields such as `boardTaskLinks` and `boardTaskToolActions` keep their existing contract shape +- rows that cannot truthfully satisfy those invariants must stay normalized-only instead of being forced into misleading transcript messages + +This is the minimum bar for claiming that phase 1 is transcript-compatible. + +### Phase-1 chain and sidechain invariants + +The first Codex-native rollout should also preserve these structural invariants: + +- persisted transcript rows still form a coherent `parentUuid` chain where current readers expect one +- rows that are not true transcript messages do not become accidental chain participants +- `isSidechain` remains truthful for member/subagent logs versus lead/main-thread logs +- `sessionId`, `agentId`, and `agentName` remain truthful enough for current team-log discovery and grouping logic +- projected internal-user/tool-result rows preserve current `isMeta` semantics where UI and analysis code already rely on that distinction + +This is the minimum bar for claiming that phase 1 is safe for team-log and subagent-related UI, not just generic transcript parsing. + +### Phase-1 live request-correlation invariants + +The first Codex-native rollout should also preserve these live-state invariants: + +- approval-request-like events still expose a stable request identifier usable by `pendingApprovals` and `resolvedApprovals` +- streamed assistant activity still supports request-scoped dedupe where current UI/read-model code already depends on `requestId` +- projected tool activity does not invent tool-link ids when no truthful originating action exists +- activity rows and exact-log selectors do not silently merge unrelated actions just because they are temporally close +- exact-log detail selection still has enough request/tool anchor evidence to keep the right assistant row when multiple streamed rows share one request lifecycle + +This is the minimum bar for claiming that phase 1 is safe for live activity UX, not just persisted history UX. + +### Recommended transcript projector behavior + +The Codex-native lane should project normalized activity into the existing transcript family conservatively: + +- assistant/user/system rows remain parseable by existing JSONL parser +- additive metadata may be added the same way task-log metadata is added today +- provider-native thread identity may be stored additively +- provider-native event richness that does not fit current transcript rows can remain in the normalized layer instead of forcing new raw transcript entry kinds immediately + +### Why this matters + +Without this rule, the migration quietly becomes a `claude_team` transcript format rewrite. + +That is exactly the kind of hidden blast radius we want to avoid. + +## UI Integration Rule + +`claude_team` should not consume raw normalized runtime events directly as the first migration step. + +The safer sequence is: + +1. runtime backends emit provider-native events +2. orchestrator maps them to normalized events +3. orchestrator projects transcript-compatible persisted history +4. `claude_team` continues using existing transcript/read-model services +5. later, if useful, `claude_team` can adopt normalized DTOs more directly + +This reduces UI regression risk significantly. + +It also means: + +- approval UI, activity rows, and runtime noise handling continue to depend on stable request-correlation semantics during the first rollout +- a transcript-compatible projector alone is not enough if live request identity becomes ambiguous + +## Backend ID Compatibility Rule + +`codex-native` must be introduced as an additive shared backend identity, not as an implicit reinterpretation of an existing id. + +That means: + +- orchestrator runtime types must add `codex-native` explicitly +- persisted runtime preference config must add `codex-native` explicitly +- main-process runtime status mapping must carry `codex-native` through `selectedBackendId` and `resolvedBackendId` +- renderer selectors and settings UI must render the new id without breaking existing `api` and `adapter` flows +- tests that assert current backend option lists or current labels must be updated consciously, not by accident + +Practical rule: + +- if the new lane exists, the user should be able to see and reason about it as a distinct backend lane +- if the user still selected `api`, we must not silently run `codex-native` + +## What Changes Per Repo + +### `agent_teams_orchestrator` + +This repo takes the biggest change. + +We want to: + +- introduce a provider-neutral normalized event/log model +- add adapter mappers from current Anthropic/Gemini style streams into that model +- add a separate `Codex-native` backend lane through `@openai/codex-sdk / codex exec` +- keep the current Codex adapter path alive as fallback during migration +- avoid forcing `codex exec` events into fake `tool_use/tool_result` transport semantics +- preserve explicit request-correlation semantics through normalized events and transcript projection +- preserve truthful chain and sidechain identity through transcript projection +- add an explicit runtime status/settings contract for `codex-native`, including backend option truth and model-probe policy +- add an explicit approval/control adaptation contract instead of assuming current `control_request` semantics automatically carry over +- decouple Codex auth-mode env construction from Codex backend-lane selection so API-key auth can coexist with a real Codex-native lane +- align app config schema, IPC validation, and launch granularity with the new backend vocabulary instead of leaving `codex-native` as a runtime-only hidden state +- keep phase-1 Codex backend routing honest about its real scope, which likely remains process-wide rather than teammate-specific +- make provisioning-readiness probe cache backend-aware or explicitly invalidated so backend/auth switches cannot leave stale lane truth in UI +- keep external Codex CLI detection separate from actual `codex-native` lane readiness in runtime status and installer/provisioning summaries +- define explicit option-state semantics so backend selectors and provisioning summaries do not collapse `selectable`, `available`, and `verified` into one ambiguous readiness label +- define degraded-status behavior so transient runtime-status failures cannot silently erase backend-lane truth + +We do **not** want to: + +- replace the current Codex backend in one shot +- rewrite all providers around Codex-native semantics +- make transcript/log normalization depend on Anthropic wire events +- hide a new `codex-native` lane behind the old `api` backend identity + +### `claude_team` + +This repo should stay relatively stable compared with the orchestrator. + +We want to: + +- keep one multimodel runtime concept +- stay capability-aware per provider/backend lane +- consume normalized runtime/log DTOs where helpful, but keep transcript/read-model compatibility stable during the first rollout +- integrate plugin management through `plugin-kit-ai` +- keep Codex plugin support gated behind the real Codex-native lane +- keep approval UX and request-correlated activity rendering stable +- keep sidechain/main-thread log discovery and grouping stable +- evolve runtime settings/provisioning UI so `codex-native` does not conflict with the current “Codex runtime follows connection mode” assumption +- keep model verification, provisioning readiness, and installer/runtime summaries truthful per backend lane +- stop UI copy and env plumbing from implying that `Codex API key` always means the old Responses API execution lane +- keep launch/provisioning UX honest about whether backend choice is provider-global or launch-specific +- do not imply member-level mixed Codex backend lanes until launch/spawn plumbing can actually support them +- do not let provisioning-readiness UI reuse stale provider-scoped probe results after a backend/auth switch +- do not let runtime settings or installer/provisioning UI imply that a detected Codex CLI means the `codex-native` lane is already usable +- do not let runtime selector UX hide or overstate `codex-native` because it still assumes backend options are governed only by `available` +- do not let status-transport fallback silently collapse `codex-native` back into provider-only Codex truth +- separate connection-method copy from runtime-lane copy so banners and settings cannot describe the wrong lane with the right credentials + +We do **not** want to: + +- invent a fake Codex plugin support state while execution still goes through the old adapter lane +- force UI logic to infer runtime truth from provider labels alone +- accept a migration that breaks `selectedBackendId` / `resolvedBackendId` UI semantics or transcript invariants +- accept a migration that makes approval or request-correlation semantics ambiguous + +### `plugin-kit-ai` + +This repo remains the management engine, not the execution engine. + +We want to: + +- use it for catalog +- use it for discover +- use it for install/update/remove/repair +- use it for native Codex plugin placement through native marketplace/filesystem layout + +We do **not** want to: + +- make it responsible for running Codex plugins inside sessions +- blur installation and execution into one concern + +## Codex-Native Lane Contract + +The `Codex-native` lane should be treated as a distinct backend lane with its own capability truth. + +### Phase-1 lane guarantees + +Before we claim the lane is usable, it must prove: + +- API-key mode works +- working directory is respected +- streaming events can be consumed and normalized +- thread/session resume behavior is understood +- the chosen seam's headless-vs-interactive capability boundary is explicit and truthful +- basic approval/sandbox behavior is understood without overclaiming unsupported server-request-style interactivity +- completed-turn history/trancript completeness is understood under the chosen `ephemeral` or non-ephemeral seam policy +- transcript compatibility projection does not break current `claude_team` parsers/read models + +### Capability rule + +Codex plugin support must be gated by the lane, not just by the provider. + +That means: + +- current adapter/API lane can keep `plugins: unsupported` +- `Codex-native` can become `plugins: supported` only after native plugin execution is actually proven in real sessions +- `Codex-native` must not implicitly become `manual approvals: supported` or `interactive prompts: supported` just because it is the native lane + +## Codex Plugins Strategy + +For Codex plugins we want: + +- native Codex runtime execution +- native Codex marketplace/filesystem placement +- provider-aware plugin management in `claude_team` + +Therefore: + +- `plugin-kit-ai` is the management engine +- real Codex runtime is the execution engine + +This is important because plugin installation and plugin execution are different concerns. + +Installing a native Codex plugin is not enough by itself if the session still runs through our current Responses API adapter path. + +## App Server Position + +`codex app-server` remains relevant, but not as the first critical path for this migration. + +It is better positioned as a later control-plane enhancement for things like: + +- auth state +- MCP status and OAuth flows +- skills/config inspection +- external config import + +For the first production rollout, it should not be the hard dependency for plugin lifecycle management. + +## Updated Post-Phase-0 Recommendation + +Phase 0 is now implementation-complete and evidence-backed. + +That changes the recommended next steps. + +We do **not** need Phase 1 to "fix" the native lane. + +We need Phase 1 to: + +- make rollout truth safer +- unlock the lane deliberately instead of implicitly +- expand the lane from a locked experimental path into an internal-usable path without regressing the old Codex fallback + +Recommended sequence from here: + +### Phase 0.5 - minimal smoke E2E + +Assessment: + +- `🎯 10 🛡️ 9 🧠 4` +- Rough surface: `250-700` lines + +Goal: + +- add a tiny end-to-end smoke/regression layer on top of the Phase 0 sign-off proof + +Work: + +- orchestrator smoke proof for: + - raw native exec sign-off harness + - projected warning/thread-status/execution-summary truth + - `ephemeral` versus `persistent` history truth +- `claude_team` smoke test for: + - unified runtime-status -> provider status -> renderer summary truth + - transcript parser + exact-log parser over projected native rows +- keep these tests narrow and deterministic + +Exit gate: + +- one orchestrator native smoke command/evidence path is green +- one `claude_team` runtime-status smoke path is green +- one `claude_team` transcript/exact-log smoke path is green + +### Phase 1 - internal unlock preparation + +Assessment: + +- `🎯 9 🛡️ 9 🧠 5` +- Rough surface: `900-1800` lines + +Status as of 2026-04-19: + +- implementation-complete +- sign-off evidence captured in [codex-native-runtime-phase-1-signoff-evidence.md](/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/docs/research/codex-native-runtime-phase-1-signoff-evidence.md) + +Goal: + +- prepare `codex-native` for safe internal unlock without changing default provider behavior + +Work: + +- define exact internal unlock policy: + - who can enable the lane + - where the feature flag lives + - what "selectable but degraded" means +- keep capability truth conservative: + - plugins unsupported + - approvals unsupported + - generic interactive prompts unsupported + - no false MCP/app-server-grade claims +- make locked/degraded/ready native states explicit across: + - runtime status + - settings + - dashboard/runtime copy + - provisioning summaries +- keep old Codex lane the safe default and fallback +- add internal rollout evidence for: + - missing native credentials + - missing binary + - degraded native status + - fallback to old lane + +Exit gate: + +- `codex-native` can be enabled intentionally by internal users +- old Codex lane still remains default and healthy +- lane-specific degraded states are visible and honest + +### Phase 2 - limited internal unlock + +Assessment: + +- `🎯 8 🛡️ 8 🧠 6` +- Rough surface: `700-1500` lines + +Goal: + +- allow controlled real usage of the native lane while keeping rollout blast radius small + +Work: + +- make the lane selectable under explicit internal policy +- keep `auto` away from `codex-native` +- collect real-world evidence on: + - history completeness + - warning attribution + - thread-status truth + - launch/replay truth +- only after that revisit broader capability expansion + +Exit gate: + +- internal users can use the lane intentionally +- no major regressions in old Codex lane +- no false capability claims in UI/status/provisioning surfaces + +## Implementation Phases + +### Phase 0 - proof spike + +Goal: + +- reduce the biggest architectural unknowns before broader implementation + +Companion spec: + +- [codex-native-runtime-phase-0-implementation-spec.md](/Users/belief/dev/projects/claude/claude_team_codex_native_runtime_plan/docs/research/codex-native-runtime-phase-0-implementation-spec.md) + +Spike checks: + +- run a minimal `Codex-native` session through the chosen phase-0 execution seam +- capture streamed runtime events +- map them into a draft normalized event stream +- project a minimal transcript-compatible history sample +- verify `cwd`, API-key auth, and session completion behavior +- document where current permission/sandbox semantics match or diverge +- document how Codex thread id is stored without making it the sole UI history source +- explicitly compare SDK thread persistence behavior against raw `codex exec --ephemeral` +- explicitly identify whether the first lane can use SDK safely or needs a thinner raw CLI wrapper +- explicitly identify which provider-native interactive/control requests the chosen seam can surface at all versus which ones it rejects in headless mode +- explicitly lock whether phase 1 is a headless-limited lane on the chosen seam instead of implying app-server-grade interactivity +- explicitly compare non-ephemeral completed-turn backfill against `--ephemeral` runs so transcript completeness tradeoffs are visible, not assumed +- explicitly identify whether the chosen seam executes: + - a bundled SDK-resolved Codex binary + - an external user-installed Codex binary + - or both under different conditions +- explicitly document how request identity is obtained from the Codex-native lane and how it maps into approval/live-activity UX +- explicitly switch backend/auth inputs during the spike and verify whether provisioning-readiness cache invalidates or returns stale truth +- explicitly compare provisioning-readiness truth against backend-aware model verification after a lane switch +- explicitly compare “Codex CLI detected” truth against actual lane availability so status/install UI cannot overclaim readiness +- explicitly test whether backend selector UX can represent a lane that is selectable but not yet authenticated/verified +- explicitly break unified runtime-status transport during the spike and verify that UI sees degraded transport, not silent loss of backend-lane truth +- explicitly verify that settings/dashboard summaries still describe the chosen lane correctly when auth mode and backend lane no longer map 1:1 +- explicitly switch global Codex backend after saving launch params, draft team metadata, or restoring a backed-up team and verify replay/resume/runtime snapshots do not silently drift lanes without an explicit inherited-global contract +- explicitly compare team list summaries, draft cards, and synthetic provisioning cards against detailed runtime truth so summary surfaces do not imply lane certainty they do not actually carry + +Exit gate: + +- we understand whether the lane is good enough for a feature-flagged rollout +- we understand whether the chosen seam is headless-limited and what transcript/history recovery path phase 1 will depend on + +### Phase 1 - normalized layer first + +Goal: + +- introduce the normalized internal event/log layer without changing provider execution paths yet + +Work: + +- define the normalized event schema +- add projection from current Anthropic/Gemini/current Codex streams +- add transcript compatibility projection rules +- keep current `claude_team` transcript/read-model consumers working unchanged + +Exit gate: + +- current providers still work +- logs/transcript projection can run from normalized events +- current `ParsedMessage`/exact-log/task-log paths remain compatible + +### Phase 2 - feature-flagged Codex-native lane + +Goal: + +- add a real Codex runtime lane without making it the default immediately + +Work: + +- add `codex-native` backend lane +- keep current Codex adapter path as fallback +- gate the lane behind an explicit feature flag/runtime preference +- wire capability reporting per lane +- keep headless-seam limits explicit if phase 1 uses raw exec or the current SDK +- keep the chosen `ephemeral` or non-ephemeral backfill policy explicit in transcript/history handling + +Exit gate: + +- current Codex path still works +- Codex-native lane works in controlled tests +- headless or richer interaction limits are described truthfully for the chosen seam +- no false plugin-support claim yet unless actually proven + +### Phase 3 - plugin management integration + +Goal: + +- integrate `plugin-kit-ai` as the plugin management engine + +Work: + +- catalog +- discover +- install/update/remove/repair +- native Codex plugin placement + +Exit gate: + +- management truth is provider-aware +- native plugin placement works +- Codex plugin support in UI remains honest and lane-aware + +### Phase 4 - optional app-server enrichment + +Goal: + +- add selective control-plane value where it clearly reduces complexity + +Possible areas: + +- auth state +- MCP OAuth flows +- skills/config inspection +- external config import + +This phase is optional for the first production rollout. + +## Recommended First PR Sequence + +This is the safest order to avoid hidden blast radius. + +### PR 0 - decision freeze and backend lane naming + +Repos: + +- `claude_team` +- `agent_teams_orchestrator` + +Goal: + +- freeze the backend-lane vocabulary and rollout rules in code comments/docs/tests before runtime changes spread + +Must lock: + +- new Codex backend id naming +- capability gating rule +- transcript ownership rule +- transcript invariants that phase 1 is not allowed to break +- chain/sidechain invariants that phase 1 is not allowed to break +- whether the phase-0 spike is SDK-first, raw-exec-first, or undecided pending evidence +- whether `codex-native` is connection-managed or independently selectable in runtime settings/provisioning truth +- what the minimum truthful approval/control contract is for claiming manual approval support +- how Codex API-key auth interacts with backend selection env without silently forcing the old `api` lane +- whether the first rollout keeps backend choice global-per-provider or expands launch contracts to support per-launch lane selection +- whether backend routing remains process-scoped for phase 1 and how that limitation is reflected in team launch/provisioning UX +- what identities belong in the provisioning probe-cache key and which config/backend/auth changes must invalidate cached readiness immediately +- what exact contract separates external Codex CLI detection from `codex-native` lane selection, authentication, and verified readiness +- what exact semantics belong to `selectable`, `available`, `resolved`, and `verified` for backend options once `codex-native` is introduced +- what degraded-status contract preserves backend-lane truth when unified runtime-status transport fails transiently +- what wording contract separates Codex connection method labels from Codex runtime-lane labels across settings, dashboard, and provider detail views +- what sequencing/settledness contract governs progressive `cliStatus` snapshots versus explicit status/provider refresh requests +- what backend-aware truth model controls extension mutation preflight once Codex plugin support becomes lane-specific +- what runtime shape team model selectors and provisioning diagnostics are allowed to depend on once backend-lane truth matters +- what canonical backend/auth/probe identity keys reusable provider prepare/model results +- what launch params, `team.meta.json`, `members.meta.json`, backup artifacts, relaunch defaults, runtime snapshots, and resume guards persist about backend lane versus inheriting current global backend truth +- what backend-lane truth team summaries, draft cards, and provisioning snapshot cards expose versus intentionally omit +- what backend-lane truth member runtime summaries, bootstrap/system copy, and composer slash-command/plugin affordances expose versus intentionally omit +- what plugin-management result fields and UI states distinguish installed, active, usable in next thread, requires restart, and requires app/auth setup completion +- what invocation-shape truth phase 1 exposes for plugins/apps/skills on the chosen Codex seam: structured mention targeting, linked-text mention targeting, or no explicit targeting affordance yet +- what source is authoritative for active-turn streaming versus replayable/hydrated thread history, and how that rule protects exact-log/task-log/replay consumers from sparse live Codex payloads +- what event or contract clears pending approval/request-user-input state when the turn lifecycle resolves a request before the user answers +- what phase 1 does with provider-native `requestUserInput` and MCP elicitation requests that do not fit the current tool-approval UI +- whether the chosen phase-1 execution seam is explicitly headless-limited and which interactive/control features are therefore out of scope by seam, not just by UI +- whether phase 1 chooses `--ephemeral`, non-ephemeral exec with final backfill, or an explicit replacement hydration path for completed-turn item completeness +- what credential-routing contract authenticates raw exec or the current SDK for `codex-native`, and how that differs from the old `OPENAI_API_KEY` Codex lane +- what source is authoritative for `codex-native` model inventory, reasoning-effort options, disabled-model reasons, and preflight/default model choice +- what trust authority owns phase-1 `codex-native` launches, and whether native thread start is allowed to persist project trust at all +- what instruction channel owns phase-1 `codex-native` behavior among host system/bootstrap prompts, native base/developer instructions, and collaboration-mode built-ins +- what persisted-history policy phase 1 freezes for native thread start/resume/fork, and how lossy-vs-rich history truth is surfaced later +- whether any native app-server config/feature/marketplace mutation surface is allowed in phase 1 and, if so, through what host-owned bridge +- whether native review stays inline-only in phase 1 or whether detached review gets an explicit child-thread identity contract +- what native runtime identity fields are authoritative for capability truth beyond backend id: executable source, native binary version, protocol/capability revision, and stable-vs-experimental surface truth where relevant +- what one canonical app-server connection policy means later for `experimentalApi`, notification opt-out, and live subscription truth if selective app-server enrichment is introduced +- what source remains authoritative after native rollback or compaction mutates thread history: canonical native thread history, append-only local transcript, or one explicit reconciliation rule +- what source remains authoritative for native token usage, context-window truth, final model/reasoning-effort truth, and turn plan/diff/reroute metadata when those truths arrive outside assistant transcript rows +- what source remains authoritative when host launch intent differs from persisted native thread-defaults after resume or prior turn overrides, and how that drift is surfaced in config, summaries, resume guards, and relaunch truth +- what source remains authoritative for native thread loaded, active, idle, and system-error truth, and how that truth reconciles with host process liveness, provisioning state, and coarse runtime banners +- what warning channels remain distinct between native thread warnings, startup/config warnings, and process or provisioning warnings so the app never needs to guess which surface is actually unhealthy + +### PR 1 - normalized event schema only + +Repo: + +- `agent_teams_orchestrator` + +Goal: + +- add normalized event types and mappers for current lanes only + +Must not do: + +- no Codex-native execution yet +- no transcript contract change yet + +### PR 2 - transcript compatibility projector rules + +Repo: + +- `agent_teams_orchestrator` + +Goal: + +- define how normalized events project into persisted transcript-compatible history + +Must prove: + +- current `claude_team` parsers still work +- additive metadata pattern still holds +- `requestId`, tool-linking, and task-log enrichment invariants still hold +- approval/live request-correlation invariants still hold +- chain/sidechain identity invariants still hold +- runtime status/settings projection still stays truthful for backend summaries and provisioning status +- active live-stream events and replayable history remain separate enough that transcript readers, exact-log readers, and post-hoc task-log readers never depend on sparse live Codex payloads as canonical history +- pending approval/request state clears truthfully on explicit response, auto-resolution, interruption, or lifecycle cleanup without leaving stale renderer/store state +- unsupported interactive request types are either blocked explicitly or handled through a truthful UI path instead of silently stalling turns +- transcript/history completeness remains truthful under the chosen non-ephemeral-backfill or explicit-hydration strategy instead of depending on an implicit exec behavior that phase 1 has not frozen +- native-lane API-key readiness must come from the chosen exec/SDK credential-routing contract instead of inheriting old `OPENAI_API_KEY` readiness heuristics by accident +- lane-aware model inventory, disabled-model reasons, and reasoning-effort truth must stay aligned across verification probes, create/launch selectors, and runtime settings +- host trust boundary and native thread-start behavior must not drift into two different project-trust stories +- chosen instruction-owner policy must keep system/bootstrap behavior stable instead of duplicating or replacing it accidentally +- replay/exact-log/history projection must stay truthful under the chosen `persistExtendedHistory` policy instead of assuming retroactive history repair +- native config/feature/marketplace state must not mutate behind host-owned settings without one explicit source of truth +- native review affordances must not imply detached review support unless second-thread identity is modeled explicitly +- native status/probe/cache truth must not collapse bundled SDK binary, external CLI, and protocol-surface differences into one fake universal `codex-native` identity +- any later app-server enrichment must not let connection-policy drift masquerade as runtime capability or live-event drift +- canonical replay/history truth must not drift from append-only projected transcript after native rollback or compaction mutates thread-visible history +- native usage/model/reasoning-effort truth must not be inferred only from assistant transcript rows when the chosen seam exposes separate authoritative notifications or persisted thread metadata +- projected transcript, status, and warning surfaces must not collapse host launch intent and restored native thread-defaults into one fake runtime identity when those truths diverge +- projected transcript, status, and warning surfaces must not collapse native thread loaded or system-error truth into generic process-alive or provisioning-active status +- native thread warnings and startup/config warnings must stay attributable to distinct channels instead of degrading into one coarse “Codex warning” bucket + +### PR 3 - Codex-native spike lane under feature flag + +Repo: + +- `agent_teams_orchestrator` + +Goal: + +- add the real Codex-native runtime lane without making it default + +Must prove: + +- API-key path +- cwd behavior +- stream normalization +- safe failure behavior +- chosen SDK/raw-exec seam does not create unexplained session persistence drift +- request identity is stable enough for approval UX and streamed dedupe +- exact-log anchor selection still has enough evidence after projection to avoid wrong assistant-row retention +- sidechain/main-thread identity and transcript parent-chain behavior remain explainable after projection +- runtime settings/provisioning/model verification surfaces can represent the lane honestly +- approval/control events either adapt truthfully into current approval UX or stay explicitly unsupported/limited +- API-key auth can target the intended Codex lane without stale env coupling forcing `adapter` or `api` unexpectedly +- config validation, saved settings, and launch/provisioning summaries all describe the same backend vocabulary and the same selection granularity +- team launch and teammate spawn behavior do not imply mixed Codex backend lanes that the current process/env model cannot actually deliver +- provisioning-readiness and model verification stay aligned after backend/auth switches instead of splitting on stale cached probe truth +- runtime status and installer/provisioning summaries do not treat detected Codex CLI presence as equivalent to verified `codex-native` availability +- backend selector and runtime summaries can represent `codex-native` as selectable-but-not-ready without hiding it or falsely advertising it as ready +- settings/dashboard/provider summaries do not describe `codex-native` using old auth-only labels once backend-rich truth exists +- transient runtime-status fallback cannot erase `codex-native` backend identity, option-state semantics, or lane-specific status copy without marking degradation explicitly +- progressive status transport and explicit provider refresh must not race into mixed or downgraded backend-lane truth in store/UI +- extension preflight and install buttons must not enable Codex plugin management from provider-wide truth when the selected lane is still old Codex, degraded, or unverified +- create/launch dialogs must not validate or explain Codex model choice using provider-wide truth that hides the selected lane +- provisioning warmup/model cache must not reuse results across lanes based only on backend summary display text +- saved launch params, draft team metadata, backup/restore artifacts, relaunch prefill, runtime snapshots, and resume guards must not silently drift teams onto a different Codex lane after global backend settings change +- team summaries, draft cards, and provisioning snapshot cards must not imply backend-lane truth they cannot actually represent +- member runtime summaries, bootstrap/system copy, and composer slash-command/plugin suggestions must not imply backend-lane truth they cannot actually represent +- plugin install/update results must not overclaim “ready now” when Codex-native truth is only “installed, use in a new thread/restarted session” or “install finished but app/auth setup still incomplete” +- chosen Codex execution seam must not overclaim deterministic plugin/app invocation support if the real phase-1 truth is only linked-text mention parsing or implicit runtime discovery +- active turn notifications, sparse `Turn` / `Thread` payloads, and replayable history hydration must not be conflated into one cache or one truth path +- pending approvals and request-user-input state must not outlive the active turn/run because lifecycle cleanup was never mapped back into renderer/store truth +- generic user-input or MCP elicitation requests must not silently dead-end because the app only knows approval sheets +- chosen raw-exec or SDK seam must not overclaim manual approval, generic interactive prompt, dynamic-tool, or other server-request parity if the actual headless seam rejects those flows +- if `--ephemeral` is chosen, final-turn item completeness must still be recovered through an explicit tested path instead of depending on exec's non-ephemeral backfill behavior +- old Codex API-key readiness and `codex-native` API-key readiness must not drift because UI/runtime still checks only `OPENAI_API_KEY` while the chosen seam expects `CODEX_API_KEY` or explicit SDK `apiKey` +- static provider-wide Codex model catalogs and disabled-model heuristics must not silently stand in for native-lane model truth when the chosen seam exposes different model metadata or effort options +- native thread start/resume must not silently persist project trust or bypass host trust-gated env/hook/LSP behavior +- chosen instruction channel must not silently override or duplicate host system/bootstrap prompts through collaboration-mode or native developer-instruction precedence +- native thread replay/history behavior must not quietly mix lossy and rich persisted-history policies without explicit thread-level truth +- native config/feature/marketplace helpers must not mutate process-wide or persistent native state outside host-owned settings truth +- native review flows must not silently spawn detached review threads the app cannot model, reload, or explain +- native binary source/version/protocol surface must not silently change lane capability truth while status, probes, and UI still treat `codex-native` as one universal runtime +- any later app-server enrichment must not silently mix connection-scoped stable/experimental surface or notification-subscription policies while UI/debugging still expects one global truth +- rollback or compaction must not silently leave append-only local transcript, exact-log, and replay readers on stale pre-mutation history +- native usage replay on resume/fork/reload must not depend on assistant transcript rows that never carried the authoritative usage payload in the first place +- model reroute or persisted-resume model/reasoning-effort fallback must not leave status, provisioning, or transcript projection claiming the stale configured model +- resumed native threads must not silently inherit persisted model, effort, or other thread-defaults while launch config, summaries, and resume guards still claim host launch intent is the live runtime identity +- host process liveness, provisioning activity, or runtime snapshot presence must not masquerade as native thread active or healthy truth when the native thread is `notLoaded` or `systemError` +- status and warning projection must keep native thread warnings, config warnings, and provisioning or process warnings distinguishable enough that later UI or debugging can explain what actually failed + +### PR 4 - `claude_team` capability/UI adaptation + +Repo: + +- `claude_team` + +Goal: + +- make UI lane-aware without requiring a transcript format rewrite + +Must prove: + +- old Codex lane still renders honestly +- Codex-native lane does not overclaim plugin support +- dashboard/settings/status panels stay coherent while progressive status snapshots, provider refreshes, and model verification updates interleave +- dashboard, settings, provisioning, and team status surfaces distinguish host process or provisioning truth from native thread loaded, active, idle, and system-error truth instead of flattening them into one generic “runtime healthy” story +- banners, detail views, and runtime cards distinguish native thread warnings, config/startup warnings, and process/provisioning warnings instead of collapsing them into one coarse warning channel +- extension store mutation gating is backend-lane-aware for Codex instead of relying on provider-wide auth/capability shortcuts +- team model selectors and provisioning diagnostics are lane-aware enough to distinguish old Codex from `codex-native` +- provider prepare/model cache keys use canonical backend identity rather than UI summary text +- create/launch dialogs, draft-team retry flows, restore flows, and runtime details must say whether a team is pinned to a Codex lane or inheriting the current global lane instead of hiding that distinction +- team list/cards and provisioning snapshot cards either expose lane truth consistently or stay intentionally lane-agnostic without leaking lane-sensitive copy/actions +- member detail/cards, bootstrap/system summaries, and composer slash-command/plugin affordances either expose lane truth consistently or stay intentionally lane-agnostic without leaking lane-sensitive Codex capability hints +- launch dialogs, team/member runtime summaries, bootstrap/system copy, relaunch defaults, and restore flows must not present saved launch provider/model/effort as live runtime truth after a resumed native thread restored different defaults +- extension/plugin surfaces distinguish installed, usable in next thread, restart-required, and auth/setup-incomplete states instead of collapsing them into one generic “installed” story +- composer and extension/detail surfaces distinguish exact structured invocation support from linked-text or implicit invocation support instead of collapsing them into one generic “works with plugins/apps” story +- exact-log, task-log, replay, and reload flows stay grounded in explicit hydration or persisted transcript truth instead of opportunistically reusing partial live Codex event caches +- approval sheets, pending-approval blocks, and resolved approval icons reconcile explicit response and lifecycle cleanup truth without leaving stale pending rows +- any generic interactive prompt surfaced by Codex-native either has a truthful UI flow or an explicit unsupported-state treatment +- lane copy and capability UI do not imply app-server-grade interaction support when the selected execution seam is intentionally headless-limited +- settings/status/copy do not imply native-lane API-key readiness from old-lane credential checks alone +- settings/selectors/provisioning do not imply old provider-wide Codex model truth for a lane whose model inventory or reasoning-effort options differ +- trust/status/copy do not imply the workspace is trusted just because native Codex can start or because a native thread already exists +- bootstrap/system summaries and member/composer surfaces do not accidentally inherit hidden collaboration-mode built-ins or second instruction owners the UI cannot explain +- replay, reload, and exact-log surfaces can distinguish native threads with richer persisted history from native threads whose historical completeness is intentionally lossy +- runtime/settings/extensions surfaces do not drift from hidden native process-wide feature/config/marketplace state +- composer/runtime affordances do not imply detached `/review` behavior unless the resulting review-thread identity is surfaced honestly +- runtime/settings/provisioning/copy do not imply all `codex-native` lanes are capability-equivalent when executable source/version/protocol surface differs +- later app-server-backed UI/debugging surfaces do not imply every connection sees the same fields, methods, or notifications when connection policy differs +- replay/exact-log/task-log surfaces do not imply append-only local transcript is canonical after native rollback or compaction changed thread history +- context panels, token warnings, provisioning usage, and runtime copy do not imply assistant-row usage/model truth when native usage/model/reroute authority actually lives on separate seam-specific notifications or persisted metadata + +### PR 5 - `plugin-kit-ai` management integration + +Repos: + +- `plugin-kit-ai` +- `claude_team` + +Goal: + +- add provider-aware plugin management with truthful Codex-native execution gating + +Must prove: + +- native placement works +- install does not imply runtime execution unless the lane is actually Codex-native +- management responses and UI states distinguish installed, usable after new thread/restart, and still-needs-auth/setup truth instead of collapsing them into one success state +- management/runtime integration does not imply first-class explicit plugin/app targeting unless the chosen Codex seam really exposes that invocation shape +- management/runtime integration does not imply approval or generic interactive parity when the selected Codex-native execution seam is still headless-limited +- management/runtime integration does not imply a plugin is usable in a workspace whose trust boundary or native-thread history policy is still unresolved +- management/runtime integration does not mutate native global config, feature state, or marketplaces behind the host's back +- management/runtime integration does not imply plugin/runtime parity solely from backend id when native binary source or protocol surface differs +- management/runtime integration does not silently depend on a richer app-server connection profile than the rest of the app actually uses +- management/runtime integration does not rely on append-only local transcript truth when native rollback or compaction can supersede that history +- management/runtime integration does not infer native turn usage/model/reroute truth from transcript rows when the chosen execution seam exposes those truths elsewhere or not at all +- management/runtime integration does not treat host process liveness or coarse provisioning health as proof that the current native thread is loaded, active, or warning-free + +## Required Fixture Matrix + +Broad enablement should stay blocked until the rollout has explicit fixtures for the highest-risk drift classes. + +### `agent_teams_orchestrator` fixtures + +- `old-codex-selected` + - selected/resolved lane remains old Codex + - plugin capability stays unsupported + - normalized events and transcript projection stay stable +- `codex-native-selectable-not-verified` + - `codex-native` appears in backend options + - option-state truth distinguishes `selectable` from `available` and `verified` + - status payloads do not collapse back into old Codex copy +- `codex-native-degraded-status-fallback` + - transient runtime-status failure preserves last known lane truth or emits explicit degraded truth + - backend ids/options do not disappear silently +- `progressive-status-race` + - interleave progressive status snapshots, explicit refresh, and provider-model verification updates + - fresher backend truth wins deterministically +- `plugin-installed-next-thread-only` + - native placement succeeds + - current-session activation is still false/unknown + - result truth says next-thread or restart required +- `plugin-installed-auth-incomplete` + - install succeeds + - plugin remains blocked on app/auth setup + - result truth stays distinct from generic success +- `linked-mention-only-seam` + - chosen SDK/raw-exec seam can invoke plugin/app only through linked-text mentions + - normalized/runtime truth does not overclaim structured targeting support +- `live-turn-stream-vs-hydrated-history` + - active `item/*` notifications stream normally + - `turn/*` and `thread/*` payloads stay sparse as documented + - reconnect/unsubscribe/reload still requires explicit hydration for canonical history + - explicit hydration or persisted transcript projection remains the canonical replay/history source +- `approval-lifecycle-cleanup-without-user-response` + - approval or user-input request becomes non-pending because the turn completed/interrupted/restarted + - renderer/store truth clears pending state without faking a user decision +- `interactive-request-unsupported-or-handled` + - runtime emits `requestUserInput` or MCP elicitation + - phase-1 behavior is explicit: handled truthfully or blocked with a clear limitation +- `exec-headless-rejects-interactive-server-requests` + - chosen raw-exec or SDK seam rejects approval/user-input/dynamic-tool-style server requests exactly as expected + - lane capability truth stays conservative instead of pretending these flows are app-supported +- `bundled-sdk-binary-vs-external-cli-detection` + - chosen seam's real executable source is explicit + - external CLI detection stays advisory when the lane actually runs through a bundled SDK-resolved binary +- `codex-native-api-key-routing` + - old Codex API-key mode and native exec/SDK lane do not silently share one fake readiness source + - chosen seam gets the credential in the shape it actually expects + - status/issues/copy reflect native-lane auth truth rather than provider-wide `OPENAI_API_KEY` truth +- `native-lane-model-inventory` + - chosen lane's model list, disabled-model reasons, and reasoning-effort options do not silently reuse old provider-wide Codex catalog truth + - verification probes and selectors agree on what the lane actually supports +- `resume-persisted-thread-defaults-vs-launch-intent` + - resumed native thread restoring persisted model, effort, or other thread-defaults does not silently masquerade as the current launch intent + - normalized, status, and transcript truth either shows inherited defaults honestly or applies an explicit override or fresh-thread policy +- `resume-model-switch-warning-vs-runtime-copy` + - resuming with a different requested model or default set does not leave runtime copy, provisioning copy, or relaunch truth claiming the switch already happened before the next turn proves it +- `thread-system-error-vs-process-alive` + - native thread can enter `systemError` while the host process remains alive + - normalized, status, and warning truth does not report the lane healthy from process liveness alone +- `thread-not-loaded-vs-runtime-still-running` + - unsubscribe, inactivity, or explicit thread close can return native thread truth to `notLoaded` while host runtime/process still exists + - status and projection distinguish loaded-thread truth from generic runtime availability +- `thread-warning-vs-config-warning-truth` + - thread-scoped runtime warnings and startup/config warnings remain attributable to distinct channels + - status, transcript, and later UI projection do not collapse them into one coarse warning state +- `native-trust-does-not-bypass-host-trust-boundary` + - native thread start/resume in writable/full-access mode does not silently mark the workspace trusted before host trust is accepted + - host trust-gated env/hook/LSP behavior remains under one explicit authority +- `collaboration-mode-does-not-double-inject-system-instructions` + - chosen instruction-owner policy prevents hidden collaboration-mode or native developer-instruction layers from duplicating or replacing bootstrap/system prompt truth + - host-selected model/effort/prompt semantics remain stable under the chosen lane +- `persist-extended-history-policy-frozen-at-thread-birth` + - native thread start/resume/fork history richness is explicit + - replay/exact-log truth can distinguish rich persisted history from intentionally lossy history + - later config changes do not pretend to retroactively repair older threads +- `native-config-does-not-bypass-host-settings-ownership` + - native config/feature/marketplace mutation surfaces do not silently create a second settings authority + - any allowed mutation path is explicit and reconciled with host-owned config truth +- `native-review-inline-vs-detached-policy` + - review affordances and runtime behavior agree on whether native review is inline-only or can spawn a detached review thread + - detached review does not create hidden second-thread activity +- `native-binary-version-and-protocol-skew` + - bundled SDK binary and external CLI with different versions or protocol surfaces do not collapse into one fake capability/readiness/model truth + - cache/probe identity stays tied to the actual native runtime identity in use +- `app-server-connection-policy-skew` + - future selective app-server enrichment does not get different fields, methods, or live notifications merely because one connection opted into a different policy + - missing notifications stay diagnosable as connection-policy drift rather than phantom runtime breakage +- `native-history-mutation-vs-append-only-projection` + - native rollback or compaction does not leave append-only projected transcript, exact-log, or replay on stale pre-mutation history + - canonical-history reconciliation is explicit and testable +- `native-token-usage-replay-vs-assistant-row` + - native usage after resume/fork/reload comes from the chosen seam's authoritative source + - context-window and usage truth do not depend on assistant transcript rows carrying the same payload shape +- `native-model-reroute-vs-configured-model` + - rerouted or persisted-resume model/reasoning-effort truth does not leave status, provisioning, or transcript projection claiming the stale configured model +- `native-plan-diff-metadata-authority` + - turn plan/diff metadata is either projected truthfully from a supported seam or stays normalized-only / unavailable by explicit contract +- `ephemeral-turn-completed-without-backfill` + - chosen ephemeral seam does not get exec's final non-ephemeral `thread/read` item backfill + - transcript/history projector still produces truthful post-turn history through an explicit tested recovery path +- `non-ephemeral-completed-turn-backfill` + - chosen non-ephemeral exec seam recovers completed-turn items through final backfill + - transcript/history projector does not accidentally depend on a behavior that disappears if seam policy changes +- `team-replay-after-global-lane-switch` + - save launch params or draft metadata on one lane + - switch global Codex backend + - replay/relaunch/restore outcome is explicitly inherited-global or explicitly pinned +- `request-chain-invariants` + - projected Codex-native activity preserves: + - `requestId` + - tool-link fields + - `parentUuid` + - `logicalParentUuid` + - `isSidechain` + - `isMeta` + +### `claude_team` fixtures + +- `runtime-selector-visible-but-not-ready` + - backend selector can show `codex-native` without falsely presenting it as ready + - summary/copy remains lane-aware +- `plugin-installed-not-active-ui` + - extension store/detail shows install success without claiming current-session activation + - next-thread/restart guidance is explicit +- `plugin-auth-followup-ui` + - extension surfaces keep “auth/setup still required” separate from “installed and usable” +- `mention-targeting-copy` + - composer/detail UI distinguishes exact structured targeting from linked-text-only targeting +- `exact-log-hydrated-after-live-stream` + - live Codex activity can render progressively + - exact-log/task-log reload still comes from hydrated or persisted transcript truth rather than stale live event cache +- `approval-cleared-on-lifecycle` + - approval sheet and pending-approval UI clear correctly when runtime cleanup happens without explicit allow/deny + - resolved state does not incorrectly imply a user decision +- `generic-runtime-prompt-ui-truth` + - user-input or MCP-elicitation flows do not masquerade as tool approvals + - unsupported flows are visibly blocked instead of silently hanging +- `headless-lane-capability-copy` + - runtime/settings/detail/composer copy does not imply manual approval or generic interactive support on a headless-limited exec seam +- `native-lane-auth-copy` + - settings/status/detail copy does not imply `codex-native` API-key readiness from old Responses-API credential checks alone +- `native-lane-model-copy` + - create/launch selectors, runtime settings, and provisioning hints do not imply the old Codex model catalog when the selected lane carries different model or effort truth +- `native-trust-copy` + - status/settings/detail copy does not imply native thread start or writable sandbox means the workspace passed the host trust boundary +- `instruction-owner-copy` + - bootstrap/member/composer/detail surfaces do not leak hidden collaboration-mode or native developer-instruction behavior the UI cannot explain +- `persisted-history-truth-copy` + - replay/reload/exact-log surfaces can tell when native-thread history is rich versus intentionally lossy +- `native-config-ownership-copy` + - runtime/settings/extensions surfaces do not imply host config is authoritative while hidden native process-wide state says otherwise +- `native-review-copy` + - composer/runtime/detail surfaces do not imply detached review support unless review-thread identity is surfaced honestly +- `native-runtime-identity-copy` + - runtime/settings/provisioning copy does not imply all `codex-native` lanes are capability-equivalent when executable source/version/protocol surface differs +- `app-server-connection-policy-copy` + - later app-server-backed debug/status copy does not imply every connection sees the same surface when connection negotiation differs +- `canonical-history-copy` + - replay/exact-log/task-log copy does not imply append-only local transcript remains canonical after native rollback or compaction changes thread history +- `context-panel-native-usage-truth` + - context panel, token usage widgets, and provisioning usage copy do not guess native usage or context-window truth from stale assistant rows + - restored usage or unavailable usage is shown honestly +- `native-reroute-copy` + - runtime/settings/provisioning/detail copy does not imply the configured model still ran when native reroute or persisted-resume model/effort truth says otherwise +- `launch-intent-vs-native-defaults-copy` + - launch dialogs, runtime details, and relaunch summaries do not present saved launch provider/model/effort as live runtime truth after a resumed native thread restored different defaults +- `resume-default-drift-warning-copy` + - resumed native thread default drift is either shown honestly or blocked by explicit fresh-thread or override policy instead of being hidden behind unchanged launch badges +- `native-thread-status-vs-process-copy` + - dashboard, settings, provisioning, and team detail copy do not equate process alive or provisioning active with native thread active or healthy truth + - `notLoaded`, `idle`, and `systemError` states remain explainable even when the host runtime still exists +- `warning-channel-copy` + - config warnings, native thread warnings, and process/provisioning warnings stay distinguishable in banners, detail views, and runtime cards +- `team-list-vs-detail-lane-truth` + - team cards, provisioning snapshots, and runtime details do not disagree about pinned-vs-inherited lane identity +- `member-summary-vs-runtime-truth` + - member runtime summary, bootstrap/system copy, and composer affordances do not overstate Codex-native capability or lane truth +- `provisioning-cache-switch` + - switching backend/auth invalidates or bypasses stale prepare/probe truth + - dialogs do not show old-lane readiness after the switch + +### `plugin-kit-ai` fixtures + +- `native-placement-without-runtime-execution` + - placement succeeds on disk + - contract truth does not imply active runtime execution +- `post-install-followup-truth` + - contract can represent: + - usable after new thread/restart + - auth/setup still required + - old lane selected so runtime execution still unsupported + +Practical rule: + +- if a risky seam has no explicit fixture, phase 1 should assume the seam is still unsafe + +## Acceptance Gates By Repo + +### `agent_teams_orchestrator` + +The work is not ready if: + +- Codex-native still depends on fake Anthropic tool loop assumptions +- normalized events cannot explain runtime activity needed by transcripts/UI +- transcript compatibility projection is still unspecified +- `codex-native` backend identity is not represented consistently in config/status payloads +- phase-0 spike still leaves SDK-vs-raw-exec persistence behavior ambiguous +- request-correlation semantics are still too vague for approval/live activity consumers +- chain/sidechain projection still leaves `parentUuid`, `isSidechain`, or `isMeta` semantics ambiguous +- runtime status, backend option lists, or model-probe policy still treat `codex-native` as an invisible variant of old Codex +- approval/control adaptation is still vague enough that allow/deny semantics or deadlock behavior are guesswork +- connection-mode env plumbing still silently rewrites Codex backend truth in a way that can bypass the new lane +- app config validation or launch contracts still reject or hide the backend vocabulary needed by the new lane +- launch/provisioning or teammate override UX implies per-member Codex backend choice while backend routing is still process-scoped +- provisioning probe cache still reuses provider-scoped readiness across backend/auth changes or lacks deterministic invalidation rules for the new lane +- runtime status or installer snapshots still let “Codex CLI detected” overrule actual lane availability/authentication truth +- renderer/backend-selector logic still assumes `available` is the only meaningful backend-option state once `codex-native` exists +- runtime-status fallback still collapses backend-rich Codex truth into generic provider-only fallback without an explicit degraded-state contract +- Codex status banners or settings summaries still derive “current runtime” from auth mode instead of backend lane when `codex-native` is available +- progressive status snapshots can still overwrite fresher provider/backend truth without explicit sequencing or settledness semantics +- team model/runtime helpers still collapse Codex into provider-wide auth/backend summary truth, making lane-specific model rules impossible to express +- team launch requests, draft metadata, backup artifacts, relaunch defaults, runtime snapshots, or resume guards still hide whether Codex backend lane is persisted or inherited, allowing silent lane drift after global settings changes +- team summaries, draft cards, or provisioning snapshot cards still cannot represent lane truth honestly enough for the UI surfaces that rely on them +- member runtime summaries, bootstrap/system copy, or composer slash-command/plugin affordances still key off provider-wide Codex truth where lane-specific semantics already differ +- plugin install/update results, activation states, or setup/auth follow-up truth still collapse installed, active-now, next-thread-visible, and app-auth-incomplete semantics into one generic success state +- chosen Codex execution seam still blurs structured invocation, linked-text mention invocation, and implicit plugin/app discovery enough that UI cannot describe plugin/app targeting honestly +- active live-stream truth and replayable history truth are still conflated enough that exact-log/replay consumers could read sparse live Codex payloads as canonical history +- approval/request cleanup semantics are still vague enough that interrupted or replaced turns can leave stale pending state +- provider-native generic interactive prompts still have no explicit phase-1 handling rule +- chosen raw-exec or SDK seam still overclaims approval, generic interactive, dynamic-tool, or other server-request parity that the headless seam explicitly does not provide +- chosen `--ephemeral` or non-ephemeral policy still leaves completed-turn item completeness ambiguous enough that transcript/history projection depends on unstated backfill behavior +- native-lane auth readiness still reuses old `OPENAI_API_KEY` heuristics even though the chosen exec/SDK seam authenticates differently +- native-lane model availability, disabled-model reasons, or reasoning-effort options still reuse old provider-wide Codex catalog truth +- native thread start/resume can still mutate project trust or bypass host trust-gated env/hook/LSP behavior without one explicit trust owner +- instruction ownership across host system/bootstrap prompts, native base/developer instructions, and collaboration-mode built-ins is still ambiguous enough that runtime behavior can drift silently +- native-thread replay/history truth still depends on implicit `persistExtendedHistory` policy instead of an explicit thread-level contract +- native-lane capability, model, or review truth still depends only on backend id while actual native binary source/version/protocol surface can differ +- later selective app-server enrichment can still vary capability or live notification truth by connection policy without one canonical connection profile +- native rollback or compaction can still mutate canonical history while append-only local transcript/log readers continue serving stale pre-mutation truth +- native usage/context/model/reroute truth can still be lost or guessed because the host only trusts assistant transcript rows while the chosen native seam delivers those truths separately or not at all +- host launch intent and persisted native thread-defaults can still drift silently enough that resume, relaunch, restore, or runtime copy tell a different runtime story than the actual native thread +- native thread status or warning truth can still collapse into process liveness, provisioning progress, or coarse provider banners, leaving `systemError` or `notLoaded` states invisible +- required high-risk fixtures for lane truth, status races, replay identity, plugin activation, invocation shape, history hydration, approval cleanup, and interactive prompts do not exist yet + +### `claude_team` + +The work is not ready if: + +- it needs a breaking transcript parser rewrite for the first rollout +- it infers Codex plugin support from provider id instead of backend lane truth +- task-log and exact-log paths regress +- `selectedBackendId` / `resolvedBackendId` UX becomes misleading or ambiguous +- transcript invariants like `requestId` and tool-link fields are lost for projected Codex-native activity +- pending approval UX or request-scoped activity indicators become ambiguous or lossy +- sidechain/main-thread task logs or subagent-linked views regress because projected identity fields drift +- runtime settings still special-case Codex as connection-managed-only when a real `codex-native` lane exists +- provisioning readiness or model verification UI silently reports old Codex backend truth for the new lane +- connection/auth UI copy or saved settings still imply that Codex API-key auth always means the old Responses API backend +- launch/provisioning UX implies per-team or per-task backend control when backend selection is still only global-per-provider +- team spawn/runtime logs can still only inherit one process-level Codex backend while UI suggests mixed member-level lanes +- provisioning-readiness UI can still show stale old-lane readiness after a Codex backend/auth change because probe cache identity or invalidation is too coarse +- runtime settings or installer/provisioning UI still imply `codex-native` readiness from generic Codex CLI detection instead of lane-specific status truth +- runtime/backend selector UX still cannot represent a lane that is intentionally selectable but not yet verified +- transport failures in runtime status can still make `codex-native` disappear or revert to old connection-managed-only semantics in UI +- lane-aware backend truth still gets translated back into old `Codex subscription` / `OpenAI API key` runtime copy in a way that misdescribes the active lane +- extension store banners, install buttons, or mutation preflight still rely on coarse provider/runtime truth and can misstate Codex plugin availability for the selected lane +- team create/launch dialogs still use runtime helper types that omit backend-lane identity needed for Codex-native model/provisioning truth +- provider prepare/model cache still keys off backend summary copy instead of canonical backend identity +- saved launch params, draft metadata, restore flows, relaunch prefill, runtime cards, or resume behavior still hide lane identity badly enough that a team can replay on a different Codex backend without the UI noticing +- team list/cards or synthetic provisioning cards still imply lane truth they do not actually carry, or stay so lane-blind that they mislead users about pinned-vs-inherited runtime identity +- member cards/detail, bootstrap/system copy, or composer capability hints still imply old Codex and `codex-native` are equivalent because they only key off `providerId` / `model` +- extension/plugin UX still implies Codex-native install success means immediate current-session activation when the real truth is only next-thread/restart visibility or pending app/auth setup +- composer, slash-command, or extension-detail UX still implies exact plugin/app targeting support when the chosen Codex seam only gives us linked-text mention parsing or implicit runtime behavior +- exact-log/task-log/reload flows can still confuse live Codex event caches with hydrated transcript history +- approval UI can still leave stale pending rows or wrong resolved icons when runtime cleanup happens without explicit allow/deny +- generic runtime prompts or MCP elicitations can still hang because no truthful UI path exists +- runtime/settings/member/composer copy still implies app-server-grade interactivity for a headless-limited exec seam +- runtime/settings/status copy still implies `codex-native` API-key readiness from the old Codex lane's credential surface +- selectors/settings/provisioning still imply the old provider-wide Codex model catalog for a native lane with different model metadata or effort options +- trust/status/copy still implies native thread existence or writable sandbox means the workspace passed our host trust boundary +- bootstrap/member/composer surfaces can still be influenced by hidden collaboration-mode or native developer-instruction layers the UI cannot inspect or explain +- replay/exact-log/reload still cannot tell whether a native thread was created with rich or intentionally lossy persisted-history policy +- UI/settings/provisioning still imply one universal `codex-native` capability story even when native executable source/version/protocol surface can differ +- later app-server-backed surfaces still imply one global capability/notification truth even when different connections negotiated different app-server surfaces +- replay/exact-log/task-log can still imply append-only projected transcript is canonical even after native rollback or compaction superseded that history +- context panels, provisioning usage, token warnings, or runtime copy still assume assistant transcript rows own native usage/model truth even when the chosen seam routes those truths separately +- launch dialogs, runtime details, relaunch defaults, or bootstrap and member summaries still present saved launch provider/model/effort as live runtime truth after a resumed native thread restored different defaults +- status banners, runtime cards, provisioning summaries, or team detail views still equate host process/provisioning truth with native thread loaded or healthy truth +- warning copy still collapses native thread warnings, config warnings, and provisioning/process warnings into one undifferentiated status message +- required high-risk fixtures for selector truth, extension activation truth, mention-targeting copy, replay/provisioning drift, history hydration, approval cleanup, and interactive prompts do not exist yet + +### `plugin-kit-ai` + +The work is not ready if: + +- install/update/remove/discover truth is not machine-readable enough for app use +- native placement success is confused with runtime execution success +- management integration still cannot surface follow-up truth like “use in a new thread/restarted session” or “app/auth setup still required” when Codex-native plugin placement succeeds +- required management fixtures for placement-without-execution and post-install follow-up truth do not exist yet + +## No-Go Conditions + +We should not enable `Codex-native` broadly if any of these are still true: + +- normalized projection still drops critical runtime activity needed by UI or transcripts +- lane-level capability reporting cannot distinguish old Codex path from real Codex-native path +- session resume semantics are still unclear enough to risk dual-persistence bugs +- plugin support would still be advertised while execution remains on the old adapter lane +- the new lane forces Anthropic/Gemini behavior regressions just to keep one fake protocol +- the first rollout requires `claude_team` to adopt a breaking new transcript format +- backend selection settings or UI still cannot represent `codex-native` honestly +- the chosen SDK/CLI seam still makes session persistence behavior implicit instead of explicit +- live approval or request-correlation behavior is still under-specified enough to risk wrong approvals or wrong dedupe +- chain/sidechain identity is still under-specified enough to risk broken task-log grouping or subagent linkage +- runtime status/provisioning/model verification surfaces still cannot represent `codex-native` truthfully +- approval/control adaptation still cannot describe a safe allow/deny loop without hand-waving +- auth-mode env routing still forces the old Codex backend semantics even when the selected runtime lane is `codex-native` +- config schema and launch granularity are still inconsistent enough that the user can select a lane the app cannot actually persist or launch honestly +- process-scoped backend routing is still hidden enough that the user can configure mixed Codex lanes the runtime cannot actually realize +- provisioning probe cache can still mask backend/auth changes long enough to leave readiness truth out of sync with model verification or backend selection UI +- external Codex CLI detection is still being interpreted as lane readiness or plugin support truth for `codex-native` +- backend option-state semantics are still loose enough that `codex-native` cannot be shown honestly before it is fully ready +- backend-rich Codex truth is still too easy to lose during transient status transport failure, making UI behavior nondeterministic +- runtime summary wording is still too tied to auth mode to safely explain `codex-native` in dashboard/settings/provisioning UX +- progressive `cliStatus` updates can still race explicit status/provider refresh paths and silently downgrade backend-lane truth +- extension action gating still uses provider-wide truth where `codex-native` needs backend-lane-specific readiness +- create/launch model selection and provisioning still collapse Codex into provider-wide truth, making lane-specific model handling too ambiguous to ship safely +- provisioning prepare/model cache still depends on summary-copy identity rather than canonical backend identity +- persisted team identity, replay, or resume still cannot distinguish intentional global-backend inheritance from accidental Codex lane drift +- team summaries and list surfaces still cannot express lane truth or intentional lane-agnosticism clearly enough to avoid misleading team-level UI +- member runtime summaries, bootstrap/system copy, or composer capability hints still cannot express lane truth or intentional lane-agnosticism clearly enough to avoid misleading member-level UI +- extension/plugin UX still cannot express installed-vs-active-vs-usable truth clearly enough to avoid overstating Codex-native plugin readiness +- plugin/app invocation affordances still cannot express structured-vs-linked-text targeting truth clearly enough to avoid overstating Codex-native integration maturity +- active live notifications can still masquerade as canonical history for replay/exact-log/task-log consumers +- approval lifecycle cleanup can still masquerade as user resolution or fail to clear pending state +- generic provider-native interactive prompts can still be unsupported in practice while the lane appears otherwise feature-complete +- the chosen exec/SDK seam still looks interactive-capable in UI or status copy even though the seam itself is headless-limited +- the chosen `--ephemeral` / non-ephemeral seam policy still leaves final-turn transcript completeness dependent on implicit exec backfill behavior +- the chosen `codex-native` auth path still looks ready in UI while credential-routing remains wired only for the old Codex lane +- the chosen `codex-native` lane still looks model-compatible in UI while selectors/probes use only old provider-wide Codex catalog truth +- native Codex start/resume can still create or imply project trust outside the host trust contract +- collaboration-mode or native developer-instruction precedence can still change runtime behavior without one explicit instruction owner +- native-thread history completeness can still depend on implicit `persistExtendedHistory` behavior that replay/exact-log/UI never surface +- backend id can still masquerade as full native capability truth even when bundled SDK binary, external CLI, or protocol surface differ +- later app-server enrichment can still masquerade as globally consistent even when connection-scoped negotiation changes which methods, fields, or notifications are visible +- native history mutation can still leave append-only local transcript, incremental file watchers, and replay readers out of sync on what the conversation canonically contains +- native token usage, context-window truth, or final model/reroute truth can still be guessed from assistant transcript rows even though the chosen seam exposes those truths separately or not at all +- host launch intent and persisted native thread-defaults can still drift without one explicit authority or visible warning, leaving resume, relaunch, restore, or runtime-summary truth inconsistent with the actual native thread +- native thread loaded, active, idle, or system-error truth can still collapse into host process or provisioning truth, making thread health invisible or misleading +- config warnings, native thread warnings, and provisioning/process warnings can still collapse into one coarse status story +- the required high-risk fixture matrix still does not exist, leaving the riskiest Codex-native seams unpinned against regression + +## Main Risks And Guardrails + +### Risk 1 - treating `codex-sdk/exec` as a transport-only swap + +This is the most dangerous mistake. + +Guardrail: + +- treat `Codex-native` as a separate runtime lane +- normalize logs/events above it +- do not assume the current Anthropic-shaped tool loop can be preserved unchanged + +### Risk 2 - claiming Codex plugin support too early + +Installing native Codex plugins is not enough if execution still runs through the current adapter path. + +Guardrail: + +- only advertise Codex plugin support when the session actually runs through the Codex-native lane + +### Risk 3 - overcommitting to `app-server` too early + +`codex app-server` is useful, but it should not become a hard dependency for the first production plugin rollout. + +Guardrail: + +- use it later for selective control-plane features +- do not block the first migration on `app-server plugin/*` + +### Risk 4 - designing the normalized layer as an Anthropic alias + +If the normalized layer is secretly just Anthropic wire semantics with renamed fields, it will create false constraints and future bugs. + +Guardrail: + +- normalize to concepts +- not to one provider's transport + +### Risk 5 - dual session truth + +The current orchestrator already has session/transcript logic, while real Codex runtime also has its own session model. + +Guardrail: + +- keep `Codex-native` feature-flagged until resume and transcript ownership are understood well enough + +### Risk 6 - hidden transcript-format rewrite + +This is the biggest UI risk. + +Guardrail: + +- keep transcript compatibility as a first-class phase-0/phase-1 constraint +- treat additive transcript enrichment as the default pattern +- do not require `claude_team` exact-log or task-log services to learn raw Codex-native item shapes in the first rollout + +### Risk 7 - backend-id drift between orchestrator and UI + +`codex-native` looks small as a concept, but backend ids are already part of shared config and UI payloads. + +Guardrail: + +- treat backend-id expansion as a first-class contract change +- update orchestrator config types, runtime status payloads, main mapping, renderer selectors, and tests together +- do not ship a lane whose identity only exists in one repo + +### Risk 8 - accidental durable Codex session ownership + +If we go SDK-first without addressing the current `ephemeral` gap, we may accidentally make durable Codex session storage part of the rollout semantics before we intend to. + +Guardrail: + +- make SDK-vs-raw-exec an explicit phase-0 checkpoint +- require the spike to document persistence behavior, resume behavior, and whether the lane can run without durable Codex-owned sessions +- do not hand-wave this away as an implementation detail + +### Risk 9 - request-correlation drift between runtime, normalized events, and UI + +If request identity stops meaning the same thing across layers, approval UX, exact-log selectors, and streamed dedupe will regress in subtle ways. + +Guardrail: + +- treat request-correlation as its own phase-0/phase-1 contract +- require the normalized layer to document how request identity is sourced and preserved +- require projector tests that cover approval-like events, request-scoped dedupe, and tool-link correlation + +### Risk 10 - chain and sidechain identity drift + +If projected Codex-native rows stop preserving truthful `parentUuid`, `isSidechain`, `isMeta`, `sessionId`, or `agentId` semantics, team-log discovery and exact-log views can regress even while basic JSONL parsing still “works”. + +Guardrail: + +- treat chain and sidechain semantics as first-class projector constraints +- require projector tests that cover main-thread rows, sidechain rows, and internal-user/tool-result rows +- do not allow convenience projection rules that flatten sidechain identity or create fake parent-chain participation + +### Risk 11 - runtime status/settings and probe drift + +If `codex-native` exists in execution but not in runtime settings, provisioning summaries, installer snapshots, or model verification policy, the UI will display stale or contradictory truth. + +Guardrail: + +- treat runtime status/settings as a first-class contract layer +- update backend selector truth, provisioning summaries, installer snapshots, and backend-aware model probe signatures together +- require tests that cover `selectedBackendId`, `resolvedBackendId`, backend summary rendering, and probe-signature invalidation for the new lane + +### Risk 12 - approval/control adaptation drift + +If Codex-native approval/control events do not map truthfully into the current `ToolApprovalRequest` and `requestId` contract, pending approvals, approval icons, and allow/deny responses will regress in subtle ways. + +Guardrail: + +- treat approval/control adaptation as its own contract layer +- require tests that cover emitted approval requests, resolved approval state, timeout behavior, and unsupported-control fallback +- if the mapping is not truthful yet, keep manual approval support explicitly limited for the lane + +### Risk 13 - auth-routing and backend-routing drift + +If Codex auth mode continues to rewrite `CLAUDE_CODE_CODEX_BACKEND` implicitly, the new lane can be selected in UI but never actually reached at runtime. + +Guardrail: + +- treat connection/auth env routing as its own contract layer +- require tests that cover Codex OAuth, Codex API-key mode, and backend selection independently +- require UI copy and saved settings to stop equating “OpenAI API key” with “old Responses API lane” once `codex-native` exists + +### Risk 14 - config-schema and launch-granularity drift + +If the orchestrator gains `codex-native` but app config validation and launch contracts still only understand the old Codex backend world, users can see or save a lane choice that provisioning cannot actually launch truthfully. + +Guardrail: + +- treat config schema and launch granularity as first-class rollout constraints +- update runtime config types, IPC validation, saved defaults, and provisioning summaries together +- require tests that prove the same backend vocabulary is accepted by config, surfaced in UI, and represented honestly during launch/provisioning + +### Risk 15 - process-scope backend-routing drift + +If Codex backend routing is still inherited from process env while UI or team launch copy implies member-level backend choice, one launched runtime can silently run a different backend mix than the user thinks. + +Guardrail: + +- treat backend-routing scope as a first-class rollout constraint +- require tests and logs that prove what scope backend selection actually has during team launch and teammate spawn +- keep phase-1 UX explicit that mixed Codex lanes inside one launched runtime are unsupported until spawn contracts say otherwise + +### Risk 16 - provisioning probe-cache and invalidation drift + +If provisioning-readiness cache stays keyed only by provider-level identity, a backend/auth switch can leave stale old-lane readiness visible while model verification and runtime settings already describe the new lane. + +Guardrail: + +- treat probe-cache identity and invalidation as a first-class rollout contract +- require tests that switch Codex backend/auth inputs and prove readiness cache invalidates or bypasses stale entries deterministically +- do not allow provider-only cached readiness to survive lane changes silently for `codex-native` + +### Risk 17 - external-runtime diagnostic drift + +If runtime status keeps surfacing “Codex CLI detected” without a stricter contract, UI and installer/provisioning summaries can overstate `codex-native` readiness even when the lane is still unavailable, unauthenticated, or unsupported. + +Guardrail: + +- treat external-runtime diagnostics as advisory, not as execution truth +- require tests that distinguish binary detection from backend selection, backend resolution, and authenticated readiness +- require tests that distinguish external user-installed CLI detection from bundled SDK-binary availability when the chosen seam may not use the user's PATH binary at all +- do not let Codex CLI detection upgrade plugin support or lane availability by implication + +### Risk 18 - backend-option state drift + +If runtime status keeps emitting `selectable` and `available` but renderer/backend-selection UX only understands one readiness boolean, `codex-native` can be hidden when it should be configurable or shown as ready when it is only selectable. + +Guardrail: + +- treat backend-option state semantics as a first-class shared contract +- require tests that cover selectable-but-unavailable, resolved-but-degraded, and verified-ready states +- do not let renderer/backend selector infer state transitions from `available` alone + +### Risk 19 - runtime-status fallback drift + +If backend-rich runtime status can still collapse into legacy provider-only fallback during transient failures, `codex-native` can disappear from UI or revert to old Codex semantics without any real backend change. + +Guardrail: + +- treat degraded status transport as its own first-class state +- require tests that simulate `runtime status --json` failure and verify backend-lane truth is preserved or explicitly marked degraded +- do not let fallback to `auth status` / `model list` silently erase backend ids, option-state semantics, or lane-specific copy + +### Risk 20 - runtime-copy and summary drift + +If Codex UI copy continues to derive “Current runtime” from auth mode while backend truth becomes lane-aware, dashboard/settings/provisioning summaries can confidently say the wrong thing even when the backend itself is correct. + +Guardrail: + +- treat runtime-summary wording as a shared contract, not as decorative UI copy +- require tests that cover mismatched auth-mode and backend-lane combinations +- do not let `Codex subscription` / `OpenAI API key` stand in for actual runtime-lane labels once `codex-native` exists + +### Risk 21 - progressive status-snapshot drift + +If progressive `cliStatus` snapshots, cached status responses, and provider-specific refreshes keep mutating store truth without a shared sequencing/settledness contract, `codex-native` can appear, disappear, or regress nondeterministically in UI. + +Guardrail: + +- treat progressive status transport as its own contract layer +- require tests that cover interleaving: + - `fetchCliStatus()` + - `fetchCliProviderStatus()` + - late model-verification updates + - transient degraded status pushes +- do not let the `cliInstaller:progress` status path bypass freshness/authority rules silently + +### Risk 22 - extension preflight truth drift + +If extension action gating keeps relying on coarse provider/runtime truth, Codex plugin management can be enabled on the wrong lane or disabled after the right lane is already selected. + +Guardrail: + +- treat extension preflight as a backend-aware contract, not just as generic runtime readiness +- require tests that cover old Codex lane, `codex-native` selectable-but-unverified, degraded status, and authenticated-ready lane states +- do not let provider-wide plugin capability or auth status stand in for backend-lane execution truth + +### Risk 23 - team-model runtime truth drift + +If team model selectors and provisioning diagnostics keep consuming only provider-wide Codex truth, `codex-native` can have different model semantics while create/launch UI still validates and explains models as if Codex were one runtime. + +Guardrail: + +- treat team-model runtime shape as a shared contract, not as an incidental UI helper type +- require tests that cover old Codex versus `codex-native` model visibility, selection errors, and provisioning notes +- do not let provider-wide auth/backend summary heuristics stand in for canonical backend-lane identity + +### Risk 24 - provisioning prepare-cache identity drift + +If reusable provider prepare/model results keep keying off backend summary text, copy changes or label collisions can silently merge or split cache entries across different Codex lanes. + +Guardrail: + +- treat provisioning cache identity as canonical backend/auth/probe identity +- require tests that switch lanes, auth modes, and summary wording without causing false cache hits or misses +- do not let display summary strings participate in cache identity once `codex-native` exists + +### Risk 25 - launch persistence and resume identity drift + +If saved launch params, draft team metadata, member metadata, backup artifacts, runtime snapshots, and resume guards stay provider/model-only, teams can silently move onto a different Codex lane after a global backend change while UI still implies continuity. + +Guardrail: + +- treat team launch identity as a first-class contract whenever backend lane changes runtime semantics +- require tests that: + - save launch params on one lane + - persist draft team metadata on one lane + - restore a backed-up team created on one lane + - switch global Codex backend + - relaunch or resume +- verify whether the result is explicitly inherited-global or explicitly pinned +- do not let resume guards compare only provider/model once Codex lane changes can alter runtime behavior + +### Risk 26 - team-summary and list-surface truth drift + +If team summaries, draft cards, and synthetic provisioning cards stay lane-blind while detailed runtime truth becomes lane-aware, users can see one Codex story in cards/lists and a different one in launch/runtime detail views. + +Guardrail: + +- treat team-summary surfaces as an explicit shared contract, not as incidental UI decoration +- require tests that compare: + - draft card truth + - persisted team summary truth + - provisioning snapshot truth + - detailed runtime truth + across old Codex, `codex-native`, and inherited-global scenarios +- do not let team cards imply pinned/runtime-specific lane truth unless the shared `TeamSummary` contract actually carries it + +### Risk 27 - member-runtime summary and composer-capability truth drift + +If member cards/detail, bootstrap/system summaries, and composer capability hints stay provider-wide while backend truth becomes lane-aware, users can see one Codex story in runtime/settings surfaces and another in member/composer surfaces. + +Guardrail: + +- treat member-runtime/composer surfaces as an explicit shared contract, not as cosmetic helper copy +- require tests that compare: + - runtime status truth + - member runtime summary truth + - bootstrap/system runtime summary truth + - composer slash-command/plugin affordance truth + across old Codex, `codex-native`, degraded, and inherited-global scenarios +- do not let `providerId === 'codex'` alone unlock lane-sensitive copy or Codex capability hints once backend lane semantics differ + +### Risk 28 - plugin activation and session-visibility truth drift + +If extension/plugin UX keeps collapsing “installed”, “active now”, “usable after new thread/restart”, and “still needs app/auth setup” into one generic success state, Codex-native plugin support will be overstated even when runtime execution is otherwise correct. + +Guardrail: + +- treat plugin activation/session visibility as a first-class shared contract, not as incidental success copy +- require tests that compare: + - native placement success + - selected backend lane truth + - current-session visibility truth + - next-thread/restart-required truth + - app/auth-setup-complete truth + across old Codex, `codex-native`, degraded, and ongoing-session scenarios +- do not let generic install/uninstall success banners stand in for actual execution readiness + +### Risk 29 - mention-targeting and invocation-shape truth drift + +If phase 1 blurs structured mention targeting, linked-text mention targeting, and implicit runtime plugin discovery into one generic “plugin/app invocation works” story, Codex-native integration can overpromise deterministic behavior the chosen seam does not actually guarantee. + +Guardrail: + +- treat invocation shape as a first-class contract, not as a side effect of install success +- require tests that compare: + - app-server-style structured mention truth + - chosen SDK/raw-exec invocation truth + - linked-text mention behavior + - no-explicit-targeting fallback behavior + across plugins, apps, and skills where relevant +- do not let composer/extension UX imply exact targeting semantics that are not backed by the chosen execution seam + +### Risk 30 - live-stream and history-hydration truth drift + +If phase 1 blurs active turn notifications, sparse turn/thread payloads, and replayable thread history into one generic “conversation state” cache, Codex-native integration can look correct while exact-log, task-log, replay, or resume flows quietly consume incomplete history truth. + +Guardrail: + +- treat live activity and replayable history as separate first-class contracts +- require tests that compare: + - live `item/*` stream truth + - sparse `turn/*` and `thread/*` payload truth + - explicit `thread/read` / `thread/turns/list` / `thread/resume` hydration truth + - persisted transcript projector truth + across active turns, reconnect/reload, interrupted turns, and post-hoc exact-log reads +- do not let any one in-memory event cache become the implicit source of truth for replay/exact-log/task-log unless it can prove the same completeness guarantees as the explicit hydration path + +### Risk 31 - approval lifecycle cleanup truth drift + +If phase 1 blurs explicit user approval, runtime auto-resolution, lifecycle cleanup, and run dismissal into one generic “request resolved” story, Codex-native integration can leave stale pending approvals in UI or mark requests resolved as if the user explicitly answered when they did not. + +Guardrail: + +- treat approval cleanup semantics as a first-class contract, not as a side effect of request correlation +- require tests that compare: + - explicit allow/deny response + - runtime auto-resolution + - lifecycle cleanup on turn start/complete/interrupt + - run-level dismissal + across pending approval sheets, resolved approval icons, and activity rows +- do not let renderer/store assume that successful user-response IPC is the only valid path that clears pending approval state + +### Risk 32 - generic interactive-request truth drift + +If phase 1 quietly assumes that tool-approval UI covers `requestUserInput` or MCP elicitation, Codex-native turns can stall or degrade in ways the app cannot explain, while the lane still appears broadly functional. + +Guardrail: + +- treat generic interactive prompts as a first-class contract, not as a subtype of approvals +- require tests that compare: + - approval-only flows + - generic user-input prompts + - MCP elicitation requests + - unsupported-path behavior + across active turns and blocked/setup-heavy workflows +- do not let the lane claim interactive parity unless the app can truthfully surface and resolve the provider-native prompt types it may emit + +### Risk 33 - headless exec capability truth drift + +If phase 1 blurs headless exec/SDK behavior with richer app-server behavior, Codex-native can look like a generally interactive runtime even though the actual execution seam rejects whole classes of server-request-style interactions. + +Guardrail: + +- treat headless exec capability limits as a first-class lane contract, not as an implementation footnote +- require tests that compare: + - chosen raw-exec or SDK seam behavior + - approval-like flows + - generic `requestUserInput` + - MCP elicitation + - dynamic-tool or server-request-style controls + against the capabilities the UI/status payloads claim +- do not let the lane advertise approval or interactive parity that belongs only to richer seams the rollout is not actually using + +### Risk 34 - ephemeral completion-backfill truth drift + +If phase 1 chooses `--ephemeral` for session-safety reasons without replacing non-ephemeral exec's final completed-turn backfill, Codex-native can look correct in live demos while post-turn history, transcript projection, or exact-log completeness quietly degrades. + +Guardrail: + +- treat `--ephemeral` versus non-ephemeral backfill as a first-class rollout choice, not as a low-level runtime flag +- require tests that compare: + - non-ephemeral exec with final `thread/read` backfill + - ephemeral exec without that backfill + - explicit projector/hydration recovery behavior + across final assistant message capture, completed-turn items, exact-log, and replay reads +- do not let transcript/history UX depend on implicit exec recovery behavior that disappears when seam policy changes + +### Risk 35 - native-lane credential-routing truth drift + +If phase 1 keeps reusing old Codex API-key routing assumptions while the chosen native seam actually authenticates through a different credential surface, `codex-native` can look ready in settings/status while the runtime still starts with the wrong auth shape. + +Guardrail: + +- treat native-lane credential routing as a first-class contract, not as a side effect of old Codex API-key support +- require tests that compare: + - old Codex lane API-key readiness + - native exec/SDK lane API-key readiness + - stored-key routing + - env-var routing + - status/issue/copy truth + under the same user-facing “Codex API key configured” conditions +- do not let provider-wide `OPENAI_API_KEY` truth stand in for native-lane auth truth unless the chosen seam explicitly uses and proves that same path + +### Risk 36 - native-lane model inventory truth drift + +If phase 1 keeps reusing old provider-wide Codex model catalogs, disabled-model heuristics, and probe defaults while the selected native lane exposes a different model surface, UI and provisioning can look internally consistent while still lying about what the lane really supports. + +Guardrail: + +- treat native-lane model inventory and reasoning-effort truth as a first-class contract, not as a cosmetic catalog problem +- require tests that compare: + - old Codex catalog truth + - native-lane visible models + - disabled-model reasons + - default/preflight model choice + - supported reasoning-effort options + across create/launch selectors, runtime settings, provisioning hints, and verification probes +- do not let static provider-wide Codex heuristics stand in for native-lane model truth once the selected lane materially changes available model metadata + +### Risk 37 - workspace-trust ownership drift + +If native Codex thread start/resume is allowed to imply or persist project trust independently from the host trust dialog, the rollout can silently mutate trust state or unlock trust-gated behavior without the app's existing security story staying true. + +Guardrail: + +- treat host trust ownership as a first-class contract, not as an implementation detail +- require tests that compare: + - host trust not yet accepted + - native lane selected + - writable/full-access thread start + - trust-gated env/hook/LSP behavior + - any Codex-side trust persistence effect +- do not let repo-check success, native thread existence, or writable sandbox state masquerade as host trust acceptance + +### Risk 38 - instruction-owner truth drift + +If phase 1 leaves host system/bootstrap prompts, native base/developer instructions, and collaboration-mode built-ins without one explicit owner, runtime behavior can change from hidden instruction precedence instead of visible config or code changes. + +Guardrail: + +- treat instruction ownership as a first-class contract, not as a prompt-construction detail +- require tests that compare: + - host system/bootstrap prompt only + - native base/developer instructions + - collaboration-mode on/off + - model/effort selection + - bootstrap-critical guidance visibility +- do not let hidden collaboration-mode built-ins or second instruction channels silently override host prompt truth + +### Risk 39 - persisted-history policy drift + +If phase 1 leaves `persistExtendedHistory` implicit, native threads can end up with mixed replay/hydration fidelity while exact-log, reload, and resume flows still speak as if all native history is equally complete. + +Guardrail: + +- treat persisted-history richness as a first-class thread policy, not as a background storage optimization +- require tests that compare: + - rich persisted-history thread birth/resume/fork + - intentionally lossy thread birth/resume/fork + - replay/exact-log/reload truth + - later config changes that should not retroactively repair older threads +- do not let UI/transcript/replay surfaces imply one uniform native-history completeness story unless thread policy actually guarantees it + +### Risk 40 - native config and feature-state ownership drift + +If selective app-server enrichment allows process-wide feature toggles, marketplace persistence, or `config.toml` writes without one host-owned authority, the rollout can split truth between app settings and native runtime state while still looking locally consistent. + +Guardrail: + +- treat native config/feature/marketplace mutation as a first-class ownership contract, not as a convenience API +- require tests that compare: + - host settings truth + - native process-wide feature state + - native marketplace persistence + - loaded-thread reload behavior after config mutation +- do not let normal lane operation quietly write native global state unless the host explicitly owns and surfaces that operation + +### Risk 41 - detached review-thread identity drift + +If native review affordances remain available while detached review is still unmapped, the app can create second native threads whose identity never lands in launch/replay/chain/task-log truth even though the review itself appears to work. + +Guardrail: + +- treat native review delivery mode as a first-class contract, not as a slash-command detail +- require tests that compare: + - inline review + - detached review + - `reviewThreadId` + - emitted `thread/started` + - replay/log/task surfaces +- do not let `/review` imply detached support unless review-thread identity is modeled explicitly end-to-end + +### Risk 42 - native binary-version and protocol-surface truth drift + +If phase 1 treats `codex-native` backend id as the whole capability contract while actual execution can come from different binaries, versions, or protocol surfaces, the app can look internally consistent while still lying about what that lane really supports on a given machine. + +Guardrail: + +- treat native runtime identity as a first-class contract, not as a hidden implementation detail +- require tests that compare: + - bundled SDK-resolved binary + - external CLI-resolved binary + - different native binary versions + - stable-only versus experimental protocol surface where relevant + - status/probe/cache/UI truth +- do not let backend id alone stand in for capability parity unless the rollout explicitly proves those native runtime identities are equivalent enough + +### Risk 43 - app-server connection-policy truth drift + +If later selective app-server enrichment allows different connections to negotiate different experimental surface or notification-subscription policies, the app can look like the native runtime is flaky while the real problem is that not every connection sees the same methods, fields, or live events. + +Guardrail: + +- treat app-server connection policy as a first-class contract, not as a transport detail +- require tests that compare: + - stable-only connection profile + - experimental connection profile + - different `optOutNotificationMethods` + - live notification presence/absence + - status/debugging truth +- do not let missing app-server fields or notifications be diagnosed as runtime failure before ruling out connection-policy skew + +### Risk 44 - canonical-history versus append-only-projection truth drift + +If native rollback or compaction mutates canonical thread history while local transcript/log readers still trust append-only projected history, the app can look coherent in live use while replay, exact-log, and task-log silently tell the wrong story about what the conversation now canonically contains. + +Guardrail: + +- treat canonical-history authority as a first-class contract, not as a parser implementation detail +- require tests that compare: + - pre-mutation append-only transcript truth + - native rollback result truth + - native compaction result truth + - replay/exact-log/task-log truth after reload + - incremental watcher/cache behavior +- do not let append-only local transcript remain implicitly canonical after native history mutation unless the rollout explicitly proves equivalence or performs reconciliation + +### Risk 45 - turn-metadata and usage-authority truth drift + +If native usage, context-window truth, final model/reasoning-effort truth, or turn plan/diff/reroute metadata are inferred from assistant transcript rows instead of from the seam that actually owns them, the rollout can look healthy while context panels, provisioning usage, token warnings, and runtime copy quietly tell the wrong story. + +Guardrail: + +- treat turn-metadata authority as a first-class contract, not as a rendering detail +- require tests that compare: + - live completed-turn usage on the chosen seam + - restored usage after resume/fork/reload + - assistant transcript rows with partial or no native usage payload + - configured model versus rerouted or persisted-resume model truth + - turn plan/diff metadata presence versus explicit unavailability +- do not let assistant transcript rows automatically masquerade as the canonical native source for usage, model, or reroute truth unless the rollout explicitly proves that equivalence for the chosen seam + +### Risk 46 - native thread-default and launch-intent truth drift + +If phase 1 treats saved launch `provider/model/effort` as canonical even after native turns or `thread/resume` restore different persisted defaults, the rollout can look healthy while relaunch, restore, runtime summaries, and resume guards quietly describe a different runtime than the one the native thread is actually using. + +Guardrail: + +- treat host launch intent versus native thread-defaults as a first-class contract, not as a UI-summary detail +- require tests that compare: + - fresh thread using current launch intent + - resumed thread inheriting persisted model and reasoning-effort + - explicit override or fresh-thread policy when host launch intent differs + - config, relaunch, restore, and runtime-summary copy under that drift +- do not let saved launch params, config-owned provider/model/effort, or bootstrap summaries automatically masquerade as live native thread-default truth unless the rollout explicitly proves they stay aligned + +### Risk 47 - native thread-status and warning-authority truth drift + +If host process liveness, provisioning progress, runtime snapshots, or coarse provider-global banners stand in for native thread lifecycle truth, the rollout can look healthy while the actual native thread is already `notLoaded`, `idle`, or `systemError`, and warning copy can quietly point users at the wrong failing surface. + +Guardrail: + +- treat native thread-status and warning authority as a first-class contract, not as a UI wording detail +- require tests that compare: + - process alive versus native thread `systemError` + - runtime still present versus native thread `notLoaded` + - native thread warnings versus config/startup warnings versus provisioning/process warnings + - status, banner, and team-detail copy under those divergences +- do not let host process or provisioning truth automatically masquerade as native thread health unless the rollout explicitly proves those states are equivalent on the chosen seam + +## Lowest-Confidence Seams + +These are the areas where we should stay conservative: + +1. `🎯 6 🛡️ 7 🧠 7` - session resume and transcript ownership + Rough implementation surface: `250-700` lines + Biggest risk: dual persistence and confusing resume semantics. + +2. `🎯 7 🛡️ 9 🧠 6` - transcript compatibility projection + Rough implementation surface: `350-900` lines + Biggest risk: accidentally turning the migration into a `claude_team` transcript-format rewrite. + +3. `🎯 7 🛡️ 8 🧠 6` - permission/sandbox parity for the Codex-native lane + Rough implementation surface: `300-800` lines + Biggest risk: approval UX mismatch against current orchestrator expectations. + +4. `🎯 8 🛡️ 9 🧠 5` - normalized event schema design + Rough implementation surface: `400-900` lines + Biggest risk: either too Anthropic-shaped or too vague for UI/transcripts. + +5. `🎯 7 🛡️ 8 🧠 5` - backend-id compatibility across orchestrator/UI + Rough implementation surface: `150-450` lines + Biggest risk: lane truth drifts because config, runtime status, and renderer option lists do not evolve together. + +6. `🎯 6 🛡️ 7 🧠 6` - SDK-vs-raw-exec session ownership seam + Rough implementation surface: `200-600` lines + Biggest risk: unintentionally locking the rollout to durable Codex-owned sessions before we have decided that behavior is acceptable. + +7. `🎯 7 🛡️ 8 🧠 6` - request-correlation semantics across live activity and transcript projection + Rough implementation surface: `250-700` lines + Biggest risk: approval UX, exact-log selectors, or streamed dedupe silently regress because `requestId` and tool-link identities stop being stable across layers. + +8. `🎯 7 🛡️ 8 🧠 6` - chain and sidechain identity projection + Rough implementation surface: `250-700` lines + Biggest risk: team-log grouping, exact-log views, or subagent linking silently regress because `parentUuid`, `isSidechain`, `isMeta`, `sessionId`, or `agentId` stop meaning the same thing across layers. + +9. `🎯 7 🛡️ 8 🧠 6` - runtime status/settings and backend-probe policy + Rough implementation surface: `220-650` lines + Biggest risk: `codex-native` exists in execution but settings, provisioning, installer snapshots, or model verification still describe the old Codex backend truth. + +10. `🎯 6 🛡️ 7 🧠 7` - approval/control adaptation into current approval UX + Rough implementation surface: `250-750` lines + Biggest risk: pending approvals, allow/deny responses, or timeout/deadlock handling silently drift because provider-native control events are only partially adapted. + +11. `🎯 6 🛡️ 8 🧠 6` - auth-routing versus backend-routing decoupling + Rough implementation surface: `180-550` lines + Biggest risk: `codex-native` looks selectable in UI, but env construction still forces `CLAUDE_CODE_CODEX_BACKEND=api` or `adapter`, so runtime truth never matches UI truth. + +12. `🎯 6 🛡️ 8 🧠 6` - config-schema and launch-granularity alignment + Rough implementation surface: `180-520` lines + Biggest risk: orchestrator, config validation, and provisioning all talk about different backend vocabularies or different selection granularity, so the lane can be saved or shown without being launchable honestly. + +13. `🎯 6 🛡️ 8 🧠 6` - process-scope backend-routing versus member-level UX expectations + Rough implementation surface: `180-520` lines + Biggest risk: the lane looks selectable per team member or per launch, but teammate spawn still inherits one process-level Codex backend, so real runtime behavior diverges from UI promises. + +14. `🎯 6 🛡️ 9 🧠 5` - provisioning probe-cache identity and invalidation + Rough implementation surface: `120-380` lines + Biggest risk: readiness/provisioning UI keeps showing stale old-lane truth after a Codex backend or auth switch because cache keys and invalidation stay provider-scoped instead of backend-aware. + +15. `🎯 7 🛡️ 8 🧠 4` - external-runtime diagnostics versus actual lane readiness + Rough implementation surface: `100-260` lines + Biggest risk: UI, installer snapshots, or provisioning summaries start treating detected `codex` binary presence as proof that `codex-native` is selectable, authenticated, or plugin-ready when it is not. + +16. `🎯 6 🛡️ 8 🧠 5` - backend-option state semantics in runtime status and selector UX + Rough implementation surface: `120-320` lines + Biggest risk: `codex-native` cannot be represented honestly because UI still collapses `selectable`, `available`, and `verified` into one pseudo-readiness state. + +17. `🎯 6 🛡️ 8 🧠 5` - runtime-status fallback preserving backend-lane truth + Rough implementation surface: `140-360` lines + Biggest risk: transient failure of unified runtime status makes `codex-native` vanish or revert to old provider-only Codex semantics because legacy fallback drops backend-rich truth. + +18. `🎯 7 🛡️ 8 🧠 4` - runtime summary/copy semantics for auth mode vs backend lane + Rough implementation surface: `100-240` lines + Biggest risk: UI keeps saying the wrong “Current runtime” for Codex because it still equates connection method labels with execution-lane truth. + +19. `🎯 6 🛡️ 8 🧠 5` - progressive status snapshot reconciliation across main/store/UI + Rough implementation surface: `140-420` lines + Biggest risk: partial or stale `cliStatus` pushes silently overwrite fresher backend-lane truth because progress events, cached responses, and provider refreshes do not share one freshness contract. + +20. `🎯 6 🛡️ 8 🧠 5` - backend-aware extension preflight for Codex plugin management + Rough implementation surface: `140-360` lines + Biggest risk: plugin install/uninstall UI becomes enabled from provider-wide truth even while the selected Codex lane is still old, degraded, or unverified. + +21. `🎯 6 🛡️ 8 🧠 5` - team model/runtime shape for create-launch dialogs + Rough implementation surface: `140-360` lines + Biggest risk: team model selectors and provisioning notes keep using provider-wide Codex truth, so lane-specific model behavior cannot be represented honestly. + +22. `🎯 7 🛡️ 8 🧠 4` - canonical provisioning prepare-cache identity + Rough implementation surface: `100-240` lines + Biggest risk: cache reuse drifts with backend summary wording and silently mixes old Codex and `codex-native` warmup/model results. + +23. `🎯 6 🛡️ 8 🧠 5` - persisted team identity and replay identity across backend-lane changes + Rough implementation surface: `140-420` lines + Biggest risk: saved team launches, draft team metadata, backup/restore artifacts, and resume logic keep only provider/model truth, so a later global Codex backend switch silently changes execution lane without explicit UI or snapshot truth. + +24. `🎯 7 🛡️ 8 🧠 4` - team-summary and list-surface contract for lane truth + Rough implementation surface: `100-280` lines + Biggest risk: team cards, draft cards, and synthetic provisioning snapshots tell a different Codex story than runtime/detail surfaces because shared summary DTOs cannot represent backend-lane identity honestly. + +25. `🎯 7 🛡️ 8 🧠 4` - member-runtime summary and composer-capability contract for lane truth + Rough implementation surface: `120-320` lines + Biggest risk: member cards/detail, bootstrap/system summaries, and composer slash-command/plugin hints tell a different Codex story than runtime/settings surfaces because they still collapse everything to provider-wide Codex identity. + +26. `🎯 7 🛡️ 8 🧠 5` - plugin activation and session-visibility contract + Rough implementation surface: `140-360` lines + Biggest risk: extension/plugin UI treats Codex-native install success as immediate readiness even when the real truth is only “usable in a new thread/restarted session” or “still blocked on app/auth setup”. + +27. `🎯 6 🛡️ 8 🧠 6` - mention-targeting and invocation-shape contract + Rough implementation surface: `180-420` lines + Biggest risk: UI/composer claims deterministic plugin/app targeting even though the chosen Codex seam only gives us linked-text mention parsing or implicit runtime discovery. + +28. `🎯 7 🛡️ 8 🧠 6` - live-stream versus history-hydration contract + Rough implementation surface: `180-480` lines + Biggest risk: exact-log, task-log, replay, or resume quietly consume sparse live Codex turn state as if it were fully hydrated history. + +29. `🎯 7 🛡️ 8 🧠 5` - approval-resolution and lifecycle-cleanup contract + Rough implementation surface: `160-420` lines + Biggest risk: stale pending approvals or misleading resolved icons because lifecycle-cleared requests get mistaken for explicit user decisions or never clear at all. + +30. `🎯 6 🛡️ 8 🧠 5` - generic interactive-request and MCP-elicitation contract + Rough implementation surface: `160-420` lines + Biggest risk: Codex-native turns hang or silently degrade because the app only supports approval prompts while the runtime asks for structured user input. + +31. `🎯 6 🛡️ 8 🧠 5` - headless exec / TypeScript SDK capability-boundary contract + Rough implementation surface: `160-420` lines + Biggest risk: the rollout quietly markets a headless exec seam as approval-capable or app-server-like even though the runtime seam itself rejects those interactions. + +32. `🎯 6 🛡️ 8 🧠 5` - ephemeral-versus-completion-backfill tradeoff + Rough implementation surface: `160-420` lines + Biggest risk: choosing `--ephemeral` for session-safety reasons weakens final-turn history completeness in ways that only appear in transcript/exact-log/replay paths. + +33. `🎯 7 🛡️ 8 🧠 4` - native-lane credential-routing and API-key surface contract + Rough implementation surface: `120-320` lines + Biggest risk: UI/status says `codex-native` is API-key ready while auth is still wired only for the old `OPENAI_API_KEY` Responses-API lane. + +34. `🎯 7 🛡️ 8 🧠 4` - native-lane model inventory and reasoning-effort contract + Rough implementation surface: `140-360` lines + Biggest risk: selectors/probes/settings keep using old provider-wide Codex model truth while the selected native lane exposes a different model surface. + +35. `🎯 6 🛡️ 9 🧠 5` - workspace-trust and native-thread-start contract + Rough implementation surface: `120-320` lines + Biggest risk: native thread start silently mutates trust state or bypasses host trust-gated env/hook/LSP behavior while UI still tells the old trust story. + +36. `🎯 6 🛡️ 8 🧠 6` - instruction-ownership and collaboration-mode contract + Rough implementation surface: `180-420` lines + Biggest risk: hidden collaboration-mode or native developer-instruction precedence duplicates or overrides host system/bootstrap prompts, causing behavioral drift that UI cannot explain. + +37. `🎯 7 🛡️ 8 🧠 5` - persisted-history policy and non-retroactive hydration contract + Rough implementation surface: `140-360` lines + Biggest risk: native threads are born with mixed history fidelity, but replay/exact-log/reload surfaces still act as if later config changes can make all of them equally complete. + +38. `🎯 6 🛡️ 8 🧠 6` - native config/feature/marketplace ownership contract + Rough implementation surface: `180-420` lines + Biggest risk: selective native control-plane calls create a second hidden settings authority, so app settings and native runtime state drift apart. + +39. `🎯 6 🛡️ 8 🧠 5` - detached review-thread identity contract + Rough implementation surface: `140-340` lines + Biggest risk: `/review` looks supported, but detached review spawns a second native thread that our launch/replay/task-log surfaces never model honestly. + +40. `🎯 6 🛡️ 8 🧠 5` - native binary-version and protocol-surface identity contract + Rough implementation surface: `160-380` lines + Biggest risk: backend id looks stable, but bundled SDK binary, external CLI, or protocol-surface skew quietly changes what `codex-native` actually supports. + +41. `🎯 6 🛡️ 8 🧠 5` - app-server connection-policy contract + Rough implementation surface: `120-300` lines + Biggest risk: later app-server enrichment looks flaky because different connections negotiated different experimental surface or notification visibility, while status/UI still assume one global truth. + +42. `🎯 6 🛡️ 8 🧠 6` - canonical-history versus append-only-projection contract + Rough implementation surface: `180-420` lines + Biggest risk: native rollback or compaction changes canonical history, but append-only local transcript, exact-log, and replay keep serving stale pre-mutation truth. + +43. `🎯 6 🛡️ 8 🧠 5` - turn-metadata and usage-authority contract + Rough implementation surface: `180-420` lines + Biggest risk: native usage, context-window, model/reroute, or plan truth lives outside assistant transcript rows, but context panels, provisioning usage, token warnings, and runtime copy keep guessing from stale transcript-local metadata. + +44. `🎯 6 🛡️ 8 🧠 6` - native thread-defaults versus launch-intent contract + Rough implementation surface: `180-460` lines + Biggest risk: resumed native threads inherit persisted model, effort, or other thread-defaults while saved launch params, config/meta, and team/member runtime summaries still present launch intent as if it were the live runtime truth. + +45. `🎯 6 🛡️ 8 🧠 5` - native thread-status and warning-authority contract + Rough implementation surface: `160-420` lines + Biggest risk: dashboard, settings, provisioning, and team-detail surfaces keep equating process alive or provisioning active with native thread health, while warning copy collapses config warnings, native thread warnings, and process warnings into one misleading status story. + +## Practical Rule + +If we need **unified logs**, we normalize events. + +If we need **native Codex capabilities**, we do not fake Codex into Anthropic runtime semantics. + +That is the core architectural rule for this migration. diff --git a/docs/research/codex-native-runtime-phase-0-implementation-spec.md b/docs/research/codex-native-runtime-phase-0-implementation-spec.md new file mode 100644 index 00000000..b15d9fa3 --- /dev/null +++ b/docs/research/codex-native-runtime-phase-0-implementation-spec.md @@ -0,0 +1,1216 @@ +# Codex Native Runtime - Phase 0 Implementation Spec + +Status: + +- working spec, implementation-backed +- intended companion to [codex-native-runtime-integration-decision.md](codex-native-runtime-integration-decision.md) +- scope: minimal safe spike, not broad rollout +- audited against current code and a live local `codex exec` run on 2026-04-19 +- safe to continue coding against +- not ready to unlock `codex-native` for normal runtime selection yet + +## Purpose + +This document turns the Codex-native decision doc into an execution spec for Phase 0. + +Phase 0 is not the full migration. + +Its only job is to prove that we can add a feature-flagged `codex-native` lane without: + +- breaking current transcript consumers +- lying about status/capabilities in UI +- silently changing launch, replay, or approval semantics + +If Phase 0 succeeds, we should know whether the first implementation wave can proceed as a minimal safe swap. + +## Current Readiness Verdict + +The spec itself is now ready to drive implementation. + +Phase 0 implementation is now wired and evidence-backed. + +Current state: + +- ✅ ready and already implemented: + - `codex-native` backend vocabulary in `agent_teams_orchestrator` + - `codex-native` backend vocabulary in `claude_team` config and validation + - backend-aware Codex connection-routing in `claude_team` + - lane-aware Codex status/copy in `claude_team` + - raw `codex exec` arg builder + - raw JSONL-to-normalized-event mapper + - real process-owned `codex exec` runner + - transcript-compatible projector + - persisted history wiring through the native lane + - native executable identity, credential source, and completion metadata capture + - parser coverage for native projected assistant rows + - parser coverage for modern system warning rows + - conservative selector lock policy + - targeted tests for the above slices +- ⚠️ partially implemented: + - `codex-native` runtime status can now represent the lane honestly, and the execution lane is real, but the lane remains intentionally locked and non-selectable + - native lane credentials are routed honestly end-to-end, but the lane still exposes only a conservative headless-limited capability profile + - the lane remains intentionally conservative in UI exposure and unlock policy even though transcript authority is now stronger +- ✅ sign-off evidence package is now captured in + [codex-native-runtime-phase-0-signoff-evidence.md](./codex-native-runtime-phase-0-signoff-evidence.md) + +Practical meaning: + +- the Phase 0 contract is now strong enough to keep implementing against +- the product is still protected from false rollout because `codex-native` remains a locked experimental lane + +## Spec Maintenance Rule + +This document is allowed to evolve only in two ways: + +1. to reflect implementation-backed reality more accurately +2. to tighten gates when a new risk is discovered + +It must not drift into a second speculative architecture document. + +Required maintenance behavior: + +- if a Phase 0 PR changes authority order, capability truth, lock policy, or exit criteria, this spec must be updated in the same PR +- if a Phase 0 PR only adds implementation under an already-frozen contract, this spec should update only its status/checklist sections +- if current code and this spec disagree, either the code is wrong, or the spec is stale - do not leave the disagreement implicit +- if the implementation-status snapshot changes materially, update the `Implementation Status As Of ...` date in the same PR + +## Phase 0 Source Of Truth Rule + +For Phase 0 implementation work: + +- this document is the execution contract +- [codex-native-runtime-integration-decision.md](codex-native-runtime-integration-decision.md) remains the broader strategy and risk document + +If the two documents appear to disagree on a Phase 0 implementation detail: + +- this spec wins until both documents are reconciled + +Reason: + +- the decision doc is intentionally broader +- this spec is intentionally narrower and implementation-facing + +## Implementation Status As Of 2026-04-19 + +### Foundation already landed + +- `agent_teams_orchestrator` now knows `codex-native` as a first-class backend id +- `agent_teams_orchestrator` status and registry surfaces can describe the lane without auto-resolving into it +- `claude_team` config vocabulary, validation, connection routing, and runtime UI copy are lane-aware +- old Codex auth mode no longer silently chooses the runtime lane +- raw exec Phase 0 modules already exist for: + - arg building + - JSONL mapping + - normalized event shape +- the live orchestrator execution path now has: + - a real `codex exec` runner + - transcript-compatible projection + - persisted history writes + - executable identity and completion metadata capture +- native projected transcript rows now carry: + - thread-status authority + - warning-source attribution + - execution-summary and history-completeness metadata +- targeted tests now exist for resolver, registry, config validation, connection routing, lane-aware UI, exec arg building, JSONL mapping, transcript projection, thread-status authority, turn execution, JSONL parsing, exact-log parsing, and session parsing + +### Foundation intentionally still locked + +- `codex-native` is not selectable for normal users +- `auto` never resolves to `codex-native` +- targeted client guard still rejects live interactive execution on the lane +- renderer/status surfaces may show the lane diagnostically, but not as a fully usable runtime + +### Remaining Phase 0 blockers + +- no code blockers remain inside Phase 0 +- lane unlock remains intentionally blocked by rollout policy + +### Phase 0 readiness verdict + +- ✅ implementation-complete +- ✅ sign-off evidence captured +- ✅ raw-exec execution slice is landed +- ✅ ready to treat the spec as the contract for remaining work +- ✅ ready to declare Phase 0 complete +- ⚠️ still not ready to unlock `codex-native` as a selectable runtime lane + +## Observed Current Codex Exec Facts + +The following are no longer assumptions. They were observed locally on 2026-04-19 with: + +- `codex-cli 0.117.0` +- `codex exec --json --ephemeral --skip-git-repo-check -C /tmp 'Reply only with OK'` + +Observed event shape: + +- `thread.started` +- `turn.started` +- `item.completed` +- `turn.completed` + +Observed successful assistant payload: + +- `item.completed.item.type = "agent_message"` +- `item.completed.item.text = "OK"` + +Observed usage payload: + +- `turn.completed.usage.input_tokens` +- `turn.completed.usage.cached_input_tokens` +- `turn.completed.usage.output_tokens` + +Observed seam-critical warning: + +- `thread/read failed while backfilling turn items for turn completion` +- `ephemeral threads do not support includeTurns` +- non-JSON warning lines may be interleaved with JSONL and must stay source-attributed + +Observed practical implication: + +- `--ephemeral` gives useful live events +- `--ephemeral` does not give final completion backfill via `thread/read` +- this confirms the Phase 0 rule that live stream and canonical history are different authorities + +## Current Implemented Routing Facts + +These are current implementation-backed truths, not future intentions: + +- `codex-native` is a distinct backend lane, not a rename of old Codex `api` or `adapter` +- `auto` does not resolve to `codex-native` +- `codex-native` requires its own native-lane readiness path +- the native credential surface is `CODEX_API_KEY`, not implicit old-lane readiness +- `claude_team` now keeps auth routing and backend-lane routing separate +- when the selected backend is `codex-native`, app-side credential bridging may populate `CODEX_API_KEY` +- manual early routing into live `codex-native` execution is still protected by a targeted runtime guard +- once a real native runner exists, native-lane truth must also carry executable identity, not only backend id + +Practical rule: + +- if later code or copy contradicts any item above, it should be treated as regression unless the Phase 0 contract is intentionally amended + +## Scope + +In scope: + +- one experimental `codex-native` backend lane +- one chosen execution seam for the spike +- normalized runtime events for the spike lane +- transcript-compatible projection for the spike lane +- explicit authority order for: + - history + - status + - warnings + - launch intent versus native thread defaults + - credential routing +- feature-flagged runtime exposure only +- explicit unsupported-state treatment for headless-limited interactions + +Out of scope: + +- making `codex-native` the default +- broad plugin UX rollout +- detached review parity +- full app-server integration +- changing `claude_team` transcript parser format +- removing the old Codex `adapter/api` lane + +## Phase 0 Deliverable + +Phase 0 is complete only if all of the following are true: + +- `agent_teams_orchestrator` can run one real Codex-native session through a feature-flagged lane +- the spike emits normalized events +- normalized events can be projected into transcript-compatible persisted history +- current `claude_team` transcript readers still parse the output without schema rewrite +- runtime status can represent the lane honestly as selected, resolved, degraded, or unavailable +- UI copy does not overclaim: + - plugin support + - approval support + - interactive prompt support + - current-session plugin activation + - thread health from process health + +## Phase 0 Exit Checklist + +Use this as the stop/go gate before declaring Phase 0 done. + +| Gate | Current state | Requirement to pass | +| --- | --- | --- | +| `codex-native` backend truth exists in both repos | ✅ done | keep green | +| lane remains additive and non-default | ✅ done | keep green | +| lane remains locked until execution is real | ✅ done | keep green | +| old Codex `api/adapter` lane remains behaviorally unchanged | ✅ targeted regression coverage green | required | +| old Codex lane remains the safe fallback when native lane is absent, locked, or degraded | ✅ targeted regression coverage green | required | +| real `codex exec` process run is wired into orchestrator | ✅ done | keep green | +| executable identity is captured per run | ✅ done | keep green | +| runner records executable source and completion policy | ✅ done | keep green | +| normalized native events flow from live process output | ✅ done | keep green | +| native lane capability profile remains explicit and conservative | ✅ done | keep green | +| transcript-compatible projection is written to persisted history | ✅ done | keep green | +| current parser and exact-log paths still parse the projection | ✅ parser and exact-log proof green | keep green | +| native thread-status authority exists or degrades honestly | ✅ projected thread-status rows and targeted tests green | keep green | +| warning sources remain separated end-to-end | ✅ warning-source attribution survives projected transcript rows | keep green | +| replay and history fixtures exist for `ephemeral` and non-ephemeral runs | ✅ targeted replay/history fixtures green | keep green | +| UI copy stays lane-aware and capability-honest | ✅ targeted UI/runtime tests green | keep green | + +## Completion Versus Unlock Policy + +Phase 0 completion and lane unlock are related, but they are not the same event. + +Phase 0 completion means: + +- one real `codex-native` execution path works end-to-end +- transcript, status, warning, and history truth stay honest +- internal fixtures prove the chosen seam well enough to proceed + +Phase 0 completion does **not** mean: + +- `codex-native` becomes default +- `auto` may resolve to `codex-native` +- the lane is generally available without a feature flag +- the lane suddenly gains plugin, MCP, approval, or app-server-grade interactive claims + +Default post-Phase-0 policy: + +- keep `codex-native` feature-flagged +- keep capability truth conservative +- unlock only for explicit internal usage first +- treat broader rollout as a later decision after Phase 1 gates, not as an automatic consequence of finishing Phase 0 + +## Old Codex Lane Regression Guardrail + +Phase 0 is not allowed to “succeed” by quietly making the existing Codex lane worse. + +Required rule: + +- all `codex-native` work remains additive until a later explicit migration decision + +That means: + +- old Codex `api/adapter` execution remains routable +- old Codex connection/auth behavior remains valid for the old lane +- `auto` keeps today’s old-lane behavior +- status, settings, and selector surfaces keep showing a truthful fallback path when native lane is absent, locked, or degraded +- a failed or unavailable `codex-native` lane must not make the whole Codex provider story look unavailable if the old lane still works + +Not allowed: + +- reinterpreting old-lane readiness as native-lane readiness +- changing old-lane defaults only because the new lane exists +- breaking old-lane tests while claiming the work is “only for native” + +## Chosen Phase 0 Default + +Phase 0 default: + +- execution seam: raw `codex exec` wrapper first +- lane shape: headless-limited until proven otherwise +- old Codex lane remains intact and is the fallback +- `codex-native` is additive, behind feature flag + +Reason: + +- raw exec exposes session ownership and `--ephemeral` tradeoffs more honestly than the current TypeScript SDK wrapper +- it reduces the chance of hiding critical persistence or capability differences under a convenience API too early + +## Execution Seam Freeze Rule + +Phase 0 currently chooses one seam: + +- raw `codex exec` wrapper first + +That choice is now frozen for the remainder of Phase 0 unless explicitly amended. + +Practical rule: + +- do not quietly switch the live implementation to current TypeScript SDK mid-Phase-0 while keeping the same checklist and evidence package +- if the chosen seam changes, the following must be re-evaluated and updated together: + - capability matrix + - credential-routing contract + - history-completeness contract + - sign-off evidence package + - sign-off command package + +Reason: + +- otherwise Phase 0 can look “complete” while its evidence package still proves a different seam than the one actually being shipped + +## Current Phase 0 Contract State + +This spec now serves two jobs at once: + +1. freeze the minimum safe contract for the remaining Phase 0 work +2. record which pieces of that contract already exist in code + +That distinction matters because Phase 0 is no longer theoretical. + +It already has grounded slices in both repos and is now implementation-complete, but it remains deliberately rollout-limited. + +Rule: + +- if a section below describes authority or capability truth that is not implemented yet, it is still binding for the next code slices +- if current code violates that truth, current code must change before `codex-native` is unlocked + +## Repo Ownership + +### `agent_teams_orchestrator` + +Owns: + +- Codex-native execution seam +- normalized event schema +- raw native event mapping +- transcript-compatible projector +- lane capability truth +- thread-status and warning authority +- credential routing for the chosen seam + +Recommended touched areas: + +- `src/services/runtimeBackends/types.ts` +- `src/services/runtimeBackends/registry.ts` +- `src/services/runtimeBackends/codexBackendResolver.ts` +- `src/services/boardTaskActivity/contract.ts` +- `src/services/boardTaskActivity/BoardTaskTranscriptProjector.ts` +- `src/query.ts` +- `src/utils/config.ts` + +Path note: + +- the paths above are in the `agent_teams_orchestrator` repo, not in `claude_team` + +Recommended new module split for the spike: + +- `src/services/codexNative/execRunner.ts` +- `src/services/codexNative/jsonlMapper.ts` +- `src/services/codexNative/normalizedEvents.ts` +- `src/services/codexNative/capabilities.ts` +- `src/services/codexNative/statusAuthority.ts` +- `src/services/codexNative/transcriptProjector.ts` + +Current implementation status: + +- ✅ created: + - `src/services/codexNative/execRunner.ts` + - `src/services/codexNative/jsonlMapper.ts` + - `src/services/codexNative/normalizedEvents.ts` + - `src/services/codexNative/capabilities.ts` + - `src/services/codexNative/statusAuthority.ts` + - `src/services/codexNative/transcriptProjector.ts` + - `src/services/codexNative/signOffHarness.ts` + +### `claude_team` + +Owns: + +- backend-lane-aware status ingestion +- lane-aware copy +- feature-flag exposure +- preserving current transcript/read-model path + +Recommended touched areas: + +- [ClaudeMultimodelBridgeService.ts](../../src/main/services/runtime/ClaudeMultimodelBridgeService.ts) +- [CliStatusBanner.tsx](../../src/renderer/components/dashboard/CliStatusBanner.tsx) +- [CliStatusSection.tsx](../../src/renderer/components/settings/sections/CliStatusSection.tsx) +- [providerConnectionUi.ts](../../src/renderer/components/runtime/providerConnectionUi.ts) +- [ProviderRuntimeSettingsDialog.tsx](../../src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx) +- [SessionParser.ts](../../src/main/services/parsing/SessionParser.ts) +- [BoardTaskExactLogStrictParser.ts](../../src/main/services/team/taskLogs/exact/BoardTaskExactLogStrictParser.ts) + +### `plugin-kit-ai` + +Not required for the Phase 0 spike. + +Only Phase-0-adjacent requirement: + +- no UI or status copy may imply plugin execution support for `codex-native` before Phase 3 + +## Recommended Coding Order + +Phase 0 should be cut in this order: + +1. `agent_teams_orchestrator` type freeze +- add `codex-native` backend id to runtime backend types +- keep old Codex lane untouched +- add feature flag gates only, no behavior switch yet +- status: ✅ done + - grounded by: + - backend id additions + - resolver gates + - registry/status exposure + - targeted runtime backend tests + +2. raw exec spike seam +- add a tiny native runner that can start one Codex-native session +- capture raw JSONL +- record executable source, credential path, and `ephemeral` policy +- status: ✅ done + - grounded by: + - arg builder + - real process runner in orchestrator + - live event fixture mapping + - observed local seam validation + - executable-source capture + - executable-version capture + - completion-policy and backfill metadata capture + - explicit client guard that keeps rollout conservative + +3. normalized mapper +- map raw events into the Phase-0 normalized schema +- do not wire UI to raw events +- status: ✅ done + - grounded by: + - thread started + - turn started + - assistant text + - usage updated + - turn completed + - stderr warning passthrough + - unsupported raw event preservation + - stable minimal Phase-0 event contract frozen in code + +4. transcript-compatible projector +- project the normalized subset into persisted transcript-compatible history +- verify current parser path still works +- status: ✅ done + - grounded by: + - persisted assistant projection + - projected warning rows with source attribution + - projected thread-status rows + - projected execution-summary rows with history-completeness metadata + - green parser and exact-log fixtures + +5. status and warning authority +- keep lane status, thread status, and warning-source truth separate +- update bridge payloads before touching UI copy +- status: ✅ done + - grounded by: + - backend lane truth in runtime status + - selectable-vs-available distinction + - codex-native remains locked + - targeted UI copy no longer claims auth mode equals runtime lane + - projected thread-status authority in persisted history + - projected warning-source attribution in persisted history + - sign-off evidence for `process` versus `history` warning attribution + +6. `claude_team` feature-flagged exposure +- show lane only when the backend truth can already represent it honestly +- keep unsupported capabilities visibly unsupported +- status: ✅ done + - grounded by: + - lane-aware config vocabulary + - lane-aware connection/runtime copy + - lane-aware selector behavior + - backend env kept independent from auth mode + - locked-lane affordance in runtime settings surfaces + - targeted UI/runtime tests for locked-lane truth + +7. fixture and regression pass +- add the mandatory Phase-0 fixtures +- only then allow limited internal usage of the new lane +- status: ✅ done + - grounded by: + - resolver fixtures + - runtime status fixtures + - raw exec arg-builder fixtures + - raw JSONL mapper fixtures + - `claude_team` config/routing/UI fixtures + - transcript/replay/history fixtures + - thread-status authority fixtures + - exact-log compatibility fixtures + - repo-visible sign-off evidence package + +## Authority Order + +This is the most important part of the spec. + +### 1. Execution authority + +For the spike lane: + +1. raw `codex exec` JSONL output +2. normalized-event mapping +3. transcript-compatible projection +4. current `claude_team` transcript/read-model path + +Rule: + +- no UI surface consumes raw native events directly in Phase 0 + +### 2. History authority + +History truth order: + +1. explicit seam-owned completion or hydration source for the chosen lane +2. persisted transcript-compatible projection written by orchestrator +3. live event cache for activity only + +Rule: + +- live stream is never canonical history by itself + +### 3. Status authority + +Status truth must stay split by scope: + +1. native thread status +2. provider-lane status +3. host process/provisioning status + +Rules: + +- thread health is not inferred from process liveness +- provider-global runtime banners are not allowed to masquerade as thread-specific health +- if native thread status is unavailable on the chosen seam, UI must say degraded or unavailable, not synthesize `active` + +### 4. Warning authority + +Warning channels remain separate: + +1. native thread warnings +2. config/startup warnings +3. provisioning/process warnings + +Rules: + +- do not merge these channels into one generic warning field +- if a UI surface can only show one summary line, it must still preserve source attribution in detail text + +### 5. Launch-intent authority + +There are two different truths: + +- host launch intent +- live native thread defaults + +Rules: + +- `provider/model/effort` in launch config is launch intent only +- resumed native thread defaults may differ +- if they differ, UI must show either: + - inherited native defaults + - explicit override pending + - or forced fresh-thread policy + +### 6. Credential authority + +Rules: + +- old Codex lane auth truth and `codex-native` auth truth must not share one fake readiness source +- old lane may still use current app-side `OPENAI_API_KEY` flow +- `codex-native` must use only the credential contract actually required by the chosen seam +- UI must not infer native readiness from old-lane auth success + +## Phase 0 Capability Matrix + +Phase 0 should assume the following unless the spike proves otherwise: + +| Capability | Old Codex lane | `codex-native` spike lane | +| --- | --- | --- | +| Team launch | supported | supported behind flag | +| Transcript-compatible history | supported | required | +| Plugins | unsupported | unsupported in Phase 0 | +| MCP | unsupported or existing-lane-specific | unsupported unless explicitly proven on chosen seam | +| Skills | unsupported or existing-lane-specific | unsupported unless explicitly proven on chosen seam | +| Manual approvals | current lane semantics | unsupported or limited unless explicitly proven | +| Generic interactive prompts | n/a | unsupported in Phase 0 | +| Detached review | current lane semantics | unsupported in Phase 0 | +| Lane-aware status | partial | required | + +Practical rule: + +- Phase 0 defaults to conservative capability truth +- nothing upgrades from unsupported to supported by implication +- if the live seam only proves diagnostic readiness, capability must remain diagnostic-only + +## Current Lock Policy + +This is now a required Phase 0 rule, not a suggestion. + +`codex-native` may be: + +- visible in runtime status +- visible in backend options +- resolved diagnostically + +But it must remain: + +- `selectable: false` +- non-default +- non-auto-resolved +- non-routable into live execution without an explicit execution-lane implementation +- protected by a targeted runtime error if manually forced too early + +Reason: + +- Phase 0 now has honest backend truth, real end-to-end native execution, and transcript projection +- the remaining lock is now a rollout-policy choice, not a missing-code problem +- therefore unlocking the lane would still create worse product truth than the current state + +## Normalized Event Schema + +Phase 0 does not need the full future schema. + +It does need a small, stable subset with explicit source attribution. + +The important distinction is: + +- one minimal schema is already implemented and should now be treated as frozen groundwork +- a richer schema is still allowed later, but only as an additive expansion + +### Current minimal schema already frozen in code + +Current grounded contract in `src/services/codexNative/normalizedEvents.ts`: + +```ts +type CodexNativeNormalizedEvent = + | { + type: 'thread_started' + threadId: string + } + | { + type: 'turn_started' + } + | { + type: 'assistant_text' + itemId: string + text: string + } + | { + type: 'usage_updated' + inputTokens: number + cachedInputTokens: number + outputTokens: number + } + | { + type: 'turn_completed' + } + | { + type: 'warning' + source: 'stderr' + text: string + } + | { + type: 'unsupported_raw_event' + rawType: string + payload: unknown + } +``` + +Rules for this already-landed minimal schema: + +- it is sufficient for the raw-exec spike groundwork +- it is not yet sufficient for final Phase 0 completion +- it must not be broken or renamed casually while the runner and projector are being wired +- any richer shape added next must be additive or accompanied by projector updates in the same slice + +### Target additive schema before Phase 0 can be called complete + +This is the richer schema the remaining implementation should converge toward: + +```ts +type NormalizedProviderId = 'anthropic' | 'codex' | 'gemini' +type NormalizedRuntimeLaneId = 'anthropic' | 'gemini-cli-sdk' | 'codex-adapter' | 'codex-api' | 'codex-native' + +type NativeThreadStatus = + | { type: 'not_loaded' } + | { type: 'idle' } + | { type: 'active'; activeFlags?: string[] } + | { type: 'system_error' } + +type NativeWarningSource = 'thread' | 'config' | 'process' | 'provisioning' + +type NormalizedRuntimeEvent = + | { + kind: 'thread_started' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + requestId?: string + status?: NativeThreadStatus + timestamp: string + } + | { + kind: 'thread_status_changed' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + status: NativeThreadStatus + timestamp: string + } + | { + kind: 'thread_defaults_restored' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + model?: string + reasoningEffort?: string + timestamp: string + } + | { + kind: 'turn_started' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + turnId?: string + requestId?: string + timestamp: string + } + | { + kind: 'assistant_text' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + requestId?: string + text: string + isDelta: boolean + timestamp: string + } + | { + kind: 'reasoning' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + requestId?: string + text?: string + timestamp: string + } + | { + kind: 'usage_updated' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + requestId?: string + inputTokens?: number + outputTokens?: number + contextWindow?: number + timestamp: string + } + | { + kind: 'model_rerouted' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + requestId?: string + configuredModel?: string + effectiveModel?: string + reasoningEffort?: string + timestamp: string + } + | { + kind: 'turn_plan_updated' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + requestId?: string + summary?: string + timestamp: string + } + | { + kind: 'turn_diff_updated' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + requestId?: string + summary?: string + timestamp: string + } + | { + kind: 'warning_emitted' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + source: NativeWarningSource + threadId?: string + requestId?: string + message: string + detail?: string + timestamp: string + } + | { + kind: 'turn_completed' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + requestId?: string + timestamp: string + } + | { + kind: 'turn_failed' + provider: NormalizedProviderId + laneId: NormalizedRuntimeLaneId + threadId: string + requestId?: string + error: string + timestamp: string + } +``` + +Schema rules: + +- every event carries `provider` and `laneId` +- every event is source-attributed +- thread status and warnings are not hidden inside generic `detailMessage` +- `requestId` is optional on the wire but mandatory once known +- expansion from the current minimal schema must be additive until projector and fixture coverage are in place + +## Transcript Projector Contract + +Phase 0 projector requirements: + +- produce persisted history that current `SessionParser` and exact-log readers can parse +- preserve request-correlation fields where available +- preserve board-task carrier fields +- never require `claude_team` to understand raw Codex item shapes + +Projector rules: + +1. `assistant_text` +- may append or extend assistant transcript content + +2. `usage_updated` +- does not need to become a visible assistant row +- may project into additive metadata or side-channel metadata +- must not be silently dropped if it is the only authoritative usage source + +3. `thread_status_changed` +- does not become canonical transcript history by default +- stays in normalized/status layer + +4. `warning_emitted` +- thread and config warnings should be projectable to later UI/debug surfaces +- do not force them into fake assistant rows + +5. `thread_defaults_restored` +- must not rewrite old launch config +- must remain explicit metadata + +6. `model_rerouted` +- must not overwrite configured model copy invisibly +- may project to normalized-only metadata in Phase 0 if transcript row shape has no truthful home + +## Raw Exec Spike Contract + +The spike runner must prove all of the following: + +- start a Codex-native session in a chosen working directory +- pass native credentials in the seam-native way +- capture JSONL events +- map them to normalized events +- persist transcript-compatible projection +- record: + - thread id + - executable identity + - whether run was `ephemeral` + - whether completion backfill existed + - whether final usage/model truth came from live stream or explicit seam-owned completion path + +The spike runner must explicitly capture these facts: + +- executable source: + - bundled + - external CLI +- executable version: + - exact reported version string when available +- runtime identity: + - backend lane id + - executable source + - executable version +- credential source: + - native API-key path + - or explicit unsupported state +- interactive capability: + - unsupported + - limited + - proven +- final history completeness: + - live-only + - backfilled + - explicit hydration required + +Current implementation note: + +- the spec is already grounded by one live local run +- the next required step is to turn that manual seam proof into a reusable runner contract +- until that happens, `codex-native` remains a locked diagnostic lane +- current code already enforces this lock from both status/selectability truth and live client guardrails + +## Status Contract + +Phase 0 status payload changes must allow `claude_team` to say all of the following truthfully: + +- lane exists but is not selected +- lane is selected but not verified +- lane is resolved but degraded +- lane is running but the thread is not loaded +- lane process is alive but the thread is in `systemError` + +Minimum required additions for the spike path: + +- keep `selectedBackendId` +- keep `resolvedBackendId` +- keep `availableBackends` +- keep native executable identity in diagnostic or detail truth once the runner exists +- do not let degraded transport erase backend truth +- keep thread health separate from provider-global health + +Current implementation note: + +- backend-level status truth is already in place +- thread-level status truth is not +- therefore current Phase 0 must still describe `codex-native` as execution-locked + +If native thread status is unavailable on the chosen seam: + +- surface `unknown` or `degraded` +- do not synthesize `active` + +## Warning Contract + +Phase 0 UI must be able to distinguish: + +- startup/config warning +- native thread warning +- provisioning/process warning + +Allowed compromise: + +- a single banner may summarize all warning presence + +Not allowed: + +- one combined warning string with no source attribution anywhere + +## Launch Intent vs Native Defaults Contract + +Phase 0 must choose one of these policies and implement it explicitly: + +1. fresh-thread only +2. resume with inherited native defaults +3. resume but force explicit override + +Default for the spike: + +- support resume only behind flag +- if resumed defaults differ from launch intent, keep that drift explicit + +Minimum required surfaced truth: + +- requested launch model/effort +- effective native defaults after resume, if known +- warning or degraded state when they differ + +## Credential Routing Contract + +Phase 0 must not reuse old-lane readiness assumptions. + +Rules: + +- `codex-native` readiness is computed only from the chosen seam's credential contract +- old Codex API-key success does not imply native-lane readiness +- missing or wrong native credentials must degrade only the native lane, not the entire provider story + +## Test Matrix + +Minimum must-exist tests for Phase 0: + +### `agent_teams_orchestrator` + +- `codex-native-api-key-routing` +- `native-binary-identity-metadata` +- `exec-headless-rejects-interactive-server-requests` +- `live-turn-stream-vs-hydrated-history` +- `thread-system-error-vs-process-alive` +- `thread-not-loaded-vs-runtime-still-running` +- `thread-warning-vs-config-warning-truth` +- `resume-persisted-thread-defaults-vs-launch-intent` +- `resume-model-switch-warning-vs-runtime-copy` +- `ephemeral-turn-completed-without-backfill` +- `non-ephemeral-completed-turn-backfill` +- `request-chain-invariants` + +### `claude_team` + +- `runtime-selector-visible-but-not-ready` +- `headless-lane-capability-copy` +- `native-lane-auth-copy` +- `exact-log-hydrated-after-live-stream` +- `approval-cleared-on-lifecycle` +- `native-thread-status-vs-process-copy` +- `warning-channel-copy` +- `launch-intent-vs-native-defaults-copy` + +## Required Evidence Package For Phase 0 Sign-off + +Phase 0 should not be declared complete from code inspection alone. + +Minimum sign-off evidence must include all of the following: + +1. one real successful `codex exec`-backed native run through the orchestrator lane +2. persisted transcript-compatible output from that run +3. recorded native executable identity for that run: + - source + - exact version string when available +4. parser proof that current `claude_team` transcript readers still parse it +5. exact-log or replay proof for both: + - `--ephemeral` + - non-ephemeral or explicit replacement hydration path +6. one degraded-path proof showing native lane failure does not erase old-lane fallback truth +7. one status proof showing process-alive does not masquerade as native thread healthy +8. one warning proof showing config warnings and native thread warnings remain attributable +9. green targeted test runs for: + - existing old-lane fallback/regression coverage + - new native-lane runner/mapper/projector coverage + +Practical rule: + +- if any one of the nine items above is missing, Phase 0 is still implementation-in-progress, not sign-off ready + +Recommended evidence placement: + +- keep sign-off artifacts close to this doc under `docs/research/` or another explicit repo-visible location +- do not rely only on terminal memory or one-off local runs as the sole proof of completion + +## Minimum Sign-off Command Package + +Phase 0 sign-off should include a reproducible command package, not only prose. + +Minimum command set: + +### In `agent_teams_orchestrator` + +- `bun test src/services/runtimeBackends/codexBackendResolver.test.ts` +- `bun test src/services/runtimeBackends/registry.agentTeams.test.ts` +- `bun test src/services/codexNative/execRunner.test.ts` +- `bun test src/services/codexNative/jsonlMapper.test.ts` +- `bun test src/services/codexNative/transcriptProjector.test.ts` +- `bun test src/services/codexNative/statusAuthority.test.ts` +- `bun test src/services/codexNative/turnExecutor.test.ts` +- `bun test src/services/codexNative/signOffHarness.test.ts` +- `git diff --check` + +### In `claude_team` + +- `pnpm exec vitest run test/main/ipc/configValidation.test.ts` +- `pnpm exec vitest run test/main/services/runtime/ProviderConnectionService.test.ts` +- `pnpm exec vitest run test/main/services/runtime/providerAwareCliEnv.test.ts` +- `pnpm exec vitest run test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts` +- `pnpm exec vitest run test/renderer/components/runtime/providerConnectionUi.test.ts` +- `pnpm exec vitest run test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts` +- `pnpm exec vitest run test/renderer/components/cli/CliStatusVisibility.test.ts` +- `pnpm exec vitest run test/main/utils/jsonl.test.ts` +- `pnpm exec vitest run test/main/services/parsing/SessionParser.test.ts` +- `pnpm exec vitest run test/main/services/team/BoardTaskExactLogStrictParser.test.ts` +- `git diff --check` + +### Manual native-lane proof + +- one real `codex exec --json` run through the chosen orchestrator seam +- `bun run ./scripts/codex-native-phase0-signoff.ts --cwd /tmp --prompt 'Reply only with OK' --ephemeral` +- `bun run ./scripts/codex-native-phase0-signoff.ts --cwd /tmp --prompt 'Reply only with OK' --persistent` +- one recorded native executable identity proof: + - source + - version string when available +- one explicit `--ephemeral` proof +- one non-ephemeral or explicit replacement-hydration proof +- one degraded-lane proof that old Codex fallback still stays truthful + +Rule: + +- if the command package is not written down and reproducible, the evidence package is incomplete even if one local run looked good + +## Tests Already In Place + +The following tests already exist and should remain green while Phase 0 continues: + +### `agent_teams_orchestrator` + +- `src/services/runtimeBackends/codexBackendResolver.test.ts` +- `src/services/runtimeBackends/registry.agentTeams.test.ts` +- `src/services/codexNative/execRunner.test.ts` +- `src/services/codexNative/jsonlMapper.test.ts` +- `src/services/codexNative/transcriptProjector.test.ts` +- `src/services/codexNative/statusAuthority.test.ts` +- `src/services/codexNative/turnExecutor.test.ts` +- `src/services/codexNative/signOffHarness.test.ts` + +### `claude_team` + +- `test/main/services/parsing/CodexNativePhase0Smoke.test.ts` +- `test/main/ipc/configValidation.test.ts` +- `test/main/utils/jsonl.test.ts` +- `test/main/services/parsing/SessionParser.test.ts` +- `test/main/services/runtime/ProviderConnectionService.test.ts` +- `test/main/services/runtime/providerAwareCliEnv.test.ts` +- `test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts` +- `test/main/services/team/BoardTaskExactLogStrictParser.test.ts` +- `test/renderer/components/runtime/providerConnectionUi.test.ts` +- `test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts` +- `test/renderer/components/cli/CliStatusVisibility.test.ts` + +## Exact Remaining Work Before Phase 0 Can Be Called Complete + +There is no remaining required Phase 0 code work. + +The remaining steps are rollout-policy decisions: + +1. decide whether to keep the lane locked through early internal rollout +2. if unlock is proposed later, make that a separate rollout decision rather than a hidden consequence of Phase 0 completion + +## Remaining Implementation Surface From Today + +The original Phase 0 estimate was: + +- `agent_teams_orchestrator`: `450-1100` lines +- `claude_team`: `180-450` lines +- tests: `250-700` lines + +That estimate still looks directionally correct for total Phase 0 scope. + +But from the current implementation state, the remaining required surface is now: + +- `agent_teams_orchestrator`: `0` lines required for Phase 0 +- `claude_team`: `0` lines required for Phase 0 +- tests and fixtures: `0` lines required for Phase 0 + +Remaining total from today: + +- roughly `0` lines of required Phase 0 code +- rollout decisions remain separate from implementation completion + +Practical reading: + +- the big architecture uncertainty is mostly resolved +- execution wiring, projection, parser truth, and proof fixtures are already landed +- the remaining work is rollout policy only + +## No-Go Rules For Starting Phase 1 Code + +Do not move past Phase 0 if any of these remain ambiguous: + +- whether the chosen seam is headless-limited +- whether final history completeness depends on seam-specific backfill +- whether thread status is authoritative or only guessed from process truth +- whether native thread warnings can be attributed separately from config and provisioning warnings +- whether resumed native defaults can diverge from launch intent without visible warning +- whether native credentials are routed independently from the old Codex lane + +## Estimated Implementation Surface + +For Phase 0 only: + +- `agent_teams_orchestrator`: `450-1100` lines +- `claude_team`: `180-450` lines +- tests: `250-700` lines + +Total Phase 0 expectation: + +- roughly `900-2250` lines + +That is intentionally smaller than the broader first-wave rollout. + +## Practical Rule + +Phase 0 is successful if it proves one thing: + +- we can run a real `codex-native` lane and keep our current transcript/UI world honest without pretending Codex is just another Anthropic-shaped transport. diff --git a/docs/research/codex-native-runtime-phase-0-signoff-evidence.md b/docs/research/codex-native-runtime-phase-0-signoff-evidence.md new file mode 100644 index 00000000..226b0522 --- /dev/null +++ b/docs/research/codex-native-runtime-phase-0-signoff-evidence.md @@ -0,0 +1,226 @@ +# Codex Native Runtime - Phase 0 Sign-off Evidence + +Captured on 2026-04-19. + +This file is the repo-visible evidence package referenced by: + +- [codex-native-runtime-phase-0-implementation-spec.md](./codex-native-runtime-phase-0-implementation-spec.md) + +## Verdict + +Phase 0 sign-off evidence is now captured. + +What this proves: + +- the `codex-native` lane executes through the raw `codex exec --json` seam +- persisted transcript projection remains parseable by current `claude_team` readers +- `ephemeral` and `persistent` runs keep different history-completeness truth +- thread status, warning attribution, executable identity, and usage authority survive end-to-end +- old Codex lane fallback truth remains covered by targeted regression tests + +What this does **not** mean: + +- `codex-native` should be unlocked for general runtime selection +- `auto` should start resolving to `codex-native` +- broader plugin or interactive capability claims are now safe + +## Command Package + +### `agent_teams_orchestrator` + +Executed: + +```bash +bun test src/services/codexNative/signOffHarness.test.ts \ + src/services/codexNative/statusAuthority.test.ts \ + src/services/codexNative/transcriptProjector.test.ts \ + src/services/codexNative/turnExecutor.test.ts \ + src/services/codexNative/execRunner.test.ts \ + src/services/codexNative/jsonlMapper.test.ts \ + src/services/runtimeBackends/codexBackendResolver.test.ts \ + src/services/runtimeBackends/registry.agentTeams.test.ts +``` + +Observed result: + +- `27 pass` +- `0 fail` + +### `claude_team` + +Executed: + +```bash +pnpm exec vitest run \ + test/main/utils/jsonl.test.ts \ + test/main/services/parsing/SessionParser.test.ts \ + test/main/services/team/BoardTaskExactLogStrictParser.test.ts \ + test/main/ipc/configValidation.test.ts \ + test/main/services/runtime/ProviderConnectionService.test.ts \ + test/main/services/runtime/providerAwareCliEnv.test.ts \ + test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts \ + test/renderer/components/runtime/providerConnectionUi.test.ts \ + test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts \ + test/renderer/components/cli/CliStatusVisibility.test.ts +``` + +Observed result: + +- `134 pass` +- `0 fail` + +### Diff cleanliness + +Executed: + +```bash +git diff --check +``` + +Observed result: + +- clean in both worktrees + +## Live Native Run Evidence + +### Common live-run facts + +Observed from both runs: + +- native binary path: `/usr/local/bin/codex` +- native binary source: `system-path` +- native binary version: `codex-cli 0.117.0` +- credential input source for the sign-off harness: `OPENAI_API_KEY` +- credential source observed by the runner: `explicit-api-key` +- capability profile: `headless-limited` +- final assistant text: `OK` + +### Ephemeral run + +Executed: + +```bash +bun run ./scripts/codex-native-phase0-signoff.ts \ + --cwd /tmp \ + --prompt 'Reply only with OK' \ + --ephemeral +``` + +Observed result: + +- thread id: `019da680-6f43-7e10-824c-4d985bcdca12` +- completion policy: `ephemeral` +- final history completeness: `live-only` +- final usage authority: `live-turn-completed` +- assistant usage: + - input tokens: `23616` + - cached input tokens: `0` + - output tokens: `42` + +History authority proof: + +- projected warning subtype: `codex_native_warning` +- projected warning source: `history` +- observed warning text contained: + - `thread/read failed while backfilling turn items for turn completion` + - `ephemeral threads do not support includeTurns` + +This is the explicit proof that `ephemeral` live stream does **not** equal canonical hydrated history. + +### Persistent run + +Executed: + +```bash +bun run ./scripts/codex-native-phase0-signoff.ts \ + --cwd /tmp \ + --prompt 'Reply only with OK' \ + --persistent +``` + +Observed result: + +- thread id: `019da680-6f42-77c0-94f1-4e450a69d1f1` +- completion policy: `persistent` +- final history completeness: `explicit-hydration-required` +- final usage authority: `live-turn-completed` +- assistant usage: + - input tokens: `23616` + - cached input tokens: `0` + - output tokens: `33` + +This is the explicit proof that persistent native runs keep a different history-completeness contract from `ephemeral` runs. + +## Warning Attribution Proof + +The live runs produced both: + +- process/runtime warnings +- history-completeness warnings + +Observed process-attributed warnings included: + +- plugin cache / featured plugins unauthorized warnings +- state DB migration mismatch warnings +- shell snapshot timeout warnings +- MCP process-group termination warnings + +Observed history-attributed warning included: + +- `thread/read failed while backfilling turn items for turn completion: ... ephemeral threads do not support includeTurns` + +This proves the lane now keeps `process` and `history` warning truth distinct in projected transcript rows. + +## Thread-status Proof + +Observed projected system rows included: + +- `codex_native_thread_status` + - `running` + - `completed` + +This proves the lane now writes native thread-status authority into persisted transcript-compatible rows instead of forcing UI and replay consumers to infer health from provider-global process truth. + +## Parser And Exact-log Proof + +Covered by green targeted tests: + +- `test/main/utils/jsonl.test.ts` +- `test/main/services/parsing/SessionParser.test.ts` +- `test/main/services/team/BoardTaskExactLogStrictParser.test.ts` + +These tests prove: + +- projected assistant usage remains parseable +- projected warning/source metadata remains parseable +- projected execution-summary/history metadata remains parseable +- exact-log readers do not drop the native authority rows + +## Degraded Old-lane Fallback Proof + +Covered by green targeted tests: + +- `src/services/runtimeBackends/codexBackendResolver.test.ts` +- `src/services/runtimeBackends/registry.agentTeams.test.ts` + +Those tests prove: + +- `auto` still does not silently resolve to `codex-native` +- native lane remains unavailable without: + - feature flag + - binary + - `CODEX_API_KEY` +- old Codex lane remains the truthful fallback when native is absent or degraded + +## Sign-off Conclusion + +✅ The Phase 0 code path is implementation-complete and evidence-backed. + +⚠️ The lane should still remain: + +- feature-flagged +- non-default +- non-auto-resolved +- non-selectable for normal runtime switching + +That remaining lock is now a rollout-policy choice, not a missing-code problem. diff --git a/docs/research/codex-native-runtime-phase-1-signoff-evidence.md b/docs/research/codex-native-runtime-phase-1-signoff-evidence.md new file mode 100644 index 00000000..4aedd4f7 --- /dev/null +++ b/docs/research/codex-native-runtime-phase-1-signoff-evidence.md @@ -0,0 +1,204 @@ +# Codex Native Runtime - Phase 1 Sign-off Evidence + +Captured on 2026-04-19. + +This file records the repo-visible evidence package for the Phase 1 exit gate described in: + +- [codex-native-runtime-integration-decision.md](./codex-native-runtime-integration-decision.md) + +## Verdict + +Phase 1 internal unlock preparation is now complete. + +What this proves: + +- `codex-native` can be enabled intentionally through the internal unlock policy +- old Codex lanes remain the default and `auto` still resolves to the old adapter/API world +- lane-specific rollout states are explicit and honest: + - `locked` + - `ready` + - `authentication-required` + - `runtime-missing` +- those states now survive all the way through: + - orchestrator runtime status + - bridge parsing + - dashboard/runtime copy + - settings/runtime copy + - provisioning summaries + +What this does **not** mean: + +- `codex-native` should become the default Codex lane +- `auto` should start resolving to `codex-native` +- broader approval, plugin, or interactive parity claims are now safe +- limited internal unlock has already started + +That is Phase 2 territory. + +## Command Package + +### `agent_teams_orchestrator` + +Executed: + +```bash +bun test src/services/runtimeBackends/codexBackendResolver.test.ts \ + src/services/runtimeBackends/registry.agentTeams.test.ts \ + src/services/runtimeBackends/registry.codexNativeStates.test.ts +``` + +Observed result: + +- `14 pass` +- `0 fail` + +Executed: + +```bash +bun run signoff:codex-native-phase1 +``` + +Observed result: + +- exit code `0` +- five live CLI rollout scenarios verified: + - `locked` + - `internal-unlock-ready` + - `authentication-required` + - `runtime-missing` + - `auto-fallback-stays-old-lane` + +### `claude_team` + +Executed: + +```bash +pnpm exec vitest run \ + test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts \ + test/main/services/runtime/providerAwareCliEnv.test.ts \ + test/main/services/runtime/ProviderConnectionService.test.ts \ + test/renderer/components/runtime/providerConnectionUi.test.ts \ + test/renderer/components/runtime/ProviderRuntimeBackendSelector.test.ts \ + test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts \ + test/renderer/components/team/dialogs/ProvisioningProviderStatusList.test.ts \ + test/renderer/components/cli/CliStatusVisibility.test.ts \ + test/main/services/parsing/CodexNativePhase0Smoke.test.ts +``` + +Observed result: + +- `9` files passed +- `83` tests passed +- `0` failures + +## Live CLI Rollout Evidence + +Runner: + +```bash +runtime status --provider codex --json +``` + +Observed live scenarios: + +### Locked + +- selected backend: `codex-native` +- resolved backend: `codex-native` +- provider status: `Codex native runtime ready` +- native option: + - `selectable=false` + - `available=true` + - `state=locked` + - `audience=internal` + - `statusMessage=Ready but locked` + +### Internal unlock ready + +- selected backend: `codex-native` +- resolved backend: `codex-native` +- provider status: `Codex native runtime ready` +- native option: + - `selectable=true` + - `available=true` + - `state=ready` + - `audience=internal` + - `statusMessage=Ready for internal use` + +### Authentication required + +- selected backend: `codex-native` +- resolved backend: `null` +- provider status: `Codex native runtime not ready` +- native option: + - `selectable=false` + - `available=false` + - `state=authentication-required` + - `audience=internal` + - `statusMessage=Authentication required` + +### Runtime missing + +- selected backend: `codex-native` +- resolved backend: `null` +- provider status: `Codex native runtime not ready` +- native option: + - `selectable=false` + - `available=false` + - `state=runtime-missing` + - `audience=internal` + - `statusMessage=Codex CLI not found` + +### Auto fallback stays on the old lane + +- selected backend: `auto` +- resolved backend: `api` +- provider status: `Resolved to OpenAI API` +- native option remains visible for internal rollout: + - `selectable=true` + - `available=true` + - `state=ready` + - `audience=internal` + - `statusMessage=Ready for internal use` + +This is the explicit proof that internal unlock availability does **not** mutate `auto` resolution. + +## App-facing Truth Proof + +Covered by green targeted tests: + +- `test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts` +- `test/main/services/runtime/providerAwareCliEnv.test.ts` +- `test/main/services/runtime/ProviderConnectionService.test.ts` +- `test/renderer/components/runtime/providerConnectionUi.test.ts` +- `test/renderer/components/runtime/ProviderRuntimeBackendSelector.test.ts` +- `test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts` +- `test/renderer/components/team/dialogs/ProvisioningProviderStatusList.test.ts` +- `test/renderer/components/cli/CliStatusVisibility.test.ts` + +These tests prove: + +- internal unlock state survives bridge parsing +- internal unlock env survives provider-aware child env building +- dashboard and settings do not flatten native rollout states into generic `Connected via API key` +- locked/runtime-missing/auth-required states stay visible in user-facing copy +- provisioning summaries keep native rollout state visible + +## Phase 1 Exit Gate Conclusion + +✅ The Phase 1 exit gate is satisfied. + +The lane can now be enabled intentionally by internal users, while: + +- old Codex lanes remain the safe default +- `auto` still avoids `codex-native` +- degraded or blocked native states remain explicit and honest + +⚠️ The lane should still remain: + +- non-default +- explicitly internal +- rollout-gated +- conservative in capability claims + +The next step is **Phase 2 - limited internal unlock**, not broad rollout. diff --git a/docs/research/codex-native-runtime-phase-4-signoff-evidence.md b/docs/research/codex-native-runtime-phase-4-signoff-evidence.md new file mode 100644 index 00000000..27cffb6a --- /dev/null +++ b/docs/research/codex-native-runtime-phase-4-signoff-evidence.md @@ -0,0 +1,199 @@ +# Codex Native Runtime - Phase 4 Sign-off Evidence + +Captured on 2026-04-19. + +This file records the repo-visible evidence package for the final native-only Codex cutover. + +Related documents: + +- [codex-native-runtime-integration-decision.md](./codex-native-runtime-integration-decision.md) +- [codex-native-runtime-phase-1-signoff-evidence.md](./codex-native-runtime-phase-1-signoff-evidence.md) + +## Verdict + +Phase 4 legacy removal is now complete. + +What this proves: + +- `codex-native` is now the only Codex runtime lane +- old `adapter` and `api` Codex lanes are no longer launchable through active runtime code paths +- Codex runtime status now exposes a single native option instead of a mixed legacy/native selector +- stored legacy backend values normalize forward to `codex-native` +- UI-facing Codex status, model availability, launch identity, replay parsing, and provisioning all remain truthful after legacy removal + +What this does **not** mean: + +- plugin execution parity is now guaranteed for multimodel Codex sessions +- broader app-server or interactive-request parity has been added +- Codex runtime failures silently fall back to another hidden Codex implementation + +## Command Package + +### `agent_teams_orchestrator` + +Executed: + +```bash +bun test src/services/runtimeBackends/codexBackendResolver.test.ts \ + src/services/runtimeBackends/registry.codexNativeStates.test.ts \ + src/services/runtimeBackends/registry.agentTeams.test.ts \ + src/utils/swarm/spawnUtils.test.ts +``` + +Observed result: + +- `23 pass` +- `0 fail` + +Executed: + +```bash +bun run signoff:codex-native-phase4 +``` + +Observed result: + +- exit code `0` +- four live CLI native-only scenarios verified: + - `ready` + - `authentication-required` + - `runtime-missing` + - `openai-api-key-also-works` + +### `claude_team` + +Executed: + +```bash +pnpm exec vitest run \ + test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts \ + test/main/services/runtime/providerAwareCliEnv.test.ts \ + test/main/services/runtime/ProviderConnectionService.test.ts \ + test/main/ipc/configValidation.test.ts \ + test/main/services/team/TeamProvisioningService.test.ts \ + test/main/services/parsing/CodexNativePhase0Smoke.test.ts \ + test/main/services/parsing/SessionParser.test.ts \ + test/main/services/team/BoardTaskExactLogStrictParser.test.ts \ + test/renderer/components/runtime/providerConnectionUi.test.ts \ + test/renderer/components/runtime/ProviderRuntimeBackendSelector.test.ts \ + test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts \ + test/renderer/components/cli/CliStatusVisibility.test.ts \ + test/renderer/components/team/dialogs/ProvisioningProviderStatusList.test.ts \ + test/renderer/components/team/dialogs/launchDialogPrefill.test.ts \ + test/renderer/utils/memberRuntimeSummary.test.ts \ + test/renderer/utils/teamModelAvailability.test.ts +``` + +Observed result: + +- `16` files passed +- `180` tests passed +- `0` failures + +## Live Native-only Status Evidence + +Runner: + +```bash +runtime status --provider codex --json +``` + +Observed live scenarios: + +### Ready + +- selected backend: `codex-native` +- resolved backend: `codex-native` +- provider status: `Codex native runtime ready` +- native option: + - `selectable=true` + - `available=true` + - `state=ready` + - `audience=general` + - `statusMessage=Ready` + +### Authentication required + +- selected backend: `codex-native` +- resolved backend: `null` +- provider status: `Codex native runtime unavailable` +- native option: + - `selectable=false` + - `available=false` + - `state=authentication-required` + - `audience=general` + - `statusMessage=Authentication required` + +### Runtime missing + +- selected backend: `codex-native` +- resolved backend: `null` +- provider status: `Codex native runtime unavailable` +- native option: + - `selectable=false` + - `available=false` + - `state=runtime-missing` + - `audience=general` + - `statusMessage=Codex CLI not found` + +### `OPENAI_API_KEY` also works + +- selected backend: `codex-native` +- resolved backend: `codex-native` +- provider status: `Codex native runtime ready` +- explicit proof that the native lane still accepts: + - `CODEX_API_KEY` + - or `OPENAI_API_KEY` + +This is the explicit proof that the final cutover no longer depends on a legacy adapter/API runtime seam while still preserving the supported credential surface. + +## App-facing Native-only Truth Proof + +Covered by green targeted tests: + +- `test/main/services/runtime/ClaudeMultimodelBridgeService.test.ts` +- `test/main/services/runtime/providerAwareCliEnv.test.ts` +- `test/main/services/runtime/ProviderConnectionService.test.ts` +- `test/main/ipc/configValidation.test.ts` +- `test/main/services/team/TeamProvisioningService.test.ts` +- `test/main/services/parsing/CodexNativePhase0Smoke.test.ts` +- `test/main/services/parsing/SessionParser.test.ts` +- `test/main/services/team/BoardTaskExactLogStrictParser.test.ts` +- `test/renderer/components/runtime/providerConnectionUi.test.ts` +- `test/renderer/components/runtime/ProviderRuntimeBackendSelector.test.ts` +- `test/renderer/components/runtime/ProviderRuntimeSettingsDialog.test.ts` +- `test/renderer/components/cli/CliStatusVisibility.test.ts` +- `test/renderer/components/team/dialogs/ProvisioningProviderStatusList.test.ts` +- `test/renderer/components/team/dialogs/launchDialogPrefill.test.ts` +- `test/renderer/utils/memberRuntimeSummary.test.ts` +- `test/renderer/utils/teamModelAvailability.test.ts` + +These tests prove: + +- legacy Codex backend values normalize forward to `codex-native` +- settings and dashboard now describe Codex as native-first, not adapter/API-first +- provider backend identity survives team launch, relaunch, and launch-prefill flows +- parser and exact-log readers stay truthful for native transcript authority rows +- provisioning summaries and member runtime summaries no longer flatten native truth into old Codex copy +- team model availability is keyed to the native runtime path instead of old ChatGPT-subscription heuristics + +## Legacy Removal Proof + +Covered by green targeted tests and runtime sign-off: + +- orchestrator runtime backend resolver now exposes only `codex-native` +- runtime registry now exposes a single Codex backend option +- no active runtime branch launches Codex through: + - `adapter` + - `api` +- old transport-only smoke/signoff scripts tied to legacy Codex runtime were removed + +This is the explicit proof that Phase 4 is a real cutover, not just a UI relabeling. + +## Sign-off Conclusion + +✅ The Phase 4 exit gate is satisfied. + +Codex inside the multimodel runtime is now native-only. + +There is no longer a product-supported legacy Codex runtime lane to roll back to inside normal UI flows. diff --git a/package.json b/package.json index 9809f68c..f1a8bb6c 100644 --- a/package.json +++ b/package.json @@ -301,7 +301,7 @@ } ] }, - "packageManager": "pnpm@10.25.0+sha512.5e82639027af37cf832061bcc6d639c219634488e0f2baebe785028a793de7b525ffcd3f7ff574f5e9860654e098fe852ba8ac5dd5cefe1767d23a020a92f501", + "packageManager": "pnpm@10.33.0+sha512.10568bb4a6afb58c9eb3630da90cc9516417abebd3fabbe6739f0ae795728da1491e9db5a544c76ad8eb7570f5c4bb3d6c637b2cb41bfdcdb47fa823c8649319", "pnpm": { "onlyBuiltDependencies": [ "electron", diff --git a/packages/agent-graph/src/ports/GraphConfigPort.ts b/packages/agent-graph/src/ports/GraphConfigPort.ts index 6065bfe2..8b5ae57b 100644 --- a/packages/agent-graph/src/ports/GraphConfigPort.ts +++ b/packages/agent-graph/src/ports/GraphConfigPort.ts @@ -32,6 +32,7 @@ export interface GraphConfigPort { }; // ─── Filters (show/hide node kinds) ──────────────────────────────────── + showActivity?: boolean; showTasks?: boolean; showProcesses?: boolean; showCompletedTasks?: boolean; diff --git a/packages/agent-graph/src/ui/GraphControls.tsx b/packages/agent-graph/src/ui/GraphControls.tsx index 801b4a8b..54e632cf 100644 --- a/packages/agent-graph/src/ui/GraphControls.tsx +++ b/packages/agent-graph/src/ui/GraphControls.tsx @@ -6,6 +6,7 @@ import { useCallback, useEffect, useRef, useState } from 'react'; import * as Tooltip from '@radix-ui/react-tooltip'; import { + Activity, Columns3, Expand, Settings2, @@ -26,6 +27,7 @@ import { } from 'lucide-react'; export interface GraphFilterState { + showActivity: boolean; showTasks: boolean; showProcesses: boolean; showEdges: boolean; @@ -219,6 +221,13 @@ export function GraphControls({ border: '1px solid rgba(100, 200, 255, 0.12)', }} > + toggle('showActivity')} + icon={} + label="Activity" + block + /> toggle('showTasks')} diff --git a/packages/agent-graph/src/ui/GraphView.tsx b/packages/agent-graph/src/ui/GraphView.tsx index 4f26e365..3e1c75db 100644 --- a/packages/agent-graph/src/ui/GraphView.tsx +++ b/packages/agent-graph/src/ui/GraphView.tsx @@ -70,6 +70,7 @@ export interface GraphViewProps { onSelectNode: (nodeId: string) => void; }) => React.ReactNode; renderHud?: (props: { + filters: GraphFilterState; getLaunchAnchorScreenPlacement: ( leadNodeId: string, ) => { x: number; y: number; scale: number; visible: boolean } | null; @@ -112,6 +113,7 @@ export function GraphView({ const [selectedEdgeId, setSelectedEdgeId] = useState(null); const [interactionLocked, setInteractionLocked] = useState(false); const [filters, setFilters] = useState({ + showActivity: config?.showActivity ?? true, showTasks: config?.showTasks ?? true, showProcesses: config?.showProcesses ?? true, showEdges: true, @@ -1016,6 +1018,7 @@ export function GraphView({ {renderHud ? (
{renderHud({ + filters, getLaunchAnchorScreenPlacement, getActivityWorldRect, getTransientHandoffSnapshot, diff --git a/runtime.lock.json b/runtime.lock.json index 54508861..36853e08 100644 --- a/runtime.lock.json +++ b/runtime.lock.json @@ -1,27 +1,27 @@ { - "version": "0.0.3", - "sourceRef": "v0.0.3", + "version": "0.0.4", + "sourceRef": "v0.0.4", "sourceRepository": "777genius/agent_teams_orchestrator", "releaseRepository": "777genius/claude_agent_teams_ui", "releaseTag": "v1.2.0", "assets": { "darwin-arm64": { - "file": "agent-teams-runtime-darwin-arm64-v0.0.3.tar.gz", + "file": "agent-teams-runtime-darwin-arm64-v0.0.4.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "darwin-x64": { - "file": "agent-teams-runtime-darwin-x64-v0.0.3.tar.gz", + "file": "agent-teams-runtime-darwin-x64-v0.0.4.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "linux-x64": { - "file": "agent-teams-runtime-linux-x64-v0.0.3.tar.gz", + "file": "agent-teams-runtime-linux-x64-v0.0.4.tar.gz", "archiveKind": "tar.gz", "binaryName": "claude-multimodel" }, "win32-x64": { - "file": "agent-teams-runtime-win32-x64-v0.0.3.zip", + "file": "agent-teams-runtime-win32-x64-v0.0.4.zip", "archiveKind": "zip", "binaryName": "claude-multimodel.exe" } diff --git a/src/features/agent-graph/renderer/adapters/TeamGraphAdapter.ts b/src/features/agent-graph/renderer/adapters/TeamGraphAdapter.ts index 69b58351..179d4e1d 100644 --- a/src/features/agent-graph/renderer/adapters/TeamGraphAdapter.ts +++ b/src/features/agent-graph/renderer/adapters/TeamGraphAdapter.ts @@ -11,8 +11,10 @@ import { getUnreadCount } from '@renderer/services/commentReadStorage'; import { agentAvatarUrl, + buildMemberAvatarMap, buildMemberLaunchPresentation, getMemberRuntimeAdvisoryLabel, + resolveMemberAvatarUrl, } from '@renderer/utils/memberHelpers'; import { buildTeamProvisioningPresentation } from '@renderer/utils/teamProvisioningPresentation'; import { formatTeamRuntimeSummary } from '@renderer/utils/teamRuntimeSummary'; @@ -143,6 +145,7 @@ export class TeamGraphAdapter { const leadId = `lead:${teamName}`; const leadName = TeamGraphAdapter.#getLeadMemberName(teamData, teamName); const memberNodeIdByAlias = TeamGraphAdapter.#buildMemberNodeIdByAlias(teamData, teamName); + const avatarMap = buildMemberAvatarMap(teamData.members); const provisioningPresentation = buildTeamProvisioningPresentation({ progress: provisioningProgress, members: teamData.members, @@ -158,6 +161,7 @@ export class TeamGraphAdapter { teamData, teamName, leadName, + avatarMap, pendingApprovalAgents, leadActivity, leadContext, @@ -173,6 +177,7 @@ export class TeamGraphAdapter { teamData, teamName, memberNodeIdByAlias, + avatarMap, spawnStatuses, pendingApprovalAgents, activeTools, @@ -369,6 +374,7 @@ export class TeamGraphAdapter { data: TeamGraphData, teamName: string, leadName: string, + avatarMap: ReadonlyMap, pendingApprovalAgents?: Set, leadActivity?: LeadActivityState, leadContext?: LeadContextUsage, @@ -428,7 +434,9 @@ export class TeamGraphAdapter { launchVisualState: leadLaunchPresentation?.launchVisualState ?? undefined, launchStatusLabel: leadLaunchPresentation?.launchStatusLabel ?? undefined, contextUsage: percent != null ? Math.max(0, Math.min(1, percent / 100)) : undefined, - avatarUrl: agentAvatarUrl(leadName, 64), + avatarUrl: leadMember + ? resolveMemberAvatarUrl(leadMember, avatarMap, 64) + : agentAvatarUrl(leadName, 64), pendingApproval, activeTool: activeTool ? { @@ -465,6 +473,7 @@ export class TeamGraphAdapter { data: TeamGraphData, teamName: string, memberNodeIdByAlias: ReadonlyMap, + avatarMap: ReadonlyMap, spawnStatuses?: Record, pendingApprovalAgents?: Set, activeTools?: Record>, @@ -520,7 +529,7 @@ export class TeamGraphAdapter { spawnStatus: spawn?.status, launchVisualState: launchPresentation.launchVisualState ?? undefined, launchStatusLabel: launchPresentation.launchStatusLabel ?? undefined, - avatarUrl: agentAvatarUrl(member.name, 64), + avatarUrl: resolveMemberAvatarUrl(member, avatarMap, 64), currentTaskId: member.currentTaskId ?? undefined, currentTaskSubject: member.currentTaskId ? data.tasks.find((t) => t.id === member.currentTaskId)?.subject diff --git a/src/features/agent-graph/renderer/ui/GraphNodePopover.tsx b/src/features/agent-graph/renderer/ui/GraphNodePopover.tsx index a25e3c84..f6794aa1 100644 --- a/src/features/agent-graph/renderer/ui/GraphNodePopover.tsx +++ b/src/features/agent-graph/renderer/ui/GraphNodePopover.tsx @@ -4,9 +4,15 @@ * composes project-specific UI, selectors, and presentation helpers. */ +import { useMemo } from 'react'; + import { Badge } from '@renderer/components/ui/badge'; import { Button } from '@renderer/components/ui/button'; -import { agentAvatarUrl, buildMemberLaunchPresentation } from '@renderer/utils/memberHelpers'; +import { + agentAvatarUrl, + buildMemberAvatarMap, + buildMemberLaunchPresentation, +} from '@renderer/utils/memberHelpers'; import { buildTeamProvisioningPresentation } from '@renderer/utils/teamProvisioningPresentation'; import { ExternalLink, Loader2, MessageSquare, Plus, User } from 'lucide-react'; @@ -291,7 +297,6 @@ const MemberPopoverContent = ({ node.domainRef.kind === 'member' || node.domainRef.kind === 'lead' ? node.domainRef.teamName : ''; - const avatarSrc = node.avatarUrl ?? agentAvatarUrl(memberName, 64); const { teamData, teamMembers, @@ -301,6 +306,8 @@ const MemberPopoverContent = ({ memberSpawnSnapshot, memberSpawnStatuses, } = useGraphMemberPopoverContext(teamName, memberName); + const avatarMap = useMemo(() => buildMemberAvatarMap(teamMembers), [teamMembers]); + const avatarSrc = node.avatarUrl ?? avatarMap.get(memberName) ?? agentAvatarUrl(memberName, 64); const member = teamMembers.find((candidate) => candidate.name === memberName) ?? null; const provisioningPresentation = teamData && teamName diff --git a/src/features/agent-graph/renderer/ui/TeamGraphOverlay.tsx b/src/features/agent-graph/renderer/ui/TeamGraphOverlay.tsx index 69e3bc65..ea4163f3 100644 --- a/src/features/agent-graph/renderer/ui/TeamGraphOverlay.tsx +++ b/src/features/agent-graph/renderer/ui/TeamGraphOverlay.tsx @@ -152,7 +152,7 @@ export const TeamGraphOverlay = ({ getNodeWorldPosition?: (nodeId: string) => { x: number; y: number } | null; focusEdgeIds?: ReadonlySet | null; }; - const { getViewportSize, focusNodeIds } = extraHudProps; + const { getViewportSize, focusNodeIds, filters } = extraHudProps; return ( <> @@ -174,6 +174,7 @@ export const TeamGraphOverlay = ({ getNodeWorldPosition={extraHudProps.getNodeWorldPosition} getViewportSize={getViewportSize} focusNodeIds={focusNodeIds} + enabled={filters?.showActivity ?? true} onOpenTaskDetail={onOpenTaskDetail} onOpenMemberProfile={onOpenMemberProfile} /> diff --git a/src/features/agent-graph/renderer/ui/TeamGraphTab.tsx b/src/features/agent-graph/renderer/ui/TeamGraphTab.tsx index f4374d32..b27a84d0 100644 --- a/src/features/agent-graph/renderer/ui/TeamGraphTab.tsx +++ b/src/features/agent-graph/renderer/ui/TeamGraphTab.tsx @@ -176,7 +176,7 @@ export const TeamGraphTab = ({ getNodeWorldPosition?: (nodeId: string) => { x: number; y: number } | null; focusEdgeIds?: ReadonlySet | null; }; - const { getViewportSize, focusNodeIds } = extraHudProps; + const { getViewportSize, focusNodeIds, filters } = extraHudProps; return ( <> @@ -199,7 +199,7 @@ export const TeamGraphTab = ({ getNodeWorldPosition={extraHudProps.getNodeWorldPosition} getViewportSize={getViewportSize} focusNodeIds={focusNodeIds} - enabled={isActive} + enabled={isActive && (filters?.showActivity ?? true)} onOpenTaskDetail={dispatchOpenTask} onOpenMemberProfile={dispatchOpenProfile} /> diff --git a/src/features/codex-account/contracts/api.ts b/src/features/codex-account/contracts/api.ts new file mode 100644 index 00000000..36b179c9 --- /dev/null +++ b/src/features/codex-account/contracts/api.ts @@ -0,0 +1,15 @@ +import type { CodexAccountSnapshotDto } from './dto'; + +export interface CodexAccountElectronApi { + getCodexAccountSnapshot: () => Promise; + refreshCodexAccountSnapshot: (options?: { + includeRateLimits?: boolean; + forceRefreshToken?: boolean; + }) => Promise; + startCodexChatgptLogin: () => Promise; + cancelCodexChatgptLogin: () => Promise; + logoutCodexAccount: () => Promise; + onCodexAccountSnapshotChanged: ( + callback: (event: unknown, snapshot: CodexAccountSnapshotDto) => void + ) => () => void; +} diff --git a/src/features/codex-account/contracts/channels.ts b/src/features/codex-account/contracts/channels.ts new file mode 100644 index 00000000..0b896bde --- /dev/null +++ b/src/features/codex-account/contracts/channels.ts @@ -0,0 +1,6 @@ +export const CODEX_ACCOUNT_GET_SNAPSHOT = 'codexAccount:getSnapshot'; +export const CODEX_ACCOUNT_REFRESH_SNAPSHOT = 'codexAccount:refreshSnapshot'; +export const CODEX_ACCOUNT_START_CHATGPT_LOGIN = 'codexAccount:startChatgptLogin'; +export const CODEX_ACCOUNT_CANCEL_CHATGPT_LOGIN = 'codexAccount:cancelChatgptLogin'; +export const CODEX_ACCOUNT_LOGOUT = 'codexAccount:logout'; +export const CODEX_ACCOUNT_SNAPSHOT_CHANGED = 'codexAccount:snapshotChanged'; diff --git a/src/features/codex-account/contracts/dto.ts b/src/features/codex-account/contracts/dto.ts new file mode 100644 index 00000000..27e45e94 --- /dev/null +++ b/src/features/codex-account/contracts/dto.ts @@ -0,0 +1,83 @@ +export type CodexAccountAuthMode = 'auto' | 'chatgpt' | 'api_key'; +export type CodexAccountEffectiveAuthMode = 'chatgpt' | 'api_key' | null; +export type CodexAccountPlanType = + | 'free' + | 'go' + | 'plus' + | 'pro' + | 'team' + | 'business' + | 'enterprise' + | 'edu' + | 'unknown'; +export type CodexAccountAppServerState = + | 'healthy' + | 'degraded' + | 'runtime-missing' + | 'incompatible'; +export type CodexAccountLoginStatus = 'idle' | 'starting' | 'pending' | 'failed' | 'cancelled'; +export type CodexLaunchReadinessState = + | 'ready_chatgpt' + | 'ready_api_key' + | 'ready_both' + | 'missing_auth' + | 'warning_degraded_but_launchable' + | 'runtime_missing' + | 'incompatible'; + +export interface CodexManagedAccountDto { + type: 'chatgpt' | 'api_key'; + email: string | null; + planType: CodexAccountPlanType | null; +} + +export interface CodexApiKeyAvailabilityDto { + available: boolean; + source: 'stored' | 'environment' | null; + sourceLabel: string | null; +} + +export interface CodexRateLimitWindowDto { + usedPercent: number; + windowDurationMins: number | null; + resetsAt: number | null; +} + +export interface CodexCreditsSnapshotDto { + hasCredits: boolean; + unlimited: boolean; + balance: string | null; +} + +export interface CodexRateLimitSnapshotDto { + limitId: string | null; + limitName: string | null; + primary: CodexRateLimitWindowDto | null; + secondary: CodexRateLimitWindowDto | null; + credits: CodexCreditsSnapshotDto | null; + planType: CodexAccountPlanType | null; +} + +export interface CodexLoginStateDto { + status: CodexAccountLoginStatus; + error: string | null; + startedAt: string | null; +} + +export interface CodexAccountSnapshotDto { + preferredAuthMode: CodexAccountAuthMode; + effectiveAuthMode: CodexAccountEffectiveAuthMode; + launchAllowed: boolean; + launchIssueMessage: string | null; + launchReadinessState: CodexLaunchReadinessState; + appServerState: CodexAccountAppServerState; + appServerStatusMessage: string | null; + managedAccount: CodexManagedAccountDto | null; + apiKey: CodexApiKeyAvailabilityDto; + requiresOpenaiAuth: boolean | null; + localAccountArtifactsPresent?: boolean; + localActiveChatgptAccountPresent?: boolean; + login: CodexLoginStateDto; + rateLimits: CodexRateLimitSnapshotDto | null; + updatedAt: string; +} diff --git a/src/features/codex-account/contracts/index.ts b/src/features/codex-account/contracts/index.ts new file mode 100644 index 00000000..69f32f5a --- /dev/null +++ b/src/features/codex-account/contracts/index.ts @@ -0,0 +1,3 @@ +export type * from './api'; +export * from './channels'; +export type * from './dto'; diff --git a/src/features/codex-account/core/domain/evaluateCodexLaunchReadiness.ts b/src/features/codex-account/core/domain/evaluateCodexLaunchReadiness.ts new file mode 100644 index 00000000..019a3593 --- /dev/null +++ b/src/features/codex-account/core/domain/evaluateCodexLaunchReadiness.ts @@ -0,0 +1,124 @@ +import type { + CodexAccountAppServerState, + CodexAccountAuthMode, + CodexAccountEffectiveAuthMode, + CodexApiKeyAvailabilityDto, + CodexLaunchReadinessState, + CodexManagedAccountDto, +} from '@features/codex-account/contracts'; + +export interface CodexLaunchReadinessResult { + state: CodexLaunchReadinessState; + effectiveAuthMode: CodexAccountEffectiveAuthMode; + launchAllowed: boolean; + issueMessage: string | null; +} + +export function evaluateCodexLaunchReadiness(input: { + preferredAuthMode: CodexAccountAuthMode; + managedAccount: CodexManagedAccountDto | null; + apiKey: CodexApiKeyAvailabilityDto; + appServerState: CodexAccountAppServerState; + appServerStatusMessage: string | null; + localActiveChatgptAccountPresent?: boolean; +}): CodexLaunchReadinessResult { + const managedAccountAvailable = input.managedAccount?.type === 'chatgpt'; + const apiKeyAvailable = input.apiKey.available; + + if (input.appServerState === 'runtime-missing') { + return { + state: 'runtime_missing', + effectiveAuthMode: null, + launchAllowed: false, + issueMessage: + input.appServerStatusMessage ?? 'Codex CLI is not available, so native Codex cannot start.', + }; + } + + if (input.preferredAuthMode === 'chatgpt') { + if (managedAccountAvailable) { + return { + state: + input.appServerState === 'degraded' ? 'warning_degraded_but_launchable' : 'ready_chatgpt', + effectiveAuthMode: 'chatgpt', + launchAllowed: true, + issueMessage: + input.appServerState === 'degraded' + ? (input.appServerStatusMessage ?? + 'ChatGPT account detected, but account verification is currently degraded.') + : null, + }; + } + + return { + state: input.appServerState === 'incompatible' ? 'incompatible' : 'missing_auth', + effectiveAuthMode: null, + launchAllowed: false, + issueMessage: + input.appServerState === 'incompatible' + ? (input.appServerStatusMessage ?? + 'This Codex installation does not support ChatGPT account management.') + : input.localActiveChatgptAccountPresent + ? 'Reconnect ChatGPT to refresh the current Codex subscription session.' + : 'Connect a ChatGPT account to use your Codex subscription.', + }; + } + + if (input.preferredAuthMode === 'api_key') { + if (apiKeyAvailable) { + return { + state: 'ready_api_key', + effectiveAuthMode: 'api_key', + launchAllowed: true, + issueMessage: null, + }; + } + + return { + state: 'missing_auth', + effectiveAuthMode: null, + launchAllowed: false, + issueMessage: 'Add OPENAI_API_KEY or CODEX_API_KEY to use Codex API key mode.', + }; + } + + if (managedAccountAvailable) { + return { + state: + input.appServerState === 'degraded' + ? 'warning_degraded_but_launchable' + : apiKeyAvailable + ? 'ready_both' + : 'ready_chatgpt', + effectiveAuthMode: 'chatgpt', + launchAllowed: true, + issueMessage: + input.appServerState === 'degraded' + ? (input.appServerStatusMessage ?? + 'ChatGPT account detected, but account verification is currently degraded.') + : null, + }; + } + + if (apiKeyAvailable) { + return { + state: 'ready_api_key', + effectiveAuthMode: 'api_key', + launchAllowed: true, + issueMessage: null, + }; + } + + return { + state: input.appServerState === 'incompatible' ? 'incompatible' : 'missing_auth', + effectiveAuthMode: null, + launchAllowed: false, + issueMessage: + input.appServerState === 'incompatible' + ? (input.appServerStatusMessage ?? + 'This Codex installation does not support ChatGPT account management.') + : input.localActiveChatgptAccountPresent + ? 'Reconnect ChatGPT to refresh the current Codex subscription session, or add OPENAI_API_KEY / CODEX_API_KEY to use Codex.' + : 'Connect a ChatGPT account or add OPENAI_API_KEY / CODEX_API_KEY to use Codex.', + }; +} diff --git a/src/features/codex-account/index.ts b/src/features/codex-account/index.ts new file mode 100644 index 00000000..371475b3 --- /dev/null +++ b/src/features/codex-account/index.ts @@ -0,0 +1,3 @@ +export type * from './contracts'; +export type { CodexLaunchReadinessResult } from './core/domain/evaluateCodexLaunchReadiness'; +export { evaluateCodexLaunchReadiness } from './core/domain/evaluateCodexLaunchReadiness'; diff --git a/src/features/codex-account/main/adapters/input/ipc/registerCodexAccountIpc.ts b/src/features/codex-account/main/adapters/input/ipc/registerCodexAccountIpc.ts new file mode 100644 index 00000000..1f2db06e --- /dev/null +++ b/src/features/codex-account/main/adapters/input/ipc/registerCodexAccountIpc.ts @@ -0,0 +1,33 @@ +import { + CODEX_ACCOUNT_CANCEL_CHATGPT_LOGIN, + CODEX_ACCOUNT_GET_SNAPSHOT, + CODEX_ACCOUNT_LOGOUT, + CODEX_ACCOUNT_REFRESH_SNAPSHOT, + CODEX_ACCOUNT_START_CHATGPT_LOGIN, +} from '@features/codex-account/contracts'; + +import type { CodexAccountFeatureFacade } from '../../../composition/createCodexAccountFeature'; +import type { IpcMain } from 'electron'; + +export function registerCodexAccountIpc( + ipcMain: IpcMain, + feature: CodexAccountFeatureFacade +): void { + ipcMain.handle(CODEX_ACCOUNT_GET_SNAPSHOT, () => feature.getSnapshot()); + ipcMain.handle( + CODEX_ACCOUNT_REFRESH_SNAPSHOT, + (_event, options?: { includeRateLimits?: boolean; forceRefreshToken?: boolean }) => + feature.refreshSnapshot(options) + ); + ipcMain.handle(CODEX_ACCOUNT_START_CHATGPT_LOGIN, () => feature.startChatgptLogin()); + ipcMain.handle(CODEX_ACCOUNT_CANCEL_CHATGPT_LOGIN, () => feature.cancelLogin()); + ipcMain.handle(CODEX_ACCOUNT_LOGOUT, () => feature.logout()); +} + +export function removeCodexAccountIpc(ipcMain: IpcMain): void { + ipcMain.removeHandler(CODEX_ACCOUNT_GET_SNAPSHOT); + ipcMain.removeHandler(CODEX_ACCOUNT_REFRESH_SNAPSHOT); + ipcMain.removeHandler(CODEX_ACCOUNT_START_CHATGPT_LOGIN); + ipcMain.removeHandler(CODEX_ACCOUNT_CANCEL_CHATGPT_LOGIN); + ipcMain.removeHandler(CODEX_ACCOUNT_LOGOUT); +} diff --git a/src/features/codex-account/main/adapters/output/presenters/CodexAccountSnapshotPresenter.ts b/src/features/codex-account/main/adapters/output/presenters/CodexAccountSnapshotPresenter.ts new file mode 100644 index 00000000..c9e9dfee --- /dev/null +++ b/src/features/codex-account/main/adapters/output/presenters/CodexAccountSnapshotPresenter.ts @@ -0,0 +1,19 @@ +import { + CODEX_ACCOUNT_SNAPSHOT_CHANGED, + type CodexAccountSnapshotDto, +} from '@features/codex-account/contracts'; +import { safeSendToRenderer } from '@main/utils/safeWebContentsSend'; + +import type { BrowserWindow } from 'electron'; + +export class CodexAccountSnapshotPresenter { + private mainWindow: BrowserWindow | null = null; + + setMainWindow(window: BrowserWindow | null): void { + this.mainWindow = window; + } + + publish(snapshot: CodexAccountSnapshotDto): void { + safeSendToRenderer(this.mainWindow, CODEX_ACCOUNT_SNAPSHOT_CHANGED, snapshot); + } +} diff --git a/src/features/codex-account/main/composition/createCodexAccountFeature.ts b/src/features/codex-account/main/composition/createCodexAccountFeature.ts new file mode 100644 index 00000000..8e95422a --- /dev/null +++ b/src/features/codex-account/main/composition/createCodexAccountFeature.ts @@ -0,0 +1,693 @@ +import { + type CodexAccountAuthMode, + type CodexAccountSnapshotDto, + type CodexApiKeyAvailabilityDto, + type CodexCreditsSnapshotDto, + type CodexLoginStateDto, + type CodexManagedAccountDto, + type CodexRateLimitSnapshotDto, + type CodexRateLimitWindowDto, +} from '@features/codex-account/contracts'; +import { + type CodexLaunchReadinessResult, + evaluateCodexLaunchReadiness, +} from '@features/codex-account/core/domain/evaluateCodexLaunchReadiness'; +import { ApiKeyService } from '@main/services/extensions'; +import { + type CodexAppServerGetAccountRateLimitsResponse, + type CodexAppServerGetAccountResponse, + type CodexAppServerRateLimitSnapshot, + CodexAppServerSessionFactory, + CodexBinaryResolver, + JsonRpcStdioClient, +} from '@main/services/infrastructure/codexAppServer'; +import { getCachedShellEnv } from '@main/utils/shellEnv'; + +import { CodexAccountSnapshotPresenter } from '../adapters/output/presenters/CodexAccountSnapshotPresenter'; +import { CodexAccountAppServerClient } from '../infrastructure/CodexAccountAppServerClient'; +import { CodexAccountEnvBuilder } from '../infrastructure/CodexAccountEnvBuilder'; +import { CodexLoginSessionManager } from '../infrastructure/CodexLoginSessionManager'; +import { detectCodexLocalAccountState } from '../infrastructure/detectCodexLocalAccountArtifacts'; + +import type { Logger } from '@shared/utils/logger'; +import type { BrowserWindow } from 'electron'; + +type LoggerPort = Pick; + +const SNAPSHOT_CACHE_TTL_MS = 5_000; +const RATE_LIMITS_CACHE_TTL_MS = 45_000; +const LAST_KNOWN_GOOD_MANAGED_ACCOUNT_TTL_MS = 60_000; + +interface CodexLastKnownAccount { + payload: CodexAppServerGetAccountResponse; + observedAt: number; +} + +interface CodexLastKnownRateLimits { + payload: CodexAppServerGetAccountRateLimitsResponse; + observedAt: number; +} + +interface CodexSnapshotRefreshOptions { + includeRateLimits: boolean; + forceRefreshToken: boolean; +} + +function hasChatgptManagedAccount( + payload: CodexAppServerGetAccountResponse | null | undefined +): boolean { + return payload?.account?.type === 'chatgpt'; +} + +function deepClone(value: T): T { + return structuredClone(value); +} + +function asCodexManagedAccount( + account: CodexAppServerGetAccountResponse['account'] +): CodexManagedAccountDto | null { + if (!account) { + return null; + } + + if (account.type === 'apiKey') { + return { + type: 'api_key', + email: null, + planType: null, + }; + } + + return { + type: 'chatgpt', + email: account.email, + planType: account.planType, + }; +} + +function asRateLimitWindow( + window: CodexAppServerRateLimitSnapshot['primary'] +): CodexRateLimitWindowDto | null { + if (!window) { + return null; + } + + return { + usedPercent: window.usedPercent, + windowDurationMins: window.windowDurationMins, + resetsAt: window.resetsAt, + }; +} + +function asCreditsSnapshot( + credits: CodexAppServerRateLimitSnapshot['credits'] +): CodexCreditsSnapshotDto | null { + if (!credits) { + return null; + } + + return { + hasCredits: credits.hasCredits, + unlimited: credits.unlimited, + balance: credits.balance, + }; +} + +function asRateLimits( + snapshot: CodexAppServerRateLimitSnapshot | null +): CodexRateLimitSnapshotDto | null { + if (!snapshot) { + return null; + } + + return { + limitId: snapshot.limitId, + limitName: snapshot.limitName, + primary: asRateLimitWindow(snapshot.primary), + secondary: asRateLimitWindow(snapshot.secondary), + credits: asCreditsSnapshot(snapshot.credits), + planType: snapshot.planType, + }; +} + +function getPreferredAuthMode(configManager: { + getConfig: () => { + providerConnections: { + codex: { + preferredAuthMode?: CodexAccountAuthMode; + }; + }; + }; +}): CodexAccountAuthMode { + return configManager.getConfig().providerConnections.codex.preferredAuthMode ?? 'auto'; +} + +function classifyAppServerFailure(error: unknown): { + appServerState: CodexAccountSnapshotDto['appServerState']; + appServerStatusMessage: string; +} { + const message = error instanceof Error ? error.message : String(error); + const lower = message.toLowerCase(); + + if ( + lower.includes('unknown method') || + lower.includes('method not found') || + lower.includes('unknown command') || + lower.includes('no such command') + ) { + return { + appServerState: 'incompatible', + appServerStatusMessage: + 'The installed Codex binary does not support app-server account management yet.', + }; + } + + return { + appServerState: 'degraded', + appServerStatusMessage: message, + }; +} + +function normalizeRefreshOptions(options?: { + includeRateLimits?: boolean; + forceRefreshToken?: boolean; +}): CodexSnapshotRefreshOptions { + return { + includeRateLimits: options?.includeRateLimits === true, + forceRefreshToken: options?.forceRefreshToken === true, + }; +} + +function mergeRefreshOptions( + current: CodexSnapshotRefreshOptions | null, + next: CodexSnapshotRefreshOptions +): CodexSnapshotRefreshOptions { + if (!current) { + return next; + } + + return { + includeRateLimits: current.includeRateLimits || next.includeRateLimits, + forceRefreshToken: current.forceRefreshToken || next.forceRefreshToken, + }; +} + +function createDeferred(): { promise: Promise; resolve: () => void } { + let resolve: (() => void) | null = null; + const promise = new Promise((fulfill) => { + resolve = fulfill; + }); + + if (!resolve) { + throw new Error('Failed to create deferred promise.'); + } + + return { + promise, + resolve, + }; +} + +export interface CodexAccountFeatureFacade { + getSnapshot(): Promise; + refreshSnapshot(options?: { + includeRateLimits?: boolean; + forceRefreshToken?: boolean; + }): Promise; + startChatgptLogin(): Promise; + cancelLogin(): Promise; + logout(): Promise; + subscribe(listener: (snapshot: CodexAccountSnapshotDto) => void): () => void; + setMainWindow(window: BrowserWindow | null): void; + getLaunchReadiness(): Promise; + dispose(): Promise; +} + +class CodexAccountFeatureFacadeImpl implements CodexAccountFeatureFacade { + private readonly listeners = new Set<(snapshot: CodexAccountSnapshotDto) => void>(); + private readonly presenter = new CodexAccountSnapshotPresenter(); + private readonly envBuilder = new CodexAccountEnvBuilder(); + private readonly appServerClient: CodexAccountAppServerClient; + private readonly loginSessionManager: CodexLoginSessionManager; + + private snapshotCache: CodexAccountSnapshotDto | null = null; + private snapshotObservedAt = 0; + private refreshPromise: Promise | null = null; + private pendingRefreshOptions: CodexSnapshotRefreshOptions | null = null; + private lastKnownAccount: CodexLastKnownAccount | null = null; + private lastKnownRateLimits: CodexLastKnownRateLimits | null = null; + private mutationQueue: Promise = Promise.resolve(); + private mutationQueueRelease: (() => void) | null = null; + private activeMutationCount = 0; + + constructor( + private readonly logger: LoggerPort, + private readonly configManager: { + getConfig: () => { + providerConnections: { + codex: { + preferredAuthMode?: CodexAccountAuthMode; + }; + }; + }; + }, + private readonly apiKeyService = new ApiKeyService() + ) { + const sessionFactory = new CodexAppServerSessionFactory(new JsonRpcStdioClient(logger)); + this.appServerClient = new CodexAccountAppServerClient(sessionFactory); + this.loginSessionManager = new CodexLoginSessionManager(sessionFactory, logger); + + this.loginSessionManager.subscribe(() => { + void this.emitCurrentSnapshot(); + }); + this.loginSessionManager.onSettled(() => { + void this.refreshSnapshot({ + includeRateLimits: true, + forceRefreshToken: true, + }); + }); + } + + async getSnapshot(): Promise { + if (this.snapshotCache && Date.now() - this.snapshotObservedAt <= SNAPSHOT_CACHE_TTL_MS) { + return deepClone(this.snapshotCache); + } + + return this.refreshSnapshot(); + } + + async refreshSnapshot(options?: { + includeRateLimits?: boolean; + forceRefreshToken?: boolean; + }): Promise { + this.pendingRefreshOptions = mergeRefreshOptions( + this.pendingRefreshOptions, + normalizeRefreshOptions(options) + ); + + if (!this.refreshPromise) { + this.refreshPromise = this.drainRefreshQueue().finally(() => { + this.refreshPromise = null; + }); + } + + return this.refreshPromise; + } + + async startChatgptLogin(): Promise { + let binaryMissing = false; + await this.runSerializedMutation(async () => { + const binaryPath = await CodexBinaryResolver.resolve(); + if (!binaryPath) { + binaryMissing = true; + return; + } + + const env = this.envBuilder.buildControlPlaneEnv({ binaryPath }); + await this.loginSessionManager.start({ binaryPath, env }); + }); + + if (binaryMissing) { + return this.loadSnapshot(); + } + + return this.emitCurrentSnapshot(); + } + + async cancelLogin(): Promise { + await this.runSerializedMutation(async () => { + await this.loginSessionManager.cancel(); + }); + + return this.emitCurrentSnapshot(); + } + + async logout(): Promise { + await this.runSerializedMutation(async () => { + await this.loginSessionManager.cancel().catch(() => undefined); + + const binaryPath = await CodexBinaryResolver.resolve(); + if (!binaryPath) { + throw new Error('Codex CLI is not available, so logout cannot be completed.'); + } + + const env = this.envBuilder.buildControlPlaneEnv({ binaryPath }); + await this.appServerClient.logout({ binaryPath, env }); + this.lastKnownAccount = null; + this.lastKnownRateLimits = null; + await this.publishLoggedOutSnapshot(); + }); + + return this.refreshSnapshot({ includeRateLimits: true, forceRefreshToken: true }); + } + + subscribe(listener: (snapshot: CodexAccountSnapshotDto) => void): () => void { + this.listeners.add(listener); + return (): void => { + this.listeners.delete(listener); + }; + } + + setMainWindow(window: BrowserWindow | null): void { + this.presenter.setMainWindow(window); + } + + async getLaunchReadiness(): Promise { + const snapshot = await this.getSnapshot(); + return evaluateCodexLaunchReadiness({ + preferredAuthMode: snapshot.preferredAuthMode, + managedAccount: snapshot.managedAccount, + apiKey: snapshot.apiKey, + appServerState: snapshot.appServerState, + appServerStatusMessage: snapshot.appServerStatusMessage, + localActiveChatgptAccountPresent: snapshot.localActiveChatgptAccountPresent, + }); + } + + async dispose(): Promise { + await this.loginSessionManager.dispose(); + this.listeners.clear(); + this.snapshotCache = null; + this.refreshPromise = null; + this.pendingRefreshOptions = null; + this.lastKnownAccount = null; + this.lastKnownRateLimits = null; + this.activeMutationCount = 0; + if (this.mutationQueueRelease) { + this.mutationQueueRelease(); + this.mutationQueueRelease = null; + } + this.mutationQueue = Promise.resolve(); + } + + private async drainRefreshQueue(): Promise { + let lastSnapshot: CodexAccountSnapshotDto | null = null; + + while (this.pendingRefreshOptions) { + const nextOptions = this.pendingRefreshOptions; + this.pendingRefreshOptions = null; + await this.mutationQueue.catch(() => undefined); + + lastSnapshot = await this.loadSnapshot(nextOptions); + } + + if (!lastSnapshot) { + if (this.snapshotCache) { + return deepClone(this.snapshotCache); + } + return this.loadSnapshot(); + } + + return lastSnapshot; + } + + private async loadSnapshot(options?: { + includeRateLimits?: boolean; + forceRefreshToken?: boolean; + }): Promise { + const preferredAuthMode = getPreferredAuthMode(this.configManager); + const apiKey = await this.loadApiKeyAvailability(); + const localAccountState = await detectCodexLocalAccountState(); + const localAccountArtifactsPresent = localAccountState.hasArtifacts; + const localActiveChatgptAccountPresent = localAccountState.hasActiveChatgptAccount; + const binaryPath = await CodexBinaryResolver.resolve(); + const login = this.loginSessionManager.getState(); + const now = Date.now(); + + if (!binaryPath) { + const snapshot = this.setSnapshot({ + preferredAuthMode, + effectiveAuthMode: null, + launchAllowed: false, + launchIssueMessage: 'Codex CLI not found. Install Codex to use native account management.', + launchReadinessState: 'runtime_missing', + appServerState: 'runtime-missing', + appServerStatusMessage: + 'Codex CLI not found. Install Codex to use native account management.', + managedAccount: null, + apiKey, + requiresOpenaiAuth: null, + localAccountArtifactsPresent, + localActiveChatgptAccountPresent, + login, + rateLimits: null, + updatedAt: new Date(now).toISOString(), + }); + return snapshot; + } + + const env = this.envBuilder.buildControlPlaneEnv({ binaryPath }); + let appServerState: CodexAccountSnapshotDto['appServerState'] = 'healthy'; + let appServerStatusMessage: string | null = null; + let accountPayload = this.lastKnownAccount?.payload ?? null; + let requiresOpenaiAuth: boolean | null = accountPayload?.requiresOpenaiAuth ?? null; + + try { + const accountResult = await this.appServerClient.readAccount({ + binaryPath, + env, + refreshToken: options?.forceRefreshToken ?? false, + }); + const canReuseLastKnownManagedAccount = + options?.forceRefreshToken !== true && + localActiveChatgptAccountPresent && + accountResult.account.account == null && + accountResult.account.requiresOpenaiAuth === true && + this.lastKnownAccount !== null && + now - this.lastKnownAccount.observedAt <= LAST_KNOWN_GOOD_MANAGED_ACCOUNT_TTL_MS && + hasChatgptManagedAccount(this.lastKnownAccount.payload); + + if (canReuseLastKnownManagedAccount) { + accountPayload = this.lastKnownAccount!.payload; + requiresOpenaiAuth = this.lastKnownAccount!.payload.requiresOpenaiAuth; + } else { + accountPayload = accountResult.account; + requiresOpenaiAuth = accountResult.account.requiresOpenaiAuth; + this.lastKnownAccount = { + payload: accountResult.account, + observedAt: now, + }; + } + } catch (error) { + const failure = classifyAppServerFailure(error); + appServerState = failure.appServerState; + appServerStatusMessage = failure.appServerStatusMessage; + + if ( + !this.lastKnownAccount || + now - this.lastKnownAccount.observedAt > LAST_KNOWN_GOOD_MANAGED_ACCOUNT_TTL_MS + ) { + accountPayload = null; + requiresOpenaiAuth = null; + } else { + accountPayload = this.lastKnownAccount.payload; + requiresOpenaiAuth = this.lastKnownAccount.payload.requiresOpenaiAuth; + } + } + + let rateLimits: CodexRateLimitSnapshotDto | null = null; + const shouldLoadRateLimits = + options?.includeRateLimits === true || + (this.lastKnownRateLimits !== null && + now - this.lastKnownRateLimits.observedAt <= RATE_LIMITS_CACHE_TTL_MS); + + if (shouldLoadRateLimits) { + try { + if ( + this.lastKnownRateLimits && + now - this.lastKnownRateLimits.observedAt <= RATE_LIMITS_CACHE_TTL_MS + ) { + rateLimits = asRateLimits(this.lastKnownRateLimits.payload.rateLimits); + } else { + const rateLimitsPayload = await this.appServerClient.readRateLimits({ + binaryPath, + env, + }); + this.lastKnownRateLimits = { + payload: rateLimitsPayload, + observedAt: now, + }; + rateLimits = asRateLimits(rateLimitsPayload.rateLimits); + } + } catch (error) { + this.logger.warn('codex account rate limits refresh failed', { + error: error instanceof Error ? error.message : String(error), + }); + rateLimits = this.lastKnownRateLimits + ? asRateLimits(this.lastKnownRateLimits.payload.rateLimits) + : null; + } + } + + const managedAccount = asCodexManagedAccount(accountPayload?.account ?? null); + const readiness = evaluateCodexLaunchReadiness({ + preferredAuthMode, + managedAccount, + apiKey, + appServerState, + appServerStatusMessage, + localActiveChatgptAccountPresent, + }); + + const snapshot = this.setSnapshot({ + preferredAuthMode, + effectiveAuthMode: readiness.effectiveAuthMode, + launchAllowed: readiness.launchAllowed, + launchIssueMessage: readiness.issueMessage, + launchReadinessState: readiness.state, + appServerState, + appServerStatusMessage, + managedAccount, + apiKey, + requiresOpenaiAuth, + localAccountArtifactsPresent, + localActiveChatgptAccountPresent, + login, + rateLimits, + updatedAt: new Date(now).toISOString(), + }); + + return snapshot; + } + + private setSnapshot(nextSnapshot: CodexAccountSnapshotDto): CodexAccountSnapshotDto { + this.snapshotCache = deepClone(nextSnapshot); + this.snapshotObservedAt = Date.now(); + const snapshot = deepClone(nextSnapshot); + this.presenter.publish(snapshot); + for (const listener of this.listeners) { + listener(snapshot); + } + return snapshot; + } + + private async emitCurrentSnapshot(): Promise { + if (!this.snapshotCache) { + return this.refreshSnapshot(); + } + + return this.setSnapshot({ + ...this.snapshotCache, + login: this.loginSessionManager.getState(), + updatedAt: new Date().toISOString(), + }); + } + + private async publishLoggedOutSnapshot(): Promise { + const preferredAuthMode = getPreferredAuthMode(this.configManager); + const apiKey = this.snapshotCache?.apiKey ?? (await this.loadApiKeyAvailability()); + const localAccountState = await detectCodexLocalAccountState(); + const localAccountArtifactsPresent = localAccountState.hasArtifacts; + const localActiveChatgptAccountPresent = localAccountState.hasActiveChatgptAccount; + const readiness = evaluateCodexLaunchReadiness({ + preferredAuthMode, + managedAccount: null, + apiKey, + appServerState: 'healthy', + appServerStatusMessage: null, + localActiveChatgptAccountPresent, + }); + const login = this.asIdleLoginState(this.loginSessionManager.getState()); + + return this.setSnapshot({ + preferredAuthMode, + effectiveAuthMode: readiness.effectiveAuthMode, + launchAllowed: readiness.launchAllowed, + launchIssueMessage: readiness.issueMessage, + launchReadinessState: readiness.state, + appServerState: 'healthy', + appServerStatusMessage: null, + managedAccount: null, + apiKey, + requiresOpenaiAuth: false, + localAccountArtifactsPresent, + localActiveChatgptAccountPresent, + login, + rateLimits: null, + updatedAt: new Date().toISOString(), + }); + } + + private asIdleLoginState(loginState: CodexLoginStateDto): CodexLoginStateDto { + return { + status: 'idle', + error: loginState.status === 'failed' ? loginState.error : null, + startedAt: null, + }; + } + + private async runSerializedMutation(operation: () => Promise): Promise { + const previousMutation = this.mutationQueue.catch(() => undefined); + const deferred = createDeferred(); + this.mutationQueue = deferred.promise; + this.mutationQueueRelease = deferred.resolve; + + await previousMutation; + await this.refreshPromise?.catch(() => undefined); + + this.activeMutationCount += 1; + try { + return await operation(); + } finally { + this.activeMutationCount = Math.max(0, this.activeMutationCount - 1); + deferred.resolve(); + if (this.mutationQueueRelease === deferred.resolve) { + this.mutationQueueRelease = null; + } + } + } + + private async loadApiKeyAvailability(): Promise { + const storedKey = await this.apiKeyService.lookupPreferred('OPENAI_API_KEY'); + if (storedKey?.value.trim()) { + return { + available: true, + source: 'stored', + sourceLabel: 'Stored in app', + }; + } + + const shellEnv = getCachedShellEnv() ?? {}; + const envSources = [shellEnv, process.env]; + for (const envSource of envSources) { + const codexKey = envSource.CODEX_API_KEY; + if (typeof codexKey === 'string' && codexKey.trim()) { + return { + available: true, + source: 'environment', + sourceLabel: 'Detected from CODEX_API_KEY', + }; + } + + const openAiKey = envSource.OPENAI_API_KEY; + if (typeof openAiKey === 'string' && openAiKey.trim()) { + return { + available: true, + source: 'environment', + sourceLabel: 'Detected from OPENAI_API_KEY', + }; + } + } + + return { + available: false, + source: null, + sourceLabel: null, + }; + } +} + +export function createCodexAccountFeature(deps: { + logger: LoggerPort; + configManager: { + getConfig: () => { + providerConnections: { + codex: { + preferredAuthMode?: CodexAccountAuthMode; + }; + }; + }; + }; +}): CodexAccountFeatureFacade { + return new CodexAccountFeatureFacadeImpl(deps.logger, deps.configManager); +} diff --git a/src/features/codex-account/main/index.ts b/src/features/codex-account/main/index.ts new file mode 100644 index 00000000..6189c65f --- /dev/null +++ b/src/features/codex-account/main/index.ts @@ -0,0 +1,6 @@ +export { + registerCodexAccountIpc, + removeCodexAccountIpc, +} from './adapters/input/ipc/registerCodexAccountIpc'; +export type { CodexAccountFeatureFacade } from './composition/createCodexAccountFeature'; +export { createCodexAccountFeature } from './composition/createCodexAccountFeature'; diff --git a/src/features/codex-account/main/infrastructure/CodexAccountAppServerClient.ts b/src/features/codex-account/main/infrastructure/CodexAccountAppServerClient.ts new file mode 100644 index 00000000..14ba5e3f --- /dev/null +++ b/src/features/codex-account/main/infrastructure/CodexAccountAppServerClient.ts @@ -0,0 +1,100 @@ +import { + type CodexAppServerGetAccountParams, + type CodexAppServerGetAccountRateLimitsResponse, + type CodexAppServerGetAccountResponse, + type CodexAppServerLogoutAccountResponse, +} from '@main/services/infrastructure/codexAppServer'; + +import type { CodexAppServerSessionFactory } from '@main/services/infrastructure/codexAppServer'; + +const ACCOUNT_READ_TIMEOUT_MS = 3_500; +const ACCOUNT_RATE_LIMITS_TIMEOUT_MS = 4_500; +const ACCOUNT_LOGOUT_TIMEOUT_MS = 3_500; +const INITIALIZE_TIMEOUT_MS = 6_000; +const TOTAL_TIMEOUT_MS = 9_000; + +export class CodexAccountAppServerClient { + constructor(private readonly sessionFactory: CodexAppServerSessionFactory) {} + + async readAccount(options: { + binaryPath: string; + env: NodeJS.ProcessEnv; + refreshToken?: boolean; + }): Promise<{ + account: CodexAppServerGetAccountResponse; + initialize: { codexHome: string; platformFamily: string; platformOs: string }; + }> { + return this.sessionFactory.withSession( + { + binaryPath: options.binaryPath, + env: options.env, + requestTimeoutMs: ACCOUNT_READ_TIMEOUT_MS, + initializeTimeoutMs: INITIALIZE_TIMEOUT_MS, + totalTimeoutMs: TOTAL_TIMEOUT_MS, + label: 'codex app-server account/read', + }, + async (session) => { + const account = await session.request( + 'account/read', + { + refreshToken: options.refreshToken ?? false, + } satisfies CodexAppServerGetAccountParams, + ACCOUNT_READ_TIMEOUT_MS + ); + + return { + account, + initialize: { + codexHome: session.initializeResponse.codexHome, + platformFamily: session.initializeResponse.platformFamily, + platformOs: session.initializeResponse.platformOs, + }, + }; + } + ); + } + + async readRateLimits(options: { + binaryPath: string; + env: NodeJS.ProcessEnv; + }): Promise { + return this.sessionFactory.withSession( + { + binaryPath: options.binaryPath, + env: options.env, + requestTimeoutMs: ACCOUNT_RATE_LIMITS_TIMEOUT_MS, + initializeTimeoutMs: INITIALIZE_TIMEOUT_MS, + totalTimeoutMs: TOTAL_TIMEOUT_MS, + label: 'codex app-server account/rateLimits/read', + }, + async (session) => + session.request( + 'account/rateLimits/read', + undefined, + ACCOUNT_RATE_LIMITS_TIMEOUT_MS + ) + ); + } + + async logout(options: { + binaryPath: string; + env: NodeJS.ProcessEnv; + }): Promise { + return this.sessionFactory.withSession( + { + binaryPath: options.binaryPath, + env: options.env, + requestTimeoutMs: ACCOUNT_LOGOUT_TIMEOUT_MS, + initializeTimeoutMs: INITIALIZE_TIMEOUT_MS, + totalTimeoutMs: TOTAL_TIMEOUT_MS, + label: 'codex app-server account/logout', + }, + async (session) => + session.request( + 'account/logout', + undefined, + ACCOUNT_LOGOUT_TIMEOUT_MS + ) + ); + } +} diff --git a/src/features/codex-account/main/infrastructure/CodexAccountEnvBuilder.ts b/src/features/codex-account/main/infrastructure/CodexAccountEnvBuilder.ts new file mode 100644 index 00000000..72fe4754 --- /dev/null +++ b/src/features/codex-account/main/infrastructure/CodexAccountEnvBuilder.ts @@ -0,0 +1,66 @@ +import { buildRuntimeBaseEnv } from '@main/services/runtime/buildRuntimeBaseEnv'; +import { getCachedShellEnv } from '@main/utils/shellEnv'; + +import type { CodexAccountEffectiveAuthMode } from '@features/codex-account/contracts'; + +const CODEX_API_KEY_ENV_VAR = 'CODEX_API_KEY'; +const OPENAI_API_KEY_ENV_VAR = 'OPENAI_API_KEY'; +const PROVIDER_ROUTING_ENV_KEYS = [ + 'CLAUDE_CODE_PROVIDER_MANAGED_BY_HOST', + 'CLAUDE_CODE_ENTRY_PROVIDER', + 'CLAUDE_CODE_USE_OPENAI', + 'CLAUDE_CODE_USE_BEDROCK', + 'CLAUDE_CODE_USE_VERTEX', + 'CLAUDE_CODE_USE_FOUNDRY', + 'CLAUDE_CODE_USE_GEMINI', + 'CLAUDE_CODE_GEMINI_BACKEND', + 'CLAUDE_CODE_CODEX_BACKEND', +] as const; + +export class CodexAccountEnvBuilder { + buildControlPlaneEnv(options: { + binaryPath?: string | null; + shellEnv?: NodeJS.ProcessEnv | null; + env?: NodeJS.ProcessEnv; + }): NodeJS.ProcessEnv { + const { env } = buildRuntimeBaseEnv({ + binaryPath: options.binaryPath, + shellEnv: options.shellEnv ?? getCachedShellEnv() ?? {}, + env: options.env, + }); + + for (const key of PROVIDER_ROUTING_ENV_KEYS) { + delete env[key]; + } + + delete env.OPENAI_API_KEY; + delete env.CODEX_API_KEY; + return env; + } + + applyExecutionAuthPolicy( + env: NodeJS.ProcessEnv, + options: { + effectiveAuthMode: CodexAccountEffectiveAuthMode; + apiKeyValue?: string | null; + } + ): NodeJS.ProcessEnv { + if (options.effectiveAuthMode === 'chatgpt') { + delete env[OPENAI_API_KEY_ENV_VAR]; + delete env[CODEX_API_KEY_ENV_VAR]; + return env; + } + + if (options.effectiveAuthMode === 'api_key' && options.apiKeyValue?.trim()) { + env[OPENAI_API_KEY_ENV_VAR] = options.apiKeyValue.trim(); + env[CODEX_API_KEY_ENV_VAR] = options.apiKeyValue.trim(); + return env; + } + + delete env[CODEX_API_KEY_ENV_VAR]; + if (typeof env[OPENAI_API_KEY_ENV_VAR] !== 'string' || !env[OPENAI_API_KEY_ENV_VAR]?.trim()) { + delete env[OPENAI_API_KEY_ENV_VAR]; + } + return env; + } +} diff --git a/src/features/codex-account/main/infrastructure/CodexLoginSessionManager.ts b/src/features/codex-account/main/infrastructure/CodexLoginSessionManager.ts new file mode 100644 index 00000000..be71bef5 --- /dev/null +++ b/src/features/codex-account/main/infrastructure/CodexLoginSessionManager.ts @@ -0,0 +1,300 @@ +import { + type CodexAppServerAccountLoginCompletedNotification, + type CodexAppServerCancelLoginAccountResponse, + type CodexAppServerLoginAccountResponse, + type CodexAppServerSession, +} from '@main/services/infrastructure/codexAppServer'; +import { shell } from 'electron'; + +import type { CodexLoginStateDto } from '@features/codex-account/contracts'; +import type { CodexAppServerSessionFactory } from '@main/services/infrastructure/codexAppServer'; + +const LOGIN_REQUEST_TIMEOUT_MS = 5_000; +const INITIALIZE_TIMEOUT_MS = 6_000; +const LOGIN_PENDING_TIMEOUT_MS = 10 * 60 * 1_000; + +type CodexLoginStateListener = (state: CodexLoginStateDto) => void; +type CodexLoginSettledListener = () => void; +interface CodexLoginLogger { + warn: (message: string, meta?: Record) => void; +} + +export class CodexLoginSessionManager { + private readonly listeners = new Set(); + private readonly settledListeners = new Set(); + private state: CodexLoginStateDto = { + status: 'idle', + error: null, + startedAt: null, + }; + private pendingStartToken: symbol | null = null; + private activeSession: { + session: CodexAppServerSession; + loginId: string; + disposeNotificationListener: () => void; + timeoutId: ReturnType; + } | null = null; + + constructor( + private readonly sessionFactory: CodexAppServerSessionFactory, + private readonly logger: CodexLoginLogger + ) {} + + subscribe(listener: CodexLoginStateListener): () => void { + this.listeners.add(listener); + return (): void => { + this.listeners.delete(listener); + }; + } + + onSettled(listener: CodexLoginSettledListener): () => void { + this.settledListeners.add(listener); + return (): void => { + this.settledListeners.delete(listener); + }; + } + + getState(): CodexLoginStateDto { + return structuredClone(this.state); + } + + async start(options: { binaryPath: string; env: NodeJS.ProcessEnv }): Promise { + if (this.activeSession || this.pendingStartToken) { + return; + } + + const startToken = Symbol('codex-login-start'); + this.pendingStartToken = startToken; + let session: CodexAppServerSession | null = null; + + this.setState({ + status: 'starting', + error: null, + startedAt: new Date().toISOString(), + }); + + try { + session = await this.sessionFactory.openSession({ + binaryPath: options.binaryPath, + env: options.env, + requestTimeoutMs: LOGIN_REQUEST_TIMEOUT_MS, + initializeTimeoutMs: INITIALIZE_TIMEOUT_MS, + }); + + if (this.pendingStartToken !== startToken) { + await session.close().catch(() => undefined); + return; + } + + const response = await session.request( + 'account/login/start', + { type: 'chatgpt' }, + LOGIN_REQUEST_TIMEOUT_MS + ); + + if (this.pendingStartToken !== startToken) { + await session.close().catch(() => undefined); + return; + } + + if (response.type !== 'chatgpt') { + throw new Error('Codex app-server returned an unexpected login response type'); + } + + const authUrl = new URL(response.authUrl); + if (authUrl.protocol !== 'https:') { + throw new Error('Codex app-server returned a non-https auth URL'); + } + + const disposeNotificationListener = session.onNotification((method, params) => { + if (method !== 'account/login/completed') { + return; + } + + const notification = params as CodexAppServerAccountLoginCompletedNotification; + if (notification.loginId && notification.loginId !== response.loginId) { + return; + } + + void this.handleCompletion(notification); + }); + + const timeoutId = setTimeout(() => { + void this.failActiveLogin('Timed out while waiting for ChatGPT account login to finish.'); + }, LOGIN_PENDING_TIMEOUT_MS); + + this.activeSession = { + session, + loginId: response.loginId, + disposeNotificationListener, + timeoutId, + }; + this.pendingStartToken = null; + + this.setState({ + status: 'pending', + error: null, + startedAt: this.state.startedAt, + }); + + await shell.openExternal(authUrl.toString()); + } catch (error) { + const wasAbandonedDuringStart = + this.pendingStartToken !== startToken && + !this.activeSession && + (this.state.status === 'cancelled' || this.state.status === 'idle'); + + if (this.pendingStartToken === startToken) { + this.pendingStartToken = null; + } + await session?.close().catch(() => undefined); + if (session && this.activeSession?.session === session) { + this.activeSession = null; + } + if (wasAbandonedDuringStart) { + return; + } + this.setState({ + status: 'failed', + error: error instanceof Error ? error.message : String(error), + startedAt: this.state.startedAt, + }); + throw error; + } + } + + async cancel(): Promise { + if (this.pendingStartToken && !this.activeSession) { + this.pendingStartToken = null; + this.setState({ + status: 'cancelled', + error: null, + startedAt: null, + }); + this.emitSettled(); + return; + } + + if (!this.activeSession) { + this.setState({ + status: 'cancelled', + error: null, + startedAt: null, + }); + return; + } + + const activeSession = this.activeSession; + this.activeSession = null; + clearTimeout(activeSession.timeoutId); + activeSession.disposeNotificationListener(); + + try { + await activeSession.session.request( + 'account/login/cancel', + { loginId: activeSession.loginId }, + LOGIN_REQUEST_TIMEOUT_MS + ); + } catch (error) { + this.logger.warn('codex login cancel failed', { + error: error instanceof Error ? error.message : String(error), + }); + } finally { + await activeSession.session.close().catch(() => undefined); + } + + this.setState({ + status: 'cancelled', + error: null, + startedAt: null, + }); + this.emitSettled(); + } + + async dispose(): Promise { + if (this.pendingStartToken) { + this.pendingStartToken = null; + } + + if (!this.activeSession) { + this.setState({ + status: 'idle', + error: null, + startedAt: null, + }); + return; + } + + const activeSession = this.activeSession; + this.activeSession = null; + clearTimeout(activeSession.timeoutId); + activeSession.disposeNotificationListener(); + await activeSession.session.close().catch(() => undefined); + this.setState({ + status: 'idle', + error: null, + startedAt: null, + }); + } + + private async handleCompletion( + notification: CodexAppServerAccountLoginCompletedNotification + ): Promise { + if (!this.activeSession) { + return; + } + + const activeSession = this.activeSession; + this.activeSession = null; + clearTimeout(activeSession.timeoutId); + activeSession.disposeNotificationListener(); + await activeSession.session.close().catch(() => undefined); + + if (notification.success) { + this.setState({ + status: 'idle', + error: null, + startedAt: null, + }); + } else { + this.setState({ + status: 'failed', + error: notification.error ?? 'ChatGPT login failed.', + startedAt: this.state.startedAt, + }); + } + + this.emitSettled(); + } + + private async failActiveLogin(errorMessage: string): Promise { + if (!this.activeSession) { + return; + } + + const activeSession = this.activeSession; + this.activeSession = null; + clearTimeout(activeSession.timeoutId); + activeSession.disposeNotificationListener(); + await activeSession.session.close().catch(() => undefined); + this.setState({ + status: 'failed', + error: errorMessage, + startedAt: this.state.startedAt, + }); + this.emitSettled(); + } + + private emitSettled(): void { + for (const listener of this.settledListeners) { + listener(); + } + } + + private setState(nextState: CodexLoginStateDto): void { + this.state = structuredClone(nextState); + for (const listener of this.listeners) { + listener(this.getState()); + } + } +} diff --git a/src/features/codex-account/main/infrastructure/detectCodexLocalAccountArtifacts.ts b/src/features/codex-account/main/infrastructure/detectCodexLocalAccountArtifacts.ts new file mode 100644 index 00000000..54d1854f --- /dev/null +++ b/src/features/codex-account/main/infrastructure/detectCodexLocalAccountArtifacts.ts @@ -0,0 +1,109 @@ +import { promises as fs } from 'fs'; +import os from 'os'; +import path from 'path'; + +const CODEX_ACCOUNTS_DIR = path.join(os.homedir(), '.codex', 'accounts'); + +interface CodexAccountsRegistry { + active_account_key?: string | null; + activeAccountKey?: string | null; +} + +interface CodexAuthFile { + auth_mode?: string | null; + authMode?: string | null; +} + +export interface CodexLocalAccountState { + hasArtifacts: boolean; + hasActiveChatgptAccount: boolean; +} + +function encodeAccountKeyForAuthFilename(accountKey: string): string { + return Buffer.from(accountKey, 'utf8') + .toString('base64') + .replaceAll('+', '-') + .replaceAll('/', '_') + .replace(/=+$/u, ''); +} + +async function readJsonFile(filePath: string): Promise { + try { + const raw = await fs.readFile(filePath, 'utf8'); + return JSON.parse(raw) as T; + } catch { + return null; + } +} + +async function fileExists(filePath: string): Promise { + try { + await fs.access(filePath); + return true; + } catch { + return false; + } +} + +export async function detectCodexLocalAccountState( + accountsDir = CODEX_ACCOUNTS_DIR +): Promise { + try { + const entries = await fs.readdir(accountsDir, { withFileTypes: true }); + const hasArtifacts = entries.some( + (entry) => + entry.isFile() && (entry.name === 'registry.json' || entry.name.endsWith('.auth.json')) + ); + + if (!hasArtifacts) { + return { + hasArtifacts: false, + hasActiveChatgptAccount: false, + }; + } + + const registry = await readJsonFile( + path.join(accountsDir, 'registry.json') + ); + const activeAccountKey = + registry?.active_account_key?.trim() || registry?.activeAccountKey?.trim() || null; + + if (!activeAccountKey) { + return { + hasArtifacts: true, + hasActiveChatgptAccount: false, + }; + } + + const authFilePath = path.join( + accountsDir, + `${encodeAccountKeyForAuthFilename(activeAccountKey)}.auth.json` + ); + if (!(await fileExists(authFilePath))) { + return { + hasArtifacts: true, + hasActiveChatgptAccount: false, + }; + } + + const authFile = await readJsonFile(authFilePath); + const authMode = authFile?.auth_mode ?? authFile?.authMode ?? null; + + return { + hasArtifacts: true, + hasActiveChatgptAccount: authMode === 'chatgpt', + }; + } catch { + return { + hasArtifacts: false, + hasActiveChatgptAccount: false, + }; + } +} + +export async function detectCodexLocalAccountArtifacts( + accountsDir = CODEX_ACCOUNTS_DIR +): Promise { + const state = await detectCodexLocalAccountState(accountsDir); + return state.hasArtifacts; +} diff --git a/src/features/codex-account/preload/createCodexAccountBridge.ts b/src/features/codex-account/preload/createCodexAccountBridge.ts new file mode 100644 index 00000000..8ed97798 --- /dev/null +++ b/src/features/codex-account/preload/createCodexAccountBridge.ts @@ -0,0 +1,40 @@ +import { + CODEX_ACCOUNT_CANCEL_CHATGPT_LOGIN, + CODEX_ACCOUNT_GET_SNAPSHOT, + CODEX_ACCOUNT_LOGOUT, + CODEX_ACCOUNT_REFRESH_SNAPSHOT, + CODEX_ACCOUNT_SNAPSHOT_CHANGED, + CODEX_ACCOUNT_START_CHATGPT_LOGIN, + type CodexAccountElectronApi, +} from '@features/codex-account/contracts'; + +import type { IpcRenderer } from 'electron'; + +interface CreateCodexAccountBridgeDeps { + ipcRenderer: IpcRenderer; +} + +export function createCodexAccountBridge({ + ipcRenderer, +}: CreateCodexAccountBridgeDeps): CodexAccountElectronApi { + return { + getCodexAccountSnapshot: () => ipcRenderer.invoke(CODEX_ACCOUNT_GET_SNAPSHOT), + refreshCodexAccountSnapshot: (options) => + ipcRenderer.invoke(CODEX_ACCOUNT_REFRESH_SNAPSHOT, options), + startCodexChatgptLogin: () => ipcRenderer.invoke(CODEX_ACCOUNT_START_CHATGPT_LOGIN), + cancelCodexChatgptLogin: () => ipcRenderer.invoke(CODEX_ACCOUNT_CANCEL_CHATGPT_LOGIN), + logoutCodexAccount: () => ipcRenderer.invoke(CODEX_ACCOUNT_LOGOUT), + onCodexAccountSnapshotChanged: (callback) => { + ipcRenderer.on( + CODEX_ACCOUNT_SNAPSHOT_CHANGED, + callback as (event: Electron.IpcRendererEvent, ...args: unknown[]) => void + ); + return (): void => { + ipcRenderer.removeListener( + CODEX_ACCOUNT_SNAPSHOT_CHANGED, + callback as (event: Electron.IpcRendererEvent, ...args: unknown[]) => void + ); + }; + }, + }; +} diff --git a/src/features/codex-account/preload/index.ts b/src/features/codex-account/preload/index.ts new file mode 100644 index 00000000..d62efa18 --- /dev/null +++ b/src/features/codex-account/preload/index.ts @@ -0,0 +1 @@ +export { createCodexAccountBridge } from './createCodexAccountBridge'; diff --git a/src/features/codex-account/renderer/hooks/useCodexAccountSnapshot.ts b/src/features/codex-account/renderer/hooks/useCodexAccountSnapshot.ts new file mode 100644 index 00000000..7a32a42f --- /dev/null +++ b/src/features/codex-account/renderer/hooks/useCodexAccountSnapshot.ts @@ -0,0 +1,232 @@ +import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; + +import { api, isElectronMode } from '@renderer/api'; + +import type { CodexAccountSnapshotDto } from '@features/codex-account/contracts'; + +const CODEX_PENDING_LOGIN_REFRESH_MS = 3_000; +const CODEX_VISIBLE_RATE_LIMITS_REFRESH_MS = 10_000; +const CODEX_VISIBLE_STANDARD_REFRESH_MS = 20_000; +const CODEX_HIDDEN_REFRESH_MS = 60_000; + +function isDocumentVisible(): boolean { + if (typeof document === 'undefined') { + return true; + } + + return document.visibilityState !== 'hidden'; +} + +function getRefreshIntervalMs(options: { + loginStatus: CodexAccountSnapshotDto['login']['status'] | undefined; + includeRateLimits: boolean; + visible: boolean; +}): number { + if (options.loginStatus === 'starting' || options.loginStatus === 'pending') { + return CODEX_PENDING_LOGIN_REFRESH_MS; + } + + if (!options.visible) { + return CODEX_HIDDEN_REFRESH_MS; + } + + return options.includeRateLimits + ? CODEX_VISIBLE_RATE_LIMITS_REFRESH_MS + : CODEX_VISIBLE_STANDARD_REFRESH_MS; +} + +export function useCodexAccountSnapshot(options: { + enabled: boolean; + includeRateLimits?: boolean; +}): { + snapshot: CodexAccountSnapshotDto | null; + loading: boolean; + error: string | null; + refresh: (options?: { + includeRateLimits?: boolean; + forceRefreshToken?: boolean; + silent?: boolean; + }) => Promise; + startChatgptLogin: () => Promise; + cancelChatgptLogin: () => Promise; + logout: () => Promise; +} { + const electronMode = isElectronMode(); + const [snapshot, setSnapshot] = useState(null); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + const [visible, setVisible] = useState(() => isDocumentVisible()); + const lastUpdatedAtRef = useRef(null); + + const applySnapshot = useCallback((nextSnapshot: CodexAccountSnapshotDto) => { + lastUpdatedAtRef.current = Date.now(); + setSnapshot(nextSnapshot); + setError(null); + }, []); + + const refresh = useCallback( + async (refreshOptions?: { + includeRateLimits?: boolean; + forceRefreshToken?: boolean; + silent?: boolean; + }) => { + if (!electronMode || !options.enabled) { + return; + } + + const silent = refreshOptions?.silent === true; + if (!silent) { + setLoading(true); + setError(null); + } + try { + const nextSnapshot = await api.refreshCodexAccountSnapshot({ + includeRateLimits: refreshOptions?.includeRateLimits ?? options.includeRateLimits, + forceRefreshToken: refreshOptions?.forceRefreshToken, + }); + applySnapshot(nextSnapshot); + } catch (nextError) { + if (!silent) { + setError( + nextError instanceof Error ? nextError.message : 'Failed to refresh Codex account' + ); + } + } finally { + if (!silent) { + setLoading(false); + } + } + }, + [applySnapshot, electronMode, options.enabled, options.includeRateLimits] + ); + + useEffect(() => { + if (!electronMode || !options.enabled) { + return; + } + + setLoading(true); + setError(null); + + const initialSnapshotRequest = options.includeRateLimits + ? api.refreshCodexAccountSnapshot({ + includeRateLimits: true, + }) + : api.getCodexAccountSnapshot(); + + void initialSnapshotRequest + .then((nextSnapshot) => { + applySnapshot(nextSnapshot); + }) + .catch((nextError) => { + setError(nextError instanceof Error ? nextError.message : 'Failed to load Codex account'); + }) + .finally(() => { + setLoading(false); + }); + + const unsubscribe = api.onCodexAccountSnapshotChanged((_event, nextSnapshot) => { + applySnapshot(nextSnapshot); + }); + + return unsubscribe; + }, [applySnapshot, electronMode, options.enabled, options.includeRateLimits]); + + useEffect(() => { + if (!electronMode || !options.enabled || typeof document === 'undefined') { + return; + } + + const handleVisibilityChange = (): void => { + const nextVisible = isDocumentVisible(); + setVisible(nextVisible); + + if (!nextVisible) { + return; + } + + const staleAfterMs = options.includeRateLimits + ? CODEX_VISIBLE_RATE_LIMITS_REFRESH_MS + : CODEX_VISIBLE_STANDARD_REFRESH_MS; + + if ( + lastUpdatedAtRef.current === null || + Date.now() - lastUpdatedAtRef.current >= staleAfterMs + ) { + void refresh({ + includeRateLimits: options.includeRateLimits, + silent: true, + }); + } + }; + + document.addEventListener('visibilitychange', handleVisibilityChange); + return () => { + document.removeEventListener('visibilitychange', handleVisibilityChange); + }; + }, [electronMode, options.enabled, options.includeRateLimits, refresh]); + + useEffect(() => { + if (!electronMode || !options.enabled) { + return; + } + + const refreshIntervalMs = getRefreshIntervalMs({ + loginStatus: snapshot?.login.status, + includeRateLimits: options.includeRateLimits === true, + visible, + }); + const intervalId = window.setInterval(() => { + void refresh({ + includeRateLimits: options.includeRateLimits, + silent: true, + }); + }, refreshIntervalMs); + + return () => { + window.clearInterval(intervalId); + }; + }, [ + electronMode, + options.enabled, + options.includeRateLimits, + refresh, + snapshot?.login.status, + visible, + ]); + + const runAction = useCallback( + async (runner: () => Promise): Promise => { + if (!electronMode || !options.enabled) { + return false; + } + + setLoading(true); + setError(null); + try { + const nextSnapshot = await runner(); + applySnapshot(nextSnapshot); + return true; + } catch (nextError) { + setError(nextError instanceof Error ? nextError.message : 'Codex account action failed'); + return false; + } finally { + setLoading(false); + } + }, + [applySnapshot, electronMode, options.enabled] + ); + + return useMemo( + () => ({ + snapshot, + loading, + error, + refresh, + startChatgptLogin: () => runAction(() => api.startCodexChatgptLogin()), + cancelChatgptLogin: () => runAction(() => api.cancelCodexChatgptLogin()), + logout: () => runAction(() => api.logoutCodexAccount()), + }), + [error, loading, refresh, runAction, snapshot] + ); +} diff --git a/src/features/codex-account/renderer/index.ts b/src/features/codex-account/renderer/index.ts new file mode 100644 index 00000000..70d2e307 --- /dev/null +++ b/src/features/codex-account/renderer/index.ts @@ -0,0 +1,14 @@ +export { useCodexAccountSnapshot } from './hooks/useCodexAccountSnapshot'; +export { mergeCodexCliStatusWithSnapshot } from './mergeCodexCliStatusWithSnapshot'; +export { mergeCodexProviderStatusWithSnapshot } from './mergeCodexProviderStatusWithSnapshot'; +export { + formatCodexCreditsValue, + formatCodexRemainingPercent, + formatCodexResetWindowLabel, + formatCodexUsageExplanation, + formatCodexUsagePercent, + formatCodexUsageWindowLabel, + formatCodexWindowDuration, + formatCodexWindowDurationLong, + normalizeCodexResetTimestamp, +} from './rateLimitDisplay'; diff --git a/src/features/codex-account/renderer/mergeCodexCliStatusWithSnapshot.ts b/src/features/codex-account/renderer/mergeCodexCliStatusWithSnapshot.ts new file mode 100644 index 00000000..07d8b4b1 --- /dev/null +++ b/src/features/codex-account/renderer/mergeCodexCliStatusWithSnapshot.ts @@ -0,0 +1,26 @@ +import { mergeCodexProviderStatusWithSnapshot } from './mergeCodexProviderStatusWithSnapshot'; + +import type { CodexAccountSnapshotDto } from '../contracts'; +import type { CliInstallationStatus } from '@shared/types'; + +export function mergeCodexCliStatusWithSnapshot( + cliStatus: CliInstallationStatus | null, + snapshot: CodexAccountSnapshotDto | null +): CliInstallationStatus | null { + if (!cliStatus || !snapshot) { + return cliStatus; + } + + if (!cliStatus.providers.some((provider) => provider.providerId === 'codex')) { + return cliStatus; + } + + return { + ...cliStatus, + providers: cliStatus.providers.map((provider) => + provider.providerId === 'codex' + ? mergeCodexProviderStatusWithSnapshot(provider, snapshot) + : provider + ), + }; +} diff --git a/src/features/codex-account/renderer/mergeCodexProviderStatusWithSnapshot.ts b/src/features/codex-account/renderer/mergeCodexProviderStatusWithSnapshot.ts new file mode 100644 index 00000000..8980cf69 --- /dev/null +++ b/src/features/codex-account/renderer/mergeCodexProviderStatusWithSnapshot.ts @@ -0,0 +1,216 @@ +import type { CodexAccountSnapshotDto } from '../contracts'; +import type { CliProviderStatus } from '@shared/types'; + +const CODEX_NATIVE_BACKEND_ID = 'codex-native'; +const CODEX_NATIVE_LABEL = 'Codex native'; +const CODEX_NATIVE_DESCRIPTION = 'Use codex exec JSON mode.'; +const DEFAULT_CODEX_AUTH_MODES = ['auto', 'chatgpt', 'api_key'] as const; + +function isCodexBootstrapPlaceholder(provider: CliProviderStatus): boolean { + return ( + provider.providerId === 'codex' && + provider.supported === false && + provider.statusMessage === 'Checking...' && + provider.models.length === 0 && + provider.backend == null + ); +} + +function getCodexNativeBackendTruth( + snapshot: CodexAccountSnapshotDto +): Pick< + NonNullable[number], + 'available' | 'selectable' | 'state' | 'statusMessage' | 'detailMessage' +> { + switch (snapshot.launchReadinessState) { + case 'ready_chatgpt': + case 'ready_api_key': + case 'ready_both': + return { + available: true, + selectable: true, + state: snapshot.appServerState === 'degraded' ? 'degraded' : 'ready', + statusMessage: + snapshot.appServerState === 'degraded' + ? (snapshot.launchIssueMessage ?? + snapshot.appServerStatusMessage ?? + 'Ready with degraded account verification.') + : 'Ready', + detailMessage: snapshot.appServerStatusMessage, + }; + case 'warning_degraded_but_launchable': + return { + available: true, + selectable: true, + state: 'degraded', + statusMessage: + snapshot.launchIssueMessage ?? + snapshot.appServerStatusMessage ?? + 'Ready with degraded account verification.', + detailMessage: snapshot.appServerStatusMessage, + }; + case 'runtime_missing': + return { + available: false, + selectable: false, + state: 'runtime-missing', + statusMessage: + snapshot.launchIssueMessage ?? snapshot.appServerStatusMessage ?? 'Runtime missing', + detailMessage: snapshot.appServerStatusMessage, + }; + case 'incompatible': + return { + available: false, + selectable: false, + state: 'disabled', + statusMessage: + snapshot.launchIssueMessage ?? snapshot.appServerStatusMessage ?? 'Runtime incompatible', + detailMessage: snapshot.appServerStatusMessage, + }; + case 'missing_auth': + default: + return { + available: false, + selectable: true, + state: 'authentication-required', + statusMessage: + snapshot.launchIssueMessage ?? + 'Connect a ChatGPT account or add OPENAI_API_KEY / CODEX_API_KEY to use Codex.', + detailMessage: snapshot.appServerStatusMessage, + }; + } +} + +function getProviderStatusMessage( + snapshot: CodexAccountSnapshotDto, + fallback: string | null | undefined +): string | null { + if (snapshot.launchAllowed) { + if (snapshot.effectiveAuthMode === 'chatgpt') { + return snapshot.appServerState === 'degraded' + ? (snapshot.launchIssueMessage ?? + 'ChatGPT account detected - account verification is currently degraded.') + : 'ChatGPT account ready'; + } + + if (snapshot.effectiveAuthMode === 'api_key') { + return 'API key ready'; + } + } + + return snapshot.launchIssueMessage ?? snapshot.appServerStatusMessage ?? fallback ?? null; +} + +function mergeCodexNativeBackendOption( + provider: CliProviderStatus, + snapshot: CodexAccountSnapshotDto +): NonNullable { + const truth = getCodexNativeBackendTruth(snapshot); + const existingOptions = provider.availableBackends ?? []; + const hasCodexNativeOption = existingOptions.some( + (option) => option.id === CODEX_NATIVE_BACKEND_ID + ); + const baseOptions = hasCodexNativeOption + ? existingOptions + : [ + ...existingOptions, + { + id: CODEX_NATIVE_BACKEND_ID, + label: CODEX_NATIVE_LABEL, + description: CODEX_NATIVE_DESCRIPTION, + selectable: true, + recommended: true, + available: true, + state: 'ready' as const, + audience: 'general' as const, + statusMessage: null, + detailMessage: null, + }, + ]; + + return baseOptions.map((option) => { + if (option.id !== CODEX_NATIVE_BACKEND_ID) { + return option; + } + + return { + ...option, + label: option.label || CODEX_NATIVE_LABEL, + description: option.description || CODEX_NATIVE_DESCRIPTION, + recommended: option.recommended !== false, + audience: option.audience ?? 'general', + ...truth, + }; + }); +} + +export function mergeCodexProviderStatusWithSnapshot( + provider: CliProviderStatus, + snapshot: CodexAccountSnapshotDto | null +): CliProviderStatus { + if (provider.providerId !== 'codex' || !snapshot) { + return provider; + } + + const availableBackends = mergeCodexNativeBackendOption(provider, snapshot); + const baseConnection = provider.connection ?? { + supportsOAuth: false, + supportsApiKey: true, + configurableAuthModes: [...DEFAULT_CODEX_AUTH_MODES], + configuredAuthMode: snapshot.preferredAuthMode, + apiKeyConfigured: snapshot.apiKey.available, + apiKeySource: snapshot.apiKey.source, + apiKeySourceLabel: snapshot.apiKey.sourceLabel, + codex: null, + }; + + return { + ...provider, + supported: provider.supported || isCodexBootstrapPlaceholder(provider), + authenticated: snapshot.launchAllowed, + authMethod: + snapshot.effectiveAuthMode === 'chatgpt' + ? 'chatgpt' + : snapshot.effectiveAuthMode === 'api_key' + ? 'api_key' + : null, + verificationState: snapshot.launchAllowed + ? 'verified' + : snapshot.appServerState === 'runtime-missing' || snapshot.appServerState === 'incompatible' + ? 'error' + : 'unknown', + statusMessage: getProviderStatusMessage(snapshot, provider.statusMessage), + selectedBackendId: CODEX_NATIVE_BACKEND_ID, + resolvedBackendId: CODEX_NATIVE_BACKEND_ID, + availableBackends, + backend: { + kind: CODEX_NATIVE_BACKEND_ID, + label: CODEX_NATIVE_LABEL, + endpointLabel: 'codex exec --json', + projectId: provider.backend?.projectId ?? null, + authMethodDetail: snapshot.effectiveAuthMode ?? null, + }, + connection: { + ...baseConnection, + configuredAuthMode: snapshot.preferredAuthMode, + apiKeyConfigured: snapshot.apiKey.available, + apiKeySource: snapshot.apiKey.source, + apiKeySourceLabel: snapshot.apiKey.sourceLabel, + codex: { + preferredAuthMode: snapshot.preferredAuthMode, + effectiveAuthMode: snapshot.effectiveAuthMode, + launchAllowed: snapshot.launchAllowed, + launchIssueMessage: snapshot.launchIssueMessage, + launchReadinessState: snapshot.launchReadinessState, + appServerState: snapshot.appServerState, + appServerStatusMessage: snapshot.appServerStatusMessage, + managedAccount: snapshot.managedAccount, + requiresOpenaiAuth: snapshot.requiresOpenaiAuth, + localAccountArtifactsPresent: snapshot.localAccountArtifactsPresent, + localActiveChatgptAccountPresent: snapshot.localActiveChatgptAccountPresent, + login: snapshot.login, + rateLimits: snapshot.rateLimits, + }, + }, + }; +} diff --git a/src/features/codex-account/renderer/rateLimitDisplay.ts b/src/features/codex-account/renderer/rateLimitDisplay.ts new file mode 100644 index 00000000..66a0d5fb --- /dev/null +++ b/src/features/codex-account/renderer/rateLimitDisplay.ts @@ -0,0 +1,129 @@ +import type { CodexRateLimitSnapshotDto } from '../contracts'; + +export function normalizeCodexResetTimestamp(resetAt: number | null | undefined): number | null { + if (typeof resetAt !== 'number' || !Number.isFinite(resetAt) || resetAt <= 0) { + return null; + } + + return resetAt < 1_000_000_000_000 ? resetAt * 1000 : resetAt; +} + +export function formatCodexWindowDuration( + windowDurationMins: number | null | undefined +): string | null { + if ( + typeof windowDurationMins !== 'number' || + !Number.isFinite(windowDurationMins) || + windowDurationMins <= 0 + ) { + return null; + } + + if (windowDurationMins % 10_080 === 0) { + return `${windowDurationMins / 10_080}w`; + } + + if (windowDurationMins % 1_440 === 0) { + return `${windowDurationMins / 1_440}d`; + } + + if (windowDurationMins % 60 === 0) { + return `${windowDurationMins / 60}h`; + } + + return `${windowDurationMins}m`; +} + +export function formatCodexWindowDurationLong( + windowDurationMins: number | null | undefined +): string | null { + if ( + typeof windowDurationMins !== 'number' || + !Number.isFinite(windowDurationMins) || + windowDurationMins <= 0 + ) { + return null; + } + + if (windowDurationMins % 10_080 === 0) { + const weeks = windowDurationMins / 10_080; + return weeks === 1 ? '7-day' : `${weeks}-week`; + } + + if (windowDurationMins % 1_440 === 0) { + const days = windowDurationMins / 1_440; + return days === 1 ? '1-day' : `${days}-day`; + } + + if (windowDurationMins % 60 === 0) { + const hours = windowDurationMins / 60; + return hours === 1 ? '1-hour' : `${hours}-hour`; + } + + return `${windowDurationMins}-minute`; +} + +export function formatCodexUsageWindowLabel( + title: 'Primary used' | 'Secondary used' | 'Weekly used', + windowDurationMins: number | null | undefined +): string { + const duration = formatCodexWindowDuration(windowDurationMins); + return duration ? `${title} (${duration})` : title; +} + +export function formatCodexResetWindowLabel( + title: 'Primary reset' | 'Secondary reset' | 'Weekly reset', + windowDurationMins: number | null | undefined +): string { + const duration = formatCodexWindowDuration(windowDurationMins); + return duration ? `${title} (${duration})` : title; +} + +export function formatCodexUsagePercent(usedPercent: number | null | undefined): string { + return typeof usedPercent === 'number' && Number.isFinite(usedPercent) + ? `${usedPercent}%` + : 'Unknown'; +} + +export function formatCodexRemainingPercent(usedPercent: number | null | undefined): string | null { + if (typeof usedPercent !== 'number' || !Number.isFinite(usedPercent)) { + return null; + } + + const remaining = Math.max(0, Math.min(100, 100 - usedPercent)); + return `${remaining}%`; +} + +export function formatCodexUsageExplanation( + usedPercent: number | null | undefined, + windowDurationMins: number | null | undefined +): string { + const windowLabel = formatCodexWindowDurationLong(windowDurationMins); + const remaining = formatCodexRemainingPercent(usedPercent); + + if (windowLabel && remaining) { + return `${formatCodexUsagePercent(usedPercent)} used - about ${remaining} left in the current ${windowLabel} window.`; + } + + if (windowLabel) { + return `Shows used quota in the current ${windowLabel} window, not remaining quota.`; + } + + return 'Shows used quota, not remaining quota.'; +} + +export function formatCodexCreditsValue(credits: CodexRateLimitSnapshotDto['credits']): string { + if (!credits) { + return 'Unknown'; + } + + if (credits.unlimited) { + return 'Unlimited'; + } + + if (!credits.hasCredits) { + return 'Not available'; + } + + return credits.balance ?? 'Unknown'; +} diff --git a/src/features/recent-projects/main/composition/createRecentProjectsFeature.ts b/src/features/recent-projects/main/composition/createRecentProjectsFeature.ts index f7109381..5710cf10 100644 --- a/src/features/recent-projects/main/composition/createRecentProjectsFeature.ts +++ b/src/features/recent-projects/main/composition/createRecentProjectsFeature.ts @@ -9,12 +9,14 @@ import { ClaudeRecentProjectsSourceAdapter } from '../adapters/output/sources/Cl import { CodexRecentProjectsSourceAdapter } from '../adapters/output/sources/CodexRecentProjectsSourceAdapter'; import { InMemoryRecentProjectsCache } from '../infrastructure/cache/InMemoryRecentProjectsCache'; import { CodexAppServerClient } from '../infrastructure/codex/CodexAppServerClient'; -import { CodexBinaryResolver } from '../infrastructure/codex/CodexBinaryResolver'; -import { JsonRpcStdioClient } from '../infrastructure/codex/JsonRpcStdioClient'; import { RecentProjectIdentityResolver } from '../infrastructure/identity/RecentProjectIdentityResolver'; import type { ClockPort } from '../../core/application/ports/ClockPort'; import type { LoggerPort } from '../../core/application/ports/LoggerPort'; +import { + CodexBinaryResolver, + JsonRpcStdioClient, +} from '@main/services/infrastructure/codexAppServer'; import type { ServiceContext } from '@main/services'; export interface RecentProjectsFeatureFacade { diff --git a/src/features/recent-projects/main/infrastructure/codex/CodexAppServerClient.ts b/src/features/recent-projects/main/infrastructure/codex/CodexAppServerClient.ts index 86805355..b24b6f97 100644 --- a/src/features/recent-projects/main/infrastructure/codex/CodexAppServerClient.ts +++ b/src/features/recent-projects/main/infrastructure/codex/CodexAppServerClient.ts @@ -1,4 +1,7 @@ -import type { JsonRpcSession, JsonRpcStdioClient } from './JsonRpcStdioClient'; +import type { + JsonRpcSession, + JsonRpcStdioClient, +} from '@main/services/infrastructure/codexAppServer'; const DEFAULT_REQUEST_TIMEOUT_MS = 3_000; const DEFAULT_TOTAL_TIMEOUT_MS = 8_000; diff --git a/src/main/http/teams.ts b/src/main/http/teams.ts index 0166e058..5495aef5 100644 --- a/src/main/http/teams.ts +++ b/src/main/http/teams.ts @@ -1,6 +1,7 @@ import { validateTeamName } from '@main/ipc/guards'; import { getErrorMessage } from '@shared/utils/errorHandling'; import { createLogger } from '@shared/utils/logger'; +import { migrateProviderBackendId } from '@shared/utils/providerBackend'; import { isAbsolute } from 'path'; import type { HttpServices } from './index'; @@ -100,6 +101,13 @@ function parseLaunchRequest(teamName: string, body: unknown): TeamLaunchRequest throw new HttpBadRequestError('providerId must be anthropic, codex, or gemini'); })(); const prompt = assertOptionalString(payload.prompt, 'prompt'); + const rawProviderBackendId = assertOptionalString(payload.providerBackendId, 'providerBackendId'); + const providerBackendId = migrateProviderBackendId(providerId, rawProviderBackendId); + if (rawProviderBackendId && !providerBackendId) { + throw new HttpBadRequestError( + 'providerBackendId must be one of auto, adapter, api, cli-sdk, or codex-native' + ); + } const model = assertOptionalString(payload.model, 'model'); const effort = assertOptionalEffort(payload.effort); const clearContext = assertOptionalBoolean(payload.clearContext, 'clearContext'); @@ -111,6 +119,9 @@ function parseLaunchRequest(teamName: string, body: unknown): TeamLaunchRequest teamName, cwd: assertAbsoluteCwd(payload.cwd), providerId, + ...(providerBackendId && { + providerBackendId, + }), ...(prompt && { prompt, }), diff --git a/src/main/index.ts b/src/main/index.ts index 42b9d15a..a89d959e 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -19,6 +19,12 @@ process.env.UV_THREADPOOL_SIZE ??= '16'; // Sentry must be the first import to capture early errors. import './sentry'; +import { + createCodexAccountFeature, + type CodexAccountFeatureFacade, + registerCodexAccountIpc, + removeCodexAccountIpc, +} from '@features/codex-account/main'; import { createRecentProjectsFeature, type RecentProjectsFeatureFacade, @@ -39,6 +45,7 @@ import { TeamConfigReader } from '@main/services/team/TeamConfigReader'; import { TeamInboxWriter } from '@main/services/team/TeamInboxWriter'; import { TeamMcpConfigBuilder } from '@main/services/team/TeamMcpConfigBuilder'; import { resolveInteractiveShellEnv } from '@main/utils/shellEnv'; +import { providerConnectionService } from '@main/services/runtime/ProviderConnectionService'; import { CONTEXT_CHANGED, SCHEDULE_CHANGE, @@ -111,6 +118,7 @@ import { } from './utils/safeWebContentsSend'; import { syncTelemetryFlag } from './sentry'; import { + ActiveTeamRegistry, BoardTaskActivityDetailService, BoardTaskActivityRecordSource, BoardTaskActivityService, @@ -130,6 +138,11 @@ import { TaskBoundaryParser, TeamDataService, TeamLogSourceTracker, + TeamTaskStallJournal, + TeamTaskStallMonitor, + TeamTaskStallNotifier, + TeamTaskStallPolicy, + TeamTaskStallSnapshotSource, TeammateToolTracker, TeamMemberLogsFinder, TeamProvisioningService, @@ -408,6 +421,7 @@ let contextRegistry: ServiceContextRegistry; let notificationManager: NotificationManager; let updaterService: UpdaterService; let sshConnectionManager: SshConnectionManager; +let codexAccountFeature: CodexAccountFeatureFacade | null = null; let recentProjectsFeature: RecentProjectsFeatureFacade; let teamDataService: TeamDataService; let teamProvisioningService: TeamProvisioningService; @@ -415,6 +429,7 @@ let cliInstallerService: CliInstallerService; let ptyTerminalService: PtyTerminalService; let httpServer: HttpServer; let schedulerService: SchedulerService; +let teamTaskStallMonitor: TeamTaskStallMonitor | null = null; let skillsWatcherService: SkillsWatcherService | null = null; let teamBackupService: TeamBackupService | null = null; let branchStatusService: BranchStatusService | null = null; @@ -848,6 +863,13 @@ async function initializeServices(): Promise { const taskChangePresenceRepository = new JsonTaskChangePresenceRepository(); const teamLogSourceTracker = new TeamLogSourceTracker(teamMemberLogsFinder); + teamTaskStallMonitor = new TeamTaskStallMonitor( + new ActiveTeamRegistry(teamDataService, teamLogSourceTracker), + new TeamTaskStallSnapshotSource(), + new TeamTaskStallPolicy(), + new TeamTaskStallJournal(), + new TeamTaskStallNotifier(teamDataService) + ); let teammateToolTracker: TeammateToolTracker | null = null; branchStatusService = new BranchStatusService((event) => { safeSendToRenderer(mainWindow, TEAM_PROJECT_BRANCH_CHANGE, event); @@ -930,6 +952,7 @@ async function initializeServices(): Promise { // Allow TeamProvisioningService to trigger team refresh events (e.g. live lead replies). const teamChangeEmitter = (event: TeamChangeEvent): void => { forwardTeamChange(event); + teamTaskStallMonitor?.noteTeamChange(event); if (event.type === 'lead-activity' && event.detail === 'offline') { teammateToolTracker?.handleTeamOffline(event.teamName); } @@ -939,6 +962,7 @@ async function initializeServices(): Promise { teamLogSourceTracker.onLogSourceChange((teamName) => { teammateToolTracker?.handleLogSourceChange(teamName); }); + teamTaskStallMonitor.start(); // Allow SchedulerService to push schedule events to renderer schedulerService.setChangeEmitter((event) => { @@ -959,6 +983,11 @@ async function initializeServices(): Promise { getLocalContext: () => contextRegistry.get('local'), logger: createLogger('Feature:RecentProjects'), }); + codexAccountFeature = createCodexAccountFeature({ + logger: createLogger('Feature:CodexAccount'), + configManager, + }); + providerConnectionService.setCodexAccountFeature(codexAccountFeature); // startProcessHealthPolling() is deferred to after window creation // (did-finish-load handler) to avoid thread pool contention at startup. @@ -1013,6 +1042,7 @@ async function initializeServices(): Promise { crossTeamService, teamBackupService ?? undefined ); + registerCodexAccountIpc(ipcMain, codexAccountFeature); registerRecentProjectsIpc(ipcMain, recentProjectsFeature); // Forward SSH state changes to renderer and HTTP SSE clients @@ -1142,6 +1172,10 @@ function shutdownServices(): void { if (teamDataService) { teamDataService.stopProcessHealthPolling(); } + if (teamTaskStallMonitor) { + void teamTaskStallMonitor.stop(); + teamTaskStallMonitor = null; + } branchStatusService?.dispose(); branchStatusService = null; @@ -1151,6 +1185,9 @@ function shutdownServices(): void { } void skillsWatcherService?.stopAll(); + providerConnectionService.setCodexAccountFeature(null); + void codexAccountFeature?.dispose(); + codexAccountFeature = null; // Kill all PTY processes if (ptyTerminalService) { @@ -1159,6 +1196,7 @@ function shutdownServices(): void { // Remove IPC handlers removeIpcHandlers(); + removeCodexAccountIpc(ipcMain); removeRecentProjectsIpc(ipcMain); // Dispose backup service timers @@ -1438,6 +1476,7 @@ function createWindow(): void { if (teamProvisioningService) { teamProvisioningService.setMainWindow(null); } + codexAccountFeature?.setMainWindow(null); setEditorMainWindow(null); setReviewMainWindow(null); cleanupEditorState(); @@ -1472,6 +1511,7 @@ function createWindow(): void { if (teamProvisioningService) { teamProvisioningService.setMainWindow(mainWindow); } + codexAccountFeature?.setMainWindow(mainWindow); setEditorMainWindow(mainWindow); setReviewMainWindow(mainWindow); diff --git a/src/main/ipc/configValidation.ts b/src/main/ipc/configValidation.ts index d52b0dcf..e10b5653 100644 --- a/src/main/ipc/configValidation.ts +++ b/src/main/ipc/configValidation.ts @@ -5,6 +5,8 @@ import * as path from 'path'; +import { migrateProviderBackendId } from '@shared/utils/providerBackend'; + import type { AppConfig, DisplayConfig, @@ -442,13 +444,21 @@ function validateRuntimeSection(data: unknown): ValidationSuccess<'runtime'> | V } if (providerId === 'codex') { - if (backendId !== 'auto' && backendId !== 'adapter') { + if ( + backendId !== 'auto' && + backendId !== 'adapter' && + backendId !== 'api' && + backendId !== 'codex-native' + ) { return { valid: false, - error: 'runtime.providerBackends.codex must be one of: auto, adapter', + error: 'runtime.providerBackends.codex must be one of: codex-native', }; } - providerBackends.codex = backendId; + providerBackends.codex = migrateProviderBackendId( + 'codex', + backendId + ) as RuntimeConfig['providerBackends']['codex']; continue; } @@ -515,25 +525,24 @@ function validateProviderConnectionsSection( const codexUpdate: Partial = {}; for (const [connectionKey, connectionValue] of Object.entries(value)) { - if (connectionKey === 'apiKeyBetaEnabled') { - if (typeof connectionValue !== 'boolean') { - return { - valid: false, - error: 'providerConnections.codex.apiKeyBetaEnabled must be a boolean', - }; - } - codexUpdate.apiKeyBetaEnabled = connectionValue; + if (connectionKey === 'apiKeyBetaEnabled' || connectionKey === 'authMode') { continue; } - if (connectionKey === 'authMode') { - if (connectionValue !== 'oauth' && connectionValue !== 'api_key') { + if (connectionKey === 'preferredAuthMode') { + if ( + connectionValue !== 'auto' && + connectionValue !== 'chatgpt' && + connectionValue !== 'api_key' + ) { return { valid: false, - error: 'providerConnections.codex.authMode must be one of: oauth, api_key', + error: + 'providerConnections.codex.preferredAuthMode must be one of: auto, chatgpt, api_key', }; } - codexUpdate.authMode = connectionValue; + + codexUpdate.preferredAuthMode = connectionValue; continue; } diff --git a/src/main/ipc/teams.ts b/src/main/ipc/teams.ts index dcfdc648..a01e1d5c 100644 --- a/src/main/ipc/teams.ts +++ b/src/main/ipc/teams.ts @@ -91,6 +91,7 @@ import { PROTECTED_CLI_FLAGS, } from '@shared/utils/cliArgsParser'; import { createLogger } from '@shared/utils/logger'; +import { isTeamProviderBackendId, migrateProviderBackendId } from '@shared/utils/providerBackend'; import { isRateLimitMessage } from '@shared/utils/rateLimitDetector'; import { buildStandaloneSlashCommandMeta, @@ -185,6 +186,8 @@ import type { TeamLaunchResponse, TeamMemberActivityMeta, TeamMessageNotificationData, + TeamProviderBackendId, + TeamProviderId, TeamProvisioningPrepareResult, TeamProvisioningProgress, TeamSummary, @@ -1074,6 +1077,9 @@ async function handleUpdateConfig( } return wrapTeamHandler('updateConfig', async () => { const tn = validated.value!; + const teamDataService = getTeamDataService(); + const previousDisplayName = await teamDataService.getTeamDisplayName(tn).catch(() => tn); + const requestedName = typeof name === 'string' ? name.trim() : ''; const result = await getTeamDataService().updateConfig(tn, { name, description, @@ -1084,10 +1090,10 @@ async function handleUpdateConfig( } // Notify running lead about the rename so it stays aware of current team name - if (typeof name === 'string' && name.trim()) { + if (requestedName && requestedName !== (previousDisplayName?.trim() || tn)) { const provisioning = getTeamProvisioningService(); if (provisioning.isTeamAlive(tn)) { - const msg = `The team has been renamed to "${name.trim()}". Please use this name when referring to the team going forward.`; + const msg = `The team has been renamed to "${requestedName}". Please use this name when referring to the team going forward.`; try { await provisioning.sendMessageToTeam(tn, msg); } catch { @@ -1126,6 +1132,38 @@ function parseOptionalMemberProviderId( return { valid: false, error: 'member providerId must be anthropic, codex, or gemini' }; } +function parseOptionalProviderBackendId( + value: unknown, + providerId?: TeamProviderId +): { valid: true; value: TeamProviderBackendId | undefined } | { valid: false; error: string } { + if (value === undefined || value === null || value === '') { + return { valid: true, value: undefined }; + } + if (typeof value !== 'string') { + return { valid: false, error: 'providerBackendId must be a string' }; + } + const trimmed = value.trim(); + if (!trimmed) { + return { valid: true, value: undefined }; + } + if (trimmed.length > 64) { + return { valid: false, error: 'providerBackendId too long (max 64)' }; + } + if (providerId) { + const migratedBackendId = migrateProviderBackendId(providerId, trimmed); + if (migratedBackendId) { + return { valid: true, value: migratedBackendId }; + } + } else if (isTeamProviderBackendId(trimmed)) { + return { valid: true, value: trimmed }; + } + + return { + valid: false, + error: 'providerBackendId must be one of auto, adapter, api, cli-sdk, or codex-native', + }; +} + function parseOptionalMemberEffort( value: unknown ): { valid: true; value: EffortLevel | undefined } | { valid: false; error: string } { @@ -1219,6 +1257,19 @@ async function validateProvisioningRequest( if (payload.prompt !== undefined && typeof payload.prompt !== 'string') { return { valid: false, error: 'prompt must be a string' }; } + const providerId = + payload.providerId === 'codex' + ? 'codex' + : payload.providerId === 'gemini' + ? 'gemini' + : 'anthropic'; + const providerBackendValidation = parseOptionalProviderBackendId( + payload.providerBackendId, + providerId + ); + if (!providerBackendValidation.valid) { + return { valid: false, error: providerBackendValidation.error }; + } try { await fs.promises.mkdir(cwd, { recursive: true }); @@ -1270,12 +1321,8 @@ async function validateProvisioningRequest( members, cwd, prompt: typeof payload.prompt === 'string' ? payload.prompt.trim() || undefined : undefined, - providerId: - payload.providerId === 'codex' - ? 'codex' - : payload.providerId === 'gemini' - ? 'gemini' - : 'anthropic', + providerId, + providerBackendId: providerBackendValidation.value, model: typeof payload.model === 'string' ? payload.model.trim() || undefined : undefined, effort: isValidEffort(payload.effort) ? payload.effort : undefined, skipPermissions: @@ -1385,6 +1432,19 @@ async function handleLaunchTeam( if (payload.model !== undefined && typeof payload.model !== 'string') { return { success: false, error: 'model must be a string' }; } + const providerId = + payload.providerId === 'codex' + ? 'codex' + : payload.providerId === 'gemini' + ? 'gemini' + : 'anthropic'; + const providerBackendValidation = parseOptionalProviderBackendId( + payload.providerBackendId, + providerId + ); + if (!providerBackendValidation.valid) { + return { success: false, error: providerBackendValidation.error }; + } // Detect draft team: team.meta.json exists but config.json doesn't. // This happens when user created team config without launching (launchTeam=false), @@ -1403,7 +1463,17 @@ async function handleLaunchTeam( if (isDraft) { const meta = await teamMetaStore.getMeta(tn); const membersStore = new TeamMembersMetaStore(); - const members = await membersStore.getMembers(tn); + const membersMeta = await membersStore.getMeta(tn); + const members = membersMeta?.members ?? []; + + const resolvedProviderId = + providerId === 'codex' || providerId === 'gemini' + ? providerId + : meta?.providerId === 'codex' + ? 'codex' + : meta?.providerId === 'gemini' + ? 'gemini' + : 'anthropic'; const createRequest: TeamCreateRequest = { teamName: tn, @@ -1412,16 +1482,11 @@ async function handleLaunchTeam( color: meta?.color, cwd, prompt: typeof payload.prompt === 'string' ? payload.prompt.trim() || undefined : undefined, - providerId: - payload.providerId === 'codex' - ? 'codex' - : payload.providerId === 'gemini' - ? 'gemini' - : meta?.providerId === 'codex' - ? 'codex' - : meta?.providerId === 'gemini' - ? 'gemini' - : 'anthropic', + providerId: resolvedProviderId, + providerBackendId: migrateProviderBackendId( + resolvedProviderId, + providerBackendValidation.value ?? meta?.providerBackendId ?? membersMeta?.providerBackendId + ), model: typeof payload.model === 'string' ? payload.model.trim() || undefined : undefined, effort: isValidEffort(payload.effort) ? payload.effort : undefined, limitContext: typeof payload.limitContext === 'boolean' ? payload.limitContext : undefined, @@ -1462,12 +1527,8 @@ async function handleLaunchTeam( teamName: validatedTeamName.value!, cwd, prompt: typeof payload.prompt === 'string' ? payload.prompt.trim() || undefined : undefined, - providerId: - payload.providerId === 'codex' - ? 'codex' - : payload.providerId === 'gemini' - ? 'gemini' - : 'anthropic', + providerId, + providerBackendId: providerBackendValidation.value, model: typeof payload.model === 'string' ? payload.model.trim() || undefined : undefined, effort: isValidEffort(payload.effort) ? payload.effort : undefined, clearContext: payload.clearContext === true ? true : undefined, @@ -2552,6 +2613,10 @@ async function handleCreateConfig( return { success: false, error: 'cwd must be an absolute path' }; } } + const providerBackendValidation = parseOptionalProviderBackendId(payload.providerBackendId); + if (!providerBackendValidation.valid) { + return { success: false, error: providerBackendValidation.error }; + } const seenNames = new Set(); const members: TeamCreateConfigRequest['members'] = []; @@ -2609,6 +2674,7 @@ async function handleCreateConfig( color: typeof payload.color === 'string' ? payload.color.trim() || undefined : undefined, members, cwd: typeof payload.cwd === 'string' ? payload.cwd.trim() || undefined : undefined, + providerBackendId: providerBackendValidation.value, }) ); } @@ -3884,7 +3950,10 @@ async function handleGetSavedRequest( } const membersStore = new TeamMembersMetaStore(); - const members = await membersStore.getMembers(tn); + const membersMeta = await membersStore.getMeta(tn); + const members = membersMeta?.members ?? []; + + const resolvedProviderId = meta.providerId ?? 'anthropic'; return { success: true, @@ -3895,7 +3964,11 @@ async function handleGetSavedRequest( color: meta.color, cwd: meta.cwd, prompt: meta.prompt, - providerId: meta.providerId ?? 'anthropic', + providerId: resolvedProviderId, + providerBackendId: migrateProviderBackendId( + resolvedProviderId, + meta.providerBackendId ?? membersMeta?.providerBackendId + ), model: meta.model, effort: meta.effort as TeamCreateRequest['effort'], skipPermissions: meta.skipPermissions, diff --git a/src/main/services/discovery/ProjectScanner.ts b/src/main/services/discovery/ProjectScanner.ts index da24adb1..4fe892a1 100644 --- a/src/main/services/discovery/ProjectScanner.ts +++ b/src/main/services/discovery/ProjectScanner.ts @@ -240,7 +240,7 @@ export class ProjectScanner { } const ms = Date.now() - startedAt; - if (ms >= 2000) { + if (ms >= 5000) { logger.warn( `[scan] completed slow ms=${ms} projectDirs=${projectDirs.length} projects=${validProjects.length}` ); diff --git a/src/main/services/infrastructure/CliInstallerService.ts b/src/main/services/infrastructure/CliInstallerService.ts index 9ab0015a..24254cc4 100644 --- a/src/main/services/infrastructure/CliInstallerService.ts +++ b/src/main/services/infrastructure/CliInstallerService.ts @@ -71,6 +71,9 @@ const CLI_INSTALLER_PROGRESS_CHANNEL = 'cliInstaller:progress'; /** Timeout for `claude --version` (ms) */ const VERSION_TIMEOUT_MS = 10_000; +const VERSION_RETRY_ATTEMPTS = 2; +const VERSION_RETRY_DELAY_MS = 350; +const HEALTHY_STATUS_FALLBACK_TTL_MS = 60_000; /** Timeout for `claude install` (ms) — can take a while on slow disks */ const INSTALL_TIMEOUT_MS = 120_000; @@ -134,6 +137,10 @@ function clipTailForDiag(s: string, maxLen: number): string { return stripControlForDiag(s).slice(-maxLen); } +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + const DIAG_PATH_HEAD = 400; const DIAG_HOME_PREVIEW = 120; const DIAG_AUTH_STDOUT_TAIL = 160; @@ -355,8 +362,36 @@ export class CliInstallerService { } ); private latestStatusSnapshot: CliInstallationStatus | null = null; + private lastHealthyStatusSnapshot: CliInstallationStatus | null = null; + private lastHealthyStatusObservedAt = 0; private readonly latestProviderSignatures = new Map(); + private rememberHealthyStatus(status: CliInstallationStatus): void { + if (!status.installed || !status.binaryPath || status.launchError) { + return; + } + + this.lastHealthyStatusSnapshot = cloneCliInstallationStatus(status); + this.lastHealthyStatusObservedAt = Date.now(); + } + + private getRecoverableHealthyStatus(binaryPath: string): CliInstallationStatus | null { + if ( + !this.lastHealthyStatusSnapshot || + !this.lastHealthyStatusSnapshot.installed || + !this.lastHealthyStatusSnapshot.binaryPath || + this.lastHealthyStatusSnapshot.binaryPath !== binaryPath + ) { + return null; + } + + if (Date.now() - this.lastHealthyStatusObservedAt > HEALTHY_STATUS_FALLBACK_TTL_MS) { + return null; + } + + return cloneCliInstallationStatus(this.lastHealthyStatusSnapshot); + } + private electronMetaForDiag(): Record { try { // eslint-disable-next-line @typescript-eslint/no-require-imports @@ -764,6 +799,7 @@ export class CliInstallerService { r.installedVersion = versionProbe.version; r.launchError = null; r.authStatusChecking = true; + this.rememberHealthyStatus(r); this.publishStatusSnapshot(r); // Auth and GCS version check are independent — run in parallel. @@ -772,8 +808,21 @@ export class CliInstallerService { this.checkAuthStatus(binaryPath, r, diag), r.supportsSelfUpdate ? this.fetchLatestVersion(r) : Promise.resolve(), ]); + this.rememberHealthyStatus(r); this.publishStatusSnapshot(r); } else { + const recoveredHealthyStatus = this.getRecoverableHealthyStatus(binaryPath); + if (recoveredHealthyStatus) { + logger.warn( + `CLI version probe failed for ${binaryPath}, reusing last healthy status snapshot: ${versionProbe.error}` + ); + Object.assign(r, recoveredHealthyStatus, { + launchError: null, + }); + this.publishStatusSnapshot(r); + return; + } + diag.versionError = versionProbe.error; r.installed = false; r.installedVersion = null; @@ -806,37 +855,50 @@ export class CliInstallerService { private async probeCliVersion( binaryPath: string ): Promise<{ ok: true; version: string | null } | { ok: false; error: string }> { - try { - const { stdout } = await execCli(binaryPath, ['--version'], { - timeout: VERSION_TIMEOUT_MS, - env: this.envForCli(binaryPath), - }); - const version = normalizeVersion(stdout); - if (!version) { - return { ok: false, error: 'CLI returned an empty version string.' }; - } + let lastError: string | null = null; - if (isSemverVersion(version)) { - logger.info(`Installed CLI version: "${stdout.trim()}" → normalized: "${version}"`); - return { ok: true, version }; - } + for (let attempt = 1; attempt <= VERSION_RETRY_ATTEMPTS; attempt += 1) { + try { + const { stdout } = await execCli(binaryPath, ['--version'], { + timeout: VERSION_TIMEOUT_MS, + env: this.envForCli(binaryPath), + }); + const version = normalizeVersion(stdout); + if (!version) { + return { ok: false, error: 'CLI returned an empty version string.' }; + } - const inferredVersion = await this.inferInstalledCliVersionFromPath(binaryPath); - if (inferredVersion) { - logger.info( - `Installed CLI version was inferred from installer path: "${stdout.trim()}" → "${inferredVersion}"` + if (isSemverVersion(version)) { + logger.info(`Installed CLI version: "${stdout.trim()}" → normalized: "${version}"`); + return { ok: true, version }; + } + + const inferredVersion = await this.inferInstalledCliVersionFromPath(binaryPath); + if (inferredVersion) { + logger.info( + `Installed CLI version was inferred from installer path: "${stdout.trim()}" → "${inferredVersion}"` + ); + return { ok: true, version: inferredVersion }; + } + + logger.warn( + `Installed CLI returned a non-semver version string: "${stdout.trim()}". ` + + 'Treating the binary as healthy, but omitting version details.' ); - return { ok: true, version: inferredVersion }; + return { ok: true, version: null }; + } catch (err) { + lastError = getErrorMessage(err); + if (attempt < VERSION_RETRY_ATTEMPTS) { + logger.warn( + `CLI version probe failed (attempt ${attempt}/${VERSION_RETRY_ATTEMPTS}), retrying after ${VERSION_RETRY_DELAY_MS}ms: ${lastError}` + ); + await sleep(VERSION_RETRY_DELAY_MS); + continue; + } } - - logger.warn( - `Installed CLI returned a non-semver version string: "${stdout.trim()}". ` + - 'Treating the binary as healthy, but omitting version details.' - ); - return { ok: true, version: null }; - } catch (err) { - return { ok: false, error: getErrorMessage(err) }; } + + return { ok: false, error: lastError ?? 'Failed to run runtime version probe.' }; } private async inferInstalledCliVersionFromPath(binaryPath: string): Promise { diff --git a/src/main/services/infrastructure/ConfigManager.ts b/src/main/services/infrastructure/ConfigManager.ts index 96cc048d..e4f324fe 100644 --- a/src/main/services/infrastructure/ConfigManager.ts +++ b/src/main/services/infrastructure/ConfigManager.ts @@ -12,12 +12,14 @@ import { getClaudeBasePath, setClaudeBasePathOverride } from '@main/utils/pathDecoder'; import { validateRegexPattern } from '@main/utils/regexValidation'; import { createLogger } from '@shared/utils/logger'; +import { migrateProviderBackendId } from '@shared/utils/providerBackend'; import * as fs from 'fs'; import * as fsp from 'fs/promises'; import * as path from 'path'; import { DEFAULT_TRIGGERS, TriggerManager } from './TriggerManager'; +import type { CodexAccountAuthMode } from '@features/codex-account/contracts'; import type { TriggerColor } from '@shared/constants/triggerColors'; import type { SshConnectionProfile } from '@shared/types/api'; @@ -226,20 +228,18 @@ export interface GeneralConfig { export interface RuntimeConfig { providerBackends: { gemini: 'auto' | 'api' | 'cli-sdk'; - codex: 'auto' | 'adapter'; + codex: 'codex-native'; }; } export type ProviderConnectionAuthMode = 'auto' | 'oauth' | 'api_key'; -export type CodexProviderConnectionAuthMode = Exclude; export interface ProviderConnectionsConfig { anthropic: { authMode: ProviderConnectionAuthMode; }; codex: { - apiKeyBetaEnabled: boolean; - authMode: CodexProviderConnectionAuthMode; + preferredAuthMode: CodexAccountAuthMode; }; } @@ -335,14 +335,13 @@ const DEFAULT_CONFIG: AppConfig = { authMode: 'auto', }, codex: { - apiKeyBetaEnabled: false, - authMode: 'oauth', + preferredAuthMode: 'auto', }, }, runtime: { providerBackends: { gemini: 'auto', - codex: 'auto', + codex: 'codex-native', }, }, display: { @@ -398,6 +397,27 @@ function normalizeConfiguredClaudeRootPath(value: unknown): string | null { return resolved.slice(0, end); } +function normalizeCodexPreferredAuthMode( + currentValue: unknown, + legacyValue?: unknown +): CodexAccountAuthMode { + const candidate = currentValue ?? legacyValue; + + if (candidate === 'chatgpt' || candidate === 'api_key' || candidate === 'auto') { + return candidate; + } + + if (candidate === 'oauth') { + return 'chatgpt'; + } + + return DEFAULT_CONFIG.providerConnections.codex.preferredAuthMode; +} + +function shouldPersistNormalizedConfig(loaded: Partial, normalized: AppConfig): boolean { + return JSON.stringify(loaded) !== JSON.stringify(normalized); +} + // =========================================================================== // ConfigManager Class // =========================================================================== @@ -449,9 +469,14 @@ export class ConfigManager { try { const content = fs.readFileSync(this.configPath, 'utf8'); const parsed = JSON.parse(content) as Partial; + const merged = this.mergeWithDefaults(parsed); + + if (shouldPersistNormalizedConfig(parsed, merged)) { + this.persistConfig(merged); + } // Merge with defaults to ensure all fields exist - return this.mergeWithDefaults(parsed); + return merged; } catch (error) { if ((error as NodeJS.ErrnoException).code === 'ENOENT') { logger.info('No config file found, using defaults'); @@ -567,14 +592,20 @@ export class ConfigManager { ...(loaded.providerConnections?.anthropic ?? {}), }, codex: { - ...DEFAULT_CONFIG.providerConnections.codex, - ...(loaded.providerConnections?.codex ?? {}), + preferredAuthMode: normalizeCodexPreferredAuthMode( + loaded.providerConnections?.codex?.preferredAuthMode, + (loaded.providerConnections?.codex as { authMode?: unknown } | undefined)?.authMode + ), }, }, runtime: { providerBackends: { ...DEFAULT_CONFIG.runtime.providerBackends, ...(loaded.runtime?.providerBackends ?? {}), + codex: migrateProviderBackendId( + 'codex', + loaded.runtime?.providerBackends?.codex + ) as RuntimeConfig['providerBackends']['codex'], }, }, display: { @@ -660,6 +691,10 @@ export class ConfigManager { providerBackends: { ...this.config.runtime.providerBackends, ...runtimeUpdate.providerBackends, + codex: migrateProviderBackendId( + 'codex', + runtimeUpdate.providerBackends?.codex ?? this.config.runtime.providerBackends.codex + ) as RuntimeConfig['providerBackends']['codex'], }, } as unknown as Partial; } @@ -675,6 +710,10 @@ export class ConfigManager { codex: { ...this.config.providerConnections.codex, ...(connectionUpdate.codex ?? {}), + preferredAuthMode: normalizeCodexPreferredAuthMode( + connectionUpdate.codex?.preferredAuthMode, + (connectionUpdate.codex as { authMode?: unknown } | undefined)?.authMode + ), }, } as unknown as Partial; } diff --git a/src/main/services/infrastructure/codexAppServer/CodexAppServerSessionFactory.ts b/src/main/services/infrastructure/codexAppServer/CodexAppServerSessionFactory.ts new file mode 100644 index 00000000..dee886f8 --- /dev/null +++ b/src/main/services/infrastructure/codexAppServer/CodexAppServerSessionFactory.ts @@ -0,0 +1,135 @@ +import type { JsonRpcSession, JsonRpcStdioClient } from './JsonRpcStdioClient'; +import type { CodexAppServerInitializeResponse } from './protocol'; + +const DEFAULT_INITIALIZE_TIMEOUT_MS = 6_000; +const DEFAULT_REQUEST_TIMEOUT_MS = 3_000; +const DEFAULT_TOTAL_TIMEOUT_MS = 8_000; + +export const DEFAULT_CODEX_APP_SERVER_SUPPRESSED_NOTIFICATION_METHODS = [ + 'thread/started', + 'thread/status/changed', + 'thread/archived', + 'thread/unarchived', + 'thread/closed', + 'thread/name/updated', + 'turn/started', + 'turn/completed', + 'item/agentMessage/delta', + 'item/agentReasoning/delta', + 'item/execCommandOutputDelta', +]; + +export interface CodexAppServerSession extends JsonRpcSession { + readonly initializeResponse: CodexAppServerInitializeResponse; +} + +export class CodexAppServerSessionFactory { + constructor(private readonly rpcClient: JsonRpcStdioClient) {} + + async withSession( + options: { + binaryPath: string; + env?: NodeJS.ProcessEnv; + requestTimeoutMs?: number; + initializeTimeoutMs?: number; + totalTimeoutMs?: number; + label: string; + experimentalApi?: boolean; + optOutNotificationMethods?: string[]; + }, + handler: (session: CodexAppServerSession) => Promise + ): Promise { + const requestTimeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS; + const initializeTimeoutMs = Math.max( + options.initializeTimeoutMs ?? DEFAULT_INITIALIZE_TIMEOUT_MS, + requestTimeoutMs + ); + + return this.rpcClient.withSession( + { + binaryPath: options.binaryPath, + args: ['app-server'], + env: options.env, + requestTimeoutMs, + totalTimeoutMs: options.totalTimeoutMs ?? DEFAULT_TOTAL_TIMEOUT_MS, + label: options.label, + }, + async (session) => { + const initializedSession = await this.initializeSession(session, { + initializeTimeoutMs, + experimentalApi: options.experimentalApi ?? false, + optOutNotificationMethods: + options.optOutNotificationMethods ?? + DEFAULT_CODEX_APP_SERVER_SUPPRESSED_NOTIFICATION_METHODS, + }); + return handler(initializedSession); + } + ); + } + + async openSession(options: { + binaryPath: string; + env?: NodeJS.ProcessEnv; + requestTimeoutMs?: number; + initializeTimeoutMs?: number; + experimentalApi?: boolean; + optOutNotificationMethods?: string[]; + }): Promise { + const requestTimeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS; + const initializeTimeoutMs = Math.max( + options.initializeTimeoutMs ?? DEFAULT_INITIALIZE_TIMEOUT_MS, + requestTimeoutMs + ); + const session = await this.rpcClient.openSession({ + binaryPath: options.binaryPath, + args: ['app-server'], + env: options.env, + requestTimeoutMs, + }); + + try { + return await this.initializeSession(session, { + initializeTimeoutMs, + experimentalApi: options.experimentalApi ?? false, + optOutNotificationMethods: + options.optOutNotificationMethods ?? + DEFAULT_CODEX_APP_SERVER_SUPPRESSED_NOTIFICATION_METHODS, + }); + } catch (error) { + await session.close().catch(() => undefined); + throw error; + } + } + + private async initializeSession( + session: JsonRpcSession, + options: { + initializeTimeoutMs: number; + experimentalApi: boolean; + optOutNotificationMethods: string[]; + } + ): Promise { + const initializeResponse = await session.request( + 'initialize', + { + clientInfo: { + name: 'claude-agent-teams-ui', + title: 'Agent Teams UI', + version: '0.1.0', + }, + capabilities: { + experimentalApi: options.experimentalApi, + optOutNotificationMethods: options.optOutNotificationMethods, + }, + }, + options.initializeTimeoutMs + ); + + await session.notify('initialized'); + + return { + ...session, + initializeResponse, + }; + } +} diff --git a/src/features/recent-projects/main/infrastructure/codex/CodexBinaryResolver.ts b/src/main/services/infrastructure/codexAppServer/CodexBinaryResolver.ts similarity index 100% rename from src/features/recent-projects/main/infrastructure/codex/CodexBinaryResolver.ts rename to src/main/services/infrastructure/codexAppServer/CodexBinaryResolver.ts diff --git a/src/features/recent-projects/main/infrastructure/codex/JsonRpcStdioClient.ts b/src/main/services/infrastructure/codexAppServer/JsonRpcStdioClient.ts similarity index 61% rename from src/features/recent-projects/main/infrastructure/codex/JsonRpcStdioClient.ts rename to src/main/services/infrastructure/codexAppServer/JsonRpcStdioClient.ts index afd3dc6a..4b2bf21c 100644 --- a/src/features/recent-projects/main/infrastructure/codex/JsonRpcStdioClient.ts +++ b/src/main/services/infrastructure/codexAppServer/JsonRpcStdioClient.ts @@ -3,10 +3,9 @@ import readline from 'node:readline'; import { killProcessTree, spawnCli } from '@main/utils/childProcess'; -import type { LoggerPort } from '../../../core/application/ports/LoggerPort'; - -const DEFAULT_REQUEST_TIMEOUT_MS = 3_000; -const DEFAULT_TOTAL_TIMEOUT_MS = 8_000; +interface JsonRpcLogger { + warn: (message: string, meta?: Record) => void; +} interface JsonRpcErrorPayload { code?: number; @@ -19,9 +18,16 @@ interface JsonRpcResponse { error?: JsonRpcErrorPayload; } +interface JsonRpcNotificationMessage { + method?: string; + params?: unknown; +} + export interface JsonRpcSession { request(method: string, params?: unknown, timeoutMs?: number): Promise; notify(method: string, params?: unknown): Promise; + onNotification(listener: (method: string, params: unknown) => void): () => void; + close(): Promise; } function withTimeout(promise: Promise, timeoutMs: number, label: string): Promise { @@ -40,42 +46,48 @@ function withTimeout(promise: Promise, timeoutMs: number, label: string): }) as Promise; } +const DEFAULT_REQUEST_TIMEOUT_MS = 3_000; +const DEFAULT_TOTAL_TIMEOUT_MS = 8_000; + export class JsonRpcStdioClient { - constructor(private readonly logger: LoggerPort) {} + constructor(private readonly logger: JsonRpcLogger) {} async withSession( options: { binaryPath: string; args: string[]; + env?: NodeJS.ProcessEnv; requestTimeoutMs?: number; totalTimeoutMs?: number; label: string; }, handler: (session: JsonRpcSession) => Promise ): Promise { - const requestTimeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS; + const session = await this.openSession(options); const totalTimeoutMs = options.totalTimeoutMs ?? DEFAULT_TOTAL_TIMEOUT_MS; - return withTimeout( - this.#runSession(options.binaryPath, options.args, requestTimeoutMs, handler), - totalTimeoutMs, - options.label - ); + try { + return await withTimeout(handler(session), totalTimeoutMs, options.label); + } finally { + await session.close(); + } } - async #runSession( - binaryPath: string, - args: string[], - requestTimeoutMs: number, - handler: (session: JsonRpcSession) => Promise - ): Promise { - const child = spawnCli(binaryPath, args, { + async openSession(options: { + binaryPath: string; + args: string[]; + env?: NodeJS.ProcessEnv; + requestTimeoutMs?: number; + }): Promise { + const requestTimeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS; + const child = spawnCli(options.binaryPath, options.args, { + env: options.env, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true, }); const lineReader = readline.createInterface({ input: child.stdout! }); child.stderr?.on('data', () => { - // Keep stderr drained so process warnings do not block the pipe. + // Keep stderr drained so warnings never block the pipe. }); const pending = new Map< @@ -86,8 +98,10 @@ export class JsonRpcStdioClient { timeoutId: ReturnType; } >(); + const notificationListeners = new Set<(method: string, params: unknown) => void>(); let nextRequestId = 1; + let closed = false; const rejectAll = (error: Error): void => { for (const [id, entry] of pending) { @@ -97,10 +111,27 @@ export class JsonRpcStdioClient { } }; + const handleNotification = (message: JsonRpcNotificationMessage): void => { + if (typeof message.method !== 'string' || message.method.length === 0) { + return; + } + + for (const listener of notificationListeners) { + try { + listener(message.method, message.params); + } catch (error) { + this.logger.warn('json-rpc notification listener failed', { + error: error instanceof Error ? error.message : String(error), + method: message.method, + }); + } + } + }; + lineReader.on('line', (line) => { - let message: JsonRpcResponse; + let message: JsonRpcResponse & JsonRpcNotificationMessage; try { - message = JSON.parse(line) as JsonRpcResponse; + message = JSON.parse(line) as JsonRpcResponse & JsonRpcNotificationMessage; } catch (error) { this.logger.warn('json-rpc stdio emitted non-json line', { error: error instanceof Error ? error.message : String(error), @@ -108,24 +139,25 @@ export class JsonRpcStdioClient { return; } - if (typeof message.id !== 'number') { + if (typeof message.id === 'number') { + const entry = pending.get(message.id); + if (!entry) { + return; + } + + clearTimeout(entry.timeoutId); + pending.delete(message.id); + + if (message.error) { + entry.reject(new Error(message.error.message ?? 'Unknown JSON-RPC error')); + return; + } + + entry.resolve(message.result); return; } - const entry = pending.get(message.id); - if (!entry) { - return; - } - - clearTimeout(entry.timeoutId); - pending.delete(message.id); - - if (message.error) { - entry.reject(new Error(message.error.message ?? 'Unknown JSON-RPC error')); - return; - } - - entry.resolve(message.result); + handleNotification(message); }); child.once('error', (error) => { @@ -144,14 +176,42 @@ export class JsonRpcStdioClient { ); }); - const session: JsonRpcSession = { + const close = async (): Promise => { + if (closed) { + return; + } + closed = true; + + rejectAll(new Error('JSON-RPC session closed')); + notificationListeners.clear(); + lineReader.close(); + + if (child.stdin && !child.stdin.destroyed && !child.stdin.writableEnded) { + await new Promise((resolve) => { + try { + child.stdin!.end(() => resolve()); + } catch { + resolve(); + } + }); + } + + killProcessTree(child); + try { + await once(child, 'close'); + } catch { + this.logger.warn('json-rpc close wait failed'); + } + }; + + return { request: ( method: string, params?: unknown, timeoutMs = requestTimeoutMs ): Promise => new Promise((resolve, reject) => { - if (!child.stdin) { + if (!child.stdin || child.stdin.destroyed || child.stdin.writableEnded) { reject(new Error('JSON-RPC stdin is not available')); return; } @@ -176,7 +236,7 @@ export class JsonRpcStdioClient { }), notify: async (method: string, params?: unknown): Promise => { - if (!child.stdin) { + if (!child.stdin || child.stdin.destroyed || child.stdin.writableEnded) { throw new Error('JSON-RPC stdin is not available'); } @@ -190,22 +250,15 @@ export class JsonRpcStdioClient { }); }); }, - }; - try { - return await handler(session); - } finally { - rejectAll(new Error('JSON-RPC session closed')); - lineReader.close(); - if (child.stdin && !child.stdin.destroyed) { - child.stdin.end(); - } - killProcessTree(child); - try { - await once(child, 'close'); - } catch { - this.logger.warn('json-rpc close wait failed'); - } - } + onNotification: (listener) => { + notificationListeners.add(listener); + return (): void => { + notificationListeners.delete(listener); + }; + }, + + close, + }; } } diff --git a/src/main/services/infrastructure/codexAppServer/index.ts b/src/main/services/infrastructure/codexAppServer/index.ts new file mode 100644 index 00000000..63b3b013 --- /dev/null +++ b/src/main/services/infrastructure/codexAppServer/index.ts @@ -0,0 +1,29 @@ +export type { CodexAppServerSession } from './CodexAppServerSessionFactory'; +export { + CodexAppServerSessionFactory, + DEFAULT_CODEX_APP_SERVER_SUPPRESSED_NOTIFICATION_METHODS, +} from './CodexAppServerSessionFactory'; +export { CodexBinaryResolver } from './CodexBinaryResolver'; +export type { JsonRpcSession } from './JsonRpcStdioClient'; +export { JsonRpcStdioClient } from './JsonRpcStdioClient'; +export type { + CodexAppServerAccount, + CodexAppServerAccountLoginCompletedNotification, + CodexAppServerAccountRateLimitsUpdatedNotification, + CodexAppServerAccountUpdatedNotification, + CodexAppServerAuthMode, + CodexAppServerCancelLoginAccountParams, + CodexAppServerCancelLoginAccountResponse, + CodexAppServerCancelLoginAccountStatus, + CodexAppServerCreditsSnapshot, + CodexAppServerGetAccountParams, + CodexAppServerGetAccountRateLimitsResponse, + CodexAppServerGetAccountResponse, + CodexAppServerInitializeResponse, + CodexAppServerLoginAccountParams, + CodexAppServerLoginAccountResponse, + CodexAppServerLogoutAccountResponse, + CodexAppServerPlanType, + CodexAppServerRateLimitSnapshot, + CodexAppServerRateLimitWindow, +} from './protocol'; diff --git a/src/main/services/infrastructure/codexAppServer/protocol.ts b/src/main/services/infrastructure/codexAppServer/protocol.ts new file mode 100644 index 00000000..4fd8fbf1 --- /dev/null +++ b/src/main/services/infrastructure/codexAppServer/protocol.ts @@ -0,0 +1,113 @@ +export type CodexAppServerPlanType = + | 'free' + | 'go' + | 'plus' + | 'pro' + | 'team' + | 'business' + | 'enterprise' + | 'edu' + | 'unknown'; + +export type CodexAppServerAuthMode = 'apikey' | 'chatgpt' | 'chatgptAuthTokens'; + +export interface CodexAppServerInitializeResponse { + userAgent: string; + codexHome: string; + platformFamily: string; + platformOs: string; +} + +export type CodexAppServerAccount = + | { type: 'apiKey' } + | { + type: 'chatgpt'; + email: string; + planType: CodexAppServerPlanType; + }; + +export interface CodexAppServerGetAccountResponse { + account: CodexAppServerAccount | null; + requiresOpenaiAuth: boolean; +} + +export interface CodexAppServerGetAccountParams { + refreshToken: boolean; +} + +export type CodexAppServerLoginAccountParams = + | { + type: 'apiKey'; + apiKey: string; + } + | { + type: 'chatgpt'; + } + | { + type: 'chatgptAuthTokens'; + accessToken: string; + chatgptAccountId: string; + chatgptPlanType?: string | null; + }; + +export type CodexAppServerLoginAccountResponse = + | { type: 'apiKey' } + | { + type: 'chatgpt'; + loginId: string; + authUrl: string; + } + | { type: 'chatgptAuthTokens' }; + +export type CodexAppServerLogoutAccountResponse = Record; + +export interface CodexAppServerRateLimitWindow { + usedPercent: number; + windowDurationMins: number | null; + resetsAt: number | null; +} + +export interface CodexAppServerCreditsSnapshot { + hasCredits: boolean; + unlimited: boolean; + balance: string | null; +} + +export interface CodexAppServerRateLimitSnapshot { + limitId: string | null; + limitName: string | null; + primary: CodexAppServerRateLimitWindow | null; + secondary: CodexAppServerRateLimitWindow | null; + credits: CodexAppServerCreditsSnapshot | null; + planType: CodexAppServerPlanType | null; +} + +export interface CodexAppServerGetAccountRateLimitsResponse { + rateLimits: CodexAppServerRateLimitSnapshot; + rateLimitsByLimitId: Record | null; +} + +export interface CodexAppServerAccountLoginCompletedNotification { + loginId: string | null; + success: boolean; + error: string | null; +} + +export interface CodexAppServerAccountUpdatedNotification { + authMode: CodexAppServerAuthMode | null; + planType: CodexAppServerPlanType | null; +} + +export interface CodexAppServerAccountRateLimitsUpdatedNotification { + rateLimits: CodexAppServerRateLimitSnapshot; +} + +export interface CodexAppServerCancelLoginAccountParams { + loginId: string; +} + +export type CodexAppServerCancelLoginAccountStatus = 'canceled' | 'notFound'; + +export interface CodexAppServerCancelLoginAccountResponse { + status: CodexAppServerCancelLoginAccountStatus; +} diff --git a/src/main/services/runtime/ClaudeMultimodelBridgeService.ts b/src/main/services/runtime/ClaudeMultimodelBridgeService.ts index 53a6bb17..004869a8 100644 --- a/src/main/services/runtime/ClaudeMultimodelBridgeService.ts +++ b/src/main/services/runtime/ClaudeMultimodelBridgeService.ts @@ -88,6 +88,14 @@ interface UnifiedRuntimeStatusResponse { selectable?: boolean; recommended?: boolean; available?: boolean; + state?: + | 'ready' + | 'locked' + | 'disabled' + | 'authentication-required' + | 'runtime-missing' + | 'degraded'; + audience?: 'general' | 'internal'; statusMessage?: string | null; detailMessage?: string | null; }[]; @@ -270,6 +278,8 @@ export class ClaudeMultimodelBridgeService { selectable: backend.selectable !== false, recommended: backend.recommended === true, available: backend.available === true, + state: backend.state ?? undefined, + audience: backend.audience ?? undefined, statusMessage: backend.statusMessage ?? null, detailMessage: backend.detailMessage ?? null, })) ?? [], diff --git a/src/main/services/runtime/ProviderConnectionService.ts b/src/main/services/runtime/ProviderConnectionService.ts index f49e732d..5581bc29 100644 --- a/src/main/services/runtime/ProviderConnectionService.ts +++ b/src/main/services/runtime/ProviderConnectionService.ts @@ -1,8 +1,16 @@ +import path from 'node:path'; + +import { evaluateCodexLaunchReadiness } from '@features/codex-account'; import { getCachedShellEnv } from '@main/utils/shellEnv'; import { ApiKeyService } from '../extensions/apikeys/ApiKeyService'; import { ConfigManager } from '../infrastructure/ConfigManager'; +import type { + CodexAccountAuthMode, + CodexAccountSnapshotDto, +} from '@features/codex-account/contracts'; +import type { CodexAccountFeatureFacade } from '@features/codex-account/main'; import type { CliProviderAuthMode, CliProviderConnectionInfo, @@ -25,9 +33,9 @@ const PROVIDER_CAPABILITIES: Record< configurableAuthModes: ['auto', 'oauth', 'api_key'], }, codex: { - supportsOAuth: true, + supportsOAuth: false, supportsApiKey: true, - configurableAuthModes: [], + configurableAuthModes: ['auto', 'chatgpt', 'api_key'], }, gemini: { supportsOAuth: false, @@ -42,10 +50,33 @@ const PROVIDER_API_KEY_ENV_VARS: Partial> = { gemini: 'GEMINI_API_KEY', }; -const CODEX_API_KEY_BETA_ENV_VAR = 'CLAUDE_CODE_CODEX_API_KEY_BETA'; +const CODEX_NATIVE_API_KEY_ENV_VAR = 'CODEX_API_KEY'; +const CODEX_NATIVE_BACKEND_ID = 'codex-native'; + +function isCodexExecBinary(binaryPath?: string | null): boolean { + const binaryName = path.basename(binaryPath?.trim() ?? '').toLowerCase(); + return ( + binaryName === 'codex' || + binaryName === 'codex.exe' || + binaryName === 'codex-cli' || + binaryName === 'codex-cli.exe' + ); +} + +function buildCodexForcedLoginLaunchArgs( + binaryPath: string | null | undefined, + loginMethod: 'chatgpt' | 'api' +): string[] { + if (isCodexExecBinary(binaryPath)) { + return ['-c', `forced_login_method="${loginMethod}"`]; + } + + return ['--settings', JSON.stringify({ codex: { forced_login_method: loginMethod } })]; +} export class ProviderConnectionService { private static instance: ProviderConnectionService | null = null; + private codexAccountFeature: Pick | null = null; constructor( private readonly apiKeyService = new ApiKeyService(), @@ -57,14 +88,17 @@ export class ProviderConnectionService { return ProviderConnectionService.instance; } + setCodexAccountFeature(feature: Pick | null): void { + this.codexAccountFeature = feature; + } + getConfiguredAuthMode(providerId: CliProviderId): CliProviderAuthMode | null { if (providerId === 'anthropic') { return this.configManager.getConfig().providerConnections.anthropic.authMode; } if (providerId === 'codex') { - const codexConnection = this.configManager.getConfig().providerConnections.codex; - return codexConnection.apiKeyBetaEnabled ? codexConnection.authMode : null; + return this.configManager.getConfig().providerConnections.codex.preferredAuthMode; } return null; @@ -72,7 +106,8 @@ export class ProviderConnectionService { async applyConfiguredConnectionEnv( env: NodeJS.ProcessEnv, - providerId: CliProviderId + providerId: CliProviderId, + runtimeBackendOverride?: string | null ): Promise { if (providerId === 'anthropic') { const authMode = this.getConfiguredAuthMode(providerId); @@ -106,32 +141,33 @@ export class ProviderConnectionService { return env; } - const codexConnection = this.configManager.getConfig().providerConnections.codex; - if (!codexConnection.apiKeyBetaEnabled) { - delete env[CODEX_API_KEY_BETA_ENV_VAR]; + const snapshot = this.mergeCodexApiKeyAvailability(await this.getCodexAccountSnapshot(), env); + const readiness = evaluateCodexLaunchReadiness({ + preferredAuthMode: snapshot.preferredAuthMode, + managedAccount: snapshot.managedAccount, + apiKey: snapshot.apiKey, + appServerState: snapshot.appServerState, + appServerStatusMessage: snapshot.appServerStatusMessage, + localActiveChatgptAccountPresent: snapshot.localActiveChatgptAccountPresent, + }); + + if (readiness.effectiveAuthMode === 'chatgpt') { delete env.OPENAI_API_KEY; + delete env[CODEX_NATIVE_API_KEY_ENV_VAR]; return env; } - env[CODEX_API_KEY_BETA_ENV_VAR] = '1'; - - if (codexConnection.authMode === 'oauth') { - env.CLAUDE_CODE_CODEX_BACKEND = 'adapter'; - delete env.OPENAI_API_KEY; - return env; - } - - env.CLAUDE_CODE_CODEX_BACKEND = 'api'; - - const storedKey = await this.apiKeyService.lookupPreferred('OPENAI_API_KEY'); - if (storedKey?.value.trim()) { - env.OPENAI_API_KEY = storedKey.value; + const resolvedApiKey = await this.resolveCodexApiKeyValue(env, runtimeBackendOverride); + if (readiness.effectiveAuthMode === 'api_key' && resolvedApiKey) { + env.OPENAI_API_KEY = resolvedApiKey; + env[CODEX_NATIVE_API_KEY_ENV_VAR] = resolvedApiKey; return env; } if (typeof env.OPENAI_API_KEY !== 'string' || !env.OPENAI_API_KEY.trim()) { delete env.OPENAI_API_KEY; } + delete env[CODEX_NATIVE_API_KEY_ENV_VAR]; return env; } @@ -146,7 +182,8 @@ export class ProviderConnectionService { async augmentConfiguredConnectionEnv( env: NodeJS.ProcessEnv, - providerId: CliProviderId + providerId: CliProviderId, + runtimeBackendOverride?: string | null ): Promise { if (providerId === 'anthropic') { if (this.getConfiguredAuthMode(providerId) !== 'api_key') { @@ -164,21 +201,26 @@ export class ProviderConnectionService { return env; } - const codexConnection = this.configManager.getConfig().providerConnections.codex; - if (!codexConnection.apiKeyBetaEnabled) { + const snapshot = this.mergeCodexApiKeyAvailability(await this.getCodexAccountSnapshot(), env); + const readiness = evaluateCodexLaunchReadiness({ + preferredAuthMode: snapshot.preferredAuthMode, + managedAccount: snapshot.managedAccount, + apiKey: snapshot.apiKey, + appServerState: snapshot.appServerState, + appServerStatusMessage: snapshot.appServerStatusMessage, + localActiveChatgptAccountPresent: snapshot.localActiveChatgptAccountPresent, + }); + + if (readiness.effectiveAuthMode === 'chatgpt') { + delete env.OPENAI_API_KEY; + delete env[CODEX_NATIVE_API_KEY_ENV_VAR]; return env; } - env[CODEX_API_KEY_BETA_ENV_VAR] = '1'; - env.CLAUDE_CODE_CODEX_BACKEND = codexConnection.authMode === 'oauth' ? 'adapter' : 'api'; - - if (codexConnection.authMode !== 'api_key') { - return env; - } - - const storedKey = await this.apiKeyService.lookupPreferred('OPENAI_API_KEY'); - if (storedKey?.value.trim()) { - env.OPENAI_API_KEY = storedKey.value; + const resolvedApiKey = await this.resolveCodexApiKeyValue(env, runtimeBackendOverride); + if (readiness.effectiveAuthMode === 'api_key' && resolvedApiKey) { + env.OPENAI_API_KEY = resolvedApiKey; + env[CODEX_NATIVE_API_KEY_ENV_VAR] = resolvedApiKey; } return env; @@ -194,7 +236,8 @@ export class ProviderConnectionService { async getConfiguredConnectionIssue( env: NodeJS.ProcessEnv, - providerId: CliProviderId + providerId: CliProviderId, + _runtimeBackendOverride?: string | null ): Promise { if (providerId === 'anthropic') { if (this.getConfiguredAuthMode(providerId) !== 'api_key') { @@ -215,29 +258,57 @@ export class ProviderConnectionService { return null; } - const codexConnection = this.configManager.getConfig().providerConnections.codex; - if (!codexConnection.apiKeyBetaEnabled || codexConnection.authMode !== 'api_key') { + const snapshot = this.mergeCodexApiKeyAvailability(await this.getCodexAccountSnapshot(), env); + const readiness = evaluateCodexLaunchReadiness({ + preferredAuthMode: snapshot.preferredAuthMode, + managedAccount: snapshot.managedAccount, + apiKey: snapshot.apiKey, + appServerState: snapshot.appServerState, + appServerStatusMessage: snapshot.appServerStatusMessage, + localActiveChatgptAccountPresent: snapshot.localActiveChatgptAccountPresent, + }); + + if (readiness.launchAllowed) { return null; } - if (typeof env.OPENAI_API_KEY === 'string' && env.OPENAI_API_KEY.trim()) { - return null; + if (readiness.state === 'missing_auth') { + if (snapshot.preferredAuthMode === 'chatgpt') { + return snapshot.requiresOpenaiAuth + ? snapshot.localActiveChatgptAccountPresent + ? 'Codex ChatGPT account mode is selected, and Codex has a locally selected ChatGPT account, but the current session needs reconnect. Reconnect ChatGPT or switch Codex auth mode to API key.' + : snapshot.localAccountArtifactsPresent + ? 'Codex ChatGPT account mode is selected, but Codex CLI reports no active ChatGPT login. Local Codex account data exists, but no active managed session is selected. Connect ChatGPT again or switch Codex auth mode to API key.' + : 'Codex ChatGPT account mode is selected, but Codex CLI reports no active ChatGPT login. Connect ChatGPT again or switch Codex auth mode to API key.' + : 'Codex ChatGPT account mode is selected, but no managed ChatGPT account is available. Connect ChatGPT again or switch Codex auth mode to API key.'; + } + + if (snapshot.preferredAuthMode === 'api_key') { + return 'Codex API key mode is selected, but no OPENAI_API_KEY or CODEX_API_KEY credential is available. Add one before launching Codex.'; + } + + return 'Codex native requires OPENAI_API_KEY or CODEX_API_KEY, or a connected ChatGPT account. Add one before launching Codex.'; } return ( - 'Codex API key mode is enabled, but no OPENAI_API_KEY is configured. ' + - 'Add a stored/environment API key or switch Codex auth mode back to OAuth.' + readiness.issueMessage ?? + 'Codex native is not ready. Connect a ChatGPT account or add an API key before launching.' ); } async getConfiguredConnectionIssues( env: NodeJS.ProcessEnv, - providerIds: readonly CliProviderId[] = ['anthropic', 'codex', 'gemini'] + providerIds: readonly CliProviderId[] = ['anthropic', 'codex', 'gemini'], + runtimeBackendOverrides?: Partial> ): Promise>> { const issues: Partial> = {}; for (const providerId of providerIds) { - const issue = await this.getConfiguredConnectionIssue(env, providerId); + const issue = await this.getConfiguredConnectionIssue( + env, + providerId, + runtimeBackendOverrides?.[providerId] + ); if (issue) { issues[providerId] = issue; } @@ -246,6 +317,41 @@ export class ProviderConnectionService { return issues; } + async getConfiguredConnectionLaunchArgs( + env: NodeJS.ProcessEnv, + providerId: CliProviderId, + runtimeBackendOverride?: string | null, + binaryPath?: string | null + ): Promise { + if (providerId !== 'codex') { + return []; + } + + if (this.getConfiguredCodexRuntimeBackend(runtimeBackendOverride) !== CODEX_NATIVE_BACKEND_ID) { + return []; + } + + const snapshot = this.mergeCodexApiKeyAvailability(await this.getCodexAccountSnapshot(), env); + const readiness = evaluateCodexLaunchReadiness({ + preferredAuthMode: snapshot.preferredAuthMode, + managedAccount: snapshot.managedAccount, + apiKey: snapshot.apiKey, + appServerState: snapshot.appServerState, + appServerStatusMessage: snapshot.appServerStatusMessage, + localActiveChatgptAccountPresent: snapshot.localActiveChatgptAccountPresent, + }); + + if (readiness.effectiveAuthMode === 'chatgpt') { + return buildCodexForcedLoginLaunchArgs(binaryPath, 'chatgpt'); + } + + if (readiness.effectiveAuthMode === 'api_key') { + return buildCodexForcedLoginLaunchArgs(binaryPath, 'api'); + } + + return []; + } + async enrichProviderStatus(provider: CliProviderStatus): Promise { return { ...provider, @@ -261,32 +367,56 @@ export class ProviderConnectionService { const capabilities = PROVIDER_CAPABILITIES[providerId]; const storedApiKey = await this.getStoredApiKey(providerId); const externalCredential = this.getExternalCredential(providerId); - const codexBetaEnabled = - providerId === 'codex' - ? this.configManager.getConfig().providerConnections.codex.apiKeyBetaEnabled - : undefined; - const configurableAuthModes = - providerId === 'codex' && codexBetaEnabled - ? (['oauth', 'api_key'] as CliProviderAuthMode[]) - : capabilities.configurableAuthModes; + const codexSnapshot = providerId === 'codex' ? await this.getCodexAccountSnapshot() : null; + const configurableAuthModes = capabilities.configurableAuthModes; const configuredAuthMode = - providerId === 'codex' && !codexBetaEnabled ? null : this.getConfiguredAuthMode(providerId); + providerId === 'codex' + ? (codexSnapshot?.preferredAuthMode ?? this.getConfiguredAuthMode(providerId)) + : this.getConfiguredAuthMode(providerId); + const apiKeyConfigured = + providerId === 'codex' + ? (codexSnapshot?.apiKey.available ?? false) + : Boolean(storedApiKey?.value.trim() || externalCredential?.value.trim()); + const apiKeySource = + providerId === 'codex' + ? (codexSnapshot?.apiKey.source ?? null) + : storedApiKey?.value.trim() + ? 'stored' + : externalCredential?.value.trim() + ? 'environment' + : null; + const apiKeySourceLabel = + providerId === 'codex' + ? (codexSnapshot?.apiKey.sourceLabel ?? null) + : storedApiKey?.value.trim() + ? 'Stored in app' + : (externalCredential?.label ?? null); return { ...capabilities, configurableAuthModes, configuredAuthMode, - apiKeyBetaAvailable: providerId === 'codex' ? true : undefined, - apiKeyBetaEnabled: codexBetaEnabled, - apiKeyConfigured: Boolean(storedApiKey?.value.trim() || externalCredential?.value.trim()), - apiKeySource: storedApiKey?.value.trim() - ? 'stored' - : externalCredential?.value.trim() - ? 'environment' + apiKeyConfigured, + apiKeySource, + apiKeySourceLabel, + codex: + providerId === 'codex' && codexSnapshot + ? { + preferredAuthMode: codexSnapshot.preferredAuthMode, + effectiveAuthMode: codexSnapshot.effectiveAuthMode, + appServerState: codexSnapshot.appServerState, + appServerStatusMessage: codexSnapshot.appServerStatusMessage, + managedAccount: codexSnapshot.managedAccount, + requiresOpenaiAuth: codexSnapshot.requiresOpenaiAuth, + localAccountArtifactsPresent: codexSnapshot.localAccountArtifactsPresent, + localActiveChatgptAccountPresent: codexSnapshot.localActiveChatgptAccountPresent, + login: codexSnapshot.login, + rateLimits: codexSnapshot.rateLimits, + launchAllowed: codexSnapshot.launchAllowed, + launchIssueMessage: codexSnapshot.launchIssueMessage, + launchReadinessState: codexSnapshot.launchReadinessState, + } : null, - apiKeySourceLabel: storedApiKey?.value.trim() - ? 'Stored in app' - : (externalCredential?.label ?? null), }; } @@ -301,6 +431,117 @@ export class ProviderConnectionService { return this.apiKeyService.lookupPreferred(envVarName); } + private getConfiguredCodexRuntimeBackend(runtimeBackendOverride?: string | null): 'codex-native' { + if (runtimeBackendOverride === CODEX_NATIVE_BACKEND_ID) { + return runtimeBackendOverride; + } + return CODEX_NATIVE_BACKEND_ID; + } + + private async getCodexAccountSnapshot(): Promise { + if (this.codexAccountFeature) { + return this.codexAccountFeature.getSnapshot(); + } + + const preferredAuthMode = + (this.configManager.getConfig().providerConnections.codex.preferredAuthMode as + | CodexAccountAuthMode + | undefined) ?? 'auto'; + const storedKey = await this.apiKeyService.lookupPreferred('OPENAI_API_KEY'); + const externalCredential = this.getExternalCredential('codex'); + const apiKeyAvailable = Boolean(storedKey?.value.trim() || externalCredential?.value.trim()); + const apiKey = { + available: apiKeyAvailable, + source: storedKey?.value.trim() + ? 'stored' + : externalCredential?.value.trim() + ? 'environment' + : null, + sourceLabel: storedKey?.value.trim() ? 'Stored in app' : (externalCredential?.label ?? null), + } satisfies CodexAccountSnapshotDto['apiKey']; + const readiness = evaluateCodexLaunchReadiness({ + preferredAuthMode, + managedAccount: null, + apiKey, + appServerState: 'degraded', + appServerStatusMessage: 'Codex account management has not been initialized yet.', + localActiveChatgptAccountPresent: false, + }); + + return { + preferredAuthMode, + effectiveAuthMode: readiness.effectiveAuthMode, + launchAllowed: readiness.launchAllowed, + launchIssueMessage: readiness.issueMessage, + launchReadinessState: readiness.state, + appServerState: 'degraded', + appServerStatusMessage: 'Codex account management has not been initialized yet.', + managedAccount: null, + apiKey, + requiresOpenaiAuth: null, + localAccountArtifactsPresent: false, + localActiveChatgptAccountPresent: false, + login: { + status: 'idle', + error: null, + startedAt: null, + }, + rateLimits: null, + updatedAt: new Date().toISOString(), + }; + } + + private async resolveCodexApiKeyValue( + env: NodeJS.ProcessEnv, + runtimeBackendOverride?: string | null + ): Promise { + const codexRuntimeBackend = this.getConfiguredCodexRuntimeBackend(runtimeBackendOverride); + const storedKey = await this.apiKeyService.lookupPreferred('OPENAI_API_KEY'); + const existingOpenAiKey = + typeof env.OPENAI_API_KEY === 'string' && env.OPENAI_API_KEY.trim() + ? env.OPENAI_API_KEY + : null; + const existingNativeKey = + typeof env[CODEX_NATIVE_API_KEY_ENV_VAR] === 'string' && + env[CODEX_NATIVE_API_KEY_ENV_VAR]?.trim() + ? env[CODEX_NATIVE_API_KEY_ENV_VAR] + : null; + + return ( + storedKey?.value.trim() || + existingOpenAiKey || + (codexRuntimeBackend === CODEX_NATIVE_BACKEND_ID ? existingNativeKey : null) + ); + } + + private mergeCodexApiKeyAvailability( + snapshot: CodexAccountSnapshotDto, + env: NodeJS.ProcessEnv + ): CodexAccountSnapshotDto { + const openAiApiKey = + typeof env.OPENAI_API_KEY === 'string' && env.OPENAI_API_KEY.trim() + ? env.OPENAI_API_KEY + : null; + const codexApiKey = + typeof env[CODEX_NATIVE_API_KEY_ENV_VAR] === 'string' && + env[CODEX_NATIVE_API_KEY_ENV_VAR]?.trim() + ? env[CODEX_NATIVE_API_KEY_ENV_VAR] + : null; + + if (!openAiApiKey && !codexApiKey) { + return snapshot; + } + + return { + ...snapshot, + apiKey: { + available: true, + source: 'environment', + sourceLabel: codexApiKey ? 'Detected from CODEX_API_KEY' : 'Detected from OPENAI_API_KEY', + }, + }; + } + private getExternalCredential(providerId: CliProviderId): ExternalCredential { const shellEnv = getCachedShellEnv() ?? {}; const sources = [shellEnv, process.env]; @@ -336,6 +577,14 @@ export class ProviderConnectionService { } if (providerId === 'codex') { + const nativeApiKey = findEnvValue(CODEX_NATIVE_API_KEY_ENV_VAR); + if (nativeApiKey) { + return { + label: `Detected from ${CODEX_NATIVE_API_KEY_ENV_VAR}`, + value: nativeApiKey, + }; + } + const apiKey = findEnvValue('OPENAI_API_KEY'); if (apiKey) { return { diff --git a/src/main/services/runtime/buildRuntimeBaseEnv.ts b/src/main/services/runtime/buildRuntimeBaseEnv.ts new file mode 100644 index 00000000..568d0fa3 --- /dev/null +++ b/src/main/services/runtime/buildRuntimeBaseEnv.ts @@ -0,0 +1,86 @@ +import { buildEnrichedEnv } from '@main/utils/cliEnv'; +import { getShellPreferredHome } from '@main/utils/shellEnv'; + +import { configManager } from '../infrastructure/ConfigManager'; + +import { + applyConfiguredRuntimeBackendsEnv, + applyProviderRuntimeEnv, + resolveTeamProviderId, +} from './providerRuntimeEnv'; + +import type { CliProviderId, TeamProviderId } from '@shared/types'; + +type ProviderEnvTargetId = CliProviderId | TeamProviderId | undefined; + +export interface BuildRuntimeBaseEnvOptions { + binaryPath?: string | null; + providerId?: ProviderEnvTargetId; + providerBackendId?: string | null; + shellEnv?: NodeJS.ProcessEnv | null; + env?: NodeJS.ProcessEnv; +} + +function getFirstNonEmptyEnvValue(...values: (string | null | undefined)[]): string | undefined { + for (const value of values) { + if (typeof value === 'string' && value.trim().length > 0) { + return value; + } + } + return undefined; +} + +export function buildRuntimeBaseEnv(options: BuildRuntimeBaseEnvOptions = {}): { + env: NodeJS.ProcessEnv; + resolvedProviderId: CliProviderId | null; +} { + const shellEnv = options.shellEnv ?? {}; + const env = { + ...buildEnrichedEnv(options.binaryPath), + ...shellEnv, + }; + + applyConfiguredRuntimeBackendsEnv(env, configManager.getConfig().runtime); + Object.assign(env, options.env ?? {}); + + const explicitHome = getFirstNonEmptyEnvValue(options.env?.HOME, options.env?.USERPROFILE); + const fallbackHome = getFirstNonEmptyEnvValue( + env.HOME, + env.USERPROFILE, + getShellPreferredHome(), + shellEnv.HOME, + process.env.HOME, + process.env.USERPROFILE + ); + + if (explicitHome) { + env.HOME = getFirstNonEmptyEnvValue(options.env?.HOME, explicitHome); + env.USERPROFILE = getFirstNonEmptyEnvValue(options.env?.USERPROFILE, explicitHome); + } else if (fallbackHome) { + env.HOME = getFirstNonEmptyEnvValue(env.HOME, fallbackHome); + env.USERPROFILE = getFirstNonEmptyEnvValue(env.USERPROFILE, fallbackHome); + } + + if (!options.providerId) { + return { + env, + resolvedProviderId: null, + }; + } + + const resolvedProviderId = resolveTeamProviderId(options.providerId); + applyProviderRuntimeEnv(env, options.providerId); + + if (resolvedProviderId === 'codex' && options.providerBackendId?.trim()) { + env.CLAUDE_CODE_CODEX_BACKEND = options.providerBackendId.trim(); + } + + if (resolvedProviderId === 'gemini' && options.providerBackendId?.trim()) { + env.CLAUDE_CODE_GEMINI_BACKEND = options.providerBackendId.trim(); + } + + return { + env, + resolvedProviderId, + }; +} diff --git a/src/main/services/runtime/providerAwareCliEnv.ts b/src/main/services/runtime/providerAwareCliEnv.ts index 2793d710..83de3a28 100644 --- a/src/main/services/runtime/providerAwareCliEnv.ts +++ b/src/main/services/runtime/providerAwareCliEnv.ts @@ -1,14 +1,7 @@ -import { buildEnrichedEnv } from '@main/utils/cliEnv'; -import { getCachedShellEnv, getShellPreferredHome } from '@main/utils/shellEnv'; - -import { configManager } from '../infrastructure/ConfigManager'; +import { getCachedShellEnv } from '@main/utils/shellEnv'; +import { buildRuntimeBaseEnv } from './buildRuntimeBaseEnv'; import { providerConnectionService } from './ProviderConnectionService'; -import { - applyConfiguredRuntimeBackendsEnv, - applyProviderRuntimeEnv, - resolveTeamProviderId, -} from './providerRuntimeEnv'; import type { CliProviderId, TeamProviderId } from '@shared/types'; @@ -17,6 +10,7 @@ type ProviderEnvTargetId = CliProviderId | TeamProviderId | undefined; export interface ProviderAwareCliEnvOptions { binaryPath?: string | null; providerId?: ProviderEnvTargetId; + providerBackendId?: string | null; shellEnv?: NodeJS.ProcessEnv | null; env?: NodeJS.ProcessEnv; connectionMode?: 'strict' | 'augment'; @@ -25,15 +19,7 @@ export interface ProviderAwareCliEnvOptions { export interface ProviderAwareCliEnvResult { env: NodeJS.ProcessEnv; connectionIssues: Partial>; -} - -function getFirstNonEmptyEnvValue(...values: (string | null | undefined)[]): string | undefined { - for (const value of values) { - if (typeof value === 'string' && value.trim().length > 0) { - return value; - } - } - return undefined; + providerArgs: string[]; } export async function buildProviderAwareCliEnv( @@ -41,51 +27,52 @@ export async function buildProviderAwareCliEnv( ): Promise { const connectionMode = options.connectionMode ?? 'strict'; const shellEnv = options.shellEnv ?? getCachedShellEnv() ?? {}; - const env = { - ...buildEnrichedEnv(options.binaryPath), - ...shellEnv, - }; - - applyConfiguredRuntimeBackendsEnv(env, configManager.getConfig().runtime); - - Object.assign(env, options.env ?? {}); - - const explicitHome = getFirstNonEmptyEnvValue(options.env?.HOME, options.env?.USERPROFILE); - const fallbackHome = getFirstNonEmptyEnvValue( - env.HOME, - env.USERPROFILE, - getShellPreferredHome(), - shellEnv.HOME, - process.env.HOME, - process.env.USERPROFILE - ); - - if (explicitHome) { - env.HOME = getFirstNonEmptyEnvValue(options.env?.HOME, explicitHome); - env.USERPROFILE = getFirstNonEmptyEnvValue(options.env?.USERPROFILE, explicitHome); - } else if (fallbackHome) { - env.HOME = getFirstNonEmptyEnvValue(env.HOME, fallbackHome); - env.USERPROFILE = getFirstNonEmptyEnvValue(env.USERPROFILE, fallbackHome); - } + const { env, resolvedProviderId } = buildRuntimeBaseEnv({ + binaryPath: options.binaryPath, + providerId: options.providerId, + providerBackendId: options.providerBackendId, + shellEnv, + env: options.env, + }); if (options.providerId) { - const resolvedProviderId = resolveTeamProviderId(options.providerId); - applyProviderRuntimeEnv(env, options.providerId); + if (!resolvedProviderId) { + throw new Error('Resolved provider id is required when providerId is set'); + } if (connectionMode === 'augment') { - await providerConnectionService.augmentConfiguredConnectionEnv(env, resolvedProviderId); + await providerConnectionService.augmentConfiguredConnectionEnv( + env, + resolvedProviderId, + options.providerBackendId + ); return { env, connectionIssues: {}, + providerArgs: [], }; } - await providerConnectionService.applyConfiguredConnectionEnv(env, resolvedProviderId); + await providerConnectionService.applyConfiguredConnectionEnv( + env, + resolvedProviderId, + options.providerBackendId + ); return { env, - connectionIssues: await providerConnectionService.getConfiguredConnectionIssues(env, [ + providerArgs: await providerConnectionService.getConfiguredConnectionLaunchArgs( + env, resolvedProviderId, - ]), + options.providerBackendId, + options.binaryPath + ), + connectionIssues: await providerConnectionService.getConfiguredConnectionIssues( + env, + [resolvedProviderId], + resolvedProviderId === 'codex' || resolvedProviderId === 'gemini' + ? { [resolvedProviderId]: options.providerBackendId?.trim() || undefined } + : undefined + ), }; } @@ -94,6 +81,7 @@ export async function buildProviderAwareCliEnv( return { env, connectionIssues: {}, + providerArgs: [], }; } @@ -101,5 +89,6 @@ export async function buildProviderAwareCliEnv( return { env, connectionIssues: await providerConnectionService.getConfiguredConnectionIssues(env), + providerArgs: [], }; } diff --git a/src/main/services/runtime/providerModelProbe.ts b/src/main/services/runtime/providerModelProbe.ts index 0a4b4f7b..f706539c 100644 --- a/src/main/services/runtime/providerModelProbe.ts +++ b/src/main/services/runtime/providerModelProbe.ts @@ -62,7 +62,7 @@ export function normalizeProviderModelProbeFailureReason(message: string): strin if ( /The '[^']+' model is not supported when using Codex with a ChatGPT account\./i.test(trimmed) ) { - return 'Not available with Codex ChatGPT subscription'; + return 'Not available on this Codex native runtime'; } if (/The requested model is not available for your account\./i.test(trimmed)) { return 'Not available for this account'; diff --git a/src/main/services/schedule/ScheduledTaskExecutor.ts b/src/main/services/schedule/ScheduledTaskExecutor.ts index a98bde32..d2692183 100644 --- a/src/main/services/schedule/ScheduledTaskExecutor.ts +++ b/src/main/services/schedule/ScheduledTaskExecutor.ts @@ -105,7 +105,7 @@ export class ScheduledTaskExecutor { request.config.providerId === 'codex' || request.config.providerId === 'gemini' ? request.config.providerId : 'anthropic'; - const { env, connectionIssues } = await buildProviderAwareCliEnv({ + const { env, connectionIssues, providerArgs } = await buildProviderAwareCliEnv({ binaryPath, providerId, shellEnv, @@ -119,6 +119,8 @@ export class ScheduledTaskExecutor { throw new Error(connectionIssue); } + args.push(...providerArgs); + const child = spawnCli(binaryPath, args, { cwd: request.config.cwd, // shellEnv spread after buildEnrichedEnv ensures freshly-resolved values diff --git a/src/main/services/team/TeamBackupService.ts b/src/main/services/team/TeamBackupService.ts index 707ae2be..932a3a09 100644 --- a/src/main/services/team/TeamBackupService.ts +++ b/src/main/services/team/TeamBackupService.ts @@ -58,6 +58,7 @@ const MAX_FILE_SIZE_BYTES = 20 * 1024 * 1024; const TEAM_ROOT_FILES = [ 'config.json', + 'team.meta.json', 'kanban-state.json', 'sentMessages.json', 'sent-cross-team.json', diff --git a/src/main/services/team/TeamDataService.ts b/src/main/services/team/TeamDataService.ts index 5d29e6db..5807ea51 100644 --- a/src/main/services/team/TeamDataService.ts +++ b/src/main/services/team/TeamDataService.ts @@ -12,9 +12,10 @@ import { classifyIdleNotificationText } from '@shared/utils/idleNotificationSema import { isLeadMember } from '@shared/utils/leadDetection'; import { createLogger } from '@shared/utils/logger'; import { getKanbanColumnFromReviewState, normalizeReviewState } from '@shared/utils/reviewState'; +import { buildTeamMemberColorMap } from '@shared/utils/teamMemberColors'; import { buildStandaloneSlashCommandMeta } from '@shared/utils/slashCommands'; import { formatTaskDisplayLabel } from '@shared/utils/taskIdentity'; -import { parseNumericSuffixName } from '@shared/utils/teamMemberName'; +import { parseNumericSuffixName, validateTeamMemberNameFormat } from '@shared/utils/teamMemberName'; import { normalizeOptionalTeamProviderId } from '@shared/utils/teamProvider'; import { extractToolPreview, formatToolSummaryFromCalls } from '@shared/utils/toolSummary'; import * as agentTeamsControllerModule from 'agent-teams-controller'; @@ -130,6 +131,16 @@ interface FileWatchReconcileDiagnostics { lastPressureLogAt: number; } +function applyDistinctRosterColors( + members: readonly T[] +): T[] { + const colorMap = buildTeamMemberColorMap(members, { preferProvidedColors: false }); + return members.map((member) => ({ + ...member, + color: colorMap.get(member.name) ?? member.color ?? getMemberColorByName(member.name), + })); +} + function normalizePassiveUserReplyLinkText(value: string | undefined): string { if (typeof value !== 'string') return ''; return value @@ -500,6 +511,27 @@ export class TeamDataService { return this.configReader.listTeams(); } + async listAliveProcessTeams(): Promise { + const teams = await this.listTeams(); + const alive: string[] = []; + + for (const team of teams) { + if (team.deletedAt) { + continue; + } + try { + const processes = await this.readProcesses(team.teamName); + if (processes.some((process) => !process.stoppedAt)) { + alive.push(team.teamName); + } + } catch { + // best-effort per team + } + } + + return alive.sort((left, right) => left.localeCompare(right)); + } + async getAllTasks(): Promise { const rawTasks = await this.taskReader.getAllTasks(); const teams = await this.configReader.listTeams(); @@ -1161,7 +1193,7 @@ export class TeamDataService { role: configMember.role, workflow: configMember.workflow, agentType: configMember.agentType ?? 'general-purpose', - color: configMember.color ?? getMemberColorByName(configMember.name.trim()), + color: configMember.color, joinedAt: configMember.joinedAt ?? Date.now(), cwd: configMember.cwd, }; @@ -1176,13 +1208,13 @@ export class TeamDataService { member = { name: memberName, agentType: 'general-purpose', - color: getMemberColorByName(memberName), joinedAt: Date.now(), }; } - members.push(member); - await this.membersMetaStore.writeMembers(teamName, members); + const nextMembers = applyDistinctRosterColors([...members, member]); + member = nextMembers.find((m) => m.name === memberName) ?? member; + await this.membersMetaStore.writeMembers(teamName, nextMembers); } return { members, member }; @@ -1193,6 +1225,13 @@ export class TeamDataService { if (!name) { throw new Error('Member name cannot be empty'); } + const formatError = validateTeamMemberNameFormat(name); + if (formatError) { + throw new Error(`Member name "${name}" is invalid: ${formatError}`); + } + if (name.toLowerCase() === 'user') { + throw new Error('Member name "user" is reserved'); + } const suffixInfo = parseNumericSuffixName(name); if (suffixInfo && suffixInfo.suffix >= 2) { throw new Error( @@ -1224,12 +1263,11 @@ export class TeamDataService { ? request.effort : undefined, agentType: 'general-purpose', - color: getMemberColorByName(name), joinedAt: Date.now(), }; - members.push(newMember); - await this.membersMetaStore.writeMembers(teamName, members); + const nextMembers = applyDistinctRosterColors([...members, newMember]); + await this.membersMetaStore.writeMembers(teamName, nextMembers); } async updateMemberRole( @@ -1269,36 +1307,50 @@ export class TeamDataService { const joinedAt = Date.now(); const nextByName = new Set(); - const nextActive: TeamMember[] = request.members.map((member) => { - const name = member.name.trim(); - if (!name) throw new Error('Member name cannot be empty'); - if (name.toLowerCase() === 'team-lead') { - throw new Error('Member name "team-lead" is reserved'); - } - const suffixInfo = parseNumericSuffixName(name); - if (suffixInfo && suffixInfo.suffix >= 2) { - throw new Error( - `Member name "${name}" is not allowed (reserved for Claude CLI auto-suffix). Use "${suffixInfo.base}" instead.` - ); - } - nextByName.add(name.toLowerCase()); - const prev = existingByName.get(name.toLowerCase()); - return { - name, - role: member.role?.trim() || undefined, - workflow: member.workflow?.trim() || undefined, - providerId: normalizeOptionalTeamProviderId(member.providerId), - model: member.model?.trim() || undefined, - effort: - member.effort === 'low' || member.effort === 'medium' || member.effort === 'high' - ? member.effort - : undefined, - agentType: prev?.agentType ?? 'general-purpose', - color: prev?.color ?? getMemberColorByName(name), - joinedAt: prev?.joinedAt ?? joinedAt, - removedAt: undefined, - }; - }); + const nextActive = applyDistinctRosterColors( + request.members.map((member) => { + const name = member.name.trim(); + if (!name) throw new Error('Member name cannot be empty'); + const formatError = validateTeamMemberNameFormat(name); + if (formatError) { + throw new Error(`Member name "${name}" is invalid: ${formatError}`); + } + if (name.toLowerCase() === 'user') { + throw new Error('Member name "user" is reserved'); + } + if (name.toLowerCase() === 'team-lead') { + throw new Error('Member name "team-lead" is reserved'); + } + if (nextByName.has(name.toLowerCase())) { + throw new Error(`Member "${name}" already exists`); + } + const suffixInfo = parseNumericSuffixName(name); + if (suffixInfo && suffixInfo.suffix >= 2) { + throw new Error( + `Member name "${name}" is not allowed (reserved for Claude CLI auto-suffix). Use "${suffixInfo.base}" instead.` + ); + } + nextByName.add(name.toLowerCase()); + const prev = existingByName.get(name.toLowerCase()); + const isSameActiveMember = Boolean(prev && prev.removedAt == null); + return { + name, + role: member.role?.trim() || undefined, + workflow: member.workflow?.trim() || undefined, + providerId: normalizeOptionalTeamProviderId(member.providerId), + model: member.model?.trim() || undefined, + effort: + member.effort === 'low' || member.effort === 'medium' || member.effort === 'high' + ? member.effort + : undefined, + agentType: prev?.agentType ?? 'general-purpose', + agentId: isSameActiveMember ? prev?.agentId : undefined, + color: prev?.color, + joinedAt: prev?.joinedAt ?? joinedAt, + removedAt: undefined, + }; + }) + ); // Preserve/mark removed members so stale inbox files don't resurrect them in the UI. const nextRemoved: TeamMember[] = []; @@ -1712,6 +1764,23 @@ export class TeamDataService { return result; } + async sendSystemNotificationToLead(args: { + teamName: string; + summary: string; + text: string; + taskRefs?: TaskRef[]; + }): Promise { + const leadName = await this.resolveLeadName(args.teamName); + return this.sendMessage(args.teamName, { + member: leadName, + from: 'system', + summary: args.summary, + text: args.text, + ...(args.taskRefs && args.taskRefs.length > 0 ? { taskRefs: args.taskRefs } : {}), + source: TASK_COMMENT_NOTIFICATION_SOURCE, + }); + } + private resolveLeadNameFromConfig(config: TeamConfig | null): string { if (!config) return 'team-lead'; const lead = config.members?.find((m) => m.role?.toLowerCase().includes('lead')); @@ -2319,15 +2388,22 @@ export class TeamDataService { description: request.description, color: request.color, cwd: request.cwd?.trim() || '', + providerBackendId: request.providerBackendId, createdAt: joinedAt, }); - await this.membersMetaStore.writeMembers( - request.teamName, + const membersToWrite = applyDistinctRosterColors( request.members.map((member) => ({ name: (() => { const name = member.name.trim(); if (!name) throw new Error('Member name cannot be empty'); + const formatError = validateTeamMemberNameFormat(name); + if (formatError) { + throw new Error(`Member name "${name}" is invalid: ${formatError}`); + } + if (name.toLowerCase() === 'user') { + throw new Error('Member name "user" is reserved'); + } if (name.toLowerCase() === 'team-lead') throw new Error('Member name "team-lead" is reserved'); const suffixInfo = parseNumericSuffixName(name); @@ -2346,11 +2422,13 @@ export class TeamDataService { member.effort === 'low' || member.effort === 'medium' || member.effort === 'high' ? member.effort : undefined, - agentType: 'general-purpose', - color: getMemberColorByName(member.name.trim()), + agentType: 'general-purpose' as const, joinedAt, })) ); + await this.membersMetaStore.writeMembers(request.teamName, membersToWrite, { + providerBackendId: request.providerBackendId, + }); } async reconcileTeamArtifacts( diff --git a/src/main/services/team/TeamLogSourceTracker.ts b/src/main/services/team/TeamLogSourceTracker.ts index 045cc007..0f99a0ce 100644 --- a/src/main/services/team/TeamLogSourceTracker.ts +++ b/src/main/services/team/TeamLogSourceTracker.ts @@ -22,7 +22,11 @@ interface TeamLogSourceSnapshot { logSourceGeneration: string | null; } -export type TeamLogSourceTrackingConsumer = 'change_presence' | 'tool_activity' | 'task_log_stream'; +export type TeamLogSourceTrackingConsumer = + | 'change_presence' + | 'tool_activity' + | 'task_log_stream' + | 'stall_monitor'; interface TrackingState { watcher: FSWatcher | null; diff --git a/src/main/services/team/TeamMemberResolver.ts b/src/main/services/team/TeamMemberResolver.ts index ec65957e..62bc8f06 100644 --- a/src/main/services/team/TeamMemberResolver.ts +++ b/src/main/services/team/TeamMemberResolver.ts @@ -3,6 +3,7 @@ import { createCliAutoSuffixNameGuard, createCliProvisionerNameGuard, } from '@shared/utils/teamMemberName'; +import { buildTeamMemberColorMap } from '@shared/utils/teamMemberColors'; import { getStableTeamOwnerId } from '@shared/utils/teamStableOwnerId'; import type { TeamConfig, TeamMember, TeamMemberSnapshot, TeamTaskWithKanban } from '@shared/types'; @@ -262,6 +263,11 @@ export class TeamMemberResolver { } return aStableId.localeCompare(bStableId); }); - return members; + + const colorMap = buildTeamMemberColorMap(members, { preferProvidedColors: false }); + return members.map((member) => ({ + ...member, + color: colorMap.get(member.name) ?? member.color ?? getMemberColorByName(member.name), + })); } } diff --git a/src/main/services/team/TeamMembersMetaStore.ts b/src/main/services/team/TeamMembersMetaStore.ts index 4baf9776..064b0273 100644 --- a/src/main/services/team/TeamMembersMetaStore.ts +++ b/src/main/services/team/TeamMembersMetaStore.ts @@ -9,13 +9,22 @@ import { atomicWriteAsync } from './atomicWrite'; import type { TeamMember } from '@shared/types'; -interface TeamMembersMetaFile { +export interface TeamMembersMetaFile { version: 1; + providerBackendId?: string; members: TeamMember[]; } const MAX_META_FILE_BYTES = 256 * 1024; +function normalizeOptionalBackendId(value: unknown): string | undefined { + if (typeof value !== 'string') { + return undefined; + } + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : undefined; +} + function normalizeMember(member: TeamMember): TeamMember | null { const trimmedName = member.name?.trim(); if (!trimmedName) { @@ -45,15 +54,15 @@ export class TeamMembersMetaStore { return path.join(getTeamsBasePath(), teamName, 'members.meta.json'); } - async getMembers(teamName: string): Promise { + async getMeta(teamName: string): Promise { const metaPath = this.getMetaPath(teamName); try { const stat = await fs.promises.stat(metaPath); if (!stat.isFile()) { - return []; + return null; } if (stat.isFile() && stat.size > MAX_META_FILE_BYTES) { - return []; + return null; } } catch { // ignore - readFile below will handle ENOENT and throw on other errors @@ -63,10 +72,10 @@ export class TeamMembersMetaStore { raw = await readFileUtf8WithTimeout(metaPath, 5_000); } catch (error) { if ((error as NodeJS.ErrnoException).code === 'ENOENT') { - return []; + return null; } if (error instanceof FileReadTimeoutError) { - return []; + return null; } throw error; } @@ -75,15 +84,15 @@ export class TeamMembersMetaStore { try { parsed = JSON.parse(raw) as unknown; } catch { - return []; + return null; } if (!parsed || typeof parsed !== 'object') { - return []; + return null; } const file = parsed as Partial; if (!Array.isArray(file.members)) { - return []; + return null; } const deduped = new Map(); @@ -107,10 +116,22 @@ export class TeamMembersMetaStore { } } - return Array.from(deduped.values()).sort((a, b) => a.name.localeCompare(b.name)); + return { + version: 1, + providerBackendId: normalizeOptionalBackendId(file.providerBackendId), + members: Array.from(deduped.values()).sort((a, b) => a.name.localeCompare(b.name)), + }; } - async writeMembers(teamName: string, members: TeamMember[]): Promise { + async getMembers(teamName: string): Promise { + return (await this.getMeta(teamName))?.members ?? []; + } + + async writeMembers( + teamName: string, + members: TeamMember[], + options?: { providerBackendId?: string } + ): Promise { const deduped = new Map(); for (const member of members) { const normalized = normalizeMember(member); @@ -131,6 +152,7 @@ export class TeamMembersMetaStore { const payload: TeamMembersMetaFile = { version: 1, + providerBackendId: normalizeOptionalBackendId(options?.providerBackendId), members: Array.from(deduped.values()).sort((a, b) => a.name.localeCompare(b.name)), }; diff --git a/src/main/services/team/TeamMessageFeedService.ts b/src/main/services/team/TeamMessageFeedService.ts index b40d01f8..d2817917 100644 --- a/src/main/services/team/TeamMessageFeedService.ts +++ b/src/main/services/team/TeamMessageFeedService.ts @@ -1,4 +1,5 @@ import { classifyIdleNotificationText } from '@shared/utils/idleNotificationSemantics'; +import { createLogger } from '@shared/utils/logger'; import { buildStandaloneSlashCommandMeta } from '@shared/utils/slashCommands'; import { createHash } from 'crypto'; @@ -7,6 +8,8 @@ import { getEffectiveInboxMessageId } from './inboxMessageIdentity'; import type { InboxMessage, TeamConfig } from '@shared/types'; const PASSIVE_USER_REPLY_LINK_WINDOW_MS = 15_000; +const MESSAGE_FEED_CACHE_MAX_AGE_MS = 5_000; +const logger = createLogger('Service:TeamMessageFeedService'); interface TeamMessageFeedDeps { getConfig: (teamName: string) => Promise; @@ -18,6 +21,7 @@ interface TeamMessageFeedDeps { interface TeamMessageFeedCacheEntry { feedRevision: string; messages: InboxMessage[]; + cachedAt: number; } export interface TeamNormalizedMessageFeed { @@ -352,7 +356,10 @@ export class TeamMessageFeedService { async getFeed(teamName: string): Promise { const cached = this.cacheByTeam.get(teamName); - if (cached && !this.dirtyTeams.has(teamName)) { + const now = Date.now(); + const cacheDirty = this.dirtyTeams.has(teamName); + const cacheExpired = !cached || now - cached.cachedAt >= MESSAGE_FEED_CACHE_MAX_AGE_MS; + if (cached && !cacheDirty && !cacheExpired) { return { teamName, feedRevision: cached.feedRevision, @@ -362,7 +369,7 @@ export class TeamMessageFeedService { const config = await this.deps.getConfig(teamName); if (!config) { - const emptyEntry = { feedRevision: toFeedRevision([]), messages: [] }; + const emptyEntry = { feedRevision: toFeedRevision([]), messages: [], cachedAt: now }; this.cacheByTeam.set(teamName, emptyEntry); this.dirtyTeams.delete(teamName); return { teamName, ...emptyEntry }; @@ -389,12 +396,21 @@ export class TeamMessageFeedService { }); const feedRevision = toFeedRevision(messages); + if (cached && !cacheDirty && cacheExpired && cached.feedRevision !== feedRevision) { + logger.warn( + `[${teamName}] Message feed cache expired without dirty invalidation and recovered newer durable messages` + ); + } const nextEntry = cached?.feedRevision === feedRevision - ? cached + ? { + ...cached, + cachedAt: now, + } : { feedRevision, messages, + cachedAt: now, }; this.cacheByTeam.set(teamName, nextEntry); diff --git a/src/main/services/team/TeamMetaStore.ts b/src/main/services/team/TeamMetaStore.ts index a8bce4dc..2f9553a0 100644 --- a/src/main/services/team/TeamMetaStore.ts +++ b/src/main/services/team/TeamMetaStore.ts @@ -1,4 +1,5 @@ import { FileReadTimeoutError, readFileUtf8WithTimeout } from '@main/utils/fsRead'; +import { migrateProviderBackendId } from '@shared/utils/providerBackend'; import { getTeamsBasePath } from '@main/utils/pathDecoder'; import * as fs from 'fs'; import * as path from 'path'; @@ -19,6 +20,7 @@ export interface TeamMetaFile { cwd: string; prompt?: string; providerId?: 'anthropic' | 'codex' | 'gemini'; + providerBackendId?: string; model?: string; effort?: string; skipPermissions?: boolean; @@ -30,6 +32,14 @@ export interface TeamMetaFile { const MAX_META_FILE_BYTES = 256 * 1024; +function normalizeOptionalBackendId(value: unknown): string | undefined { + if (typeof value !== 'string') { + return undefined; + } + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : undefined; +} + export class TeamMetaStore { private getMetaPath(teamName: string): string { return path.join(getTeamsBasePath(), teamName, 'team.meta.json'); @@ -74,6 +84,11 @@ export class TeamMetaStore { return null; } + const providerId = + file.providerId === 'anthropic' || file.providerId === 'codex' || file.providerId === 'gemini' + ? file.providerId + : undefined; + return { version: 1, displayName: @@ -83,12 +98,11 @@ export class TeamMetaStore { color: typeof file.color === 'string' ? file.color.trim() || undefined : undefined, cwd: file.cwd.trim(), prompt: typeof file.prompt === 'string' ? file.prompt.trim() || undefined : undefined, - providerId: - file.providerId === 'anthropic' || - file.providerId === 'codex' || - file.providerId === 'gemini' - ? file.providerId - : undefined, + providerId, + providerBackendId: migrateProviderBackendId( + providerId, + normalizeOptionalBackendId(file.providerBackendId) + ), model: typeof file.model === 'string' ? file.model.trim() || undefined : undefined, effort: typeof file.effort === 'string' ? file.effort.trim() || undefined : undefined, skipPermissions: typeof file.skipPermissions === 'boolean' ? file.skipPermissions : undefined, @@ -109,6 +123,10 @@ export class TeamMetaStore { cwd: data.cwd.trim(), prompt: data.prompt?.trim() || undefined, providerId: data.providerId, + providerBackendId: migrateProviderBackendId( + data.providerId, + normalizeOptionalBackendId(data.providerBackendId) + ), model: data.model?.trim() || undefined, effort: data.effort?.trim() || undefined, skipPermissions: data.skipPermissions, diff --git a/src/main/services/team/TeamProvisioningService.ts b/src/main/services/team/TeamProvisioningService.ts index 5bf9f284..06ba0121 100644 --- a/src/main/services/team/TeamProvisioningService.ts +++ b/src/main/services/team/TeamProvisioningService.ts @@ -40,6 +40,7 @@ import { resolveLanguageName } from '@shared/utils/agentLanguage'; import { getAnthropicDefaultTeamModel } from '@shared/utils/anthropicModelDefaults'; import { parseCliArgs } from '@shared/utils/cliArgsParser'; import { deriveContextMetrics, inferContextWindowTokens } from '@shared/utils/contextMetrics'; +import { getErrorMessage } from '@shared/utils/errorHandling'; import { isInboxNoiseMessage, isMeaningfulBootstrapCheckInMessage, @@ -48,12 +49,14 @@ import { } from '@shared/utils/inboxNoise'; import { isLeadAgentType, isLeadMember } from '@shared/utils/leadDetection'; import { createLogger } from '@shared/utils/logger'; +import { migrateProviderBackendId } from '@shared/utils/providerBackend'; import { isDefaultProviderModelSelection } from '@shared/utils/providerModelSelection'; import { formatTaskDisplayLabel } from '@shared/utils/taskIdentity'; import { parseAllTeammateMessages, type ParsedTeammateContent, } from '@shared/utils/teammateMessageParser'; +import { buildTeamMemberColorMap } from '@shared/utils/teamMemberColors'; import { createCliAutoSuffixNameGuard, parseNumericSuffixName } from '@shared/utils/teamMemberName'; import { normalizeOptionalTeamProviderId } from '@shared/utils/teamProvider'; import { @@ -224,6 +227,16 @@ const PROBE_CACHE_TTL_MS = 36 * 60 * 60 * 1000; const PREFLIGHT_BINARY_TIMEOUT_MS = 8000; const PREFLIGHT_AUTH_RETRY_DELAY_MS = 2000; const PREFLIGHT_AUTH_MAX_RETRIES = 2; + +function applyDistinctProvisioningMemberColors< + T extends { name: string; color?: string; removedAt?: number }, +>(members: readonly T[]): T[] { + const colorMap = buildTeamMemberColorMap(members, { preferProvidedColors: false }); + return members.map((member) => ({ + ...member, + color: colorMap.get(member.name) ?? member.color ?? getMemberColorByName(member.name), + })); +} const FS_MONITOR_POLL_MS = 2000; const TASK_WAIT_FALLBACK_MS = 15_000; const STALL_CHECK_INTERVAL_MS = 10_000; @@ -390,7 +403,7 @@ function getConfiguredRuntimeBackend(providerId: TeamProviderId): string | null case 'gemini': return runtimeConfig.gemini; case 'codex': - return runtimeConfig.codex; + return migrateProviderBackendId('codex', runtimeConfig.codex) ?? 'codex-native'; case 'anthropic': default: return null; @@ -408,7 +421,7 @@ function mergeProvisioningWarnings( } function buildRuntimeLaunchWarning( - request: Pick, + request: Pick, env: NodeJS.ProcessEnv, options?: { geminiRuntimeAuth?: GeminiRuntimeAuthState | null; @@ -420,7 +433,9 @@ function buildRuntimeLaunchWarning( const providerLabel = getTeamProviderLabel(providerId); const modelLabel = request.model?.trim() || 'default'; const effortLabel = request.effort ?? 'default'; - const backend = getConfiguredRuntimeBackend(providerId); + const backend = + migrateProviderBackendId(providerId, request.providerBackendId?.trim()) || + getConfiguredRuntimeBackend(providerId); const flags: string[] = []; if (env.CLAUDE_CODE_USE_GEMINI === '1') flags.push('USE_GEMINI'); if (env.CLAUDE_CODE_USE_OPENAI === '1') flags.push('USE_OPENAI'); @@ -455,7 +470,7 @@ function logRuntimeLaunchSnapshot( teamName: string, claudePath: string, args: string[], - request: Pick, + request: Pick, env: NodeJS.ProcessEnv, options?: { geminiRuntimeAuth?: GeminiRuntimeAuthState | null; @@ -466,9 +481,12 @@ function logRuntimeLaunchSnapshot( const providerId = resolveTeamProviderId(request.providerId); const snapshot = { providerId, + providerBackendId: migrateProviderBackendId(providerId, request.providerBackendId) ?? null, model: request.model ?? null, effort: request.effort ?? null, - configuredBackend: getConfiguredRuntimeBackend(providerId), + configuredBackend: + migrateProviderBackendId(providerId, request.providerBackendId?.trim()) || + getConfiguredRuntimeBackend(providerId), promptSize: options?.promptSize ?? null, expectedMembersCount: options?.expectedMembersCount ?? null, geminiRuntimeAuth: @@ -727,6 +745,8 @@ interface ProvisioningRun { >; /** Agent tool_use_id -> teammate name for persistent teammate spawns. */ memberSpawnToolUseIds: Map; + /** Explicit restart requests awaiting teammate rejoin or failure. */ + pendingMemberRestarts: Map; /** Per-member latest processed lead-inbox bootstrap signal cursor for the current live run. */ memberSpawnLeadInboxCursorByMember: Map; /** Highest accepted deterministic bootstrap event sequence for this run. */ @@ -753,6 +773,7 @@ interface ProvisioningEnvResolution { env: NodeJS.ProcessEnv; authSource: ProvisioningAuthSource; geminiRuntimeAuth: GeminiRuntimeAuthState | null; + providerArgs?: string[]; warning?: string; } @@ -821,6 +842,78 @@ interface LiveTeamAgentRuntimeMetadata { tmuxPaneId?: string; } +function escapeRegexLiteral(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function commandContainsCliArgValue(command: string, argName: string, value: string): boolean { + const normalizedCommand = command.trim(); + const normalizedValue = value.trim(); + if (!normalizedCommand || !normalizedValue) { + return false; + } + const pattern = new RegExp( + `(?:^|\\s)${escapeRegexLiteral(argName)}(?:=|\\s+)${escapeRegexLiteral(normalizedValue)}(?:\\s|$)` + ); + return pattern.test(normalizedCommand); +} + +function isNeverSpawnedDuringLaunchReason(reason?: string): boolean { + return reason?.trim() === 'Teammate was never spawned during launch.'; +} + +function isLaunchGraceWindowFailureReason(reason?: string): boolean { + return reason?.trim() === 'Teammate did not join within the launch grace window.'; +} + +function isConfigRegistrationFailureReason(reason?: string): boolean { + return ( + reason?.trim() === + 'Teammate was not registered in config.json during launch. Persistent spawn failed.' + ); +} + +function isTmuxNoServerRunningError(error: unknown): boolean { + const text = error instanceof Error ? error.message : String(error ?? ''); + return /no server running on /i.test(text); +} + +function isAutoClearableLaunchFailureReason(reason?: string): boolean { + return ( + isNeverSpawnedDuringLaunchReason(reason) || + isLaunchGraceWindowFailureReason(reason) || + isConfigRegistrationFailureReason(reason) + ); +} + +function buildRestartStillRunningReason(memberName: string): string { + return ( + `Restart for teammate "${memberName}" was skipped because the previous runtime still appears ` + + `to be active. The requested settings may not have been applied.` + ); +} + +function buildRestartDuplicateUnconfirmedReason(memberName: string, rawReason?: string): string { + const suffix = rawReason?.trim() + ? ` Agent returned duplicate_skipped with unrecognized reason "${rawReason.trim()}".` + : ' Agent returned duplicate_skipped without a reason.'; + return ( + `Restart for teammate "${memberName}" could not be confirmed and may not have applied.` + suffix + ); +} + +function buildRestartGraceTimeoutReason(memberName: string): string { + return `Teammate "${memberName}" did not rejoin within the restart grace window.`; +} + +interface PendingMemberRestartContext { + requestedAt: string; + desired: Pick< + TeamCreateRequest['members'][number], + 'name' | 'role' | 'workflow' | 'providerId' | 'model' | 'effort' + >; +} + function normalizeTeamAgentRuntimeBackendType( value: string | undefined, isLead: boolean @@ -994,19 +1087,60 @@ function sleep(ms: number): Promise { async function waitForPidsToExit( pids: readonly number[], opts: { timeoutMs: number; pollMs: number } -): Promise { +): Promise { if (pids.length === 0) { - return; + return []; } const deadline = Date.now() + opts.timeoutMs; + let remainingPids = [...new Set(pids)]; while (Date.now() < deadline) { - const remaining = pids.filter((pid) => isProcessAlive(pid)); - if (remaining.length === 0) { - return; + remainingPids = remainingPids.filter((pid) => isProcessAlive(pid)); + if (remainingPids.length === 0) { + return []; } await sleep(opts.pollMs); } + + return remainingPids; +} + +async function waitForTmuxPanesToExit( + paneIds: readonly string[], + opts: { timeoutMs: number; pollMs: number } +): Promise { + const normalizedPaneIds = [...new Set(paneIds.map((paneId) => paneId.trim()).filter(Boolean))]; + if (normalizedPaneIds.length === 0) { + return []; + } + + const deadline = Date.now() + opts.timeoutMs; + let remainingPaneIds = normalizedPaneIds; + let lastError: unknown = null; + while (Date.now() < deadline) { + let livePanePidById: Map; + try { + livePanePidById = await listTmuxPanePidsForCurrentPlatform(remainingPaneIds); + lastError = null; + } catch (error) { + if (isTmuxNoServerRunningError(error)) { + return []; + } + lastError = error; + await sleep(opts.pollMs); + continue; + } + remainingPaneIds = remainingPaneIds.filter((paneId) => livePanePidById.has(paneId)); + if (remainingPaneIds.length === 0) { + return []; + } + await sleep(opts.pollMs); + } + + if (lastError) { + throw lastError instanceof Error ? lastError : new Error(getErrorMessage(lastError)); + } + return remainingPaneIds; } async function waitForChildProcessToExit( @@ -1149,14 +1283,26 @@ function buildEffectiveTeamMemberSpecs( } function shouldSkipResumeForProviderRuntimeChange( - request: Pick, - config: Record + request: Pick, + config: Record, + persistedProviderBackendId?: string | null ): { skip: boolean; reason?: string } { const providerId = normalizeTeamMemberProviderId(request.providerId); if (providerId !== 'gemini' && providerId !== 'codex') { return { skip: false }; } + const requestedBackendId = + migrateProviderBackendId(providerId, request.providerBackendId?.trim()) || null; + const previousBackendId = + migrateProviderBackendId(providerId, persistedProviderBackendId?.trim()) || null; + if (requestedBackendId && previousBackendId && requestedBackendId !== previousBackendId) { + return { + skip: true, + reason: `runtime backend changed (${previousBackendId} -> ${requestedBackendId})`, + }; + } + const members = Array.isArray(config.members) ? (config.members as Record[]) : []; @@ -1628,7 +1774,9 @@ export function buildRestartMemberSpawnMessage( return ( `Teammate "${member.name}"${roleHint} was restarted from the UI. ` + `Please respawn them immediately using the **Agent** tool with team_name="${teamName}", name="${member.name}", subagent_type="general-purpose"${providerPart}${modelPart}${effortPart}, and the exact prompt below. ` + - `This is a restart of an existing persistent teammate, not a new teammate.${workflowHint ? workflowHint : ''}\n\n` + + `This is a restart of an existing persistent teammate, not a new teammate. ` + + `If the Agent tool returns duplicate_skipped with reason bootstrap_pending, treat that as a pending restart and wait for teammate check-in. ` + + `If it returns duplicate_skipped with reason already_running, do not report success - it means the previous runtime still appears active and the restart may not have applied.${workflowHint ? workflowHint : ''}\n\n` + indentMultiline(prompt, ' ') ); } @@ -3919,6 +4067,7 @@ export class TeamProvisioningService { const spawnedMemberName = run.memberSpawnToolUseIds.get(toolUseId); if (spawnedMemberName) { run.memberSpawnToolUseIds.delete(toolUseId); + const pendingRestart = run.pendingMemberRestarts.get(spawnedMemberName); if (isError) { const resultPreview = extractToolResultPreview(resultContent); this.handleMemberSpawnFailure(run, spawnedMemberName, resultPreview); @@ -3928,8 +4077,42 @@ export class TeamProvisioningService { const detail = parsedStatus.reason === 'already_running' ? 'duplicate spawn skipped - already running' - : 'duplicate spawn skipped - teammate already online'; + : parsedStatus.reason === 'bootstrap_pending' + ? 'duplicate spawn skipped - teammate bootstrap still pending' + : parsedStatus.rawReason + ? `duplicate spawn skipped - unrecognized reason: ${parsedStatus.rawReason}` + : 'duplicate spawn skipped - reason unavailable'; this.appendMemberBootstrapDiagnostic(run, spawnedMemberName, detail); + if (pendingRestart && !parsedStatus.reason) { + logger.warn( + `[${run.teamName}] Restart for teammate "${spawnedMemberName}" returned duplicate_skipped without a recognized reason` + ); + run.pendingMemberRestarts.delete(spawnedMemberName); + this.setMemberSpawnStatus( + run, + spawnedMemberName, + 'error', + buildRestartDuplicateUnconfirmedReason(spawnedMemberName, parsedStatus.rawReason) + ); + return; + } + if (parsedStatus.reason === 'already_running') { + if (pendingRestart) { + run.pendingMemberRestarts.delete(spawnedMemberName); + this.setMemberSpawnStatus( + run, + spawnedMemberName, + 'error', + buildRestartStillRunningReason(spawnedMemberName) + ); + return; + } + this.agentRuntimeSnapshotCache.delete(run.teamName); + this.liveTeamAgentRuntimeMetadataCache.delete(run.teamName); + this.setMemberSpawnStatus(run, spawnedMemberName, 'online', undefined, 'process'); + } else { + this.setMemberSpawnStatus(run, spawnedMemberName, 'waiting'); + } return; } @@ -3947,11 +4130,16 @@ export class TeamProvisioningService { memberName: string, resultPreview?: string ): void { + const pendingRestart = run.pendingMemberRestarts.get(memberName); const reason = (typeof resultPreview === 'string' && resultPreview.trim().length > 0 ? resultPreview.trim() : 'Teammate spawn failed immediately after launch.') || 'Teammate spawn failed.'; - const message = `Teammate "${memberName}" failed to start: ${reason}`; + const message = pendingRestart + ? `Failed to restart teammate "${memberName}": ${reason}` + : `Teammate "${memberName}" failed to start: ${reason}`; + + run.pendingMemberRestarts.delete(memberName); this.setMemberSpawnStatus(run, memberName, 'error', message); @@ -4004,6 +4192,23 @@ export class TeamProvisioningService { } } + private clearMemberSpawnToolTracking(run: ProvisioningRun, memberName: string): void { + let removed = false; + for (const [toolUseId, trackedMemberName] of run.memberSpawnToolUseIds.entries()) { + if (trackedMemberName !== memberName) continue; + run.memberSpawnToolUseIds.delete(toolUseId); + removed = true; + } + + if (removed) { + this.appendMemberBootstrapDiagnostic( + run, + memberName, + 'cleared stale spawn tool tracking before manual restart' + ); + } + } + /** * Update spawn status for a specific team member and emit a change event. */ @@ -4016,6 +4221,21 @@ export class TeamProvisioningService { heartbeatAt?: string ): void { const prev = run.memberSpawnStatuses.get(memberName) ?? createInitialMemberSpawnStatusEntry(); + if ( + status === 'waiting' && + !prev.hardFailure && + (prev.bootstrapConfirmed || prev.runtimeAlive) + ) { + this.setMemberSpawnStatus( + run, + memberName, + 'online', + undefined, + prev.livenessSource, + prev.lastHeartbeatAt + ); + return; + } const updatedAt = nowIso(); const next: MemberSpawnStatusEntry = { ...prev, @@ -4024,13 +4244,26 @@ export class TeamProvisioningService { }; if (status === 'spawning') { - next.launchState = 'starting'; - } else if (status === 'waiting') { - next.agentToolAccepted = true; + next.agentToolAccepted = false; + next.runtimeAlive = false; + next.bootstrapConfirmed = false; next.hardFailure = false; next.error = undefined; next.hardFailureReason = undefined; + next.livenessSource = undefined; + next.firstSpawnAcceptedAt = undefined; + next.lastHeartbeatAt = undefined; + next.launchState = 'starting'; + } else if (status === 'waiting') { + next.agentToolAccepted = true; + next.runtimeAlive = false; + next.bootstrapConfirmed = false; + next.hardFailure = false; + next.error = undefined; + next.hardFailureReason = undefined; + next.livenessSource = undefined; next.firstSpawnAcceptedAt = prev.firstSpawnAcceptedAt ?? updatedAt; + next.lastHeartbeatAt = undefined; next.launchState = 'runtime_pending_bootstrap'; } else if (status === 'online') { next.agentToolAccepted = true; @@ -4058,6 +4291,11 @@ export class TeamProvisioningService { next.launchState = 'failed_to_start'; } else if (status === 'offline') { Object.assign(next, createInitialMemberSpawnStatusEntry(), { updatedAt }); + next.error = undefined; + next.hardFailureReason = undefined; + next.livenessSource = undefined; + next.firstSpawnAcceptedAt = undefined; + next.lastHeartbeatAt = undefined; } next.launchState = deriveMemberLaunchState(next); @@ -4078,6 +4316,13 @@ export class TeamProvisioningService { } run.memberSpawnStatuses.set(memberName, next); + if ( + (status === 'online' && (next.bootstrapConfirmed || livenessSource === 'process')) || + status === 'offline' || + status === 'error' + ) { + run.pendingMemberRestarts?.delete(memberName); + } this.syncMemberLaunchGraceCheck(run, memberName, next); if (status === 'spawning') { @@ -4193,6 +4438,7 @@ export class TeamProvisioningService { const updatedAt = nowIso(); const runId = this.getTrackedRunId(teamName); const run = runId ? (this.runs.get(runId) ?? null) : null; + const persistedTeamMeta = await this.teamMetaStore.getMeta(teamName).catch(() => null); let configuredMembers: TeamConfig['members'] = []; try { @@ -4294,6 +4540,10 @@ export class TeamProvisioningService { teamName, updatedAt, runId: run?.runId ?? null, + providerBackendId: migrateProviderBackendId( + run?.request.providerId ?? persistedTeamMeta?.providerId, + run?.request.providerBackendId ?? persistedTeamMeta?.providerBackendId + ), members: snapshotMembers, }; @@ -4314,11 +4564,38 @@ export class TeamProvisioningService { throw new Error(`Team "${teamName}" is not currently running`); } - const config = await this.configReader.getConfig(teamName); - const configuredMembers = config?.members ?? []; - const configuredMember = configuredMembers.find( - (member) => member?.name?.trim() === memberName - ); + const readCurrentConfiguredMember = async (): Promise<{ + config: TeamConfig | null; + configuredMembers: TeamConfig['members']; + metaMembers: Awaited>; + configuredMember: ReturnType; + }> => { + const config = await this.configReader.getConfig(teamName); + const configuredMembers = config?.members ?? []; + let metaMembers: Awaited> = []; + try { + metaMembers = await this.membersMetaStore.getMembers(teamName); + } catch { + metaMembers = []; + } + + return { + config, + configuredMembers, + metaMembers, + configuredMember: this.resolveEffectiveConfiguredMember( + configuredMembers, + metaMembers, + memberName + ), + }; + }; + + let { config, configuredMembers, metaMembers, configuredMember } = + await readCurrentConfiguredMember(); + if (!config) { + throw new Error(`Team "${teamName}" configuration is no longer available`); + } if (!configuredMember) { throw new Error(`Member "${memberName}" is not configured in team "${teamName}"`); } @@ -4328,6 +4605,9 @@ export class TeamProvisioningService { if (isLeadMember({ name: memberName, agentType: configuredMember.agentType })) { throw new Error('Lead restart is not supported from member controls'); } + if (run.pendingMemberRestarts.has(memberName)) { + throw new Error(`Restart for teammate "${memberName}" is already in progress`); + } const persistedRuntimeMembers = this.readPersistedRuntimeMembers(teamName).filter((member) => { const candidateName = typeof member.name === 'string' ? member.name.trim() : ''; @@ -4345,6 +4625,8 @@ export class TeamProvisioningService { ); } + this.agentRuntimeSnapshotCache.delete(teamName); + this.liveTeamAgentRuntimeMetadataCache.delete(teamName); const liveRuntimeByMember = await this.getLiveTeamAgentRuntimeMetadata(teamName); const livePids = new Set(); let hasAliveRuntimeWithoutPid = false; @@ -4367,6 +4649,7 @@ export class TeamProvisioningService { ); } + const tmuxPaneIdsToVerify: string[] = []; for (const persistedRuntimeMember of persistedRuntimeMembers) { const paneId = typeof persistedRuntimeMember.tmuxPaneId === 'string' @@ -4376,6 +4659,7 @@ export class TeamProvisioningService { if (!paneId || backendType !== 'tmux') { continue; } + tmuxPaneIdsToVerify.push(paneId); try { killTmuxPaneForCurrentPlatformSync(paneId); logger.info( @@ -4403,26 +4687,94 @@ export class TeamProvisioningService { } if (livePids.size > 0) { - await waitForPidsToExit(Array.from(livePids), { + const lingeringPids = await waitForPidsToExit(Array.from(livePids), { timeoutMs: 1_500, pollMs: 100, }); + if (lingeringPids.length > 0) { + throw new Error( + `Restart for teammate "${memberName}" is still waiting for the previous process to exit (${lingeringPids.join(', ')}).` + ); + } + } + + if (tmuxPaneIdsToVerify.length > 0) { + let lingeringPaneIds: string[]; + try { + lingeringPaneIds = await waitForTmuxPanesToExit(tmuxPaneIdsToVerify, { + timeoutMs: 1_500, + pollMs: 100, + }); + } catch (error) { + throw new Error( + `Restart for teammate "${memberName}" could not verify that the previous tmux pane exited: ${ + error instanceof Error ? error.message : String(error) + }` + ); + } + if (lingeringPaneIds.length > 0) { + throw new Error( + `Restart for teammate "${memberName}" is still waiting for the previous tmux pane to exit (${lingeringPaneIds.join(', ')}).` + ); + } + } + + this.setMemberSpawnStatus(run, memberName, 'offline'); + + const latestRunId = this.getAliveRunId(teamName); + const currentRun = this.runs.get(runId); + if ( + latestRunId !== runId || + !currentRun || + currentRun !== run || + currentRun.processKilled || + currentRun.cancelRequested + ) { + throw new Error(`Team "${teamName}" is not currently running`); + } + + ({ config, configuredMembers, metaMembers, configuredMember } = + await readCurrentConfiguredMember()); + if (!config) { + throw new Error(`Team "${teamName}" configuration disappeared while restart was in progress`); + } + if (!configuredMember) { + throw new Error( + `Member "${memberName}" is no longer configured in team "${teamName}" after restart preparation` + ); + } + if (configuredMember.removedAt) { + throw new Error(`Member "${memberName}" was removed while restart was in progress`); + } + if (isLeadMember({ name: memberName, agentType: configuredMember.agentType })) { + throw new Error('Lead restart is not supported from member controls'); } this.agentRuntimeSnapshotCache.delete(teamName); this.liveTeamAgentRuntimeMetadataCache.delete(teamName); - this.setMemberSpawnStatus(run, memberName, 'offline'); + this.resetRuntimeToolActivity(run, memberName); + this.clearMemberSpawnToolTracking(run, memberName); this.setMemberSpawnStatus(run, memberName, 'spawning'); this.appendMemberBootstrapDiagnostic(run, memberName, 'manual restart requested from UI'); + run.pendingMemberRestarts.set(memberName, { + requestedAt: nowIso(), + desired: { + name: configuredMember.name, + role: configuredMember.role, + workflow: configuredMember.workflow, + providerId: configuredMember.providerId, + model: configuredMember.model, + effort: configuredMember.effort, + }, + }); - const leadName = - configuredMembers.find((member) => isLeadMember(member))?.name?.trim() || 'team-lead'; + const leadName = this.resolveLeadMemberName(configuredMembers, metaMembers); const restartMessage = buildRestartMemberSpawnMessage( teamName, config?.name?.trim() || teamName, leadName, { - name: memberName, + name: configuredMember.name, role: configuredMember.role, workflow: configuredMember.workflow, providerId: configuredMember.providerId, @@ -4434,6 +4786,7 @@ export class TeamProvisioningService { try { await this.sendMessageToRun(run, restartMessage); } catch (error) { + run.pendingMemberRestarts.delete(memberName); this.setMemberSpawnStatus( run, memberName, @@ -4463,6 +4816,10 @@ export class TeamProvisioningService { return; } if (!entry.firstSpawnAcceptedAt) { + if (existing) { + clearTimeout(existing); + this.pendingTimeouts.delete(key); + } return; } const remainingMs = @@ -4510,11 +4867,17 @@ export class TeamProvisioningService { ) { return; } + const restartPending = run.pendingMemberRestarts.has(memberName); + if (restartPending) { + run.pendingMemberRestarts.delete(memberName); + } this.setMemberSpawnStatus( run, memberName, 'error', - 'Teammate did not join within the launch grace window.' + restartPending + ? buildRestartGraceTimeoutReason(memberName) + : 'Teammate did not join within the launch grace window.' ); } @@ -5838,8 +6201,16 @@ export class TeamProvisioningService { throw new Error('Claude CLI not found; install it or provide a valid path'); } - const provisioningEnv = await this.buildProvisioningEnv(request.providerId); - const { env: shellEnv, geminiRuntimeAuth, warning: envWarning } = provisioningEnv; + const provisioningEnv = await this.buildProvisioningEnv( + request.providerId, + request.providerBackendId + ); + const { + env: shellEnv, + geminiRuntimeAuth, + providerArgs = [], + warning: envWarning, + } = provisioningEnv; if (envWarning) { throw new Error(envWarning); } @@ -5931,6 +6302,7 @@ export class TeamProvisioningService { request.members.map((m) => [m.name, createInitialMemberSpawnStatusEntry()]) ), memberSpawnToolUseIds: new Map(), + pendingMemberRestarts: new Map(), memberSpawnLeadInboxCursorByMember: new Map(), lastDeterministicBootstrapSeq: 0, lastMemberSpawnAuditAt: 0, @@ -6017,6 +6389,7 @@ export class TeamProvisioningService { ...(request.effort ? ['--effort', request.effort] : []), ...(request.worktree ? ['--worktree', request.worktree] : []), ...parseCliArgs(request.extraCliArgs), + ...providerArgs, ]; const runtimeWarning = buildRuntimeLaunchWarning(request, shellEnv, { geminiRuntimeAuth, @@ -6042,6 +6415,7 @@ export class TeamProvisioningService { cwd: request.cwd, prompt: request.prompt, providerId: request.providerId, + providerBackendId: request.providerBackendId, model: request.model, effort: request.effort, skipPermissions: request.skipPermissions, @@ -6050,8 +6424,7 @@ export class TeamProvisioningService { limitContext: request.limitContext, createdAt: Date.now(), }); - await this.membersMetaStore.writeMembers( - request.teamName, + const membersToWrite = applyDistinctProvisioningMemberColors( effectiveMemberSpecs.map((m) => ({ name: m.name.trim(), role: m.role?.trim() || undefined, @@ -6063,10 +6436,12 @@ export class TeamProvisioningService { ? m.effort : undefined, agentType: 'general-purpose' as const, - color: getMemberColorByName(m.name.trim()), joinedAt: Date.now(), })) ); + await this.membersMetaStore.writeMembers(request.teamName, membersToWrite, { + providerBackendId: request.providerBackendId, + }); if (request.skipPermissions === false) { await this.seedTeammateOperationalPermissionRules(request.teamName, request.cwd); } @@ -6310,7 +6685,14 @@ export class TeamProvisioningService { if (!skipResume) { try { const configParsed = JSON.parse(configRaw) as Record; - const resumeGuard = shouldSkipResumeForProviderRuntimeChange(request, configParsed); + const persistedTeamMeta = await this.teamMetaStore + .getMeta(request.teamName) + .catch(() => null); + const resumeGuard = shouldSkipResumeForProviderRuntimeChange( + request, + configParsed, + persistedTeamMeta?.providerBackendId ?? null + ); if (resumeGuard.skip) { logger.info( `[${request.teamName}] Skipping session resume — ${resumeGuard.reason ?? 'runtime changed'}` @@ -6395,8 +6777,16 @@ export class TeamProvisioningService { const runId = randomUUID(); const startedAt = nowIso(); - const provisioningEnv = await this.buildProvisioningEnv(request.providerId); - const { env: shellEnv, geminiRuntimeAuth, warning: envWarning } = provisioningEnv; + const provisioningEnv = await this.buildProvisioningEnv( + request.providerId, + request.providerBackendId + ); + const { + env: shellEnv, + geminiRuntimeAuth, + providerArgs = [], + warning: envWarning, + } = provisioningEnv; if (envWarning) { throw new Error(envWarning); } @@ -6421,6 +6811,7 @@ export class TeamProvisioningService { members: effectiveMemberSpecs, cwd: request.cwd, providerId: request.providerId, + providerBackendId: request.providerBackendId, model: request.model, effort: request.effort, skipPermissions: request.skipPermissions, @@ -6512,6 +6903,7 @@ export class TeamProvisioningService { expectedMembers.map((name) => [name, createInitialMemberSpawnStatusEntry()]) ), memberSpawnToolUseIds: new Map(), + pendingMemberRestarts: new Map(), memberSpawnLeadInboxCursorByMember: new Map(), lastDeterministicBootstrapSeq: 0, lastMemberSpawnAuditAt: 0, @@ -6631,6 +7023,7 @@ export class TeamProvisioningService { launchArgs.push('--worktree', request.worktree); } launchArgs.push(...parseCliArgs(request.extraCliArgs)); + launchArgs.push(...providerArgs); const runtimeWarning = buildRuntimeLaunchWarning(request, shellEnv, { geminiRuntimeAuth, promptSize, @@ -6643,6 +7036,42 @@ export class TeamProvisioningService { }); // --resume is added above when a valid previous session JSONL exists. // Without it, CLI creates a fresh session ID automatically. + await this.teamMetaStore.writeMeta(request.teamName, { + displayName: syntheticRequest.displayName, + description: syntheticRequest.description, + color: syntheticRequest.color, + cwd: request.cwd, + prompt: request.prompt, + providerId: request.providerId, + providerBackendId: request.providerBackendId, + model: request.model, + effort: request.effort, + skipPermissions: request.skipPermissions, + worktree: request.worktree, + extraCliArgs: request.extraCliArgs, + limitContext: request.limitContext, + createdAt: Date.now(), + }); + await this.membersMetaStore.writeMembers( + request.teamName, + effectiveMemberSpecs.map((member) => ({ + name: member.name.trim(), + role: member.role?.trim() || undefined, + workflow: member.workflow?.trim() || undefined, + providerId: normalizeOptionalTeamProviderId(member.providerId), + model: member.model?.trim() || undefined, + effort: + member.effort === 'low' || member.effort === 'medium' || member.effort === 'high' + ? member.effort + : undefined, + agentType: 'general-purpose', + color: getMemberColorByName(member.name.trim()), + joinedAt: Date.now(), + })), + { + providerBackendId: request.providerBackendId, + } + ); try { if (request.skipPermissions === false) { @@ -7979,13 +8408,29 @@ export class TeamProvisioningService { const nextStatuses = { ...statuses }; for (const [memberName, metadata] of runtimeByMember.entries()) { const current = nextStatuses[memberName]; - if (!current || !metadata.model) { + if (!current) { continue; } - nextStatuses[memberName] = { + const nextEntry: MemberSpawnStatusEntry = { ...current, - runtimeModel: metadata.model, + ...(metadata.model ? { runtimeModel: metadata.model } : {}), }; + const failureReason = current.hardFailureReason ?? current.error; + if ( + metadata.alive && + current.launchState === 'failed_to_start' && + isAutoClearableLaunchFailureReason(failureReason) + ) { + nextEntry.status = 'online'; + nextEntry.agentToolAccepted = true; + nextEntry.runtimeAlive = true; + nextEntry.hardFailure = false; + nextEntry.hardFailureReason = undefined; + nextEntry.error = undefined; + nextEntry.livenessSource = current.bootstrapConfirmed ? current.livenessSource : 'process'; + nextEntry.launchState = deriveMemberLaunchState(nextEntry); + } + nextStatuses[memberName] = nextEntry; } return nextStatuses; } @@ -8033,6 +8478,87 @@ export class TeamProvisioningService { return undefined; } + private resolveEffectiveConfiguredMember( + configuredMembers: TeamConfig['members'] | undefined, + metaMembers: Awaited>, + memberName: string + ): { + name: string; + role?: string; + workflow?: string; + providerId?: TeamProviderId; + model?: string; + effort?: EffortLevel; + agentType?: string; + removedAt?: number | string; + } | null { + const configuredMember = (configuredMembers ?? []).find((member) => { + const candidateName = typeof member?.name === 'string' ? member.name.trim() : ''; + return candidateName.length > 0 && matchesTeamMemberIdentity(candidateName, memberName); + }); + const metaMember = metaMembers.find((member) => { + const candidateName = member.name?.trim() ?? ''; + return candidateName.length > 0 && matchesTeamMemberIdentity(candidateName, memberName); + }); + + if (!configuredMember && !metaMember) { + return null; + } + + const name = + metaMember?.name?.trim() || configuredMember?.name?.trim() || memberName.trim() || memberName; + const role = metaMember?.role?.trim() || configuredMember?.role?.trim() || undefined; + const workflow = + metaMember?.workflow?.trim() || configuredMember?.workflow?.trim() || undefined; + const providerId = + normalizeTeamMemberProviderId(metaMember?.providerId) ?? + normalizeTeamMemberProviderId(configuredMember?.providerId); + const model = metaMember?.model?.trim() || configuredMember?.model?.trim() || undefined; + const effort = + metaMember?.effort === 'low' || + metaMember?.effort === 'medium' || + metaMember?.effort === 'high' + ? metaMember.effort + : configuredMember?.effort === 'low' || + configuredMember?.effort === 'medium' || + configuredMember?.effort === 'high' + ? configuredMember.effort + : undefined; + const agentType = + metaMember?.agentType?.trim() || configuredMember?.agentType?.trim() || undefined; + const removedAt = metaMember?.removedAt ?? configuredMember?.removedAt; + + return { + name, + ...(role ? { role } : {}), + ...(workflow ? { workflow } : {}), + ...(providerId ? { providerId } : {}), + ...(model ? { model } : {}), + ...(effort ? { effort } : {}), + ...(agentType ? { agentType } : {}), + ...(removedAt != null ? { removedAt } : {}), + }; + } + + private resolveLeadMemberName( + configuredMembers: TeamConfig['members'] | undefined, + metaMembers: Awaited> + ): string { + const configuredLead = (configuredMembers ?? []).find((member) => isLeadMember(member)); + const configuredLeadName = configuredLead?.name?.trim(); + if (configuredLeadName) { + return configuredLeadName; + } + + const metaLead = metaMembers.find((member) => isLeadMember(member)); + const metaLeadName = metaLead?.name?.trim(); + if (metaLeadName) { + return metaLeadName; + } + + return 'team-lead'; + } + private findEffectiveRunMemberModel( run: ProvisioningRun | null, memberName: string @@ -8148,6 +8674,23 @@ export class TeamProvisioningService { }); } + for (const member of metaMembers) { + const memberName = typeof member?.name === 'string' ? member.name.trim() : ''; + if (!memberName || isLeadMember({ name: memberName, agentType: member.agentType })) { + continue; + } + const runtimeModel = + member.model?.trim() || + this.findConfiguredMemberModel(configuredMembers, memberName) || + this.findEffectiveRunMemberModel(run, memberName); + upsertMetadata(memberName, { + ...(runtimeModel ? { model: runtimeModel } : {}), + ...(typeof member.agentId === 'string' && member.agentId.trim() + ? { agentId: member.agentId.trim() } + : {}), + }); + } + for (const member of run?.effectiveMembers ?? []) { const memberName = member.name?.trim() ?? ''; if (!memberName || isLeadMember(member) || memberName.toLowerCase() === 'user') { @@ -8174,21 +8717,38 @@ export class TeamProvisioningService { } } + const unresolvedAgentIds = [...metadataByMember.values()] + .map((metadata) => metadata.agentId?.trim() ?? '') + .filter((agentId) => agentId.length > 0); + const processPidByAgentId = + unresolvedAgentIds.length > 0 + ? this.findLiveProcessPidByAgentId(teamName, unresolvedAgentIds) + : new Map(); + for (const [memberName, metadata] of metadataByMember.entries()) { const paneId = metadata.tmuxPaneId?.trim() ?? ''; const backendType = metadata.backendType; const panePid = paneId ? panePidById.get(paneId) : undefined; - const status = this.findTrackedMemberSpawnStatus(run, memberName); - const alive = + const processPid = metadata.agentId ? processPidByAgentId.get(metadata.agentId) : undefined; + const resolvedPid = typeof panePid === 'number' && panePid > 0 + ? panePid + : typeof processPid === 'number' && processPid > 0 + ? processPid + : undefined; + const status = this.findTrackedMemberSpawnStatus(run, memberName); + const mayInferAliveFromStatusOnly = status?.launchState !== 'failed_to_start'; + const alive = + typeof resolvedPid === 'number' && resolvedPid > 0 ? true : backendType === 'tmux' ? false - : Boolean(status?.runtimeAlive || status?.bootstrapConfirmed); + : mayInferAliveFromStatusOnly && + Boolean(status?.runtimeAlive || status?.bootstrapConfirmed); metadataByMember.set(memberName, { ...metadata, alive, - ...(typeof panePid === 'number' && panePid > 0 ? { pid: panePid } : {}), + ...(typeof resolvedPid === 'number' && resolvedPid > 0 ? { pid: resolvedPid } : {}), }); } @@ -8236,6 +8796,46 @@ export class TeamProvisioningService { return rows; } + private findLiveProcessPidByAgentId( + teamName: string, + agentIds: readonly string[] + ): Map { + const normalizedAgentIds = [ + ...new Set(agentIds.map((agentId) => agentId.trim()).filter(Boolean)), + ]; + if (normalizedAgentIds.length === 0) { + return new Map(); + } + + const rows = this.readUnixProcessTableRows(); + if (rows.length === 0) { + return new Map(); + } + + const pidByAgentId = new Map(); + for (const row of rows) { + if ( + !commandContainsCliArgValue(row.command, '--team-name', teamName) || + !row.command.includes('--agent-id') + ) { + continue; + } + + for (const agentId of normalizedAgentIds) { + if (!commandContainsCliArgValue(row.command, '--agent-id', agentId)) { + continue; + } + const currentPid = pidByAgentId.get(agentId); + if (!currentPid || row.pid > currentPid) { + pidByAgentId.set(agentId, row.pid); + } + break; + } + } + + return pidByAgentId; + } + private async readProcessRssBytesByPid(pids: readonly number[]): Promise> { const uniquePids = [...new Set(pids.filter((pid) => Number.isFinite(pid) && pid > 0))]; if (uniquePids.length === 0) { @@ -8444,6 +9044,7 @@ export class TeamProvisioningService { const nextMembers = { ...persisted.members }; const now = nowIso(); for (const expected of persisted.expectedMembers) { + const bootstrapMember = bootstrapSnapshot?.members[expected]; const current = nextMembers[expected] ?? { name: expected, launchState: 'starting', @@ -8453,6 +9054,20 @@ export class TeamProvisioningService { hardFailure: false, lastEvaluatedAt: now, }; + if (bootstrapMember?.agentToolAccepted && !current.agentToolAccepted) { + current.agentToolAccepted = true; + current.firstSpawnAcceptedAt = + current.firstSpawnAcceptedAt ?? bootstrapMember.firstSpawnAcceptedAt; + } + if (bootstrapMember?.runtimeAlive && !current.runtimeAlive) { + current.runtimeAlive = true; + current.lastRuntimeAliveAt = + current.lastRuntimeAliveAt ?? bootstrapMember.lastRuntimeAliveAt; + } + if (bootstrapMember?.bootstrapConfirmed && !current.bootstrapConfirmed) { + current.bootstrapConfirmed = true; + current.lastHeartbeatAt = current.lastHeartbeatAt ?? bootstrapMember.lastHeartbeatAt; + } const matchedRuntimeNames = [...configMembers].filter((name) => { if (name === expected) return true; const parsed = parseNumericSuffixName(name); @@ -8499,6 +9114,21 @@ export class TeamProvisioningService { : current.sources?.configDrift, inboxHeartbeat: heartbeatMessage != null ? true : current.sources?.inboxHeartbeat, }; + const bootstrapProvesSpawnAcceptance = + bootstrapMember?.agentToolAccepted === true || + typeof bootstrapMember?.firstSpawnAcceptedAt === 'string'; + const currentProvesSpawnAcceptance = + current.agentToolAccepted === true || typeof current.firstSpawnAcceptedAt === 'string'; + if ( + isNeverSpawnedDuringLaunchReason(current.hardFailureReason) && + (bootstrapProvesSpawnAcceptance || currentProvesSpawnAcceptance) + ) { + current.hardFailure = false; + current.hardFailureReason = undefined; + if (current.sources) { + current.sources.hardFailureSignal = undefined; + } + } if (heartbeatReason) { current.hardFailure = true; current.hardFailureReason = heartbeatReason; @@ -9331,6 +9961,16 @@ export class TeamProvisioningService { } if (outcome === 'already_running') { + if (run.pendingMemberRestarts.has(memberName)) { + run.pendingMemberRestarts.delete(memberName); + this.setMemberSpawnStatus( + run, + memberName, + 'error', + buildRestartStillRunningReason(memberName) + ); + return true; + } this.setMemberSpawnStatus(run, memberName, 'online', undefined, 'process'); return true; } @@ -11253,9 +11893,6 @@ export class TeamProvisioningService { } ); - // Clean up team.meta.json — provisioning succeeded, config.json is now authoritative. - await this.teamMetaStore.deleteMeta(run.teamName).catch(() => {}); - // Audit: flag any expected member not registered in config.json after provisioning. await this.refreshMemberSpawnStatusesFromLeadInbox(run); await this.maybeAuditMemberSpawnStatuses(run, { force: true }); @@ -12182,7 +12819,8 @@ export class TeamProvisioningService { } private async buildProvisioningEnv( - providerId: TeamProviderId | undefined = 'anthropic' + providerId: TeamProviderId | undefined = 'anthropic', + providerBackendId?: string | null ): Promise { const shellEnv = await resolveInteractiveShellEnv(); // getHomeDir() uses Electron's app.getPath('home') which handles Unicode @@ -12228,6 +12866,7 @@ export class TeamProvisioningService { const resolvedProviderId = resolveTeamProviderId(providerId); const providerEnvResult = await buildProviderAwareCliEnv({ providerId, + providerBackendId, shellEnv, env, }); @@ -12264,12 +12903,18 @@ export class TeamProvisioningService { env: providerEnv, authSource: 'configured_api_key_missing', geminiRuntimeAuth: null, + providerArgs: providerEnvResult.providerArgs, warning: providerConnectionIssue, }; } if (resolvedProviderId === 'codex') { - return { env: providerEnv, authSource: 'codex_runtime', geminiRuntimeAuth: null }; + return { + env: providerEnv, + authSource: 'codex_runtime', + geminiRuntimeAuth: null, + providerArgs: providerEnvResult.providerArgs, + }; } if (resolvedProviderId === 'gemini') { @@ -12277,6 +12922,7 @@ export class TeamProvisioningService { env: providerEnv, authSource: 'gemini_runtime', geminiRuntimeAuth: await resolveGeminiRuntimeAuth(providerEnv), + providerArgs: providerEnvResult.providerArgs, }; } @@ -12285,7 +12931,12 @@ export class TeamProvisioningService { typeof providerEnv.ANTHROPIC_API_KEY === 'string' && providerEnv.ANTHROPIC_API_KEY.trim().length > 0 ) { - return { env: providerEnv, authSource: 'anthropic_api_key', geminiRuntimeAuth: null }; + return { + env: providerEnv, + authSource: 'anthropic_api_key', + geminiRuntimeAuth: null, + providerArgs: providerEnvResult.providerArgs, + }; } // 2. Proxy token (ANTHROPIC_AUTH_TOKEN) — `-p` mode does NOT read this var, @@ -12295,7 +12946,12 @@ export class TeamProvisioningService { providerEnv.ANTHROPIC_AUTH_TOKEN.trim().length > 0 ) { providerEnv.ANTHROPIC_API_KEY = providerEnv.ANTHROPIC_AUTH_TOKEN; - return { env: providerEnv, authSource: 'anthropic_auth_token', geminiRuntimeAuth: null }; + return { + env: providerEnv, + authSource: 'anthropic_auth_token', + geminiRuntimeAuth: null, + providerArgs: providerEnvResult.providerArgs, + }; } // 3. No explicit API key — let the CLI handle its own OAuth auth. @@ -12303,7 +12959,12 @@ export class TeamProvisioningService { // tokens in-memory. Injecting CLAUDE_CODE_OAUTH_TOKEN from the // credentials file causes 401 errors because the stored token is // often stale (CLI refreshes in-memory but rarely writes back). - return { env: providerEnv, authSource: 'none', geminiRuntimeAuth: null }; + return { + env: providerEnv, + authSource: 'none', + geminiRuntimeAuth: null, + providerArgs: providerEnvResult.providerArgs, + }; } private async resolveControlApiBaseUrl(): Promise { @@ -13044,8 +13705,7 @@ export class TeamProvisioningService { const joinedAt = Date.now(); try { - await this.membersMetaStore.writeMembers( - teamName, + const membersToWrite = applyDistinctProvisioningMemberColors( teammateMembers.map((member) => ({ name: member.name.trim(), role: member.role?.trim() || undefined, @@ -13056,11 +13716,13 @@ export class TeamProvisioningService { member.effort === 'low' || member.effort === 'medium' || member.effort === 'high' ? member.effort : undefined, - agentType: 'general-purpose', - color: getMemberColorByName(member.name.trim()), + agentType: 'general-purpose' as const, joinedAt, })) ); + await this.membersMetaStore.writeMembers(teamName, membersToWrite, { + providerBackendId: request.providerBackendId, + }); } catch (error) { logger.warn( `[${teamName}] Failed to persist members.meta.json: ${ diff --git a/src/main/services/team/cliFlavor.ts b/src/main/services/team/cliFlavor.ts index 5936f527..e973dcba 100644 --- a/src/main/services/team/cliFlavor.ts +++ b/src/main/services/team/cliFlavor.ts @@ -26,7 +26,7 @@ export function getCliFlavorUiOptions(flavor: CliFlavor): CliFlavorUiOptions { switch (flavor) { case 'agent_teams_orchestrator': return { - displayName: 'agent_teams_orchestrator', + displayName: 'Multimodel runtime', supportsSelfUpdate: false, showVersionDetails: false, showBinaryPath: false, diff --git a/src/main/services/team/index.ts b/src/main/services/team/index.ts index f6290a2f..fda988d3 100644 --- a/src/main/services/team/index.ts +++ b/src/main/services/team/index.ts @@ -39,3 +39,12 @@ export { TeamSentMessagesStore } from './TeamSentMessagesStore'; export { TeamTaskReader } from './TeamTaskReader'; export { TeamTaskWriter } from './TeamTaskWriter'; export { countLineChanges } from './UnifiedLineCounter'; +export { ActiveTeamRegistry } from './stallMonitor/ActiveTeamRegistry'; +export { BoardTaskActivityBatchIndexer } from './stallMonitor/BoardTaskActivityBatchIndexer'; +export { TeamTaskLogFreshnessReader } from './stallMonitor/TeamTaskLogFreshnessReader'; +export { TeamTaskStallExactRowReader } from './stallMonitor/TeamTaskStallExactRowReader'; +export { TeamTaskStallJournal } from './stallMonitor/TeamTaskStallJournal'; +export { TeamTaskStallMonitor } from './stallMonitor/TeamTaskStallMonitor'; +export { TeamTaskStallNotifier } from './stallMonitor/TeamTaskStallNotifier'; +export { TeamTaskStallPolicy } from './stallMonitor/TeamTaskStallPolicy'; +export { TeamTaskStallSnapshotSource } from './stallMonitor/TeamTaskStallSnapshotSource'; diff --git a/src/main/services/team/stallMonitor/ActiveTeamRegistry.ts b/src/main/services/team/stallMonitor/ActiveTeamRegistry.ts new file mode 100644 index 00000000..8e838772 --- /dev/null +++ b/src/main/services/team/stallMonitor/ActiveTeamRegistry.ts @@ -0,0 +1,101 @@ +import type { TeamLogSourceTracker } from '../TeamLogSourceTracker'; +import type { TeamChangeEvent } from '@shared/types'; + +interface TeamAliveProcessesReader { + listAliveProcessTeams(): Promise; +} + +interface TeamLogSourceTrackingHandle { + enableTracking( + teamName: string, + consumer: 'stall_monitor' + ): Promise<{ projectFingerprint: string | null; logSourceGeneration: string | null }>; + disableTracking( + teamName: string, + consumer: 'stall_monitor' + ): Promise<{ projectFingerprint: string | null; logSourceGeneration: string | null }>; +} + +export class ActiveTeamRegistry { + private readonly activeTeams = new Set(); + private reconcileTimer: ReturnType | null = null; + + constructor( + private readonly teamDataService: TeamAliveProcessesReader, + private readonly teamLogSourceTracker: Pick< + TeamLogSourceTracker, + 'enableTracking' | 'disableTracking' + > & + TeamLogSourceTrackingHandle, + private readonly reconcileIntervalMs: number = 5 * 60_000 + ) {} + + noteTeamChange(event: TeamChangeEvent): void { + if ( + event.type === 'member-spawn' || + (event.type === 'lead-activity' && event.detail !== 'offline') + ) { + if (!this.activeTeams.has(event.teamName)) { + this.activeTeams.add(event.teamName); + void this.teamLogSourceTracker.enableTracking(event.teamName, 'stall_monitor'); + } + return; + } + + if (event.type === 'task-log-change' || event.type === 'log-source-change') { + if (!this.activeTeams.has(event.teamName)) { + return; + } + } + } + + async listActiveTeams(): Promise { + return [...this.activeTeams].sort((left, right) => left.localeCompare(right)); + } + + start(): void { + if (this.reconcileTimer) { + return; + } + void this.reconcile(); + this.reconcileTimer = setInterval(() => { + void this.reconcile(); + }, this.reconcileIntervalMs); + } + + async stop(): Promise { + if (this.reconcileTimer) { + clearInterval(this.reconcileTimer); + this.reconcileTimer = null; + } + + const teamNames = [...this.activeTeams]; + this.activeTeams.clear(); + await Promise.all( + teamNames.map((teamName) => + this.teamLogSourceTracker.disableTracking(teamName, 'stall_monitor') + ) + ); + } + + async reconcile(): Promise { + const aliveTeams = await this.teamDataService.listAliveProcessTeams(); + const aliveSet = new Set(aliveTeams); + + for (const teamName of aliveTeams) { + if (this.activeTeams.has(teamName)) { + continue; + } + this.activeTeams.add(teamName); + await this.teamLogSourceTracker.enableTracking(teamName, 'stall_monitor'); + } + + for (const teamName of [...this.activeTeams]) { + if (aliveSet.has(teamName)) { + continue; + } + this.activeTeams.delete(teamName); + await this.teamLogSourceTracker.disableTracking(teamName, 'stall_monitor'); + } + } +} diff --git a/src/main/services/team/stallMonitor/BoardTaskActivityBatchIndexer.ts b/src/main/services/team/stallMonitor/BoardTaskActivityBatchIndexer.ts new file mode 100644 index 00000000..548effb5 --- /dev/null +++ b/src/main/services/team/stallMonitor/BoardTaskActivityBatchIndexer.ts @@ -0,0 +1,30 @@ +import { BoardTaskActivityRecordBuilder } from '../taskLogs/activity/BoardTaskActivityRecordBuilder'; + +import type { BoardTaskActivityRecord } from '../taskLogs/activity/BoardTaskActivityRecord'; +import type { RawTaskActivityMessage } from '../taskLogs/activity/BoardTaskActivityTranscriptReader'; +import type { TeamTask } from '@shared/types'; + +export class BoardTaskActivityBatchIndexer { + constructor( + private readonly recordBuilder: Pick< + BoardTaskActivityRecordBuilder, + 'buildForTasks' + > = new BoardTaskActivityRecordBuilder() + ) {} + + buildIndex(args: { + teamName: string; + tasks: TeamTask[]; + messages: RawTaskActivityMessage[]; + }): Map { + if (args.tasks.length === 0 || args.messages.length === 0) { + return new Map(); + } + + return this.recordBuilder.buildForTasks({ + teamName: args.teamName, + tasks: args.tasks, + messages: args.messages, + }); + } +} diff --git a/src/main/services/team/stallMonitor/TeamTaskLogFreshnessReader.ts b/src/main/services/team/stallMonitor/TeamTaskLogFreshnessReader.ts new file mode 100644 index 00000000..326af24e --- /dev/null +++ b/src/main/services/team/stallMonitor/TeamTaskLogFreshnessReader.ts @@ -0,0 +1,124 @@ +import * as fs from 'fs/promises'; +import * as path from 'path'; + +import { BoardTaskActivityParseCache } from '../taskLogs/activity/BoardTaskActivityParseCache'; + +import type { TaskLogFreshnessSignal } from './TeamTaskStallTypes'; + +const BOARD_TASK_LOG_FRESHNESS_DIRNAME = '.board-task-log-freshness'; +const BOARD_TASK_LOG_FRESHNESS_FILE_SUFFIX = '.json'; + +interface ParsedFreshnessSignal { + taskId: string; + updatedAt: string; + transcriptFileBasename?: string; +} + +function encodeTaskId(taskId: string): string { + return encodeURIComponent(taskId); +} + +function isValidTimestamp(value: unknown): value is string { + return typeof value === 'string' && value.trim().length > 0 && Number.isFinite(Date.parse(value)); +} + +export class TeamTaskLogFreshnessReader { + private readonly cache = new BoardTaskActivityParseCache(); + + async readSignals( + projectDir: string, + taskIds: string[] + ): Promise> { + const uniqueTaskIds = [...new Set(taskIds)].filter((taskId) => taskId.trim().length > 0).sort(); + const signalFilePaths = uniqueTaskIds.map((taskId) => + path.join( + projectDir, + BOARD_TASK_LOG_FRESHNESS_DIRNAME, + `${encodeTaskId(taskId)}${BOARD_TASK_LOG_FRESHNESS_FILE_SUFFIX}` + ) + ); + this.cache.retainOnly(new Set(signalFilePaths)); + + const rows = await Promise.all( + uniqueTaskIds.map(async (taskId, index) => { + const filePath = signalFilePaths[index]; + const parsed = await this.readSignal(filePath); + if (!parsed || parsed.taskId !== taskId) { + return null; + } + return [ + taskId, + { + taskId, + updatedAt: parsed.updatedAt, + filePath, + ...(parsed.transcriptFileBasename + ? { transcriptFileBasename: parsed.transcriptFileBasename } + : {}), + } satisfies TaskLogFreshnessSignal, + ] as const; + }) + ); + + return new Map(rows.filter((row): row is NonNullable => row !== null)); + } + + private async readSignal(filePath: string): Promise { + try { + const stat = await fs.stat(filePath); + if (!stat.isFile()) { + this.cache.clearForPath(filePath); + return false; + } + + const cached = this.cache.getIfFresh(filePath, stat.mtimeMs, stat.size); + if (cached !== null) { + return cached; + } + + const inFlight = this.cache.getInFlight(filePath); + if (inFlight) { + return inFlight; + } + + const promise = this.parseSignal(filePath); + this.cache.setInFlight(filePath, promise); + try { + const parsed = await promise; + this.cache.set(filePath, stat.mtimeMs, stat.size, parsed); + return parsed; + } finally { + this.cache.clearInFlight(filePath); + } + } catch { + this.cache.clearForPath(filePath); + return false; + } + } + + private async parseSignal(filePath: string): Promise { + const raw = await fs.readFile(filePath, 'utf8'); + const parsed = JSON.parse(raw) as unknown; + if (!parsed || typeof parsed !== 'object') { + return false; + } + + const record = parsed as Record; + const taskId = + typeof record.taskId === 'string' && record.taskId.trim().length > 0 + ? record.taskId.trim() + : null; + const updatedAt = isValidTimestamp(record.updatedAt) ? record.updatedAt : null; + if (!taskId || !updatedAt) { + return false; + } + + return { + taskId, + updatedAt, + ...(typeof record.transcriptFile === 'string' && record.transcriptFile.trim().length > 0 + ? { transcriptFileBasename: path.basename(record.transcriptFile.trim()) } + : {}), + }; + } +} diff --git a/src/main/services/team/stallMonitor/TeamTaskStallExactRowReader.ts b/src/main/services/team/stallMonitor/TeamTaskStallExactRowReader.ts new file mode 100644 index 00000000..b515eb3a --- /dev/null +++ b/src/main/services/team/stallMonitor/TeamTaskStallExactRowReader.ts @@ -0,0 +1,127 @@ +import { yieldToEventLoop } from '@main/utils/asyncYield'; +import { parseJsonlLine } from '@main/utils/jsonl'; +import { createLogger } from '@shared/utils/logger'; +import { createReadStream } from 'fs'; +import * as fs from 'fs/promises'; +import * as readline from 'readline'; + +import { BoardTaskActivityParseCache } from '../taskLogs/activity/BoardTaskActivityParseCache'; + +import type { TeamTaskStallExactRow } from './TeamTaskStallTypes'; + +const logger = createLogger('Service:TeamTaskStallExactRowReader'); + +function asRecord(value: unknown): Record | null { + return value && typeof value === 'object' ? (value as Record) : null; +} + +function hasStrictTimestamp(record: Record): boolean { + return typeof record.timestamp === 'string' && Number.isFinite(Date.parse(record.timestamp)); +} + +function parseSystemSubtype(record: Record): 'turn_duration' | 'init' | undefined { + return record.subtype === 'turn_duration' || record.subtype === 'init' + ? record.subtype + : undefined; +} + +export class TeamTaskStallExactRowReader { + private readonly cache = new BoardTaskActivityParseCache(); + + async parseFiles(filePaths: string[]): Promise> { + const uniquePaths = [...new Set(filePaths)].sort(); + this.cache.retainOnly(new Set(uniquePaths)); + + const rows = await Promise.all( + uniquePaths.map(async (filePath) => [filePath, await this.parseFile(filePath)] as const) + ); + return new Map(rows); + } + + private async parseFile(filePath: string): Promise { + try { + const stat = await fs.stat(filePath); + const cached = this.cache.getIfFresh(filePath, stat.mtimeMs, stat.size); + if (cached !== null) { + return cached; + } + + const inFlight = this.cache.getInFlight(filePath); + if (inFlight) { + return inFlight; + } + + const promise = this.readFile(filePath); + this.cache.setInFlight(filePath, promise); + try { + const parsed = await promise; + this.cache.set(filePath, stat.mtimeMs, stat.size, parsed); + return parsed; + } finally { + this.cache.clearInFlight(filePath); + } + } catch (error) { + logger.debug(`Skipping unreadable stall exact-log transcript ${filePath}: ${String(error)}`); + this.cache.clearForPath(filePath); + return []; + } + } + + private async readFile(filePath: string): Promise { + const rows: TeamTaskStallExactRow[] = []; + const stream = createReadStream(filePath, { encoding: 'utf8' }); + const rl = readline.createInterface({ + input: stream, + crlfDelay: Infinity, + }); + + let lineCount = 0; + let sourceOrder = 0; + + for await (const line of rl) { + if (!line.trim()) { + continue; + } + lineCount += 1; + + try { + const raw = JSON.parse(line) as unknown; + const record = asRecord(raw); + if (!record || !hasStrictTimestamp(record)) { + continue; + } + + const parsed = parseJsonlLine(line); + if (!parsed) { + continue; + } + + sourceOrder += 1; + const systemSubtype = parseSystemSubtype(record); + rows.push({ + filePath, + sourceOrder, + messageUuid: parsed.uuid, + timestamp: record.timestamp as string, + parsedMessage: parsed, + ...(parsed.requestId ? { requestId: parsed.requestId } : {}), + ...(parsed.sourceToolUseID ? { sourceToolUseId: parsed.sourceToolUseID } : {}), + ...(parsed.sourceToolAssistantUUID + ? { sourceToolAssistantUuid: parsed.sourceToolAssistantUUID } + : {}), + ...(systemSubtype ? { systemSubtype } : {}), + toolUseIds: parsed.toolCalls.map((toolCall) => toolCall.id), + toolResultIds: parsed.toolResults.map((toolResult) => toolResult.toolUseId), + }); + } catch (error) { + logger.debug(`Skipping malformed stall exact-log line in ${filePath}: ${String(error)}`); + } + + if (lineCount % 250 === 0) { + await yieldToEventLoop(); + } + } + + return rows; + } +} diff --git a/src/main/services/team/stallMonitor/TeamTaskStallJournal.ts b/src/main/services/team/stallMonitor/TeamTaskStallJournal.ts new file mode 100644 index 00000000..316796e6 --- /dev/null +++ b/src/main/services/team/stallMonitor/TeamTaskStallJournal.ts @@ -0,0 +1,145 @@ +import { getTeamsBasePath } from '@main/utils/pathDecoder'; +import * as fs from 'fs'; +import * as path from 'path'; + +import { atomicWriteAsync } from '../atomicWrite'; +import { withFileLock } from '../fileLock'; + +import type { + TaskStallEvaluation, + TaskStallJournalEntry, + TaskStallJournalState, +} from './TeamTaskStallTypes'; + +function isValidState(value: unknown): value is TaskStallJournalState { + return value === 'suspected' || value === 'alert_ready' || value === 'alerted'; +} + +export class TeamTaskStallJournal { + private getFilePath(teamName: string): string { + return path.join(getTeamsBasePath(), teamName, 'stall-monitor-journal.json'); + } + + async reconcileScan(args: { + teamName: string; + evaluations: TaskStallEvaluation[]; + activeTaskIds: string[]; + now: string; + }): Promise { + const filePath = this.getFilePath(args.teamName); + let readyEvaluations: TaskStallEvaluation[] = []; + + await withFileLock(filePath, async () => { + const entries = await this.readUnlocked(filePath); + const candidateByEpoch = new Map( + args.evaluations + .filter( + ( + evaluation + ): evaluation is TaskStallEvaluation & + Required> => + evaluation.status === 'alert' && + typeof evaluation.taskId === 'string' && + typeof evaluation.branch === 'string' && + typeof evaluation.signal === 'string' && + typeof evaluation.epochKey === 'string' + ) + .map((evaluation) => [evaluation.epochKey, evaluation] as const) + ); + + const activeTaskIdSet = new Set(args.activeTaskIds); + for (let i = entries.length - 1; i >= 0; i -= 1) { + const entry = entries[i]; + if (!activeTaskIdSet.has(entry.taskId) || !candidateByEpoch.has(entry.epochKey)) { + entries.splice(i, 1); + } + } + + for (const [epochKey, evaluation] of candidateByEpoch) { + const existing = entries.find((entry) => entry.epochKey === epochKey); + if (!existing) { + entries.push({ + epochKey, + teamName: args.teamName, + taskId: evaluation.taskId, + branch: evaluation.branch, + signal: evaluation.signal, + state: 'suspected', + consecutiveScans: 1, + createdAt: args.now, + updatedAt: args.now, + }); + continue; + } + + existing.updatedAt = args.now; + if (existing.state === 'alerted') { + continue; + } + + existing.consecutiveScans += 1; + if (existing.consecutiveScans >= 2) { + existing.state = 'alert_ready'; + readyEvaluations.push(evaluation); + } + } + + await atomicWriteAsync(filePath, JSON.stringify(entries, null, 2)); + }); + + return readyEvaluations; + } + + async markAlerted(teamName: string, epochKey: string, now: string): Promise { + const filePath = this.getFilePath(teamName); + await withFileLock(filePath, async () => { + const entries = await this.readUnlocked(filePath); + const target = entries.find((entry) => entry.epochKey === epochKey); + if (!target) { + return; + } + target.state = 'alerted'; + target.updatedAt = now; + target.alertedAt = now; + await atomicWriteAsync(filePath, JSON.stringify(entries, null, 2)); + }); + } + + private async readUnlocked(filePath: string): Promise { + try { + const raw = await fs.promises.readFile(filePath, 'utf8'); + const parsed = JSON.parse(raw) as unknown; + if (!Array.isArray(parsed)) { + return []; + } + + return parsed + .filter( + (item): item is TaskStallJournalEntry => + item != null && + typeof item === 'object' && + typeof (item as TaskStallJournalEntry).epochKey === 'string' && + typeof (item as TaskStallJournalEntry).teamName === 'string' && + typeof (item as TaskStallJournalEntry).taskId === 'string' && + ((item as TaskStallJournalEntry).branch === 'work' || + (item as TaskStallJournalEntry).branch === 'review') && + ((item as TaskStallJournalEntry).signal === 'turn_ended_after_touch' || + (item as TaskStallJournalEntry).signal === 'mid_turn_after_touch' || + (item as TaskStallJournalEntry).signal === 'touch_then_other_turns') && + isValidState((item as TaskStallJournalEntry).state) && + typeof (item as TaskStallJournalEntry).consecutiveScans === 'number' && + typeof (item as TaskStallJournalEntry).createdAt === 'string' && + typeof (item as TaskStallJournalEntry).updatedAt === 'string' + ) + .map((entry) => ({ + ...entry, + ...(entry.alertedAt ? { alertedAt: entry.alertedAt } : {}), + })); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + return []; + } + throw error; + } + } +} diff --git a/src/main/services/team/stallMonitor/TeamTaskStallMonitor.ts b/src/main/services/team/stallMonitor/TeamTaskStallMonitor.ts new file mode 100644 index 00000000..c5cfbe66 --- /dev/null +++ b/src/main/services/team/stallMonitor/TeamTaskStallMonitor.ts @@ -0,0 +1,246 @@ +import { createLogger } from '@shared/utils/logger'; +import { getTaskDisplayId } from '@shared/utils/taskIdentity'; + +import { ActiveTeamRegistry } from './ActiveTeamRegistry'; +import { + getTeamTaskStallActivationGraceMs, + getTeamTaskStallScanIntervalMs, + getTeamTaskStallStartupGraceMs, + isTeamTaskStallAlertsEnabled, + isTeamTaskStallMonitorEnabled, +} from './featureGates'; + +import type { TeamTaskStallSnapshotSource } from './TeamTaskStallSnapshotSource'; +import type { TeamTaskStallPolicy } from './TeamTaskStallPolicy'; +import type { TeamTaskStallJournal } from './TeamTaskStallJournal'; +import type { TeamTaskStallNotifier } from './TeamTaskStallNotifier'; +import type { TaskStallAlert, TaskStallEvaluation } from './TeamTaskStallTypes'; +import type { TeamChangeEvent } from '@shared/types'; + +const logger = createLogger('Service:TeamTaskStallMonitor'); + +interface TeamObservationState { + firstSeenAtMs: number; + lastActivationAtMs: number; +} + +export class TeamTaskStallMonitor { + private scanTimer: ReturnType | null = null; + private nudgeTimer: ReturnType | null = null; + private scanInFlight = false; + private started = false; + private readonly observationByTeam = new Map(); + + constructor( + private readonly registry: ActiveTeamRegistry, + private readonly snapshotSource: TeamTaskStallSnapshotSource, + private readonly policy: TeamTaskStallPolicy, + private readonly journal: TeamTaskStallJournal, + private readonly notifier: TeamTaskStallNotifier + ) {} + + start(): void { + if (!isTeamTaskStallMonitorEnabled()) { + logger.debug('Task stall monitor disabled by feature gate'); + return; + } + if (this.started) { + return; + } + this.started = true; + this.registry.start(); + this.scheduleNextScan(2_000); + } + + async stop(): Promise { + this.started = false; + if (this.scanTimer) { + clearTimeout(this.scanTimer); + this.scanTimer = null; + } + if (this.nudgeTimer) { + clearTimeout(this.nudgeTimer); + this.nudgeTimer = null; + } + await this.registry.stop(); + } + + noteTeamChange(event: TeamChangeEvent): void { + this.registry.noteTeamChange(event); + if (!isTeamTaskStallMonitorEnabled()) { + return; + } + + if ( + event.type === 'member-spawn' || + (event.type === 'lead-activity' && event.detail !== 'offline') + ) { + const now = Date.now(); + const existing = this.observationByTeam.get(event.teamName); + this.observationByTeam.set(event.teamName, { + firstSeenAtMs: existing?.firstSeenAtMs ?? now, + lastActivationAtMs: now, + }); + this.scheduleNudgedScan(); + return; + } + + if (event.type === 'task-log-change' || event.type === 'log-source-change') { + this.scheduleNudgedScan(); + } + } + + private scheduleNextScan(delayMs: number): void { + if (!this.started) { + return; + } + if (this.scanTimer) { + clearTimeout(this.scanTimer); + } + this.scanTimer = setTimeout(() => { + this.scanTimer = null; + void this.runScan(); + }, delayMs); + } + + private scheduleNudgedScan(): void { + if (!this.started || this.nudgeTimer) { + return; + } + this.nudgeTimer = setTimeout(() => { + this.nudgeTimer = null; + void this.runScan(); + }, 5_000); + } + + private async runScan(): Promise { + if (!this.started || this.scanInFlight) { + return; + } + this.scanInFlight = true; + try { + const activeTeams = await this.registry.listActiveTeams(); + const activeSet = new Set(activeTeams); + for (const teamName of [...this.observationByTeam.keys()]) { + if (!activeSet.has(teamName)) { + this.observationByTeam.delete(teamName); + } + } + + const now = new Date(); + for (const teamName of activeTeams) { + const observation = this.getOrCreateObservation(teamName, now.getTime()); + const startupAgeMs = now.getTime() - observation.firstSeenAtMs; + if (startupAgeMs < getTeamTaskStallStartupGraceMs()) { + continue; + } + + const activationAgeMs = now.getTime() - observation.lastActivationAtMs; + if (activationAgeMs < getTeamTaskStallActivationGraceMs()) { + continue; + } + + await this.scanTeam(teamName, now); + } + } catch (error) { + logger.warn(`Task stall monitor scan failed: ${String(error)}`); + } finally { + this.scanInFlight = false; + this.scheduleNextScan(getTeamTaskStallScanIntervalMs()); + } + } + + private getOrCreateObservation(teamName: string, nowMs: number): TeamObservationState { + const existing = this.observationByTeam.get(teamName); + if (existing) { + return existing; + } + const created = { + firstSeenAtMs: nowMs, + lastActivationAtMs: nowMs, + }; + this.observationByTeam.set(teamName, created); + return created; + } + + private async scanTeam(teamName: string, now: Date): Promise { + const snapshot = await this.snapshotSource.getSnapshot(teamName); + if (!snapshot) { + return; + } + + const evaluations: TaskStallEvaluation[] = []; + for (const task of snapshot.inProgressTasks) { + evaluations.push(this.policy.evaluateWork({ now, task, snapshot })); + } + for (const task of snapshot.reviewOpenTasks) { + evaluations.push(this.policy.evaluateReview({ now, task, snapshot })); + } + + const activeTaskIds = [ + ...new Set([...snapshot.inProgressTasks, ...snapshot.reviewOpenTasks].map((task) => task.id)), + ]; + const readyEvaluations = await this.journal.reconcileScan({ + teamName, + evaluations, + activeTaskIds, + now: now.toISOString(), + }); + + const alerts = readyEvaluations + .map((evaluation) => this.buildAlert(snapshot, evaluation)) + .filter((alert): alert is TaskStallAlert => alert !== null); + + if (alerts.length === 0) { + return; + } + + if (!isTeamTaskStallAlertsEnabled()) { + logger.debug(`Task stall monitor shadow-ready alerts for ${teamName}: ${alerts.length}`); + return; + } + + await this.notifier.notifyLead(teamName, alerts); + await Promise.all( + alerts.map((alert) => this.journal.markAlerted(teamName, alert.epochKey, now.toISOString())) + ); + } + + private buildAlert( + snapshot: Awaited>, + evaluation: TaskStallEvaluation + ): TaskStallAlert | null { + if ( + !snapshot || + evaluation.status !== 'alert' || + !evaluation.taskId || + !evaluation.branch || + !evaluation.signal || + !evaluation.epochKey + ) { + return null; + } + + const task = snapshot.allTasksById.get(evaluation.taskId); + if (!task) { + return null; + } + + const displayId = getTaskDisplayId(task); + return { + teamName: snapshot.teamName, + taskId: task.id, + displayId, + subject: task.subject, + branch: evaluation.branch, + signal: evaluation.signal, + reason: evaluation.reason, + epochKey: evaluation.epochKey, + taskRef: { + taskId: task.id, + displayId, + teamName: snapshot.teamName, + }, + }; + } +} diff --git a/src/main/services/team/stallMonitor/TeamTaskStallNotifier.ts b/src/main/services/team/stallMonitor/TeamTaskStallNotifier.ts new file mode 100644 index 00000000..0f00b766 --- /dev/null +++ b/src/main/services/team/stallMonitor/TeamTaskStallNotifier.ts @@ -0,0 +1,32 @@ +import { formatTaskDisplayLabel } from '@shared/utils/taskIdentity'; + +import type { TaskStallAlert } from './TeamTaskStallTypes'; +import type { TeamDataService } from '../TeamDataService'; + +function buildLeadAlertText(alerts: TaskStallAlert[]): string { + return alerts + .map( + (alert) => + `- ${formatTaskDisplayLabel({ id: alert.taskId, displayId: alert.displayId })} [${alert.branch}] ${alert.subject} - ${alert.reason}` + ) + .join('\n'); +} + +export class TeamTaskStallNotifier { + constructor( + private readonly teamDataService: Pick + ) {} + + async notifyLead(teamName: string, alerts: TaskStallAlert[]): Promise { + if (alerts.length === 0) { + return; + } + + await this.teamDataService.sendSystemNotificationToLead({ + teamName, + summary: 'Potential stalled tasks detected', + text: buildLeadAlertText(alerts), + taskRefs: alerts.map((alert) => alert.taskRef), + }); + } +} diff --git a/src/main/services/team/stallMonitor/TeamTaskStallPolicy.ts b/src/main/services/team/stallMonitor/TeamTaskStallPolicy.ts new file mode 100644 index 00000000..1d339dec --- /dev/null +++ b/src/main/services/team/stallMonitor/TeamTaskStallPolicy.ts @@ -0,0 +1,508 @@ +import type { + ReviewTaskContext, + TaskStallBranch, + TaskStallEvaluation, + TaskStallSignal, + TeamTaskStallExactRow, + TeamTaskStallSnapshot, + WorkTaskContext, +} from './TeamTaskStallTypes'; +import type { BoardTaskActivityRecord } from '../taskLogs/activity/BoardTaskActivityRecord'; +import type { TeamTask, TaskWorkInterval, TaskHistoryEvent } from '@shared/types'; + +const WORK_TOUCH_TOOLS = new Set(['task_start', 'task_add_comment', 'task_set_status']); +const REVIEW_TOUCH_TOOLS = new Set(['review_start', 'task_add_comment']); + +const ONE_MINUTE_MS = 60_000; +const WORK_THRESHOLDS_MS: Record = { + turn_ended_after_touch: 8 * ONE_MINUTE_MS, + touch_then_other_turns: 10 * ONE_MINUTE_MS, + mid_turn_after_touch: 20 * ONE_MINUTE_MS, +}; +const REVIEW_THRESHOLDS_MS: Record = { + turn_ended_after_touch: 10 * ONE_MINUTE_MS, + touch_then_other_turns: 10 * ONE_MINUTE_MS, + mid_turn_after_touch: 25 * ONE_MINUTE_MS, +}; + +function skip( + taskId: string, + reason: string, + skipReason: TaskStallEvaluation['skipReason'] +): TaskStallEvaluation { + return { + status: 'skip', + taskId, + reason, + skipReason, + }; +} + +function isAfterOrEqual(timestamp: string, lowerBound: string): boolean { + return Date.parse(timestamp) >= Date.parse(lowerBound); +} + +function getOpenWorkInterval(task: TeamTask): TaskWorkInterval | null { + const intervals = task.workIntervals ?? []; + for (let i = intervals.length - 1; i >= 0; i -= 1) { + const interval = intervals[i]; + if (!interval.completedAt) { + return interval; + } + } + return null; +} + +function getOpenReviewWindowStart(task: TeamTask): string | null { + if (task.reviewState !== 'review' || !task.historyEvents?.length) { + return null; + } + + for (let i = task.historyEvents.length - 1; i >= 0; i -= 1) { + const event = task.historyEvents[i]; + if (event.type === 'review_started') { + return event.timestamp; + } + if ( + event.type === 'review_approved' || + event.type === 'review_changes_requested' || + (event.type === 'status_changed' && event.to === 'in_progress') + ) { + return null; + } + } + return null; +} + +function hasReviewStartedByReviewer( + historyEvents: TaskHistoryEvent[] | undefined, + reviewer: string, + windowStartedAt: string +): boolean { + if (!historyEvents?.length) { + return false; + } + + return historyEvents.some( + (event) => + event.type === 'review_started' && + event.actor === reviewer && + isAfterOrEqual(event.timestamp, windowStartedAt) + ); +} + +function isStrongReviewTouch( + record: BoardTaskActivityRecord, + reviewer: string, + hasExplicitStartedReview: boolean, + windowStartedAt: string +): boolean { + if ( + record.actor.memberName !== reviewer || + !record.action?.canonicalToolName || + !REVIEW_TOUCH_TOOLS.has(record.action.canonicalToolName) || + !isAfterOrEqual(record.timestamp, windowStartedAt) + ) { + return false; + } + + if (record.action.canonicalToolName === 'review_start') { + return true; + } + + if ( + record.actorContext.relation === 'same_task' && + record.actorContext.activePhase === 'review' + ) { + return true; + } + + return hasExplicitStartedReview; +} + +function findLastMeaningfulWorkTouch( + records: BoardTaskActivityRecord[], + owner: string, + intervalStartedAt: string +): BoardTaskActivityRecord | null { + return ( + [...records] + .filter((record) => record.actor.memberName === owner) + .filter((record) => isAfterOrEqual(record.timestamp, intervalStartedAt)) + .filter((record) => WORK_TOUCH_TOOLS.has(record.action?.canonicalToolName ?? '')) + .at(-1) ?? null + ); +} + +function findLastMeaningfulReviewTouch( + records: BoardTaskActivityRecord[], + reviewer: string, + windowStartedAt: string, + hasExplicitStartedReview: boolean +): BoardTaskActivityRecord | null { + return ( + [...records] + .filter((record) => + isStrongReviewTouch(record, reviewer, hasExplicitStartedReview, windowStartedAt) + ) + .at(-1) ?? null + ); +} + +function anchorEvidenceRank(row: TeamTaskStallExactRow, toolUseId: string | undefined): number { + if (!toolUseId || row.parsedMessage.type !== 'assistant') { + return 0; + } + if (row.toolUseIds.includes(toolUseId)) { + return 2; + } + if (row.sourceToolUseId === toolUseId || row.toolResultIds.includes(toolUseId)) { + return 1; + } + return 0; +} + +function deduplicateAssistantRowsByRequestId( + rows: TeamTaskStallExactRow[], + toolUseId: string | undefined +): TeamTaskStallExactRow[] { + const preferredIndexByRequestId = new Map(); + for (let i = 0; i < rows.length; i += 1) { + const row = rows[i]; + if (row.parsedMessage.type !== 'assistant' || !row.requestId) { + continue; + } + const existingIndex = preferredIndexByRequestId.get(row.requestId); + if (existingIndex === undefined) { + preferredIndexByRequestId.set(row.requestId, i); + continue; + } + const existingRank = anchorEvidenceRank(rows[existingIndex], toolUseId); + const nextRank = anchorEvidenceRank(row, toolUseId); + if (nextRank > existingRank || (nextRank === existingRank && i > existingIndex)) { + preferredIndexByRequestId.set(row.requestId, i); + } + } + + if (preferredIndexByRequestId.size === 0) { + return rows; + } + + return rows.filter((row, index) => { + if (row.parsedMessage.type !== 'assistant' || !row.requestId) { + return true; + } + return preferredIndexByRequestId.get(row.requestId) === index; + }); +} + +function findAnchorRowIndex( + rows: TeamTaskStallExactRow[], + messageUuid: string, + toolUseId?: string +): number { + const candidates = rows + .map((row, index) => ({ row, index })) + .filter(({ row }) => row.messageUuid === messageUuid); + if (candidates.length === 0) { + return -1; + } + + if (toolUseId) { + const explicitToolUse = candidates.filter(({ row }) => row.toolUseIds.includes(toolUseId)); + if (explicitToolUse.length > 0) { + return explicitToolUse.at(-1)!.index; + } + + const linkedRows = candidates.filter( + ({ row }) => row.sourceToolUseId === toolUseId || row.toolResultIds.includes(toolUseId) + ); + if (linkedRows.length > 0) { + return linkedRows.at(-1)!.index; + } + } + + return candidates.at(-1)!.index; +} + +function classifyPostTouchState(args: { + rows: TeamTaskStallExactRow[]; + anchorMessageUuid: string; + anchorToolUseId?: string; +}): TaskStallSignal | 'ambiguous' { + const normalizedRows = deduplicateAssistantRowsByRequestId(args.rows, args.anchorToolUseId); + const anchorIndex = findAnchorRowIndex( + normalizedRows, + args.anchorMessageUuid, + args.anchorToolUseId + ); + if (anchorIndex < 0) { + return 'ambiguous'; + } + + let sawTurnEnd = false; + let sawLaterRows = false; + + for (let i = anchorIndex + 1; i < normalizedRows.length; i += 1) { + const row = normalizedRows[i]; + if (row.systemSubtype === 'turn_duration') { + sawTurnEnd = true; + continue; + } + + sawLaterRows = true; + if (sawTurnEnd) { + return 'touch_then_other_turns'; + } + } + + if (sawTurnEnd) { + return 'turn_ended_after_touch'; + } + if (sawLaterRows) { + return 'mid_turn_after_touch'; + } + return 'mid_turn_after_touch'; +} + +function buildEpochKey( + task: TeamTask, + branch: TaskStallBranch, + signal: TaskStallSignal, + touch: BoardTaskActivityRecord +): string { + return [ + task.id, + branch, + signal, + touch.timestamp, + touch.source.filePath, + touch.source.messageUuid, + touch.source.toolUseId ?? 'ambient', + ].join(':'); +} + +function buildAlertEvaluation(args: { + task: TeamTask; + branch: TaskStallBranch; + signal: TaskStallSignal; + touch: BoardTaskActivityRecord; + reason: string; +}): TaskStallEvaluation { + return { + status: 'alert', + taskId: args.task.id, + branch: args.branch, + signal: args.signal, + epochKey: buildEpochKey(args.task, args.branch, args.signal, args.touch), + reason: args.reason, + }; +} + +export class TeamTaskStallPolicy { + evaluateWork(args: { + now: Date; + task: TeamTask; + snapshot: TeamTaskStallSnapshot; + }): TaskStallEvaluation { + const { task, snapshot } = args; + + if (!snapshot.activityReadsEnabled) { + return skip(task.id, 'Task activity reads are disabled', 'activity_reads_disabled'); + } + if (!snapshot.exactReadsEnabled) { + return skip(task.id, 'Exact log reads are disabled', 'exact_reads_disabled'); + } + if (task.status !== 'in_progress') { + return skip(task.id, 'Task is not in progress', 'task_not_in_progress'); + } + if (!task.owner) { + return skip(task.id, 'Task has no owner', 'owner_missing'); + } + if (task.owner === snapshot.leadName) { + return skip(task.id, 'Task owner is the lead', 'owner_is_lead'); + } + if (task.reviewState === 'review') { + return skip(task.id, 'Task is currently under review', 'review_active'); + } + if (task.blockedBy?.length) { + return skip(task.id, 'Task is blocked', 'task_blocked'); + } + if (task.needsClarification) { + return skip(task.id, 'Task is waiting for clarification', 'needs_clarification'); + } + + const openWorkInterval = getOpenWorkInterval(task); + if (!openWorkInterval?.startedAt) { + return skip(task.id, 'Task has no open work interval', 'no_open_work_interval'); + } + + const records = snapshot.recordsByTaskId.get(task.id) ?? []; + if (records.length === 0 && !snapshot.freshnessByTaskId.has(task.id)) { + return skip( + task.id, + 'Task run is not instrumented enough for stall evaluation', + 'non_instrumented_run' + ); + } + + const workContext: WorkTaskContext | null = (() => { + const touch = findLastMeaningfulWorkTouch(records, task.owner!, openWorkInterval.startedAt); + if (!touch) { + return null; + } + return { + owner: task.owner!, + intervalStartedAt: openWorkInterval.startedAt, + lastMeaningfulTouch: touch, + lastMeaningfulTouchAt: touch.timestamp, + }; + })(); + + if (!workContext) { + return skip( + task.id, + 'No positive work touch found in current work interval', + 'no_positive_touch' + ); + } + + const exactRows = snapshot.exactRowsByFilePath.get( + workContext.lastMeaningfulTouch.source.filePath + ); + if (!exactRows?.length) { + return skip(task.id, 'Post-touch exact rows are unavailable', 'ambiguous_state'); + } + + const signal = classifyPostTouchState({ + rows: exactRows, + anchorMessageUuid: workContext.lastMeaningfulTouch.source.messageUuid, + anchorToolUseId: workContext.lastMeaningfulTouch.source.toolUseId, + }); + if (signal === 'ambiguous') { + return skip(task.id, 'Post-touch state is ambiguous', 'ambiguous_state'); + } + + const elapsedMs = args.now.getTime() - Date.parse(workContext.lastMeaningfulTouchAt); + const thresholdMs = WORK_THRESHOLDS_MS[signal]; + if (elapsedMs < thresholdMs) { + return skip( + task.id, + 'Work touch is still below the configured stall threshold', + 'below_threshold' + ); + } + + return buildAlertEvaluation({ + task, + branch: 'work', + signal, + touch: workContext.lastMeaningfulTouch, + reason: `Potential work stall after ${signal.replaceAll('_', ' ')}.`, + }); + } + + evaluateReview(args: { + now: Date; + task: TeamTask; + snapshot: TeamTaskStallSnapshot; + }): TaskStallEvaluation { + const { task, snapshot } = args; + + if (!snapshot.activityReadsEnabled) { + return skip(task.id, 'Task activity reads are disabled', 'activity_reads_disabled'); + } + if (!snapshot.exactReadsEnabled) { + return skip(task.id, 'Exact log reads are disabled', 'exact_reads_disabled'); + } + if (task.reviewState !== 'review') { + return skip(task.id, 'Task is not in an open review window', 'review_terminal'); + } + if (task.needsClarification) { + return skip(task.id, 'Task is waiting for clarification', 'needs_clarification'); + } + + const reviewWindowStartedAt = getOpenReviewWindowStart(task); + if (!reviewWindowStartedAt) { + return skip(task.id, 'Task has no open review window', 'no_open_review_window'); + } + + const resolvedReviewer = snapshot.resolvedReviewersByTaskId.get(task.id) ?? { + reviewer: null, + source: 'none', + }; + if (!resolvedReviewer.reviewer) { + return skip(task.id, 'Reviewer could not be resolved safely', 'reviewer_unresolved'); + } + + const records = snapshot.recordsByTaskId.get(task.id) ?? []; + if (records.length === 0 && !snapshot.freshnessByTaskId.has(task.id)) { + return skip( + task.id, + 'Review run is not instrumented enough for stall evaluation', + 'non_instrumented_run' + ); + } + + const explicitReviewStarted = hasReviewStartedByReviewer( + task.historyEvents, + resolvedReviewer.reviewer, + reviewWindowStartedAt + ); + const reviewContext: ReviewTaskContext | null = (() => { + const touch = findLastMeaningfulReviewTouch( + records, + resolvedReviewer.reviewer!, + reviewWindowStartedAt, + explicitReviewStarted + ); + if (!touch) { + return null; + } + return { + resolvedReviewer, + reviewWindowStartedAt, + lastMeaningfulTouch: touch, + lastMeaningfulTouchAt: touch.timestamp, + }; + })(); + + if (!reviewContext) { + return skip(task.id, 'No explicit started-review evidence was found', 'no_positive_touch'); + } + + const exactRows = snapshot.exactRowsByFilePath.get( + reviewContext.lastMeaningfulTouch.source.filePath + ); + if (!exactRows?.length) { + return skip(task.id, 'Post-review exact rows are unavailable', 'ambiguous_state'); + } + + const signal = classifyPostTouchState({ + rows: exactRows, + anchorMessageUuid: reviewContext.lastMeaningfulTouch.source.messageUuid, + anchorToolUseId: reviewContext.lastMeaningfulTouch.source.toolUseId, + }); + if (signal === 'ambiguous') { + return skip(task.id, 'Post-review state is ambiguous', 'ambiguous_state'); + } + + const elapsedMs = args.now.getTime() - Date.parse(reviewContext.lastMeaningfulTouchAt); + const thresholdMs = REVIEW_THRESHOLDS_MS[signal]; + if (elapsedMs < thresholdMs) { + return skip( + task.id, + 'Review touch is still below the configured stall threshold', + 'below_threshold' + ); + } + + return buildAlertEvaluation({ + task, + branch: 'review', + signal, + touch: reviewContext.lastMeaningfulTouch, + reason: `Potential started-review stall after ${signal.replaceAll('_', ' ')}.`, + }); + } +} diff --git a/src/main/services/team/stallMonitor/TeamTaskStallSnapshotSource.ts b/src/main/services/team/stallMonitor/TeamTaskStallSnapshotSource.ts new file mode 100644 index 00000000..b6118f28 --- /dev/null +++ b/src/main/services/team/stallMonitor/TeamTaskStallSnapshotSource.ts @@ -0,0 +1,119 @@ +import { TeamTaskReader } from '../TeamTaskReader'; +import { TeamKanbanManager } from '../TeamKanbanManager'; +import { TeamTranscriptSourceLocator } from '../taskLogs/discovery/TeamTranscriptSourceLocator'; +import { BoardTaskActivityTranscriptReader } from '../taskLogs/activity/BoardTaskActivityTranscriptReader'; +import { isBoardTaskActivityReadEnabled } from '../taskLogs/activity/featureGates'; +import { isBoardTaskExactLogsReadEnabled } from '../taskLogs/exact/featureGates'; + +import { BoardTaskActivityBatchIndexer } from './BoardTaskActivityBatchIndexer'; +import { TeamTaskLogFreshnessReader } from './TeamTaskLogFreshnessReader'; +import { TeamTaskStallExactRowReader } from './TeamTaskStallExactRowReader'; +import { buildResolvedReviewerIndex } from './reviewerResolution'; + +import type { BoardTaskActivityRecord } from '../taskLogs/activity/BoardTaskActivityRecord'; +import type { TeamTaskStallSnapshot } from './TeamTaskStallTypes'; +import type { TeamConfig, TeamTask } from '@shared/types'; + +function resolveLeadNameFromConfig(config: TeamConfig): string { + const lead = config.members?.find((member) => member.role?.toLowerCase().includes('lead')); + return lead?.name ?? config.members?.[0]?.name ?? 'team-lead'; +} + +export class TeamTaskStallSnapshotSource { + constructor( + private readonly transcriptSourceLocator: TeamTranscriptSourceLocator = new TeamTranscriptSourceLocator(), + private readonly taskReader: TeamTaskReader = new TeamTaskReader(), + private readonly kanbanManager: TeamKanbanManager = new TeamKanbanManager(), + private readonly transcriptReader: BoardTaskActivityTranscriptReader = new BoardTaskActivityTranscriptReader(), + private readonly activityBatchIndexer: BoardTaskActivityBatchIndexer = new BoardTaskActivityBatchIndexer(), + private readonly freshnessReader: TeamTaskLogFreshnessReader = new TeamTaskLogFreshnessReader(), + private readonly exactRowReader: TeamTaskStallExactRowReader = new TeamTaskStallExactRowReader() + ) {} + + async getSnapshot(teamName: string): Promise { + const transcriptContext = await this.transcriptSourceLocator.getContext(teamName); + if (!transcriptContext) { + return null; + } + + const [activeTasks, deletedTasks, kanbanState] = await Promise.all([ + this.taskReader.getTasks(teamName), + this.taskReader.getDeletedTasks(teamName), + this.kanbanManager.getState(teamName), + ]); + const allTasks = [...activeTasks, ...deletedTasks]; + const allTasksById = new Map(allTasks.map((task) => [task.id, task] as const)); + const inProgressTasks = activeTasks.filter( + (task) => task.status === 'in_progress' && task.reviewState !== 'review' + ); + const reviewOpenTasks = activeTasks.filter((task) => task.reviewState === 'review'); + const resolvedReviewersByTaskId = buildResolvedReviewerIndex(activeTasks, kanbanState); + const activityReadsEnabled = isBoardTaskActivityReadEnabled(); + const exactReadsEnabled = isBoardTaskExactLogsReadEnabled(); + + let recordsByTaskId = new Map(); + if ( + activityReadsEnabled && + allTasks.length > 0 && + transcriptContext.transcriptFiles.length > 0 + ) { + const messages = await this.transcriptReader.readFiles(transcriptContext.transcriptFiles); + recordsByTaskId = this.activityBatchIndexer.buildIndex({ + teamName, + tasks: allTasks, + messages, + }); + } + + const relevantMonitorTasks = [...inProgressTasks, ...reviewOpenTasks]; + const relevantExactFiles = this.collectRelevantExactFiles( + relevantMonitorTasks, + recordsByTaskId + ); + const [freshnessByTaskId, exactRowsByFilePath] = await Promise.all([ + this.freshnessReader.readSignals( + transcriptContext.projectDir, + relevantMonitorTasks.map((task) => task.id) + ), + exactReadsEnabled + ? this.exactRowReader.parseFiles(relevantExactFiles) + : Promise.resolve(new Map()), + ]); + + return { + teamName, + scannedAt: new Date().toISOString(), + projectDir: transcriptContext.projectDir, + projectId: transcriptContext.projectId, + leadName: resolveLeadNameFromConfig(transcriptContext.config), + transcriptFiles: transcriptContext.transcriptFiles, + activityReadsEnabled, + exactReadsEnabled, + activeTasks, + deletedTasks, + allTasksById, + inProgressTasks, + reviewOpenTasks, + resolvedReviewersByTaskId, + recordsByTaskId, + freshnessByTaskId, + exactRowsByFilePath, + }; + } + + private collectRelevantExactFiles( + inProgressTasks: TeamTask[], + recordsByTaskId: Map + ): string[] { + const filePaths = new Set(); + + for (const task of inProgressTasks) { + const records = recordsByTaskId.get(task.id) ?? []; + for (const record of records) { + filePaths.add(record.source.filePath); + } + } + + return [...filePaths].sort((left, right) => left.localeCompare(right)); + } +} diff --git a/src/main/services/team/stallMonitor/TeamTaskStallTypes.ts b/src/main/services/team/stallMonitor/TeamTaskStallTypes.ts new file mode 100644 index 00000000..46550e05 --- /dev/null +++ b/src/main/services/team/stallMonitor/TeamTaskStallTypes.ts @@ -0,0 +1,139 @@ +import type { BoardTaskActivityRecord } from '../taskLogs/activity/BoardTaskActivityRecord'; +import type { ParsedMessage } from '@main/types'; +import type { TeamTask } from '@shared/types'; + +export type TaskStallBranch = 'work' | 'review'; + +export type TaskStallSignal = + | 'turn_ended_after_touch' + | 'mid_turn_after_touch' + | 'touch_then_other_turns'; + +export type TaskStallEvaluationStatus = 'skip' | 'suspected' | 'alert'; + +export type TaskStallSkipReason = + | 'task_not_in_progress' + | 'owner_missing' + | 'owner_is_lead' + | 'task_blocked' + | 'needs_clarification' + | 'review_active' + | 'review_terminal' + | 'reviewer_unresolved' + | 'non_instrumented_run' + | 'activity_reads_disabled' + | 'exact_reads_disabled' + | 'no_positive_touch' + | 'no_open_work_interval' + | 'no_open_review_window' + | 'ambiguous_state' + | 'below_threshold' + | 'first_scan_only'; + +export type ResolvedReviewerSource = + | 'kanban_state' + | 'history_review_approved_actor' + | 'history_review_started_actor' + | 'history_review_requested_reviewer' + | 'none'; + +export interface ResolvedReviewer { + reviewer: string | null; + source: ResolvedReviewerSource; +} + +export interface TaskStallEvaluation { + status: TaskStallEvaluationStatus; + taskId?: string; + branch?: TaskStallBranch; + signal?: TaskStallSignal; + epochKey?: string; + reason: string; + skipReason?: TaskStallSkipReason; +} + +export interface TaskLogFreshnessSignal { + taskId: string; + updatedAt: string; + filePath: string; + transcriptFileBasename?: string; +} + +export interface TeamTaskStallExactRow { + filePath: string; + sourceOrder: number; + messageUuid: string; + timestamp: string; + parsedMessage: ParsedMessage; + requestId?: string; + sourceToolUseId?: string; + sourceToolAssistantUuid?: string; + systemSubtype?: 'turn_duration' | 'init'; + toolUseIds: string[]; + toolResultIds: string[]; +} + +export interface TeamTaskStallSnapshot { + teamName: string; + scannedAt: string; + projectDir: string; + projectId: string; + leadName: string; + transcriptFiles: string[]; + activityReadsEnabled: boolean; + exactReadsEnabled: boolean; + activeTasks: TeamTask[]; + deletedTasks: TeamTask[]; + allTasksById: Map; + inProgressTasks: TeamTask[]; + reviewOpenTasks: TeamTask[]; + resolvedReviewersByTaskId: Map; + recordsByTaskId: Map; + freshnessByTaskId: Map; + exactRowsByFilePath: Map; +} + +export interface WorkTaskContext { + owner: string; + intervalStartedAt: string; + lastMeaningfulTouch: BoardTaskActivityRecord; + lastMeaningfulTouchAt: string; +} + +export interface ReviewTaskContext { + resolvedReviewer: ResolvedReviewer; + reviewWindowStartedAt: string; + lastMeaningfulTouch: BoardTaskActivityRecord; + lastMeaningfulTouchAt: string; +} + +export interface TaskStallAlert { + teamName: string; + taskId: string; + displayId: string; + subject: string; + branch: TaskStallBranch; + signal: TaskStallSignal; + reason: string; + epochKey: string; + taskRef: { + taskId: string; + displayId: string; + teamName: string; + }; +} + +export type TaskStallJournalState = 'suspected' | 'alert_ready' | 'alerted'; + +export interface TaskStallJournalEntry { + epochKey: string; + teamName: string; + taskId: string; + branch: TaskStallBranch; + signal: TaskStallSignal; + state: TaskStallJournalState; + consecutiveScans: number; + createdAt: string; + updatedAt: string; + alertedAt?: string; +} diff --git a/src/main/services/team/stallMonitor/featureGates.ts b/src/main/services/team/stallMonitor/featureGates.ts new file mode 100644 index 00000000..f9c24682 --- /dev/null +++ b/src/main/services/team/stallMonitor/featureGates.ts @@ -0,0 +1,42 @@ +function readEnabledFlag(value: string | undefined, defaultValue: boolean): boolean { + if (value == null) { + return defaultValue; + } + + const normalized = value.trim().toLowerCase(); + if (normalized === '0' || normalized === 'false' || normalized === 'off' || normalized === 'no') { + return false; + } + if (normalized === '1' || normalized === 'true' || normalized === 'on' || normalized === 'yes') { + return true; + } + return defaultValue; +} + +function readInt(value: string | undefined, defaultValue: number): number { + if (value == null) { + return defaultValue; + } + const parsed = Number.parseInt(value.trim(), 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : defaultValue; +} + +export function isTeamTaskStallMonitorEnabled(): boolean { + return readEnabledFlag(process.env.CLAUDE_TEAM_TASK_STALL_MONITOR_ENABLED, false); +} + +export function isTeamTaskStallAlertsEnabled(): boolean { + return readEnabledFlag(process.env.CLAUDE_TEAM_TASK_STALL_ALERTS_ENABLED, false); +} + +export function getTeamTaskStallScanIntervalMs(): number { + return readInt(process.env.CLAUDE_TEAM_TASK_STALL_SCAN_INTERVAL_MS, 60_000); +} + +export function getTeamTaskStallStartupGraceMs(): number { + return readInt(process.env.CLAUDE_TEAM_TASK_STALL_STARTUP_GRACE_MS, 180_000); +} + +export function getTeamTaskStallActivationGraceMs(): number { + return readInt(process.env.CLAUDE_TEAM_TASK_STALL_ACTIVATION_GRACE_MS, 120_000); +} diff --git a/src/main/services/team/stallMonitor/reviewerResolution.ts b/src/main/services/team/stallMonitor/reviewerResolution.ts new file mode 100644 index 00000000..962f4f84 --- /dev/null +++ b/src/main/services/team/stallMonitor/reviewerResolution.ts @@ -0,0 +1,47 @@ +import { TeamKanbanManager } from '../TeamKanbanManager'; + +import type { ResolvedReviewer } from './TeamTaskStallTypes'; +import type { TeamTask } from '@shared/types'; + +export function resolveReviewerFromHistory(task: TeamTask): ResolvedReviewer { + if (!task.historyEvents?.length) { + return { reviewer: null, source: 'none' }; + } + + for (let i = task.historyEvents.length - 1; i >= 0; i -= 1) { + const event = task.historyEvents[i]; + if (event.type === 'review_approved' && event.actor) { + return { reviewer: event.actor, source: 'history_review_approved_actor' }; + } + if (event.type === 'review_started' && event.actor) { + return { reviewer: event.actor, source: 'history_review_started_actor' }; + } + if (event.type === 'review_requested' && event.reviewer) { + return { reviewer: event.reviewer, source: 'history_review_requested_reviewer' }; + } + } + + return { reviewer: null, source: 'none' }; +} + +export function buildResolvedReviewerIndex( + tasks: TeamTask[], + kanbanState: Awaited> +): Map { + const resolved = new Map(); + + for (const task of tasks) { + const kanbanReviewer = kanbanState.tasks[task.id]?.reviewer; + if (typeof kanbanReviewer === 'string' && kanbanReviewer.trim().length > 0) { + resolved.set(task.id, { + reviewer: kanbanReviewer.trim(), + source: 'kanban_state', + }); + continue; + } + + resolved.set(task.id, resolveReviewerFromHistory(task)); + } + + return resolved; +} diff --git a/src/main/services/team/taskLogs/activity/BoardTaskActivityRecordBuilder.ts b/src/main/services/team/taskLogs/activity/BoardTaskActivityRecordBuilder.ts index fc58f657..01d780a3 100644 --- a/src/main/services/team/taskLogs/activity/BoardTaskActivityRecordBuilder.ts +++ b/src/main/services/team/taskLogs/activity/BoardTaskActivityRecordBuilder.ts @@ -312,6 +312,21 @@ function compareRecords(left: BoardTaskActivityRecord, right: BoardTaskActivityR return left.id.localeCompare(right.id); } +function resolveCandidateTaskIds(locator: BoardTaskLocator, lookup: TaskLookup): string[] { + const canonicalTask = + (locator.canonicalId && lookup.byId.get(locator.canonicalId)) || + (locator.refKind === 'canonical' ? lookup.byId.get(locator.ref) : undefined) || + (locator.refKind === 'unknown' && looksLikeCanonicalTaskId(locator.ref) + ? lookup.byId.get(locator.ref) + : undefined); + if (canonicalTask) { + return [canonicalTask.id]; + } + + const displayCandidates = lookup.byDisplayId.get(normalizeDisplayRef(locator.ref)) ?? []; + return [...new Set(displayCandidates.map((task) => task.id))]; +} + export class BoardTaskActivityRecordBuilder { buildForTask(args: { teamName: string; @@ -319,64 +334,98 @@ export class BoardTaskActivityRecordBuilder { tasks: TeamTask[]; messages: RawTaskActivityMessage[]; }): BoardTaskActivityRecord[] { + return ( + this.buildForTasks({ + teamName: args.teamName, + tasks: args.tasks, + messages: args.messages, + }).get(args.targetTask.id) ?? [] + ); + } + + buildForTasks(args: { + teamName: string; + tasks: TeamTask[]; + messages: RawTaskActivityMessage[]; + }): Map { const lookup = buildTaskLookup(args.tasks); - const records: BoardTaskActivityRecord[] = []; - const seenIds = new Set(); + const recordsByTaskId = new Map(); + const seenIdsByTaskId = new Map>(); for (const message of args.messages) { const actionMap = buildActionMap(message.boardTaskToolActions); for (const link of message.boardTaskLinks) { const resolvedTask = resolveLocatorToTaskRef(args.teamName, link.task, lookup); - if ( - resolvedTask.taskRef?.taskId !== args.targetTask.id && - !locatorCouldMatchTask(link.task, args.targetTask, lookup) - ) { + const candidateTaskIds = resolveCandidateTaskIds(link.task, lookup); + if (candidateTaskIds.length === 0) { continue; } - const action = link.linkKind === 'execution' || !link.toolUseId ? undefined : actionMap.get(link.toolUseId); - const peerTask = resolvePeerTask( - args.teamName, - link, - message.boardTaskLinks, - args.targetTask, - lookup - ); - const record: BoardTaskActivityRecord = { - id: [ - message.uuid, - link.toolUseId ?? 'ambient', - link.task.ref, - link.targetRole, - link.linkKind, - ].join(':'), - timestamp: message.timestamp, - task: resolvedTask, - linkKind: link.linkKind, - targetRole: link.targetRole, - actor: resolveActivityActor(message), - actorContext: buildActorContext(args.teamName, link.actorContext, lookup), - ...(action ? { action: buildAction({ action, link, peerTask }) } : {}), - source: { - messageUuid: message.uuid, - filePath: message.filePath, - ...(link.toolUseId ? { toolUseId: link.toolUseId } : {}), - sourceOrder: message.sourceOrder, - }, - }; - if (seenIds.has(record.id)) { - continue; + for (const taskId of candidateTaskIds) { + const targetTask = lookup.byId.get(taskId); + if (!targetTask) { + continue; + } + if ( + resolvedTask.taskRef?.taskId !== targetTask.id && + !locatorCouldMatchTask(link.task, targetTask, lookup) + ) { + continue; + } + + const peerTask = resolvePeerTask( + args.teamName, + link, + message.boardTaskLinks, + targetTask, + lookup + ); + const record: BoardTaskActivityRecord = { + id: [ + message.uuid, + link.toolUseId ?? 'ambient', + link.task.ref, + link.targetRole, + link.linkKind, + ].join(':'), + timestamp: message.timestamp, + task: resolvedTask, + linkKind: link.linkKind, + targetRole: link.targetRole, + actor: resolveActivityActor(message), + actorContext: buildActorContext(args.teamName, link.actorContext, lookup), + ...(action ? { action: buildAction({ action, link, peerTask }) } : {}), + source: { + messageUuid: message.uuid, + filePath: message.filePath, + ...(link.toolUseId ? { toolUseId: link.toolUseId } : {}), + sourceOrder: message.sourceOrder, + }, + }; + + const seenIds = seenIdsByTaskId.get(taskId) ?? new Set(); + if (seenIds.has(record.id)) { + continue; + } + seenIds.add(record.id); + seenIdsByTaskId.set(taskId, seenIds); + + const taskRecords = recordsByTaskId.get(taskId) ?? []; + taskRecords.push(record); + recordsByTaskId.set(taskId, taskRecords); } - seenIds.add(record.id); - records.push(record); } } - return records.sort(compareRecords); + for (const [taskId, records] of recordsByTaskId) { + recordsByTaskId.set(taskId, records.sort(compareRecords)); + } + + return recordsByTaskId; } } diff --git a/src/main/types/jsonl.ts b/src/main/types/jsonl.ts index c745db54..96b9b730 100644 --- a/src/main/types/jsonl.ts +++ b/src/main/types/jsonl.ts @@ -198,9 +198,22 @@ export interface AssistantEntry extends ConversationalEntry { export interface SystemEntry extends ConversationalEntry { type: 'system'; - subtype: 'turn_duration' | 'init'; - durationMs: number; + subtype?: 'turn_duration' | 'init' | 'informational' | 'permission_retry' | 'api_retry' | string; + durationMs?: number; isMeta: boolean; + content?: string; + level?: 'info' | 'warning' | 'error' | 'suggestion' | string; + toolUseID?: string; + preventContinuation?: boolean; + codexNativeWarningSource?: string; + codexNativeThreadStatus?: string; + codexNativeThreadId?: string; + codexNativeCompletionPolicy?: 'ephemeral' | 'persistent' | string; + codexNativeHistoryCompleteness?: string; + codexNativeFinalUsageAuthority?: string; + codexNativeExecutablePath?: string; + codexNativeExecutableSource?: string; + codexNativeExecutableVersion?: string | null; } export interface SummaryEntry extends BaseEntry { diff --git a/src/main/types/messages.ts b/src/main/types/messages.ts index 12d87a2d..2865cf0c 100644 --- a/src/main/types/messages.ts +++ b/src/main/types/messages.ts @@ -109,6 +109,19 @@ export interface ParsedMessage { isCompactSummary?: boolean; /** API request ID for deduplicating streaming entries */ requestId?: string; + /** System-message severity when available in the raw transcript */ + level?: string; + /** Raw system subtype when available in the transcript */ + subtype?: string; + codexNativeWarningSource?: string; + codexNativeThreadStatus?: string; + codexNativeThreadId?: string; + codexNativeCompletionPolicy?: string; + codexNativeHistoryCompleteness?: string; + codexNativeFinalUsageAuthority?: string; + codexNativeExecutablePath?: string; + codexNativeExecutableSource?: string; + codexNativeExecutableVersion?: string | null; } // ============================================================================= diff --git a/src/main/utils/jsonl.ts b/src/main/utils/jsonl.ts index 17b249e6..72320061 100644 --- a/src/main/utils/jsonl.ts +++ b/src/main/utils/jsonl.ts @@ -244,6 +244,17 @@ function parseChatHistoryEntry(entry: ChatHistoryEntry): ParsedMessage | null { let gitBranch: string | undefined; let agentId: string | undefined; let agentName: string | undefined; + let level: string | undefined; + let subtype: string | undefined; + let codexNativeWarningSource: string | undefined; + let codexNativeThreadStatus: string | undefined; + let codexNativeThreadId: string | undefined; + let codexNativeCompletionPolicy: string | undefined; + let codexNativeHistoryCompleteness: string | undefined; + let codexNativeFinalUsageAuthority: string | undefined; + let codexNativeExecutablePath: string | undefined; + let codexNativeExecutableSource: string | undefined; + let codexNativeExecutableVersion: string | null | undefined; let isSidechain = false; let isMeta = false; let userType: string | undefined; @@ -283,7 +294,19 @@ function parseChatHistoryEntry(entry: ChatHistoryEntry): ParsedMessage | null { agentId = entry.agentId; requestId = entry.requestId; } else if (entry.type === 'system') { + content = entry.content ?? ''; isMeta = entry.isMeta ?? false; + level = entry.level; + subtype = entry.subtype; + codexNativeWarningSource = entry.codexNativeWarningSource; + codexNativeThreadStatus = entry.codexNativeThreadStatus; + codexNativeThreadId = entry.codexNativeThreadId; + codexNativeCompletionPolicy = entry.codexNativeCompletionPolicy; + codexNativeHistoryCompleteness = entry.codexNativeHistoryCompleteness; + codexNativeFinalUsageAuthority = entry.codexNativeFinalUsageAuthority; + codexNativeExecutablePath = entry.codexNativeExecutablePath; + codexNativeExecutableSource = entry.codexNativeExecutableSource; + codexNativeExecutableVersion = entry.codexNativeExecutableVersion; } } @@ -310,6 +333,17 @@ function parseChatHistoryEntry(entry: ChatHistoryEntry): ParsedMessage | null { isMeta, userType, isCompactSummary, + level, + subtype, + codexNativeWarningSource, + codexNativeThreadStatus, + codexNativeThreadId, + codexNativeCompletionPolicy, + codexNativeHistoryCompleteness, + codexNativeFinalUsageAuthority, + codexNativeExecutablePath, + codexNativeExecutableSource, + codexNativeExecutableVersion, // Tool info toolCalls, toolResults: toolResultsList, diff --git a/src/main/utils/metadataExtraction.ts b/src/main/utils/metadataExtraction.ts index f28146ff..344e565b 100644 --- a/src/main/utils/metadataExtraction.ts +++ b/src/main/utils/metadataExtraction.ts @@ -4,6 +4,7 @@ import { isCommandOutputContent, sanitizeDisplayContent } from '@shared/utils/contentSanitizer'; import { createLogger } from '@shared/utils/logger'; +import * as fs from 'fs/promises'; import * as readline from 'readline'; import { LocalFileSystemProvider } from '../services/infrastructure/LocalFileSystemProvider'; @@ -29,7 +30,7 @@ function normalizeDriveLetter(p: string): string { const defaultProvider = new LocalFileSystemProvider(); -const JSONL_HEAD_TIMEOUT_MS = 2000; +const JSONL_HEAD_TIMEOUT_MS = 5000; const JSONL_HEAD_MAX_BYTES = 256 * 1024; const JSONL_HEAD_MAX_LINES = 400; @@ -53,6 +54,41 @@ function createStreamCleanup(rl: readline.Interface, fileStream: Readable): () = }; } +function extractCwdFromBufferedText(text: string): string | null { + const lines = text.split(/\r?\n/, JSONL_HEAD_MAX_LINES); + for (const line of lines) { + if (!line.trim()) continue; + + let entry: ChatHistoryEntry; + try { + entry = JSON.parse(line) as ChatHistoryEntry; + } catch { + continue; + } + + if ('cwd' in entry && entry.cwd) { + return normalizeDriveLetter(translateWslMountPath(entry.cwd)); + } + } + + return null; +} + +async function extractCwdFromLocalFile(filePath: string): Promise { + const handle = await fs.open(filePath, 'r'); + try { + const buffer = Buffer.alloc(JSONL_HEAD_MAX_BYTES); + const { bytesRead } = await handle.read(buffer, 0, JSONL_HEAD_MAX_BYTES, 0); + if (bytesRead <= 0) { + return null; + } + + return extractCwdFromBufferedText(buffer.toString('utf8', 0, bytesRead)); + } finally { + await handle.close().catch(() => undefined); + } +} + /** * Extract CWD (current working directory) from the first entry. * Used to get the actual project path from encoded directory names. @@ -74,6 +110,15 @@ export async function extractCwd( return null; } + if (fsProvider.type === 'local') { + try { + return await extractCwdFromLocalFile(filePath); + } catch (error) { + logger.debug(`Error extracting cwd from local file ${filePath}:`, error); + return null; + } + } + const fileStream = fsProvider.createReadStream(filePath, { encoding: 'utf8' }); const rl = readline.createInterface({ input: fileStream, diff --git a/src/main/workers/team-fs-worker.ts b/src/main/workers/team-fs-worker.ts index 89c83b83..6ad828ef 100644 --- a/src/main/workers/team-fs-worker.ts +++ b/src/main/workers/team-fs-worker.ts @@ -4,6 +4,7 @@ import { parentPort } from 'node:worker_threads'; import { normalizePersistedLaunchSnapshot } from '@main/services/team/TeamLaunchStateEvaluator'; import { isLeadMember } from '@shared/utils/leadDetection'; +import { buildTeamMemberColorMap } from '@shared/utils/teamMemberColors'; interface ListTeamsPayload { teamsDir: string; @@ -593,6 +594,11 @@ async function listTeams( dropCliProvisionerMembers(memberMap); const members = Array.from(memberMap.values()); + const memberColors = buildTeamMemberColorMap(members, { preferProvidedColors: false }); + const coloredMembers = members.map((member) => ({ + ...member, + color: memberColors.get(member.name) ?? member.color, + })); const launchStateSummary = (await readLaunchState(payload.teamsDir, teamName)) ?? (() => { @@ -623,7 +629,7 @@ async function listTeams( memberCount: memberMap.size, taskCount: 0, lastActivity: null, - ...(members.length > 0 ? { members } : {}), + ...(coloredMembers.length > 0 ? { members: coloredMembers } : {}), ...(color ? { color } : {}), ...(projectPath ? { projectPath } : {}), ...(leadSessionId ? { leadSessionId } : {}), diff --git a/src/preload/index.ts b/src/preload/index.ts index ac737d09..ccf85725 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -1,3 +1,4 @@ +import { createCodexAccountBridge } from '@features/codex-account/preload'; import { createRecentProjectsBridge } from '@features/recent-projects/preload'; import { createTmuxInstallerBridge } from '@features/tmux-installer/preload'; import { WINDOW_ZOOM_FACTOR_CHANGED_CHANNEL } from '@shared/constants'; @@ -121,6 +122,7 @@ import { TEAM_DELETE_DRAFT, TEAM_DELETE_TASK_ATTACHMENT, TEAM_DELETE_TEAM, + TEAM_GET_AGENT_RUNTIME, TEAM_GET_ALL_TASKS, TEAM_GET_ATTACHMENTS, TEAM_GET_CLAUDE_LOGS, @@ -147,7 +149,6 @@ import { TEAM_LEAD_CONTEXT, TEAM_LIST, TEAM_MEMBER_SPAWN_STATUSES, - TEAM_GET_AGENT_RUNTIME, TEAM_PERMANENTLY_DELETE, TEAM_PREPARE_PROVISIONING, TEAM_PROCESS_ALIVE, @@ -165,9 +166,9 @@ import { TEAM_SAVE_TASK_ATTACHMENT, TEAM_SEND_MESSAGE, TEAM_SET_CHANGE_PRESENCE_TRACKING, - TEAM_SET_TASK_LOG_STREAM_TRACKING, TEAM_SET_PROJECT_BRANCH_TRACKING, TEAM_SET_TASK_CLARIFICATION, + TEAM_SET_TASK_LOG_STREAM_TRACKING, TEAM_SET_TOOL_ACTIVITY_TRACKING, TEAM_SHOW_MESSAGE_NOTIFICATION, TEAM_SOFT_DELETE_TASK, @@ -270,7 +271,6 @@ import type { LeadContextUsageSnapshot, MemberFullStats, MemberLogSummary, - TeamAgentRuntimeSnapshot, MemberSpawnStatusesSnapshot, MessagesPage, NotificationTrigger, @@ -293,6 +293,7 @@ import type { TaskChangePresenceState, TaskChangeSetV2, TaskComment, + TeamAgentRuntimeSnapshot, TeamChangeEvent, TeamClaudeLogsQuery, TeamClaudeLogsResponse, @@ -458,6 +459,9 @@ ipcRenderer.on( // Expose protected methods that allow the renderer process to use // the ipcRenderer without exposing the entire object const electronAPI: ElectronAPI = { + ...createCodexAccountBridge({ + ipcRenderer, + }), ...createRecentProjectsBridge(), getAppVersion: () => ipcRenderer.invoke('get-app-version'), getProjects: () => ipcRenderer.invoke('get-projects'), diff --git a/src/renderer/api/httpClient.ts b/src/renderer/api/httpClient.ts index 7037e29e..58b5af8b 100644 --- a/src/renderer/api/httpClient.ts +++ b/src/renderer/api/httpClient.ts @@ -6,6 +6,7 @@ * to run in a regular browser connected to an HTTP server. */ +import type { CodexAccountSnapshotDto } from '@features/codex-account/contracts'; import type { DashboardRecentProjectsPayload } from '@features/recent-projects/contracts'; import type { AppConfig, @@ -219,6 +220,29 @@ export class HttpAPIClient implements ElectronAPI { getAppVersion = (): Promise => this.get('/api/version'); + getCodexAccountSnapshot = (): Promise => + Promise.reject(new Error('Codex account bridge is unavailable in browser mode')); + + refreshCodexAccountSnapshot = (_options?: { + includeRateLimits?: boolean; + forceRefreshToken?: boolean; + }): Promise => + Promise.reject(new Error('Codex account bridge is unavailable in browser mode')); + + startCodexChatgptLogin = (): Promise => + Promise.reject(new Error('Codex account bridge is unavailable in browser mode')); + + cancelCodexChatgptLogin = (): Promise => + Promise.reject(new Error('Codex account bridge is unavailable in browser mode')); + + logoutCodexAccount = (): Promise => + Promise.reject(new Error('Codex account bridge is unavailable in browser mode')); + + onCodexAccountSnapshotChanged = + (_callback: (event: unknown, snapshot: CodexAccountSnapshotDto) => void): (() => void) => + () => + undefined; + getDashboardRecentProjects = (): Promise => this.get('/api/dashboard/recent-projects'); diff --git a/src/renderer/assets/participant-avatars/01.png b/src/renderer/assets/participant-avatars/01.png new file mode 100644 index 00000000..4128d3b0 Binary files /dev/null and b/src/renderer/assets/participant-avatars/01.png differ diff --git a/src/renderer/assets/participant-avatars/02.png b/src/renderer/assets/participant-avatars/02.png new file mode 100644 index 00000000..15575859 Binary files /dev/null and b/src/renderer/assets/participant-avatars/02.png differ diff --git a/src/renderer/assets/participant-avatars/03.png b/src/renderer/assets/participant-avatars/03.png new file mode 100644 index 00000000..a5e00bcd Binary files /dev/null and b/src/renderer/assets/participant-avatars/03.png differ diff --git a/src/renderer/assets/participant-avatars/04.png b/src/renderer/assets/participant-avatars/04.png new file mode 100644 index 00000000..f984db69 Binary files /dev/null and b/src/renderer/assets/participant-avatars/04.png differ diff --git a/src/renderer/assets/participant-avatars/05.png b/src/renderer/assets/participant-avatars/05.png new file mode 100644 index 00000000..a9795962 Binary files /dev/null and b/src/renderer/assets/participant-avatars/05.png differ diff --git a/src/renderer/assets/participant-avatars/06.png b/src/renderer/assets/participant-avatars/06.png new file mode 100644 index 00000000..71950d32 Binary files /dev/null and b/src/renderer/assets/participant-avatars/06.png differ diff --git a/src/renderer/assets/participant-avatars/07.png b/src/renderer/assets/participant-avatars/07.png new file mode 100644 index 00000000..8f23fb86 Binary files /dev/null and b/src/renderer/assets/participant-avatars/07.png differ diff --git a/src/renderer/assets/participant-avatars/08.png b/src/renderer/assets/participant-avatars/08.png new file mode 100644 index 00000000..c7ada81e Binary files /dev/null and b/src/renderer/assets/participant-avatars/08.png differ diff --git a/src/renderer/assets/participant-avatars/09.png b/src/renderer/assets/participant-avatars/09.png new file mode 100644 index 00000000..8f4abe98 Binary files /dev/null and b/src/renderer/assets/participant-avatars/09.png differ diff --git a/src/renderer/assets/participant-avatars/10.png b/src/renderer/assets/participant-avatars/10.png new file mode 100644 index 00000000..bee2490e Binary files /dev/null and b/src/renderer/assets/participant-avatars/10.png differ diff --git a/src/renderer/assets/participant-avatars/11.png b/src/renderer/assets/participant-avatars/11.png new file mode 100644 index 00000000..e77da7e4 Binary files /dev/null and b/src/renderer/assets/participant-avatars/11.png differ diff --git a/src/renderer/assets/participant-avatars/12.png b/src/renderer/assets/participant-avatars/12.png new file mode 100644 index 00000000..32ee4912 Binary files /dev/null and b/src/renderer/assets/participant-avatars/12.png differ diff --git a/src/renderer/assets/participant-avatars/13.png b/src/renderer/assets/participant-avatars/13.png new file mode 100644 index 00000000..9b774e24 Binary files /dev/null and b/src/renderer/assets/participant-avatars/13.png differ diff --git a/src/renderer/components/common/CliInstallWarningBanner.tsx b/src/renderer/components/common/CliInstallWarningBanner.tsx index a87ef7aa..3a9cb1f1 100644 --- a/src/renderer/components/common/CliInstallWarningBanner.tsx +++ b/src/renderer/components/common/CliInstallWarningBanner.tsx @@ -1,6 +1,6 @@ /** * CliInstallWarningBanner — Global warning strip shown below the tab bar - * when Claude Code CLI is not installed. + * when the configured runtime is unavailable. * * Hidden on Dashboard pages (which have their own detailed CliStatusBanner). * Only rendered in Electron mode. @@ -13,6 +13,7 @@ import { useShallow } from 'zustand/react/shallow'; export const CliInstallWarningBanner = (): React.JSX.Element | null => { const cliStatus = useStore(useShallow((s) => s.cliStatus)); + const cliStatusLoading = useStore((s) => s.cliStatusLoading); const openDashboard = useStore((s) => s.openDashboard); // Returns a primitive boolean — minimizes re-renders @@ -24,7 +25,13 @@ export const CliInstallWarningBanner = (): React.JSX.Element | null => { }); // Hide when: not Electron, status not loaded yet, CLI installed, or dashboard is focused - if (!isElectronMode() || !cliStatus || cliStatus.installed || isDashboardFocused) { + if ( + !isElectronMode() || + cliStatusLoading || + !cliStatus || + cliStatus.installed || + isDashboardFocused + ) { return null; } @@ -40,8 +47,8 @@ export const CliInstallWarningBanner = (): React.JSX.Element | null => { {cliStatus.binaryPath && cliStatus.launchError - ? 'Claude Code was found but failed to start. Open the Dashboard to repair or reinstall it.' - : 'Claude Code is not installed. Install it from the Dashboard to enable all features.'} + ? `The configured ${cliStatus.displayName} runtime was found but failed to start. Open the Dashboard to repair or reinstall it.` + : `The configured ${cliStatus.displayName} runtime is not installed. Install it from the Dashboard to enable all features.`}
) : null} + {showInlineCodexAccessoryRow ? ( +
+ + {codexDashboardRateLimits!.map((item) => ( +
+
+ + {item.label} + + + {item.remaining} + + + • resets {item.resetsAt} + +
+
+ ))} +
+ ) : !showSkeleton && + codexDashboardRateLimits && + codexDashboardRateLimits.length > 0 ? ( +
+ {codexDashboardRateLimits.map((item) => ( +
+
+ + {item.label} + + + {item.remaining} + + + • resets {item.resetsAt} + +
+
+ ))} +
+ ) : !showSkeleton && codexDashboardHint ? ( +
+ {codexDashboardHint} +
+ ) : null}
- ) : shouldShowProviderConnectAction(provider) ? ( + ) : !showSkeleton && shouldShowProviderConnectAction(provider) ? (
- {!showSkeleton && provider.models.length > 0 && ( + {!showSkeleton && provider.models.length > 0 && !showInlineCodexAccessoryRow && (
{ const [isSwitchingFlavor, setIsSwitchingFlavor] = useState(false); const [showTroubleshoot, setShowTroubleshoot] = useState(false); const multimodelEnabled = appConfig?.general?.multimodelEnabled ?? true; - const visibleCliProviders = useMemo( - () => filterMainScreenCliProviders(cliStatus?.providers ?? []), - [cliStatus?.providers] + const loadingCliStatus = useMemo( + () => + !cliStatus && cliStatusLoading && multimodelEnabled + ? createLoadingMultimodelCliStatus() + : cliStatus, + [cliStatus, cliStatusLoading, multimodelEnabled] ); + const codexAccount = useCodexAccountSnapshot({ + enabled: + isElectron && + multimodelEnabled && + loadingCliStatus?.flavor === 'agent_teams_orchestrator' && + Boolean(loadingCliStatus?.providers.some((provider) => provider.providerId === 'codex')), + includeRateLimits: true, + }); + const visibleCliProviders = useMemo( + () => + filterMainScreenCliProviders(loadingCliStatus?.providers ?? []).map((provider) => + provider.providerId === 'codex' + ? mergeCodexProviderStatusWithSnapshot(provider, codexAccount.snapshot) + : provider + ), + [loadingCliStatus?.providers, codexAccount.snapshot] + ); + const loadingCliProviderMap = useMemo( + () => + new Map( + filterMainScreenCliProviders(loadingCliStatus?.providers ?? []).map((provider) => [ + provider.providerId, + provider, + ]) + ), + [loadingCliStatus?.providers] + ); + const codexSnapshotPending = + codexAccount.loading && + Boolean(loadingCliStatus?.providers.some((provider) => provider.providerId === 'codex')) && + !codexAccount.snapshot; + const effectiveCliStatus = useMemo( + () => + loadingCliStatus + ? { + ...loadingCliStatus, + providers: visibleCliProviders, + } + : loadingCliStatus, + [loadingCliStatus, visibleCliProviders] + ); + const renderCliStatus = effectiveCliStatus; + const runtimeDisplayName = getHumanRuntimeDisplayName(renderCliStatus, multimodelEnabled); useEffect(() => { if (!isElectron) return; @@ -711,24 +1022,28 @@ export const CliStatusBanner = (): React.JSX.Element | null => { const interval = setInterval( () => { - void fetchCliStatus(); + void refreshCliStatusForCurrentMode({ + multimodelEnabled, + bootstrapCliStatus, + fetchCliStatus, + }); }, 10 * 60 * 1000 ); return () => clearInterval(interval); - }, [isElectron, cliStatus, fetchCliStatus]); + }, [bootstrapCliStatus, cliStatus, fetchCliStatus, isElectron, multimodelEnabled]); const handleInstall = useCallback(() => { installCli(); }, [installCli]); const handleRefresh = useCallback(() => { - if (multimodelEnabled) { - void bootstrapCliStatus({ multimodelEnabled: true }); - return; - } - void fetchCliStatus(); + void refreshCliStatusForCurrentMode({ + multimodelEnabled, + bootstrapCliStatus, + fetchCliStatus, + }); }, [bootstrapCliStatus, fetchCliStatus, multimodelEnabled]); const handleMultimodelToggle = useCallback( @@ -767,12 +1082,16 @@ export const CliStatusBanner = (): React.JSX.Element | null => { void (async () => { try { await invalidateCliStatus(); - await fetchCliStatus(); + await refreshCliStatusForCurrentMode({ + multimodelEnabled, + bootstrapCliStatus, + fetchCliStatus, + }); } finally { setIsVerifyingAuth(false); } })(); - }, [fetchCliStatus, invalidateCliStatus]); + }, [bootstrapCliStatus, fetchCliStatus, invalidateCliStatus, multimodelEnabled]); const handleProviderLogin = useCallback((providerId: CliProviderId) => { setProviderTerminal({ providerId, action: 'login' }); @@ -782,7 +1101,7 @@ export const CliStatusBanner = (): React.JSX.Element | null => { (providerId: CliProviderId) => { void (async () => { const provider = - cliStatus?.providers.find((entry) => entry.providerId === providerId) ?? null; + effectiveCliStatus?.providers.find((entry) => entry.providerId === providerId) ?? null; const disconnectAction = provider ? getProviderDisconnectAction(provider) : null; if (!disconnectAction) { return; @@ -803,7 +1122,7 @@ export const CliStatusBanner = (): React.JSX.Element | null => { setProviderTerminal({ providerId, action: 'logout' }); })(); }, - [cliStatus?.providers] + [effectiveCliStatus?.providers] ); const handleProviderManage = useCallback((providerId: CliProviderId) => { @@ -826,7 +1145,7 @@ export const CliStatusBanner = (): React.JSX.Element | null => { const currentBackends = appConfig?.runtime?.providerBackends ?? { gemini: 'auto' as const, - codex: 'auto' as const, + codex: 'codex-native' as const, }; await updateConfig('runtime', { @@ -852,29 +1171,29 @@ export const CliStatusBanner = (): React.JSX.Element | null => { if (installerState === 'error') return 'error'; if (installerState === 'completed') return 'success'; if (installerState !== 'idle') return 'info'; - if (!cliStatus) return 'loading'; - if (isCheckingMultimodelStatus(cliStatus, visibleCliProviders)) return 'info'; - if (cliStatus.authStatusChecking) return 'info'; - if (!cliStatus.installed) return 'error'; - if (isMultimodelRuntimeStatus(cliStatus) && visibleCliProviders.length === 0) { + if (!renderCliStatus) return 'loading'; + if (isCheckingMultimodelStatus(renderCliStatus, visibleCliProviders)) return 'info'; + if (renderCliStatus.authStatusChecking) return 'info'; + if (!renderCliStatus.installed) return 'error'; + if (isMultimodelRuntimeStatus(renderCliStatus) && visibleCliProviders.length === 0) { return 'warning'; } if ( - isMultimodelRuntimeStatus(cliStatus) && + isMultimodelRuntimeStatus(renderCliStatus) && visibleCliProviders.length > 0 && !hasVisibleAuthenticatedMultimodelProvider(visibleCliProviders) ) { return 'warning'; } - if (cliStatus.installed && !cliStatus.authLoggedIn) return 'warning'; - if (cliStatus.updateAvailable) return 'info'; + if (renderCliStatus.installed && !renderCliStatus.authLoggedIn) return 'warning'; + if (renderCliStatus.updateAvailable) return 'info'; return 'success'; }; const variant = getVariant(); const styles = VARIANT_STYLES[variant]; const activeTerminalProvider = providerTerminal - ? (cliStatus?.providers.find( + ? (effectiveCliStatus?.providers.find( (provider) => provider.providerId === providerTerminal.providerId ) ?? null) : null; @@ -885,7 +1204,7 @@ export const CliStatusBanner = (): React.JSX.Element | null => { : getProviderTerminalLogoutCommand(activeTerminalProvider) : null; const installedAuxiliaryUi = - cliStatus !== null ? ( + renderCliStatus !== null ? ( <> { : (visibleCliProviders[0]?.providerId ?? 'anthropic') } providerStatusLoading={cliProviderStatusLoading} - disabled={isBusy || cliStatusLoading || !cliStatus.binaryPath} + disabled={isBusy || cliStatusLoading || !renderCliStatus.binaryPath} onSelectBackend={handleProviderBackendChange} onRefreshProvider={(providerId) => fetchCliProviderStatus(providerId)} onRequestLogin={(providerId) => setProviderTerminal({ providerId, action: 'login' })} /> - {providerTerminal && cliStatus.binaryPath && ( + {providerTerminal && renderCliStatus.binaryPath && ( { @@ -930,7 +1249,7 @@ export const CliStatusBanner = (): React.JSX.Element | null => { ) : null; // ── Loading / fetch error state ──────────────────────────────────────── - if (!cliStatus && installerState === 'idle') { + if (!renderCliStatus && installerState === 'idle') { // Fetch failed — show error with retry if (cliStatusError && !cliStatusLoading) { return ( @@ -970,7 +1289,7 @@ export const CliStatusBanner = (): React.JSX.Element | null => { style={{ borderColor: styles.border, backgroundColor: styles.bg }} > - Claude CLI status will be checked in the background. + {runtimeDisplayName} status will be checked in the background. - {cliStatus.supportsSelfUpdate ? ( + {renderCliStatus.supportsSelfUpdate ? ( ) : (

{cliLaunchIssue - ? `The configured ${cliStatus.displayName} runtime failed its startup health check.` - : `The configured ${cliStatus.displayName} runtime was not found.`} + ? `The configured ${runtimeDisplayName} failed its startup health check.` + : `The configured ${runtimeDisplayName} was not found.`}

)}
@@ -1197,17 +1523,19 @@ export const CliStatusBanner = (): React.JSX.Element | null => { // Installed but not logged in — yellow warning banner if ( - cliStatus.installed && - cliStatus.flavor !== 'agent_teams_orchestrator' && - (cliStatus.authStatusChecking || isVerifyingAuth) + renderCliStatus.installed && + renderCliStatus.flavor !== 'agent_teams_orchestrator' && + (renderCliStatus.authStatusChecking || isVerifyingAuth) ) { - if (cliStatus.authStatusChecking || isVerifyingAuth) { + if (renderCliStatus.authStatusChecking || isVerifyingAuth) { return ( <> { } if ( - cliStatus.installed && - cliStatus.flavor !== 'agent_teams_orchestrator' && - !cliStatus.authStatusChecking && - !cliStatus.authLoggedIn + renderCliStatus.installed && + renderCliStatus.flavor !== 'agent_teams_orchestrator' && + !renderCliStatus.authStatusChecking && + !renderCliStatus.authLoggedIn ) { - const apiKeyActionRequiredProviders = getApiKeyActionRequiredProviders(cliStatus.providers); + const apiKeyActionRequiredProviders = getApiKeyActionRequiredProviders( + renderCliStatus.providers + ); const hasApiKeyModeIssue = apiKeyActionRequiredProviders.length > 0; const primaryApiKeyProvider = apiKeyActionRequiredProviders[0] ?? null; const apiKeyMissingProviders = apiKeyActionRequiredProviders.filter( @@ -1254,14 +1584,16 @@ export const CliStatusBanner = (): React.JSX.Element | null => { : apiKeyActionRequiredProviders.length === 1 && primaryApiKeyProvider ? `${primaryApiKeyProvider.displayName} is set to API key mode, but it is not connected. Open Manage Providers to review the saved key or switch the connection mode.` : 'One or more providers are set to API key mode and need attention. Open Manage Providers to review saved keys or switch the connection mode.' - : `${cliStatus.displayName} is installed but you are not authenticated. Login is required for team provisioning and AI features.`; + : `${runtimeDisplayName} is installed but you are not authenticated. Login is required for team provisioning and AI features.`; return ( <> {
  • Open your terminal and run:{' '} - {cliStatus.showBinaryPath && cliStatus.binaryPath - ? `"${cliStatus.binaryPath}" auth status` + {renderCliStatus.showBinaryPath && renderCliStatus.binaryPath + ? `"${renderCliStatus.binaryPath}" auth status` : 'your configured CLI auth status command'} {' '} — check if it shows "Logged in" @@ -1394,25 +1726,25 @@ export const CliStatusBanner = (): React.JSX.Element | null => {
  • If it says logged in but the app doesn't see it, try:{' '} - {cliStatus.showBinaryPath && cliStatus.binaryPath - ? `"${cliStatus.binaryPath}" auth logout` + {renderCliStatus.showBinaryPath && renderCliStatus.binaryPath + ? `"${renderCliStatus.binaryPath}" auth logout` : 'the runtime logout command'} {' '} then{' '} - {cliStatus.showBinaryPath && cliStatus.binaryPath - ? `"${cliStatus.binaryPath}" auth login` + {renderCliStatus.showBinaryPath && renderCliStatus.binaryPath + ? `"${renderCliStatus.binaryPath}" auth login` : 'the runtime login command'} {' '} again
  • Make sure the CLI in your terminal is the same runtime the app uses - {cliStatus.showBinaryPath && cliStatus.binaryPath && ( + {renderCliStatus.showBinaryPath && renderCliStatus.binaryPath && ( :{' '} - {cliStatus.binaryPath} + {renderCliStatus.binaryPath} )} @@ -1426,10 +1758,10 @@ export const CliStatusBanner = (): React.JSX.Element | null => { )} {installedAuxiliaryUi} - {showLoginTerminal && cliStatus.binaryPath && ( + {showLoginTerminal && renderCliStatus.binaryPath && ( { setShowLoginTerminal(false); @@ -1475,9 +1807,11 @@ export const CliStatusBanner = (): React.JSX.Element | null => { return ( <> { + const isElectron = useMemo(() => isElectronMode(), []); const tabId = useTabIdOptional(); const { fetchPluginCatalog, + bootstrapCliStatus, fetchCliStatus, fetchApiKeys, fetchSkillsCatalog, @@ -113,6 +129,7 @@ export const ExtensionStoreView = (): React.JSX.Element => { cliStatus, cliStatusLoading, cliProviderStatusLoading, + appConfig, openDashboard, sessions, projects, @@ -120,6 +137,7 @@ export const ExtensionStoreView = (): React.JSX.Element => { } = useStore( useShallow((s) => ({ fetchPluginCatalog: s.fetchPluginCatalog, + bootstrapCliStatus: s.bootstrapCliStatus, fetchCliStatus: s.fetchCliStatus, fetchApiKeys: s.fetchApiKeys, fetchSkillsCatalog: s.fetchSkillsCatalog, @@ -132,13 +150,58 @@ export const ExtensionStoreView = (): React.JSX.Element => { cliStatus: s.cliStatus, cliStatusLoading: s.cliStatusLoading, cliProviderStatusLoading: s.cliProviderStatusLoading, + appConfig: s.appConfig, openDashboard: s.openDashboard, sessions: s.sessions, projects: s.projects, repositoryGroups: s.repositoryGroups, })) ); - const cliInstalled = cliStatus?.installed ?? true; + const multimodelEnabled = appConfig?.general?.multimodelEnabled ?? true; + const loadingCliStatus = useMemo( + () => + !cliStatus && cliStatusLoading && multimodelEnabled + ? createLoadingMultimodelCliStatus() + : cliStatus, + [cliStatus, cliStatusLoading, multimodelEnabled] + ); + const codexAccount = useCodexAccountSnapshot({ + enabled: + isElectron && + multimodelEnabled && + loadingCliStatus?.flavor === 'agent_teams_orchestrator' && + Boolean( + loadingCliStatus?.providers.some( + (provider: CliProviderStatus) => provider.providerId === 'codex' + ) + ), + includeRateLimits: true, + }); + const codexSnapshotPending = + codexAccount.loading && + Boolean( + loadingCliStatus?.providers.some( + (provider: CliProviderStatus) => provider.providerId === 'codex' + ) + ) && + !codexAccount.snapshot; + const effectiveCliStatus = useMemo( + () => + loadingCliStatus + ? { + ...loadingCliStatus, + providers: loadingCliStatus.providers.map((provider: CliProviderStatus) => + provider.providerId === 'codex' + ? mergeCodexProviderStatusWithSnapshot(provider, codexAccount.snapshot) + : provider + ), + } + : loadingCliStatus, + [loadingCliStatus, codexAccount.snapshot] + ); + const effectiveCliStatusLoading = cliStatusLoading && effectiveCliStatus === null; + const runtimeDisplayName = getRuntimeDisplayName(effectiveCliStatus, multimodelEnabled); + const cliInstalled = effectiveCliStatus?.installed ?? true; const hasOngoingSessions = sessions.some((sess) => sess.isOngoing); const extensionsTabProjectId = useStore((s) => tabId @@ -195,8 +258,12 @@ export const ExtensionStoreView = (): React.JSX.Element => { }, [fetchPluginCatalog, projectPath]); useEffect(() => { - void fetchCliStatus(); - }, [fetchCliStatus]); + void refreshCliStatusForCurrentMode({ + multimodelEnabled, + bootstrapCliStatus, + fetchCliStatus, + }); + }, [bootstrapCliStatus, fetchCliStatus, multimodelEnabled]); // Fetch MCP installed state on mount useEffect(() => { @@ -215,42 +282,55 @@ export const ExtensionStoreView = (): React.JSX.Element => { // Refresh all data (plugins + MCP browse + installed + skills) const handleRefresh = useCallback(() => { - void fetchCliStatus(); + void refreshCliStatusForCurrentMode({ + multimodelEnabled, + bootstrapCliStatus, + fetchCliStatus, + }); void fetchApiKeys(); void fetchPluginCatalog(projectPath ?? undefined, true); void mcpBrowse(); // re-fetch first page void mcpFetchInstalled(projectPath ?? undefined); void fetchSkillsCatalog(projectPath ?? undefined); }, [ + bootstrapCliStatus, fetchApiKeys, fetchCliStatus, fetchPluginCatalog, fetchSkillsCatalog, + multimodelEnabled, mcpBrowse, mcpFetchInstalled, projectPath, ]); const isRefreshing = - cliStatusLoading || apiKeysLoading || pluginCatalogLoading || mcpBrowseLoading || skillsLoading; + effectiveCliStatusLoading || + apiKeysLoading || + pluginCatalogLoading || + mcpBrowseLoading || + skillsLoading; const mcpMutationDisableReason = useMemo( () => getExtensionActionDisableReason({ isInstalled: false, - cliStatus, - cliStatusLoading, + cliStatus: effectiveCliStatus, + cliStatusLoading: effectiveCliStatusLoading, section: 'mcp', }), - [cliStatus, cliStatusLoading] + [effectiveCliStatus, effectiveCliStatusLoading] ); const cliStatusBanner = useMemo(() => { - const providers = cliStatus?.providers ?? []; + const providers = effectiveCliStatus?.providers ?? []; const visibleProviders = getVisibleMultimodelProviders(providers); - const isMultimodel = isMultimodelRuntimeStatus(cliStatus); + const isMultimodel = isMultimodelRuntimeStatus(effectiveCliStatus); const shouldShowMultimodelProviderCards = - isMultimodel && visibleProviders.length > 0 && cliStatus !== null; + isMultimodel && visibleProviders.length > 0 && effectiveCliStatus !== null; - if ((cliStatusLoading || cliStatus === null) && !shouldShowMultimodelProviderCards) { + if ( + (effectiveCliStatusLoading || effectiveCliStatus === null) && + !shouldShowMultimodelProviderCards + ) { return (
    @@ -267,8 +347,10 @@ export const ExtensionStoreView = (): React.JSX.Element => { ); } - if (!cliStatus.installed) { - const cliLaunchIssue = Boolean(cliStatus.binaryPath && cliStatus.launchError); + if (!effectiveCliStatus.installed) { + const cliLaunchIssue = Boolean( + effectiveCliStatus.binaryPath && effectiveCliStatus.launchError + ); return (
    @@ -283,9 +365,9 @@ export const ExtensionStoreView = (): React.JSX.Element => { ? 'Extensions are disabled until the runtime passes its startup health check. Open the Dashboard to repair or reinstall it.' : 'Extensions are disabled until the runtime is installed. Open the Dashboard to install it and retry.'}

    - {cliLaunchIssue && cliStatus.launchError && ( + {cliLaunchIssue && effectiveCliStatus.launchError && (

    - {cliStatus.launchError} + {effectiveCliStatus.launchError}

    )}
    @@ -296,16 +378,18 @@ export const ExtensionStoreView = (): React.JSX.Element => { ); } - if (!isMultimodel && !cliStatus.authLoggedIn) { + if (!isMultimodel && !effectiveCliStatus.authLoggedIn) { return (
    -

    Claude CLI needs sign-in

    +

    {runtimeDisplayName} needs sign-in

    - Claude CLI was found - {cliStatus.installedVersion ? ` (${cliStatus.installedVersion})` : ''}, but plugin - installs are disabled until you sign in from the Dashboard. + {runtimeDisplayName} was found + {effectiveCliStatus.installedVersion + ? ` (${effectiveCliStatus.installedVersion})` + : ''} + , but plugin installs are disabled until you sign in from the Dashboard.

    diff --git a/src/renderer/components/extensions/apikeys/ApiKeysPanel.tsx b/src/renderer/components/extensions/apikeys/ApiKeysPanel.tsx index 3352fb3c..595b06b4 100644 --- a/src/renderer/components/extensions/apikeys/ApiKeysPanel.tsx +++ b/src/renderer/components/extensions/apikeys/ApiKeysPanel.tsx @@ -4,9 +4,15 @@ import { useEffect, useMemo, useState } from 'react'; +import { + mergeCodexProviderStatusWithSnapshot, + useCodexAccountSnapshot, +} from '@features/codex-account/renderer'; +import { isElectronMode } from '@renderer/api'; import { Button } from '@renderer/components/ui/button'; import { Tooltip, TooltipContent, TooltipTrigger } from '@renderer/components/ui/tooltip'; import { useStore } from '@renderer/store'; +import { createLoadingMultimodelCliStatus } from '@renderer/store/slices/cliInstallerSlice'; import { AlertTriangle, Info, Key, Plus } from 'lucide-react'; import { useShallow } from 'zustand/react/shallow'; @@ -24,17 +30,56 @@ export const ApiKeysPanel = ({ projectPath, projectLabel, }: ApiKeysPanelProps): React.JSX.Element => { - const { apiKeys, apiKeysLoading, apiKeysError, storageStatus, fetchStorageStatus, cliStatus } = - useStore( - useShallow((s) => ({ - apiKeys: s.apiKeys, - apiKeysLoading: s.apiKeysLoading, - apiKeysError: s.apiKeysError, - storageStatus: s.apiKeyStorageStatus, - fetchStorageStatus: s.fetchApiKeyStorageStatus, - cliStatus: s.cliStatus, - })) - ); + const isElectron = useMemo(() => isElectronMode(), []); + const { + apiKeys, + apiKeysLoading, + apiKeysError, + storageStatus, + fetchStorageStatus, + cliStatus, + cliStatusLoading, + appConfig, + } = useStore( + useShallow((s) => ({ + apiKeys: s.apiKeys, + apiKeysLoading: s.apiKeysLoading, + apiKeysError: s.apiKeysError, + storageStatus: s.apiKeyStorageStatus, + fetchStorageStatus: s.fetchApiKeyStorageStatus, + cliStatus: s.cliStatus, + cliStatusLoading: s.cliStatusLoading, + appConfig: s.appConfig, + })) + ); + const multimodelEnabled = appConfig?.general?.multimodelEnabled ?? true; + const loadingCliStatus = useMemo( + () => + !cliStatus && cliStatusLoading && multimodelEnabled + ? createLoadingMultimodelCliStatus() + : cliStatus, + [cliStatus, cliStatusLoading, multimodelEnabled] + ); + const codexAccount = useCodexAccountSnapshot({ + enabled: + isElectron && + loadingCliStatus?.flavor === 'agent_teams_orchestrator' && + Boolean(loadingCliStatus?.providers.some((provider) => provider.providerId === 'codex')), + }); + const effectiveCliStatus = useMemo( + () => + loadingCliStatus + ? { + ...loadingCliStatus, + providers: loadingCliStatus.providers.map((provider) => + provider.providerId === 'codex' + ? mergeCodexProviderStatusWithSnapshot(provider, codexAccount.snapshot) + : provider + ), + } + : loadingCliStatus, + [loadingCliStatus, codexAccount.snapshot] + ); const [dialogOpen, setDialogOpen] = useState(false); const [editingKey, setEditingKey] = useState(null); @@ -60,7 +105,7 @@ export const ApiKeysPanel = ({ const isOsKeychain = storageStatus?.encryptionMethod === 'os-keychain'; const providerKeyCards = useMemo(() => { - if (!cliStatus?.providers?.length) { + if (!effectiveCliStatus?.providers?.length) { return []; } @@ -78,7 +123,9 @@ export const ApiKeysPanel = ({ }, ] as const ).flatMap((item) => { - const provider = cliStatus.providers.find((entry) => entry.providerId === item.providerId); + const provider = effectiveCliStatus.providers.find( + (entry) => entry.providerId === item.providerId + ); if (!provider) { return []; } @@ -93,7 +140,7 @@ export const ApiKeysPanel = ({ }, ]; }); - }, [cliStatus]); + }, [effectiveCliStatus]); return (
    diff --git a/src/renderer/components/extensions/common/InstallButton.tsx b/src/renderer/components/extensions/common/InstallButton.tsx index 78930ad7..653b9ef2 100644 --- a/src/renderer/components/extensions/common/InstallButton.tsx +++ b/src/renderer/components/extensions/common/InstallButton.tsx @@ -17,6 +17,7 @@ import { getExtensionActionDisableReason } from '@shared/utils/extensionNormaliz import { Check, Loader2, Trash2 } from 'lucide-react'; import { useShallow } from 'zustand/react/shallow'; +import type { CliInstallationStatus } from '@shared/types'; import type { ExtensionOperationState } from '@shared/types/extensions'; interface InstallButtonProps { @@ -28,6 +29,11 @@ interface InstallButtonProps { disabled?: boolean; size?: 'sm' | 'default'; errorMessage?: string; + cliStatus?: Pick< + CliInstallationStatus, + 'installed' | 'authLoggedIn' | 'binaryPath' | 'launchError' | 'flavor' | 'providers' + > | null; + cliStatusLoading?: boolean; } export const InstallButton = ({ @@ -39,13 +45,17 @@ export const InstallButton = ({ disabled, size = 'sm', errorMessage, + cliStatus: cliStatusOverride, + cliStatusLoading: cliStatusLoadingOverride, }: InstallButtonProps) => { - const { cliStatus, cliStatusLoading } = useStore( + const { cliStatus: storedCliStatus, cliStatusLoading: storedCliStatusLoading } = useStore( useShallow((s) => ({ cliStatus: s.cliStatus, cliStatusLoading: s.cliStatusLoading, })) ); + const cliStatus = cliStatusOverride ?? storedCliStatus; + const cliStatusLoading = cliStatusLoadingOverride ?? storedCliStatusLoading; const disableReason = getExtensionActionDisableReason({ isInstalled, cliStatus, diff --git a/src/renderer/components/extensions/mcp/CustomMcpServerDialog.tsx b/src/renderer/components/extensions/mcp/CustomMcpServerDialog.tsx index 727d2603..114f9cb8 100644 --- a/src/renderer/components/extensions/mcp/CustomMcpServerDialog.tsx +++ b/src/renderer/components/extensions/mcp/CustomMcpServerDialog.tsx @@ -33,6 +33,7 @@ import { } from '@shared/utils/mcpScopes'; import { Plus, Server, Trash2 } from 'lucide-react'; +import type { CliInstallationStatus } from '@shared/types'; import type { McpCustomInstallRequest, McpHeaderDef, @@ -45,6 +46,11 @@ interface CustomMcpServerDialogProps { open: boolean; onClose: () => void; projectPath: string | null; + cliStatus?: Pick< + CliInstallationStatus, + 'installed' | 'authLoggedIn' | 'binaryPath' | 'launchError' | 'flavor' | 'providers' + > | null; + cliStatusLoading?: boolean; } type TransportMode = 'stdio' | 'http'; @@ -66,10 +72,14 @@ export const CustomMcpServerDialog = ({ open, onClose, projectPath, + cliStatus: cliStatusOverride, + cliStatusLoading: cliStatusLoadingOverride, }: CustomMcpServerDialogProps): React.JSX.Element => { const installCustomMcpServer = useStore((s) => s.installCustomMcpServer); - const cliStatus = useStore((s) => s.cliStatus); - const cliStatusLoading = useStore((s) => s.cliStatusLoading); + const storedCliStatus = useStore((s) => s.cliStatus); + const storedCliStatusLoading = useStore((s) => s.cliStatusLoading); + const cliStatus = cliStatusOverride ?? storedCliStatus; + const cliStatusLoading = cliStatusLoadingOverride ?? storedCliStatusLoading; const defaultSharedScope = getDefaultMcpSharedScope(cliStatus?.flavor); const scopeOptions: { value: Scope; label: string }[] = [ { value: defaultSharedScope, label: getMcpScopeLabel(defaultSharedScope, cliStatus?.flavor) }, diff --git a/src/renderer/components/extensions/mcp/McpServerCard.tsx b/src/renderer/components/extensions/mcp/McpServerCard.tsx index 10844f74..3e6d0f6e 100644 --- a/src/renderer/components/extensions/mcp/McpServerCard.tsx +++ b/src/renderer/components/extensions/mcp/McpServerCard.tsx @@ -23,6 +23,7 @@ import { Github as GithubIcon } from 'lucide-react'; import { InstallButton } from '../common/InstallButton'; import { SourceBadge } from '../common/SourceBadge'; +import type { CliInstallationStatus } from '@shared/types'; import type { InstalledMcpEntry, McpCatalogItem, @@ -37,6 +38,11 @@ interface McpServerCardProps { diagnostic?: McpServerDiagnostic | null; diagnosticsLoading?: boolean; onClick: (serverId: string) => void; + cliStatus?: Pick< + CliInstallationStatus, + 'installed' | 'authLoggedIn' | 'binaryPath' | 'launchError' | 'flavor' | 'providers' + > | null; + cliStatusLoading?: boolean; } export const McpServerCard = ({ @@ -47,8 +53,11 @@ export const McpServerCard = ({ diagnostic, diagnosticsLoading, onClick, + cliStatus: cliStatusOverride, + cliStatusLoading, }: McpServerCardProps): React.JSX.Element => { - const cliStatus = useStore((s) => s.cliStatus); + const storedCliStatus = useStore((s) => s.cliStatus); + const cliStatus = cliStatusOverride ?? storedCliStatus; const sharedScope = getDefaultMcpSharedScope(cliStatus?.flavor); const operationKey = getMcpOperationKey(server.id, sharedScope); const installProgress = useStore((s) => s.mcpInstallProgress[operationKey] ?? 'idle'); @@ -262,6 +271,8 @@ export const McpServerCard = ({ state={installProgress} isInstalled={isInstalled} section="mcp" + cliStatus={cliStatus} + cliStatusLoading={cliStatusLoading} onInstall={() => installMcpServer({ registryId: server.id, diff --git a/src/renderer/components/extensions/mcp/McpServerDetailDialog.tsx b/src/renderer/components/extensions/mcp/McpServerDetailDialog.tsx index 97845c01..0cd87512 100644 --- a/src/renderer/components/extensions/mcp/McpServerDetailDialog.tsx +++ b/src/renderer/components/extensions/mcp/McpServerDetailDialog.tsx @@ -42,6 +42,7 @@ import { ExternalLink, Lock, Plus, Star, Trash2, Wrench } from 'lucide-react'; import { InstallButton } from '../common/InstallButton'; import { SourceBadge } from '../common/SourceBadge'; +import type { CliInstallationStatus } from '@shared/types'; import type { InstalledMcpEntry, McpCatalogItem, @@ -59,6 +60,11 @@ interface McpServerDetailDialogProps { projectPath: string | null; open: boolean; onClose: () => void; + cliStatus?: Pick< + CliInstallationStatus, + 'installed' | 'authLoggedIn' | 'binaryPath' | 'launchError' | 'flavor' | 'providers' + > | null; + cliStatusLoading?: boolean; } type Scope = 'local' | 'user' | 'project' | 'global'; @@ -73,8 +79,11 @@ export const McpServerDetailDialog = ({ projectPath, open, onClose, + cliStatus: cliStatusOverride, + cliStatusLoading, }: McpServerDetailDialogProps): React.JSX.Element => { - const cliStatus = useStore((s) => s.cliStatus); + const storedCliStatus = useStore((s) => s.cliStatus); + const cliStatus = cliStatusOverride ?? storedCliStatus; const defaultSharedScope = getDefaultMcpSharedScope(cliStatus?.flavor); const [scope, setScope] = useState(defaultSharedScope); const operationKey = server ? getMcpOperationKey(server.id, scope, projectPath) : null; @@ -587,6 +596,8 @@ export const McpServerDetailDialog = ({ state={installProgress} isInstalled={isInstalledForScope} section="mcp" + cliStatus={cliStatus} + cliStatusLoading={cliStatusLoading} onInstall={handleInstall} onUninstall={handleUninstall} disabled={installDisabled} diff --git a/src/renderer/components/extensions/mcp/McpServersPanel.tsx b/src/renderer/components/extensions/mcp/McpServersPanel.tsx index ba03a024..39eb99ee 100644 --- a/src/renderer/components/extensions/mcp/McpServersPanel.tsx +++ b/src/renderer/components/extensions/mcp/McpServersPanel.tsx @@ -15,6 +15,7 @@ import { } from '@renderer/components/ui/select'; import { useStore } from '@renderer/store'; import { formatRelativeTime } from '@renderer/utils/formatters'; +import { getRuntimeDisplayName } from '@renderer/utils/runtimeDisplayName'; import { CLI_NOT_FOUND_MARKER } from '@shared/constants/cli'; import { getMcpDiagnosticKey, @@ -30,6 +31,7 @@ import { SearchInput } from '../common/SearchInput'; import { McpServerCard } from './McpServerCard'; import { McpServerDetailDialog } from './McpServerDetailDialog'; +import type { CliInstallationStatus } from '@shared/types'; import type { InstalledMcpEntry, McpCatalogItem, @@ -68,6 +70,17 @@ interface McpServersPanelProps { mcpSearchWarnings: string[]; selectedMcpServerId: string | null; setSelectedMcpServerId: (id: string | null) => void; + cliStatus?: Pick< + CliInstallationStatus, + | 'installed' + | 'authLoggedIn' + | 'binaryPath' + | 'launchError' + | 'flavor' + | 'displayName' + | 'providers' + > | null; + cliStatusLoading?: boolean; } export const McpServersPanel = ({ @@ -79,6 +92,8 @@ export const McpServersPanel = ({ mcpSearchWarnings, selectedMcpServerId, setSelectedMcpServerId, + cliStatus: cliStatusOverride, + cliStatusLoading: cliStatusLoadingOverride, }: McpServersPanelProps): React.JSX.Element => { const projectStateKey = getMcpProjectStateKey(projectPath); const { @@ -99,8 +114,6 @@ export const McpServersPanel = ({ mcpDiagnosticsLastCheckedAtByProjectPath, mcpDiagnosticsLastCheckedAtFallback, runMcpDiagnostics, - cliStatus, - cliStatusLoading, } = useStore( useShallow((s) => ({ browseCatalog: s.mcpBrowseCatalog, @@ -120,10 +133,12 @@ export const McpServersPanel = ({ mcpDiagnosticsLastCheckedAtByProjectPath: s.mcpDiagnosticsLastCheckedAtByProjectPath, mcpDiagnosticsLastCheckedAtFallback: s.mcpDiagnosticsLastCheckedAt, runMcpDiagnostics: s.runMcpDiagnostics, - cliStatus: s.cliStatus, - cliStatusLoading: s.cliStatusLoading, })) ); + const storedCliStatus = useStore((s) => s.cliStatus); + const storedCliStatusLoading = useStore((s) => s.cliStatusLoading); + const cliStatus = cliStatusOverride ?? storedCliStatus; + const cliStatusLoading = cliStatusLoadingOverride ?? storedCliStatusLoading; const installedServers = installedServersByProjectPath?.[projectStateKey] ?? installedServersFallback ?? []; const mcpDiagnostics = @@ -147,12 +162,8 @@ export const McpServersPanel = ({ }, [browseCatalog.length, browseError, browseLoading, mcpBrowse]); const diagnosticsDisableReason = useMemo(() => { - if (cliStatusLoading) { - return 'Checking runtime status...'; - } - if (cliStatus === null || typeof cliStatus === 'undefined') { - return 'Checking runtime availability...'; + return cliStatusLoading ? 'Checking runtime status...' : 'Checking runtime availability...'; } if (cliStatus?.installed === false) { @@ -241,8 +252,7 @@ export const McpServersPanel = ({ // Sort displayed servers const displayServers = useMemo(() => sortMcpServers(rawServers, mcpSort), [rawServers, mcpSort]); - const runtimeLabel = - cliStatus?.flavor === 'agent_teams_orchestrator' ? 'multimodel runtime' : 'Claude CLI'; + const runtimeLabel = getRuntimeDisplayName(cliStatus, true); // Find selected server (search in both lists to avoid losing selection during search toggle) const selectedServer = useMemo(() => { @@ -411,13 +421,12 @@ export const McpServersPanel = ({

    {cliStatus?.flavor === 'agent_teams_orchestrator' - ? 'Configured runtime not available' - : 'Claude CLI not installed'} + ? `${runtimeLabel} not available` + : `${runtimeLabel} not installed`}

    - {cliStatus?.flavor === 'agent_teams_orchestrator' - ? 'MCP health checks require the configured runtime. Go to the Dashboard to install or repair it.' - : 'MCP health checks require Claude CLI. Go to the Dashboard to install or repair it.'} + MCP health checks require {runtimeLabel}. Go to the Dashboard to install or repair + it.

    @@ -458,6 +467,8 @@ export const McpServersPanel = ({ diagnostic={getDiagnostic(server)} diagnosticsLoading={mcpDiagnosticsLoading} onClick={setSelectedMcpServerId} + cliStatus={cliStatus} + cliStatusLoading={cliStatusLoading} /> ))}
    @@ -488,6 +499,8 @@ export const McpServersPanel = ({ projectPath={projectPath} open={selectedMcpServerId !== null} onClose={() => setSelectedMcpServerId(null)} + cliStatus={cliStatus} + cliStatusLoading={cliStatusLoading} /> ); diff --git a/src/renderer/components/extensions/plugins/PluginCard.tsx b/src/renderer/components/extensions/plugins/PluginCard.tsx index 0f7230e1..fe1344b2 100644 --- a/src/renderer/components/extensions/plugins/PluginCard.tsx +++ b/src/renderer/components/extensions/plugins/PluginCard.tsx @@ -17,15 +17,27 @@ import { Tag } from 'lucide-react'; import { InstallButton } from '../common/InstallButton'; import { InstallCountBadge } from '../common/InstallCountBadge'; +import type { CliInstallationStatus } from '@shared/types'; import type { EnrichedPlugin } from '@shared/types/extensions'; interface PluginCardProps { plugin: EnrichedPlugin; index: number; onClick: (pluginId: string) => void; + cliStatus?: Pick< + CliInstallationStatus, + 'installed' | 'authLoggedIn' | 'binaryPath' | 'launchError' | 'flavor' | 'providers' + > | null; + cliStatusLoading?: boolean; } -export const PluginCard = ({ plugin, index, onClick }: PluginCardProps): React.JSX.Element => { +export const PluginCard = ({ + plugin, + index, + onClick, + cliStatus, + cliStatusLoading, +}: PluginCardProps): React.JSX.Element => { const capabilities = inferCapabilities(plugin); const category = normalizeCategory(plugin.category); const operationKey = getPluginOperationKey(plugin.pluginId, 'user'); @@ -120,6 +132,8 @@ export const PluginCard = ({ plugin, index, onClick }: PluginCardProps): React.J state={installProgress} isInstalled={isUserInstalled} section="plugins" + cliStatus={cliStatus} + cliStatusLoading={cliStatusLoading} onInstall={() => installPlugin({ pluginId: plugin.pluginId, scope: 'user' })} onUninstall={() => uninstallPlugin(plugin.pluginId, 'user')} size="sm" diff --git a/src/renderer/components/extensions/plugins/PluginDetailDialog.tsx b/src/renderer/components/extensions/plugins/PluginDetailDialog.tsx index 5b4a4274..a4ef3fa2 100644 --- a/src/renderer/components/extensions/plugins/PluginDetailDialog.tsx +++ b/src/renderer/components/extensions/plugins/PluginDetailDialog.tsx @@ -39,6 +39,7 @@ import { InstallButton } from '../common/InstallButton'; import { InstallCountBadge } from '../common/InstallCountBadge'; import { SourceBadge } from '../common/SourceBadge'; +import type { CliInstallationStatus } from '@shared/types'; import type { EnrichedPlugin, InstallScope } from '@shared/types/extensions'; interface PluginDetailDialogProps { @@ -46,6 +47,11 @@ interface PluginDetailDialogProps { open: boolean; onClose: () => void; projectPath: string | null; + cliStatus?: Pick< + CliInstallationStatus, + 'installed' | 'authLoggedIn' | 'binaryPath' | 'launchError' | 'flavor' | 'providers' + > | null; + cliStatusLoading?: boolean; } const SCOPE_OPTIONS: { value: InstallScope; label: string }[] = [ @@ -59,6 +65,8 @@ export const PluginDetailDialog = ({ open, onClose, projectPath, + cliStatus, + cliStatusLoading, }: PluginDetailDialogProps): React.JSX.Element => { const { fetchPluginReadme, readmes, readmeLoading, installPlugin, uninstallPlugin } = useStore( useShallow((s) => ({ @@ -198,6 +206,8 @@ export const PluginDetailDialog = ({ state={installProgress} isInstalled={isInstalledForScope} section="plugins" + cliStatus={cliStatus} + cliStatusLoading={cliStatusLoading} onInstall={() => installPlugin({ pluginId: plugin.pluginId, diff --git a/src/renderer/components/extensions/plugins/PluginsPanel.tsx b/src/renderer/components/extensions/plugins/PluginsPanel.tsx index 8adde28f..ac47abfc 100644 --- a/src/renderer/components/extensions/plugins/PluginsPanel.tsx +++ b/src/renderer/components/extensions/plugins/PluginsPanel.tsx @@ -28,6 +28,7 @@ import { CategoryChips } from './CategoryChips'; import { PluginCard } from './PluginCard'; import { PluginDetailDialog } from './PluginDetailDialog'; +import type { CliInstallationStatus } from '@shared/types'; import type { EnrichedPlugin, PluginCapability, @@ -48,6 +49,11 @@ interface PluginsPanelProps { clearFilters: () => void; hasActiveFilters: boolean; setPluginSort: (sort: { field: PluginSortField; order: 'asc' | 'desc' }) => void; + cliStatus?: Pick< + CliInstallationStatus, + 'installed' | 'authLoggedIn' | 'binaryPath' | 'launchError' | 'flavor' | 'providers' + > | null; + cliStatusLoading?: boolean; } const SORT_OPTIONS: { value: string; label: string }[] = [ @@ -125,8 +131,15 @@ export const PluginsPanel = ({ clearFilters, hasActiveFilters, setPluginSort, + cliStatus: cliStatusOverride, + cliStatusLoading, }: PluginsPanelProps): React.JSX.Element => { - const { catalog, loading, error, cliStatus } = useStore( + const { + catalog, + loading, + error, + cliStatus: storedCliStatus, + } = useStore( useShallow((s) => ({ catalog: s.pluginCatalog, loading: s.pluginCatalogLoading, @@ -134,6 +147,7 @@ export const PluginsPanel = ({ cliStatus: s.cliStatus, })) ); + const cliStatus = cliStatusOverride ?? storedCliStatus; const filtered = useMemo( () => selectFilteredPlugins(catalog, pluginFilters, pluginSort), @@ -192,8 +206,9 @@ export const PluginsPanel = ({ return (
    - In the multimodel runtime, plugins currently apply only to Anthropic sessions. Broader - plugin support across providers is in development. + In the multimodel runtime, plugins are currently guaranteed only for Anthropic + sessions. We are actively building broader plugin support for all agents, including + both universal plugins and agent-specific plugins. {capability.reason ? ` ${capability.reason}` : ''}
    ); @@ -407,6 +422,8 @@ export const PluginsPanel = ({ plugin={plugin} index={index} onClick={setSelectedPluginId} + cliStatus={cliStatus} + cliStatusLoading={cliStatusLoading} /> ))} @@ -418,6 +435,8 @@ export const PluginsPanel = ({ open={selectedPluginId !== null} onClose={() => setSelectedPluginId(null)} projectPath={projectPath} + cliStatus={cliStatus} + cliStatusLoading={cliStatusLoading} /> ); diff --git a/src/renderer/components/extensions/skills/SkillsPanel.tsx b/src/renderer/components/extensions/skills/SkillsPanel.tsx index 9c95959d..0ee032da 100644 --- a/src/renderer/components/extensions/skills/SkillsPanel.tsx +++ b/src/renderer/components/extensions/skills/SkillsPanel.tsx @@ -1,11 +1,16 @@ import { useEffect, useMemo, useRef, useState } from 'react'; +import { + mergeCodexProviderStatusWithSnapshot, + useCodexAccountSnapshot, +} from '@features/codex-account/renderer'; import { api } from '@renderer/api'; import { Badge } from '@renderer/components/ui/badge'; import { Button } from '@renderer/components/ui/button'; import { Popover, PopoverContent, PopoverTrigger } from '@renderer/components/ui/popover'; import { Tooltip, TooltipContent, TooltipTrigger } from '@renderer/components/ui/tooltip'; import { useStore } from '@renderer/store'; +import { createLoadingMultimodelCliStatus } from '@renderer/store/slices/cliInstallerSlice'; import { getVisibleMultimodelProviders } from '@renderer/utils/multimodelProviderVisibility'; import { getCliProviderExtensionCapability, @@ -149,6 +154,8 @@ export const SkillsPanel = ({ const fetchSkillsCatalog = useStore((s) => s.fetchSkillsCatalog); const fetchSkillDetail = useStore((s) => s.fetchSkillDetail); const cliStatus = useStore((s) => s.cliStatus); + const cliStatusLoading = useStore((s) => s.cliStatusLoading); + const multimodelEnabled = useStore((s) => s.appConfig?.general?.multimodelEnabled ?? true); const skillsLoading = useStore((s) => s.skillsCatalogLoadingByProjectPath[catalogKey] ?? false); const skillsError = useStore((s) => s.skillsCatalogErrorByProjectPath[catalogKey] ?? null); const detailById = useStore(useShallow((s) => s.skillsDetailsById)); @@ -167,28 +174,54 @@ export const SkillsPanel = ({ const selectedSkillIdRef = useRef(selectedSkillId); const selectedSkillItemRef = useRef(null); selectedSkillIdRef.current = selectedSkillId; + const loadingCliStatus = useMemo( + () => + !cliStatus && cliStatusLoading && multimodelEnabled + ? createLoadingMultimodelCliStatus() + : cliStatus, + [cliStatus, cliStatusLoading, multimodelEnabled] + ); + const codexAccount = useCodexAccountSnapshot({ + enabled: + loadingCliStatus?.flavor === 'agent_teams_orchestrator' && + Boolean(loadingCliStatus?.providers.some((provider) => provider.providerId === 'codex')), + }); + const effectiveCliStatus = useMemo( + () => + loadingCliStatus + ? { + ...loadingCliStatus, + providers: loadingCliStatus.providers.map((provider) => + provider.providerId === 'codex' + ? mergeCodexProviderStatusWithSnapshot(provider, codexAccount.snapshot) + : provider + ), + } + : loadingCliStatus, + [loadingCliStatus, codexAccount.snapshot] + ); const mergedSkills = useMemo( () => [...projectSkills, ...userSkills], [projectSkills, userSkills] ); const codexSkillOverlayAvailable = useMemo( - () => isCodexSkillOverlayAvailable(cliStatus), - [cliStatus] + () => isCodexSkillOverlayAvailable(effectiveCliStatus), + [effectiveCliStatus] ); const skillsAudienceLabel = useMemo(() => { - if (cliStatus?.flavor !== 'agent_teams_orchestrator') { + if (effectiveCliStatus?.flavor !== 'agent_teams_orchestrator') { return null; } - const providerNames = getVisibleMultimodelProviders(cliStatus.providers ?? []) + const providerNames = getVisibleMultimodelProviders(effectiveCliStatus.providers ?? []) .filter((provider) => isCliExtensionCapabilityAvailable(getCliProviderExtensionCapability(provider, 'skills')) ) .map((provider) => provider.displayName); return formatRuntimeAudienceLabel(providerNames); - }, [cliStatus]); + }, [effectiveCliStatus]); const codexOnlySkillsCount = useMemo( () => mergedSkills.filter((skill) => getSkillAudience(skill.rootKind) === 'codex').length, [mergedSkills] @@ -314,7 +347,7 @@ export const SkillsPanel = ({ return (
    - {cliStatus?.flavor === 'agent_teams_orchestrator' && ( + {effectiveCliStatus?.flavor === 'agent_teams_orchestrator' && (
    Shared skills in `.claude`, `.cursor`, and `.agents` are available to{' '} {skillsAudienceLabel ?? 'the configured runtime'}. Skills stored in `.codex` stay diff --git a/src/renderer/components/runtime/ProviderRuntimeBackendSelector.tsx b/src/renderer/components/runtime/ProviderRuntimeBackendSelector.tsx index 3c90d885..ffd4b966 100644 --- a/src/renderer/components/runtime/ProviderRuntimeBackendSelector.tsx +++ b/src/renderer/components/runtime/ProviderRuntimeBackendSelector.tsx @@ -11,6 +11,7 @@ import { TooltipProvider, TooltipTrigger, } from '@renderer/components/ui/tooltip'; +import { formatProviderBackendLabel } from '@renderer/utils/providerBackendIdentity'; import type { CliProviderStatus } from '@shared/types'; @@ -20,10 +21,57 @@ interface Props { onSelect: (providerId: CliProviderStatus['providerId'], backendId: string) => void; } +export function getProviderRuntimeBackendStateLabel( + option: NonNullable[number] +): string | null { + switch (option.state) { + case 'ready': + return null; + case 'locked': + return 'Locked'; + case 'disabled': + return 'Disabled'; + case 'authentication-required': + return 'Auth required'; + case 'runtime-missing': + return 'Runtime missing'; + case 'degraded': + return 'Degraded'; + default: + if (!option.available) { + return 'Unavailable'; + } + if (option.selectable === false) { + return 'Locked'; + } + return null; + } +} + +export function getProviderRuntimeBackendAudienceLabel( + option: NonNullable[number] +): string | null { + return option.audience === 'internal' ? 'Internal' : null; +} + +export function getVisibleProviderRuntimeBackendOptions( + provider: CliProviderStatus +): NonNullable { + return provider.availableBackends ?? []; +} + export function getOptionDisplayLabel( + provider: CliProviderStatus, option: NonNullable[number], resolvedOption: NonNullable[number] | null ): string { + if (provider.providerId === 'codex') { + const legacyLabel = formatProviderBackendLabel(provider.providerId, option.id); + if (legacyLabel) { + return legacyLabel; + } + } + if (option.id !== 'auto') { return option.label; } @@ -44,8 +92,18 @@ export function getProviderRuntimeBackendSummary(provider: CliProviderStatus): s const selectedBackendId = provider.selectedBackendId ?? options[0]?.id ?? ''; const selectedOption = options.find((option) => option.id === selectedBackendId) ?? options[0]; const resolvedOption = options.find((option) => option.id === provider.resolvedBackendId) ?? null; + const parts = [getOptionDisplayLabel(provider, selectedOption, resolvedOption)]; + const audienceLabel = getProviderRuntimeBackendAudienceLabel(selectedOption); + const stateLabel = getProviderRuntimeBackendStateLabel(selectedOption); - return getOptionDisplayLabel(selectedOption, resolvedOption); + if (audienceLabel) { + parts.push(audienceLabel.toLowerCase()); + } + if (stateLabel) { + parts.push(stateLabel.toLowerCase()); + } + + return parts.join(' - '); } export const ProviderRuntimeBackendSelector = ({ @@ -53,15 +111,21 @@ export const ProviderRuntimeBackendSelector = ({ disabled = false, onSelect, }: Props): React.JSX.Element | null => { - const options = provider.availableBackends ?? []; + const options = getVisibleProviderRuntimeBackendOptions(provider); if (options.length === 0) { return null; } + if (provider.providerId === 'codex' && options.length === 1) { + return null; + } + const selectedBackendId = provider.selectedBackendId ?? options[0]?.id ?? ''; const selectedOption = options.find((option) => option.id === selectedBackendId) ?? options[0]; const resolvedOption = options.find((option) => option.id === provider.resolvedBackendId) ?? null; - const selectedLabel = getOptionDisplayLabel(selectedOption, resolvedOption); + const selectedLabel = getOptionDisplayLabel(provider, selectedOption, resolvedOption); + const selectedStateLabel = getProviderRuntimeBackendStateLabel(selectedOption); + const selectedAudienceLabel = getProviderRuntimeBackendAudienceLabel(selectedOption); return (
    @@ -100,12 +164,17 @@ export const ProviderRuntimeBackendSelector = ({
    - {getOptionDisplayLabel(option, resolvedOption)} + + {getOptionDisplayLabel(provider, option, resolvedOption)} + {option.recommended ? ( ) : null} - {!option.available ? ( + {getProviderRuntimeBackendAudienceLabel(option) ? ( - Unavailable + {getProviderRuntimeBackendAudienceLabel(option)} + + ) : null} + {getProviderRuntimeBackendStateLabel(option) ? ( + + {getProviderRuntimeBackendStateLabel(option)} ) : null}
    @@ -160,7 +254,18 @@ export const ProviderRuntimeBackendSelector = ({ Recommended ) : null} - {!selectedOption.available ? ( + {selectedAudienceLabel ? ( + + {selectedAudienceLabel} + + ) : null} + {!selectedStateLabel && !selectedOption.available ? ( @@ -179,6 +284,33 @@ export const ProviderRuntimeBackendSelector = ({ + ) : selectedStateLabel ? ( + + + + + {selectedStateLabel} + + + + {selectedOption.detailMessage ?? + selectedOption.statusMessage ?? + 'This backend cannot be selected yet.'} + + + ) : null}
    diff --git a/src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx b/src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx index dcdb4855..c7e210b5 100644 --- a/src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx +++ b/src/renderer/components/runtime/ProviderRuntimeSettingsDialog.tsx @@ -1,5 +1,17 @@ import { useEffect, useMemo, useState } from 'react'; +import { + formatCodexCreditsValue, + formatCodexRemainingPercent, + formatCodexResetWindowLabel, + formatCodexUsageExplanation, + formatCodexUsagePercent, + formatCodexUsageWindowLabel, + formatCodexWindowDurationLong, + mergeCodexProviderStatusWithSnapshot, + normalizeCodexResetTimestamp, + useCodexAccountSnapshot, +} from '@features/codex-account/renderer'; import { ProviderBrandLogo } from '@renderer/components/common/ProviderBrandLogo'; import { Button } from '@renderer/components/ui/button'; import { @@ -31,6 +43,7 @@ import { } from './providerConnectionUi'; import { getProviderRuntimeBackendSummary, + getVisibleProviderRuntimeBackendOptions, ProviderRuntimeBackendSelector, } from './ProviderRuntimeBackendSelector'; @@ -38,13 +51,8 @@ import type { CliProviderAuthMode, CliProviderId, CliProviderStatus } from '@sha import type { ApiKeyEntry } from '@shared/types/extensions'; type ApiKeyProviderId = 'anthropic' | 'codex' | 'gemini'; -type PendingConnectionAction = - | 'auto' - | 'oauth' - | 'api_key' - | 'codex-beta-on' - | 'codex-beta-off' - | null; +type PendingConnectionAction = 'auto' | 'oauth' | 'chatgpt' | 'api_key' | null; + interface ConnectionMethodCardOption { readonly authMode: CliProviderAuthMode; readonly title: string; @@ -83,10 +91,10 @@ const API_KEY_PROVIDER_CONFIG: Record< }, codex: { envVarName: 'OPENAI_API_KEY', - name: 'OpenAI API Key', + name: 'Codex API Key', title: 'API key', description: - 'Use `OPENAI_API_KEY` with the public OpenAI Responses API. Your Codex subscription session stays available when you switch back.', + 'Use an OpenAI API key as a secondary Codex auth path. If you switch Codex to API key mode, the app will mirror OPENAI_API_KEY into CODEX_API_KEY for native launches.', placeholder: 'sk-proj-...', }, gemini: { @@ -113,9 +121,7 @@ function getConnectionDescription(provider: CliProviderStatus): string { case 'anthropic': return 'Choose how app-launched Anthropic sessions authenticate.'; case 'codex': - return provider.connection?.apiKeyBetaEnabled - ? 'Choose whether app-launched Codex sessions use your Codex subscription or an OpenAI API key. Runtime follows this automatically.' - : 'Codex uses your subscription session by default. Enable API key mode if you want to switch Codex to OPENAI_API_KEY billing.'; + return 'Choose whether Codex should prefer your ChatGPT subscription or an API key when the native runtime launches.'; case 'gemini': return 'Configure optional API access. CLI SDK and ADC are still discovered automatically.'; } @@ -126,7 +132,7 @@ function getRuntimeDescription(provider: CliProviderStatus): string { case 'anthropic': return 'Anthropic currently has no separate runtime backend selector.'; case 'codex': - return 'Codex runtime selection follows the active connection method automatically.'; + return 'Codex now runs only through the native runtime path.'; case 'gemini': return 'Choose which Gemini runtime backend multimodel should use.'; } @@ -145,9 +151,16 @@ function getAuthModeDescription(providerId: CliProviderId, authMode: CliProvider } if (providerId === 'codex') { - return authMode === 'api_key' - ? 'Use OPENAI_API_KEY and the public OpenAI Responses API backend.' - : 'Use your Codex subscription session and the built-in Codex runtime.'; + switch (authMode) { + case 'auto': + return 'Prefer your ChatGPT account when it is available. Fall back to API key mode only when needed.'; + case 'chatgpt': + return 'Force native Codex launches to use your connected ChatGPT account and subscription.'; + case 'api_key': + return 'Force native Codex launches to use OPENAI_API_KEY / CODEX_API_KEY billing.'; + default: + return ''; + } } return ''; @@ -157,7 +170,6 @@ function getConnectionAlert(provider: CliProviderStatus): string | null { const authMode = provider.connection?.configuredAuthMode; const hasAnthropicSubscriptionSession = provider.authMethod === 'oauth_token' || provider.authMethod === 'claude.ai'; - const hasCodexSubscriptionSession = provider.authMethod === 'oauth_token'; if ( provider.providerId === 'anthropic' && @@ -183,31 +195,51 @@ function getConnectionAlert(provider: CliProviderStatus): string | null { return 'A saved API key is available, but app-launched Anthropic sessions use it only after you switch to API key mode.'; } - if ( - provider.providerId === 'codex' && - provider.connection?.apiKeyBetaEnabled && - authMode === 'api_key' && - !provider.connection?.apiKeyConfigured - ) { - return 'API key mode is selected, but no OPENAI_API_KEY credential is available yet.'; - } + if (provider.providerId === 'codex') { + const codex = provider.connection?.codex; + if (codex?.login.status === 'starting') { + return 'Starting ChatGPT login...'; + } - if ( - provider.providerId === 'codex' && - provider.connection?.apiKeyBetaEnabled && - authMode === 'oauth' && - !hasCodexSubscriptionSession - ) { - return 'Codex subscription mode is selected. Sign in with Codex to use this provider.'; - } + if (codex?.login.status === 'pending') { + return 'Waiting for ChatGPT account login to finish...'; + } - if ( - provider.providerId === 'codex' && - provider.connection?.apiKeyBetaEnabled && - authMode === 'oauth' && - provider.connection?.apiKeySource === 'stored' - ) { - return 'A saved OPENAI_API_KEY is available, but Codex uses it only after you switch to API key mode.'; + if (codex?.login.status === 'failed' && codex.login.error) { + return codex.login.error; + } + + if (provider.connection?.configuredAuthMode === 'api_key') { + if (!provider.connection?.apiKeyConfigured) { + return 'API key mode is selected, but no OPENAI_API_KEY or CODEX_API_KEY credential is available yet.'; + } + return null; + } + + if (provider.connection?.configuredAuthMode === 'chatgpt' && !codex?.managedAccount) { + const missingChatgptMessage = codex?.localActiveChatgptAccountPresent + ? 'Codex has a locally selected ChatGPT account, but the current session needs reconnect.' + : codex?.localAccountArtifactsPresent + ? 'Codex CLI currently has no active ChatGPT account. Local Codex account data exists, but no active managed session is selected.' + : 'Codex CLI currently has no active ChatGPT account. Connect ChatGPT to use your subscription.'; + return provider.connection.apiKeyConfigured + ? `${missingChatgptMessage} Switch to API key mode to use the detected API key.` + : missingChatgptMessage; + } + + if (!codex?.launchAllowed && codex?.launchIssueMessage) { + return codex.launchIssueMessage; + } + + if (codex?.appServerState === 'degraded' && codex.appServerStatusMessage) { + return codex.appServerStatusMessage; + } + + if (!provider.connection?.apiKeyConfigured && !codex?.managedAccount) { + return 'No ChatGPT account or API key is available yet.'; + } + + return null; } if ( @@ -220,6 +252,147 @@ function getConnectionAlert(provider: CliProviderStatus): string | null { return null; } +function getCodexAccountPanelHint( + provider: CliProviderStatus | null, + configuredAuthMode: CliProviderAuthMode | undefined +): string | null { + if (provider?.providerId !== 'codex') { + return null; + } + + const codex = provider.connection?.codex; + if (!codex || codex.login.status === 'starting' || codex.login.status === 'pending') { + return null; + } + + if (codex.managedAccount?.type === 'chatgpt') { + if (!codex.rateLimits) { + return 'Usage limits appear here after Codex reports them for the connected ChatGPT account.'; + } + + return null; + } + + const usageSentence = codex.localActiveChatgptAccountPresent + ? 'Codex has a locally selected ChatGPT account, but the current session needs reconnect before usage limits can load here.' + : codex.localAccountArtifactsPresent + ? 'Codex CLI currently reports no active ChatGPT account. Local Codex account data exists, but no active managed session is selected. Usage limits appear here only after Codex CLI sees one.' + : 'Codex CLI currently reports no active ChatGPT account. Usage limits appear here only after Codex CLI sees one.'; + if (configuredAuthMode === 'chatgpt' && provider.connection?.apiKeyConfigured) { + return `${usageSentence} The detected API key is only used after you switch Codex to API key mode.`; + } + + if (configuredAuthMode === 'auto' && provider.connection?.apiKeyConfigured) { + return `${usageSentence} Auto will keep using the detected API key until ChatGPT is connected.`; + } + + return usageSentence; +} + +function getCheckingStatusColor(): string { + return 'var(--color-text-secondary)'; +} + +function getProviderStatusColor(statusText: string | null, authenticated: boolean): string { + if (statusText === 'Checking...') { + return getCheckingStatusColor(); + } + + return authenticated ? '#4ade80' : 'var(--color-text-muted)'; +} + +function formatCodexResetDateTime(timestampSeconds: number | null | undefined): string { + const normalized = normalizeCodexResetTimestamp(timestampSeconds); + return normalized ? new Date(normalized).toLocaleString() : 'Unknown'; +} + +function CodexRateLimitWindowCard({ + title, + usedLabel, + usedValue, + remainingValue, + resetLabel, + resetValue, + accent, +}: Readonly<{ + title: string; + usedLabel: string; + usedValue: string; + remainingValue: string; + resetLabel: string; + resetValue: string; + accent: 'primary' | 'secondary'; +}>): React.JSX.Element { + const accentStyles = + accent === 'primary' + ? { + borderColor: 'rgba(74, 222, 128, 0.24)', + backgroundColor: 'rgba(74, 222, 128, 0.05)', + badgeColor: '#86efac', + badgeBackground: 'rgba(74, 222, 128, 0.14)', + } + : { + borderColor: 'rgba(125, 211, 252, 0.22)', + backgroundColor: 'rgba(125, 211, 252, 0.04)', + badgeColor: '#bae6fd', + badgeBackground: 'rgba(125, 211, 252, 0.14)', + }; + + return ( +
    +
    +
    + {title} +
    + + {remainingValue} + +
    + +
    +
    +
    + {usedLabel} +
    +
    + {usedValue} +
    +
    + {remainingValue} left +
    +
    + +
    +
    + {resetLabel} +
    +
    + {resetValue} +
    +
    +
    +
    + ); +} + function getConnectionMethodCardOptions( provider: CliProviderStatus ): ConnectionMethodCardOption[] | null { @@ -243,20 +416,22 @@ function getConnectionMethodCardOptions( }, ]; case 'codex': - if (!provider.connection?.apiKeyBetaEnabled) { - return null; - } - return [ { - authMode: 'oauth', - title: 'Codex subscription', - description: 'Use your Codex sign-in session and subscription access.', + authMode: 'auto', + title: 'Auto', + description: + 'Prefer your ChatGPT account and subscription. Use API key mode only if needed.', + }, + { + authMode: 'chatgpt', + title: 'ChatGPT account', + description: 'Use your connected ChatGPT account and Codex subscription.', }, { authMode: 'api_key', - title: 'OpenAI API key', - description: 'Use OPENAI_API_KEY and OpenAI API billing.', + title: 'API key', + description: 'Use OPENAI_API_KEY and CODEX_API_KEY billing for native Codex launches.', }, ]; default: @@ -266,7 +441,7 @@ function getConnectionMethodCardOptions( function getConnectionMethodCardsHint(provider: CliProviderStatus): string | null { if (provider.providerId === 'codex') { - return 'Runtime follows your connection method automatically.'; + return 'Codex always runs through the native runtime. Auto prefers your ChatGPT account before falling back to API-key credentials.'; } if (provider.providerId === 'anthropic') { @@ -383,6 +558,10 @@ export const ProviderRuntimeSettingsDialog = ({ const deleteApiKey = useStore((s) => s.deleteApiKey); const updateConfig = useStore((s) => s.updateConfig); const appConfig = useStore((s) => s.appConfig); + const codexAccount = useCodexAccountSnapshot({ + enabled: open && selectedProviderId === 'codex', + includeRateLimits: true, + }); useEffect(() => { if (!open) { @@ -415,6 +594,12 @@ export const ProviderRuntimeSettingsDialog = ({ setRuntimeError(null); }, [selectedProviderId]); + useEffect(() => { + if (selectedProviderId === 'codex' && codexAccount.error) { + setConnectionError(codexAccount.error); + } + }, [codexAccount.error, selectedProviderId]); + const statusSelectedProvider = useMemo(() => { return ( providers.find((provider) => provider.providerId === selectedProviderId) ?? @@ -435,27 +620,29 @@ export const ProviderRuntimeSettingsDialog = ({ : null; const selectedProvider = useMemo(() => { - if (!statusSelectedProvider?.connection) { - return statusSelectedProvider; + const mergedStatusProvider = + statusSelectedProvider?.providerId === 'codex' + ? mergeCodexProviderStatusWithSnapshot(statusSelectedProvider, codexAccount.snapshot) + : statusSelectedProvider; + + if (!mergedStatusProvider?.connection) { + return mergedStatusProvider; } const nextConnection = { - ...statusSelectedProvider.connection, + ...mergedStatusProvider.connection, }; - if (statusSelectedProvider.providerId === 'anthropic') { + if (mergedStatusProvider.providerId === 'anthropic') { nextConnection.configuredAuthMode = appConfig?.providerConnections?.anthropic.authMode ?? - statusSelectedProvider.connection.configuredAuthMode; + mergedStatusProvider.connection.configuredAuthMode; } - if (statusSelectedProvider.providerId === 'codex') { + if (mergedStatusProvider.providerId === 'codex') { nextConnection.configuredAuthMode = - appConfig?.providerConnections?.codex.authMode ?? - statusSelectedProvider.connection.configuredAuthMode; - nextConnection.apiKeyBetaEnabled = - appConfig?.providerConnections?.codex.apiKeyBetaEnabled ?? - statusSelectedProvider.connection.apiKeyBetaEnabled; + appConfig?.providerConnections?.codex.preferredAuthMode ?? + mergedStatusProvider.connection.configuredAuthMode; } if (statusApiKeyConfig) { @@ -471,13 +658,13 @@ export const ProviderRuntimeSettingsDialog = ({ } return { - ...statusSelectedProvider, + ...mergedStatusProvider, connection: nextConnection, }; }, [ appConfig?.providerConnections?.anthropic.authMode, - appConfig?.providerConnections?.codex.apiKeyBetaEnabled, - appConfig?.providerConnections?.codex.authMode, + appConfig?.providerConnections?.codex.preferredAuthMode, + codexAccount.snapshot, selectedApiKey, statusApiKeyConfig, statusSelectedProvider, @@ -489,6 +676,10 @@ export const ProviderRuntimeSettingsDialog = ({ const runtimeSummary = selectedProvider ? getProviderRuntimeBackendSummary(selectedProvider) : null; + const codexConnection = + selectedProvider?.providerId === 'codex' ? (selectedProvider.connection?.codex ?? null) : null; + const codexLoginPending = + codexConnection?.login.status === 'starting' || codexConnection?.login.status === 'pending'; const configurableAuthModes = selectedProvider?.connection?.configurableAuthModes ?? []; const configuredAuthMode: CliProviderAuthMode | undefined = selectedProvider?.connection?.configuredAuthMode ?? configurableAuthModes[0] ?? undefined; @@ -505,7 +696,10 @@ export const ProviderRuntimeSettingsDialog = ({ : false; const hideConnectionMethodMeta = showConnectionMethodCards; const canConfigureRuntime = - !connectionManagedRuntime && (selectedProvider?.availableBackends?.length ?? 0) > 0; + !connectionManagedRuntime && + (selectedProvider + ? getVisibleProviderRuntimeBackendOptions(selectedProvider).length > 1 + : false); const apiKeyConfig = selectedProvider && isApiKeyProviderId(selectedProvider.providerId) @@ -515,26 +709,35 @@ export const ProviderRuntimeSettingsDialog = ({ selectedProvider && isApiKeyProviderId(selectedProvider.providerId) && activeApiKeyFormProviderId === selectedProvider.providerId; - const codexApiKeyBetaEnabled = selectedProvider?.connection?.apiKeyBetaEnabled === true; const showApiKeySection = Boolean( - apiKeyConfig && (selectedProvider?.providerId !== 'codex' || codexApiKeyBetaEnabled) + apiKeyConfig && + (selectedProvider?.providerId !== 'codex' || !selectedProvider.connection?.supportsOAuth) ); const connectionAlert = selectedProvider ? getConnectionAlert(selectedProvider) : null; - const connectionLoading = selectedProviderLoading || connectionSaving; + const connectionLoading = + selectedProviderLoading || + connectionSaving || + Boolean(selectedProvider?.providerId === 'codex' && codexAccount.loading && !codexConnection); const connectionBusy = disabled || connectionLoading; + const codexActionBusy = + disabled || selectedProviderLoading || connectionSaving || codexAccount.loading; const runtimeBusy = disabled || selectedProviderLoading || runtimeSaving; const connectionMethodCardsHint = selectedProvider ? getConnectionMethodCardsHint(selectedProvider) : null; + const codexAccountPanelHint = getCodexAccountPanelHint( + selectedProvider ?? null, + configuredAuthMode + ); const hasSubscriptionSession = selectedProvider?.providerId === 'anthropic' ? selectedProvider.authMethod === 'oauth_token' || selectedProvider.authMethod === 'claude.ai' - : selectedProvider?.providerId === 'codex' - ? selectedProvider.authMethod === 'oauth_token' - : false; + : false; const canRequestSubscriptionLogin = - Boolean(selectedProvider?.connection?.supportsOAuth && onRequestLogin) && + selectedProvider?.providerId === 'anthropic' && + Boolean(selectedProvider.connection?.supportsOAuth && onRequestLogin) && configuredAuthMode !== 'api_key' && + selectedProvider.statusMessage !== 'Checking...' && (!selectedProvider?.authenticated || hasSubscriptionSession || configuredAuthMode === 'oauth'); let connectionStatusLabel: string | null = null; if (selectedProvider) { @@ -555,21 +758,6 @@ export const ProviderRuntimeSettingsDialog = ({ } if (connectionSaving) { - if (selectedProvider.providerId === 'codex') { - switch (pendingConnectionAction) { - case 'codex-beta-on': - return 'Enabling API key mode...'; - case 'codex-beta-off': - return 'Disabling API key mode...'; - case 'api_key': - return 'Switching to OpenAI API key...'; - case 'oauth': - return 'Switching to Codex subscription...'; - default: - return 'Applying connection changes...'; - } - } - if (selectedProvider.providerId === 'anthropic') { switch (pendingConnectionAction) { case 'api_key': @@ -583,6 +771,19 @@ export const ProviderRuntimeSettingsDialog = ({ } } + if (selectedProvider.providerId === 'codex') { + switch (pendingConnectionAction) { + case 'chatgpt': + return 'Switching to ChatGPT account mode...'; + case 'api_key': + return 'Switching to API key mode...'; + case 'auto': + return 'Switching to Auto...'; + default: + return 'Applying connection changes...'; + } + } + return 'Applying connection changes...'; } @@ -687,13 +888,15 @@ export const ProviderRuntimeSettingsDialog = ({ authMode: nextAuthMode, }, }); - } else { + } else if (nextAuthMode !== 'oauth') { await updateConfig('providerConnections', { codex: { - authMode: nextAuthMode === 'api_key' ? 'api_key' : 'oauth', + preferredAuthMode: nextAuthMode, }, }); + await codexAccount.refresh({ includeRateLimits: true, forceRefreshToken: true }); } + updateSucceeded = true; } catch (error) { setConnectionError(error instanceof Error ? error.message : 'Failed to update connection'); @@ -711,46 +914,43 @@ export const ProviderRuntimeSettingsDialog = ({ } }; - const handleCodexBetaToggle = async (enabled: boolean): Promise => { - const fallbackApiKeyScope = selectedApiKey?.scope ?? 'user'; - const shouldOpenApiKeyForm = - enabled && - selectedProvider?.providerId === 'codex' && - !selectedProvider.connection?.apiKeyConfigured && - !selectedApiKey; - - setConnectionSaving(true); - setPendingConnectionAction(enabled ? 'codex-beta-on' : 'codex-beta-off'); + const handleCodexAccountRefresh = async (): Promise => { setConnectionError(null); - let updateSucceeded = false; try { - await updateConfig('providerConnections', { - codex: { - apiKeyBetaEnabled: enabled, - authMode: enabled ? 'api_key' : 'oauth', - }, - }); - updateSucceeded = true; + await codexAccount.refresh({ includeRateLimits: true, forceRefreshToken: true }); + await onRefreshProvider?.('codex'); } catch (error) { - setConnectionError(error instanceof Error ? error.message : 'Failed to update connection'); - } finally { - if (updateSucceeded) { - if (shouldOpenApiKeyForm) { - setActiveApiKeyFormProviderId('codex'); - setApiKeyScope(fallbackApiKeyScope); - setApiKeyValue(''); - setApiKeyError(null); - } + setConnectionError( + error instanceof Error ? error.message : 'Failed to refresh Codex account' + ); + } + }; - try { - await onRefreshProvider?.('codex'); - } catch { - setConnectionError('Connection updated, but failed to refresh provider status.'); - } - } + const handleCodexStartLogin = async (): Promise => { + setConnectionError(null); + const success = await codexAccount.startChatgptLogin(); + if (!success && codexAccount.error) { + setConnectionError(codexAccount.error); + } + }; - setConnectionSaving(false); - setPendingConnectionAction(null); + const handleCodexCancelLogin = async (): Promise => { + setConnectionError(null); + const success = await codexAccount.cancelChatgptLogin(); + if (success) { + await onRefreshProvider?.('codex'); + } else if (codexAccount.error) { + setConnectionError(codexAccount.error); + } + }; + + const handleCodexLogout = async (): Promise => { + setConnectionError(null); + const success = await codexAccount.logout(); + if (success) { + await onRefreshProvider?.('codex'); + } else if (codexAccount.error) { + setConnectionError(codexAccount.error); } }; @@ -829,7 +1029,15 @@ export const ProviderRuntimeSettingsDialog = ({ {selectedProvider.authenticated @@ -889,77 +1097,12 @@ export const ProviderRuntimeSettingsDialog = ({ {selectedProvider.authenticated && (selectedProvider.authMethod === 'oauth_token' || selectedProvider.authMethod === 'claude.ai') - ? selectedProvider.providerId === 'codex' - ? 'Reconnect Codex' - : 'Reconnect Anthropic' + ? 'Reconnect Anthropic' : getProviderConnectLabel(selectedProvider)} ) : null}
    - {selectedProvider.providerId === 'codex' && - selectedProvider.connection?.apiKeyBetaAvailable && - !selectedProvider.connection.apiKeyBetaEnabled ? ( -
    -
    -
    -
    - Codex subscription -
    -
    - Use your Codex sign-in session and subscription access. -
    -
    - Current -
    -
    -
    -
    - OpenAI API key (Beta) -
    -
    - Use OPENAI_API_KEY and OpenAI API billing for Codex. -
    -
    - -
    -
    -
    -
    - ) : null} - {showConnectionMethodCards ? (
    @@ -1043,22 +1186,283 @@ export const ProviderRuntimeSettingsDialog = ({ {selectedProvider.connection.apiKeySourceLabel} ) : null} - {selectedProvider.providerId === 'codex' && - selectedProvider.connection?.apiKeyBetaEnabled ? ( - - ) : null}
    + {selectedProvider.providerId === 'codex' ? ( +
    +
    +
    +
    + ChatGPT account +
    +
    + Manage the local Codex app-server account session that powers + subscription-backed native launches. +
    +
    +
    + + {codexLoginPending ? ( + + ) : codexConnection?.managedAccount?.type === 'chatgpt' ? ( + + ) : ( + + )} +
    +
    + +
    + + {codexConnection?.managedAccount?.type === 'chatgpt' + ? 'Connected' + : codexLoginPending + ? 'Login in progress' + : 'Not connected'} + + {codexConnection ? ( + + App-server: {codexConnection.appServerState} + + ) : null} + {codexConnection?.managedAccount?.planType ? ( + + Plan: {codexConnection.managedAccount.planType} + + ) : null} + {codexConnection?.managedAccount?.email ? ( + + {codexConnection.managedAccount.email} + + ) : null} +
    + + {codexAccountPanelHint ? ( +
    + {codexAccountPanelHint} +
    + ) : null} + + {codexConnection?.rateLimits ? ( +
    +
    + These percentages show used quota, not remaining quota.{' '} + {formatCodexUsageExplanation( + codexConnection.rateLimits.primary?.usedPercent, + codexConnection.rateLimits.primary?.windowDurationMins + )} + {codexConnection.rateLimits.secondary + ? ` Weekly limits are shown separately in the ${ + formatCodexWindowDurationLong( + codexConnection.rateLimits.secondary.windowDurationMins + ) ?? 'secondary' + } window.` + : ''} +
    + +
    +
    + + + {codexConnection.rateLimits.secondary ? ( + + ) : ( +
    +
    + Weekly window +
    +
    + Weekly used (1w) +
    +
    + Not reported +
    +
    + Codex did not return a secondary window for this account snapshot. +
    +
    + )} +
    + +
    +
    +
    +
    + Credits +
    +
    + {formatCodexCreditsValue(codexConnection.rateLimits.credits)} +
    +
    +
    + Credits are shown separately from window-based subscription usage and + may be unavailable for plan-backed ChatGPT sessions. +
    +
    +
    +
    +
    + ) : null} +
    + ) : null} + {showApiKeySection && apiKeyConfig ? (
    = { auto: 'Auto', oauth: 'Subscription / OAuth', + chatgpt: 'ChatGPT account', api_key: 'API key', }; @@ -22,10 +22,6 @@ export function formatProviderAuthModeLabelForProvider( return null; } - if (providerId === 'codex' && authMode === 'oauth') { - return CODEX_SUBSCRIPTION_LABEL; - } - if (providerId === 'anthropic' && authMode === 'oauth') { return ANTHROPIC_SUBSCRIPTION_LABEL; } @@ -58,10 +54,6 @@ export function formatProviderAuthMethodLabelForProvider( providerId: CliProviderStatus['providerId'], authMethod: string | null ): string { - if (providerId === 'codex' && authMethod === 'oauth_token') { - return CODEX_SUBSCRIPTION_LABEL; - } - if (providerId === 'anthropic' && (authMethod === 'oauth_token' || authMethod === 'claude.ai')) { return ANTHROPIC_SUBSCRIPTION_LABEL; } @@ -69,24 +61,83 @@ export function formatProviderAuthMethodLabelForProvider( return formatProviderAuthMethodLabel(authMethod); } +function isCodexNativeLane(provider: CliProviderStatus): boolean { + return ( + provider.providerId === 'codex' && + (provider.resolvedBackendId === 'codex-native' || provider.selectedBackendId === 'codex-native') + ); +} + +function getSelectedRuntimeBackendOption( + provider: CliProviderStatus +): NonNullable[number] | null { + const options = provider.availableBackends ?? []; + if (options.length === 0) { + return null; + } + + const selectedBackendId = provider.selectedBackendId ?? null; + const resolvedBackendId = provider.resolvedBackendId ?? null; + + return ( + options.find((option) => option.id === selectedBackendId) ?? + options.find((option) => option.id === resolvedBackendId) ?? + null + ); +} + export function isConnectionManagedRuntimeProvider(provider: CliProviderStatus): boolean { return provider.providerId === 'codex'; } function getCodexCurrentRuntimeLabel(provider: CliProviderStatus): string { - if (provider.authenticated) { - return provider.authMethod === 'api_key' ? CODEX_API_KEY_LABEL : CODEX_SUBSCRIPTION_LABEL; + return CODEX_NATIVE_LABEL; +} + +function getCodexApiKeyAvailabilitySummary(provider: CliProviderStatus): string | null { + if (provider.providerId !== 'codex' || !provider.connection?.apiKeyConfigured) { + return null; } - if (provider.connection?.configuredAuthMode === 'api_key') { - return CODEX_API_KEY_LABEL; + if (provider.connection.apiKeySource === 'stored') { + return 'Saved API key available in Manage'; } - return CODEX_SUBSCRIPTION_LABEL; + return provider.connection.apiKeySourceLabel ?? 'API key is configured'; +} + +function getCodexMissingManagedAccountStatus(provider: CliProviderStatus): string | null { + if (provider.providerId !== 'codex') { + return null; + } + + const codexConnection = provider.connection?.codex; + if (!codexConnection || codexConnection.managedAccount?.type === 'chatgpt') { + return null; + } + + if (provider.connection?.configuredAuthMode !== 'chatgpt') { + return null; + } + + if (codexConnection.requiresOpenaiAuth) { + if (codexConnection.localActiveChatgptAccountPresent) { + return 'Codex has a locally selected ChatGPT account, but the current session needs reconnect.'; + } + + return codexConnection.localAccountArtifactsPresent + ? 'Codex CLI reports no active ChatGPT login. Local Codex account data exists, but no active managed session is selected.' + : 'Codex CLI reports no active ChatGPT login'; + } + + return ( + codexConnection.launchIssueMessage ?? + 'Connect a ChatGPT account to use your Codex subscription.' + ); } export function getProviderCurrentRuntimeSummary(provider: CliProviderStatus): string | null { - if (provider.providerId !== 'codex') { + if (provider.providerId !== 'codex' || !isConnectionManagedRuntimeProvider(provider)) { return null; } @@ -95,6 +146,81 @@ export function getProviderCurrentRuntimeSummary(provider: CliProviderStatus): s } export function formatProviderStatusText(provider: CliProviderStatus): string { + const selectedBackendOption = getSelectedRuntimeBackendOption(provider); + + if (provider.providerId === 'codex') { + if (provider.connection?.codex?.login.status === 'starting') { + return 'Starting ChatGPT login...'; + } + + if (provider.connection?.codex?.login.status === 'pending') { + return 'Waiting for ChatGPT account login...'; + } + + if ( + provider.connection?.codex?.login.status === 'failed' && + provider.connection.codex.login.error + ) { + return provider.connection.codex.login.error; + } + + if ( + provider.connection?.codex?.appServerState === 'degraded' && + provider.connection.codex.effectiveAuthMode === 'chatgpt' && + provider.connection.codex.launchAllowed + ) { + return ( + provider.connection.codex.launchIssueMessage ?? + 'ChatGPT account detected - account verification is currently degraded.' + ); + } + + if (provider.connection?.codex?.launchAllowed) { + if (provider.connection.codex.effectiveAuthMode === 'chatgpt') { + return 'ChatGPT account ready'; + } + + if (provider.connection.codex.effectiveAuthMode === 'api_key') { + return 'API key ready'; + } + } + + const missingManagedAccountStatus = getCodexMissingManagedAccountStatus(provider); + if (missingManagedAccountStatus) { + return missingManagedAccountStatus; + } + + if (provider.connection?.codex?.launchIssueMessage) { + return provider.connection.codex.launchIssueMessage; + } + + if (selectedBackendOption?.statusMessage) { + return selectedBackendOption.statusMessage; + } + return ( + provider.statusMessage ?? (provider.authenticated ? 'Codex native ready' : 'Not connected') + ); + } + + if ( + isCodexNativeLane(provider) && + selectedBackendOption && + selectedBackendOption.state && + selectedBackendOption.state !== 'ready' + ) { + return ( + selectedBackendOption.statusMessage ?? provider.statusMessage ?? 'Codex native unavailable' + ); + } + + if ( + isCodexNativeLane(provider) && + selectedBackendOption?.audience === 'internal' && + selectedBackendOption.statusMessage + ) { + return selectedBackendOption.statusMessage; + } + if (!provider.supported) { return provider.statusMessage ?? 'Unavailable in current runtime'; } @@ -118,15 +244,17 @@ export function getProviderConnectionModeSummary(provider: CliProviderStatus): s return null; } - if (provider.providerId === 'codex') { - return null; + if (provider.providerId === 'anthropic') { + if (provider.authenticated) { + return null; + } + + if (provider.connection?.configuredAuthMode === 'auto') { + return null; + } } - if (provider.providerId === 'anthropic' && provider.authenticated) { - return null; - } - - if (provider.providerId === 'anthropic' && provider.connection?.configuredAuthMode === 'auto') { + if (provider.providerId === 'codex' && provider.connection?.configuredAuthMode === 'auto') { return null; } @@ -134,7 +262,13 @@ export function getProviderConnectionModeSummary(provider: CliProviderStatus): s provider.providerId, provider.connection?.configuredAuthMode ?? null ); - return authModeLabel ? `Preferred auth: ${authModeLabel}` : null; + if (!authModeLabel) { + return null; + } + + return provider.providerId === 'codex' + ? `Selected auth: ${authModeLabel}` + : `Preferred auth: ${authModeLabel}`; } export function getProviderCredentialSummary(provider: CliProviderStatus): string | null { @@ -162,16 +296,32 @@ export function getProviderCredentialSummary(provider: CliProviderStatus): strin : (provider.connection.apiKeySourceLabel ?? 'API key is configured'); } - if (provider.providerId === 'codex' && provider.connection?.apiKeyBetaEnabled !== true) { - return provider.connection.apiKeySource === 'stored' - ? 'OpenAI API key is saved in Manage. Enable API key mode to use it.' - : 'OpenAI API key detected. Enable API key mode in Manage to use it.'; - } + if (provider.providerId === 'codex') { + const apiKeyAvailabilitySummary = getCodexApiKeyAvailabilitySummary(provider); + if (!apiKeyAvailabilitySummary) { + return null; + } - if (provider.authMethod !== 'api_key' && provider.providerId === 'codex') { - return provider.connection.apiKeySource === 'stored' - ? 'OpenAI API key is also configured in Manage' - : (provider.connection.apiKeySourceLabel ?? 'OpenAI API key is configured'); + if ( + provider.connection.codex?.managedAccount?.type === 'chatgpt' || + provider.connection.codex?.effectiveAuthMode === 'chatgpt' + ) { + return provider.connection.apiKeySource === 'stored' + ? 'API key also available in Manage as fallback' + : `${apiKeyAvailabilitySummary} - available as fallback`; + } + + if (provider.connection.configuredAuthMode === 'chatgpt') { + return provider.connection.apiKeySource === 'stored' + ? 'Saved API key available in Manage if you switch to API key mode' + : `${apiKeyAvailabilitySummary} - available if you switch to API key mode`; + } + + if (provider.connection.configuredAuthMode === 'auto') { + return `${apiKeyAvailabilitySummary} - Auto will use this until ChatGPT is connected`; + } + + return apiKeyAvailabilitySummary; } return provider.connection.apiKeySourceLabel ?? null; @@ -202,17 +352,6 @@ export function getProviderDisconnectAction(provider: CliProviderStatus): { }; } - if (provider.providerId === 'codex' && provider.authMethod === 'oauth_token') { - return { - label: 'Disconnect', - confirmLabel: 'Disconnect', - title: 'Disconnect Codex subscription?', - message: provider.connection?.apiKeyConfigured - ? 'This removes the local Codex subscription session from the Claude CLI runtime. Saved OPENAI_API_KEY credentials in Manage stay available.' - : 'This removes the local Codex subscription session from the Claude CLI runtime.', - }; - } - if (provider.providerId === 'gemini' && provider.authMethod === 'cli_oauth_personal') { return { label: 'Disconnect', @@ -232,7 +371,7 @@ export function getProviderConnectLabel(provider: CliProviderStatus): string { } if (provider.providerId === 'codex') { - return 'Connect Codex'; + return 'Connect ChatGPT'; } if (provider.providerId === 'gemini') { @@ -243,6 +382,10 @@ export function getProviderConnectLabel(provider: CliProviderStatus): string { } export function shouldShowProviderConnectAction(provider: CliProviderStatus): boolean { + if (provider.providerId === 'codex') { + return false; + } + if (!provider.canLoginFromUi || provider.authenticated) { return false; } diff --git a/src/renderer/components/settings/hooks/useSettingsHandlers.ts b/src/renderer/components/settings/hooks/useSettingsHandlers.ts index ac027199..a4f27e8e 100644 --- a/src/renderer/components/settings/hooks/useSettingsHandlers.ts +++ b/src/renderer/components/settings/hooks/useSettingsHandlers.ts @@ -333,14 +333,13 @@ export function useSettingsHandlers({ authMode: 'auto', }, codex: { - apiKeyBetaEnabled: false, - authMode: 'oauth', + preferredAuthMode: 'auto', }, }, runtime: { providerBackends: { gemini: 'auto', - codex: 'auto', + codex: 'codex-native', }, }, display: { diff --git a/src/renderer/components/settings/sections/CliStatusSection.tsx b/src/renderer/components/settings/sections/CliStatusSection.tsx index ba98a554..9122dda6 100644 --- a/src/renderer/components/settings/sections/CliStatusSection.tsx +++ b/src/renderer/components/settings/sections/CliStatusSection.tsx @@ -7,6 +7,10 @@ import { useCallback, useEffect, useMemo, useState } from 'react'; +import { + mergeCodexProviderStatusWithSnapshot, + useCodexAccountSnapshot, +} from '@features/codex-account/renderer'; import { isElectronMode } from '@renderer/api'; import { confirm } from '@renderer/components/common/ConfirmDialog'; import { ProviderBrandLogo } from '@renderer/components/common/ProviderBrandLogo'; @@ -29,6 +33,8 @@ import { useCliInstaller } from '@renderer/hooks/useCliInstaller'; import { useStore } from '@renderer/store'; import { createLoadingMultimodelCliStatus } from '@renderer/store/slices/cliInstallerSlice'; import { formatBytes } from '@renderer/utils/formatters'; +import { refreshCliStatusForCurrentMode } from '@renderer/utils/refreshCliStatus'; +import { getRuntimeDisplayName } from '@renderer/utils/runtimeDisplayName'; import { AlertTriangle, CheckCircle, @@ -80,6 +86,34 @@ function isProviderCardLoading(provider: CliProviderStatus, providerLoading: boo ); } +function isCodexSnapshotPending( + provider: CliProviderStatus, + codexSnapshotPending: boolean +): boolean { + return provider.providerId === 'codex' && codexSnapshotPending; +} + +function shouldMaskCodexNegativeBootstrapState( + sourceProvider: CliProviderStatus | null, + mergedProvider: CliProviderStatus +): boolean { + return ( + sourceProvider?.providerId === 'codex' && + sourceProvider.statusMessage === 'Checking...' && + mergedProvider.providerId === 'codex' && + mergedProvider.connection?.codex?.launchReadinessState === 'missing_auth' && + mergedProvider.connection.codex.login.status === 'idle' + ); +} + +function getProviderStatusColor(statusText: string, authenticated: boolean): string { + if (statusText === 'Checking...') { + return 'var(--color-text-secondary)'; + } + + return authenticated ? '#4ade80' : 'var(--color-text-muted)'; +} + function getProviderLabel(providerId: CliProviderId): string { switch (providerId) { case 'anthropic': @@ -105,6 +139,15 @@ function getProviderTerminalCommand(provider: CliProviderStatus): { }; } + if (provider.providerId === 'codex') { + return { + args: ['auth', 'login', '--provider', provider.providerId], + env: { + CLAUDE_CODE_CODEX_BACKEND: provider.selectedBackendId ?? 'codex-native', + }, + }; + } + return { args: ['auth', 'login', '--provider', provider.providerId], }; @@ -124,6 +167,15 @@ function getProviderTerminalLogoutCommand(provider: CliProviderStatus): { }; } + if (provider.providerId === 'codex') { + return { + args: ['auth', 'logout', '--provider', provider.providerId], + env: { + CLAUDE_CODE_CODEX_BACKEND: provider.selectedBackendId ?? 'codex-native', + }, + }; + } + return { args: ['auth', 'logout', '--provider', provider.providerId], }; @@ -159,10 +211,43 @@ export const CliStatusSection = (): React.JSX.Element | null => { const [manageDialogOpen, setManageDialogOpen] = useState(false); const [isSwitchingFlavor, setIsSwitchingFlavor] = useState(false); const multimodelEnabled = appConfig?.general?.multimodelEnabled ?? true; - const effectiveCliStatus = + const loadingCliStatus = !cliStatus && cliStatusLoading && multimodelEnabled ? createLoadingMultimodelCliStatus() : cliStatus; + const codexAccount = useCodexAccountSnapshot({ + enabled: + isElectron && + multimodelEnabled && + loadingCliStatus?.flavor === 'agent_teams_orchestrator' && + Boolean(loadingCliStatus?.providers.some((provider) => provider.providerId === 'codex')), + includeRateLimits: true, + }); + const codexSnapshotPending = + codexAccount.loading && + Boolean(loadingCliStatus?.providers.some((provider) => provider.providerId === 'codex')) && + !codexAccount.snapshot; + const effectiveCliStatus = useMemo( + () => + loadingCliStatus + ? { + ...loadingCliStatus, + providers: loadingCliStatus.providers.map((provider) => + provider.providerId === 'codex' + ? mergeCodexProviderStatusWithSnapshot(provider, codexAccount.snapshot) + : provider + ), + } + : loadingCliStatus, + [codexAccount.snapshot, loadingCliStatus] + ); + const loadingCliProviderMap = useMemo( + () => + new Map( + (loadingCliStatus?.providers ?? []).map((provider) => [provider.providerId, provider]) + ), + [loadingCliStatus?.providers] + ); const canOpenExtensions = effectiveCliStatus?.installed === true; const showInstalledControls = effectiveCliStatus !== null && (installerState === 'idle' || installerState === 'completed'); @@ -184,8 +269,12 @@ export const CliStatusSection = (): React.JSX.Element | null => { }, [installCli]); const handleRefresh = useCallback(() => { - void fetchCliStatus(); - }, [fetchCliStatus]); + void refreshCliStatusForCurrentMode({ + multimodelEnabled, + bootstrapCliStatus, + fetchCliStatus, + }); + }, [bootstrapCliStatus, fetchCliStatus, multimodelEnabled]); const handleProviderLogout = useCallback( async (providerId: CliProviderId) => { @@ -224,9 +313,13 @@ export const CliStatusSection = (): React.JSX.Element | null => { const recheckStatus = useCallback(() => { void (async () => { await invalidateCliStatus(); - await fetchCliStatus(); + await refreshCliStatusForCurrentMode({ + multimodelEnabled, + bootstrapCliStatus, + fetchCliStatus, + }); })(); - }, [fetchCliStatus, invalidateCliStatus]); + }, [bootstrapCliStatus, fetchCliStatus, invalidateCliStatus, multimodelEnabled]); const handleMultimodelToggle = useCallback( async (enabled: boolean) => { @@ -263,7 +356,7 @@ export const CliStatusSection = (): React.JSX.Element | null => { async (providerId: CliProviderId, backendId: string) => { const currentBackends = appConfig?.runtime?.providerBackends ?? { gemini: 'auto' as const, - codex: 'auto' as const, + codex: 'codex-native' as const, }; if (providerId !== 'gemini' && providerId !== 'codex') { @@ -288,14 +381,15 @@ export const CliStatusSection = (): React.JSX.Element | null => { if (!isElectron) return null; + const runtimeDisplayName = getRuntimeDisplayName(effectiveCliStatus, multimodelEnabled); const runtimeLabel = effectiveCliStatus?.flavor === 'agent_teams_orchestrator' ? null : effectiveCliStatus && effectiveCliStatus.showVersionDetails && effectiveCliStatus.installedVersion - ? `${effectiveCliStatus.displayName} v${effectiveCliStatus.installedVersion ?? 'unknown'}` - : (effectiveCliStatus?.displayName ?? 'Claude CLI'); + ? `${runtimeDisplayName} v${effectiveCliStatus.installedVersion ?? 'unknown'}` + : runtimeDisplayName; const activeTerminalProvider = providerTerminal ? (effectiveCliStatus?.providers.find( @@ -445,11 +539,22 @@ export const CliStatusSection = (): React.JSX.Element | null => { {(() => { const providerLoading = cliProviderStatusLoading[provider.providerId] === true; - const showSkeleton = isProviderCardLoading(provider, providerLoading); + const showSkeleton = + isProviderCardLoading(provider, providerLoading) || + isCodexSnapshotPending(provider, codexSnapshotPending); const runtimeSummary = isConnectionManagedRuntimeProvider(provider) ? getProviderCurrentRuntimeSummary(provider) : getProviderRuntimeBackendSummary(provider); - const statusText = formatProviderStatusText(provider); + const sourceProvider = + loadingCliProviderMap.get(provider.providerId) ?? null; + const maskNegativeBootstrapState = shouldMaskCodexNegativeBootstrapState( + sourceProvider, + provider + ); + const effectiveShowSkeleton = showSkeleton || maskNegativeBootstrapState; + const statusText = effectiveShowSkeleton + ? 'Checking...' + : formatProviderStatusText(provider); const connectionModeSummary = getProviderConnectionModeSummary(provider); const credentialSummary = getProviderCredentialSummary(provider); const disconnectAction = getProviderDisconnectAction(provider); @@ -480,15 +585,16 @@ export const CliStatusSection = (): React.JSX.Element | null => { {statusText}
    - {showSkeleton ? ( + {effectiveShowSkeleton ? ( ) : hasDetailContent ? (
    { {disconnectAction.label} - ) : shouldShowProviderConnectAction(provider) ? ( + ) : !effectiveShowSkeleton && + shouldShowProviderConnectAction(provider) ? (
    - {!showSkeleton && provider.models.length > 0 && ( + {!effectiveShowSkeleton && provider.models.length > 0 && (
    {
    {effectiveCliStatus.binaryPath && effectiveCliStatus.launchError - ? 'Claude CLI was found but failed to start' - : 'Claude CLI not installed'} + ? `${runtimeDisplayName} was found but failed to start` + : `${runtimeDisplayName} not installed`}
    {effectiveCliStatus.showBinaryPath && effectiveCliStatus.binaryPath && (
    @@ -646,16 +753,16 @@ export const CliStatusSection = (): React.JSX.Element | null => { > {effectiveCliStatus.binaryPath && effectiveCliStatus.launchError - ? 'Reinstall Claude CLI' - : 'Install Claude CLI'} + ? `Reinstall ${runtimeDisplayName}` + : `Install ${runtimeDisplayName}`}
    )} {!effectiveCliStatus.installed && !effectiveCliStatus.supportsSelfUpdate && (

    {effectiveCliStatus.binaryPath && effectiveCliStatus.launchError - ? `The configured ${effectiveCliStatus.displayName} runtime failed its startup health check.` - : `The configured ${effectiveCliStatus.displayName} runtime was not found.`} + ? `The configured ${runtimeDisplayName} failed its startup health check.` + : `The configured ${runtimeDisplayName} was not found.`}

    )}
    @@ -761,9 +868,9 @@ export const CliStatusSection = (): React.JSX.Element | null => {
    {providerTerminal && cliStatus?.binaryPath && ( { const colors = getTeamColorSet(color ?? ''); const { isLight } = useTheme(); + const selectedTeamName = useStore((s) => s.selectedTeamName); + const effectiveTeamName = teamName ?? selectedTeamName; + const teamMembers = useStore((s) => + effectiveTeamName ? selectResolvedMembersForTeamName(s, effectiveTeamName) : [] + ); + const avatarMap = useMemo(() => buildMemberAvatarMap(teamMembers), [teamMembers]); const avatarSize = size === 'md' ? 32 : size === 'sm' ? 24 : 18; const avatarClass = size === 'md' ? 'size-6' : size === 'sm' ? 'size-5' : 'size-4'; const textClass = size === 'md' ? 'text-xs' : size === 'sm' ? 'text-[10px]' : 'text-[9px]'; @@ -53,7 +67,7 @@ export const MemberBadge = ({ const avatar = ( { const elapsed = useElapsedTimer(startedAt, loading); - const [logsOpen, setLogsOpen] = useState( - () => defaultLogsOpen ?? (Boolean(cliLogsTail) && loading) - ); + const [logsOpen, setLogsOpen] = useState(() => defaultLogsOpen ?? false); const [liveOutputOpen, setLiveOutputOpen] = useState(defaultLiveOutputOpen); const outputScrollRef = useRef(null); const isError = tone === 'error'; @@ -192,29 +190,6 @@ export const ProvisioningProgressBlock = ({ } }, [isError, cliLogsTail]); - // Open CLI logs while loading, collapse when done (unless error). - const prevLoadingRef = useRef(loading); - const hadLogsRef = useRef(Boolean(cliLogsTail)); - useEffect(() => { - if (!isError) { - const hasLogs = Boolean(cliLogsTail); - - if (loading && hasLogs && !hadLogsRef.current) { - // Logs just appeared while loading → open - setLogsOpen(true); - } else if (loading && !prevLoadingRef.current && hasLogs) { - // Started loading with logs already present → open - setLogsOpen(true); - } else if (!loading && prevLoadingRef.current) { - // Finished loading → collapse - setLogsOpen(false); - } - - hadLogsRef.current = hasLogs; - } - prevLoadingRef.current = loading; - }, [loading, cliLogsTail, isError]); - return (
    (null); - const [pendingRepliesByMember, setPendingRepliesByMember] = useState>({}); + const [pendingRepliesByMember, setPendingRepliesByMember] = useState>(() => + getTeamPendingRepliesState(teamName) + ); const [createTaskDialog, setCreateTaskDialog] = useState({ open: false, defaultSubject: '', @@ -923,7 +933,13 @@ export const TeamDetailView = ({ const [removeMemberConfirm, setRemoveMemberConfirm] = useState(null); const [updatingRoleLoading, setUpdatingRoleLoading] = useState(false); const [editDialogOpen, setEditDialogOpen] = useState(false); - const [launchDialogOpen, setLaunchDialogOpen] = useState(false); + const [launchDialogState, setLaunchDialogState] = useState<{ + open: boolean; + mode: TeamLaunchDialogMode; + }>({ + open: false, + mode: 'launch', + }); const [editorOpen, setEditorOpen] = useState(false); const [graphOpen, setGraphOpen] = useState(false); const contentRef = useRef(null); @@ -1155,6 +1171,7 @@ export const TeamDetailView = ({ const [activeTeamsForLaunch, setActiveTeamsForLaunch] = useState< { teamName: string; displayName: string; projectPath: string }[] >([]); + const launchDialogOpen = launchDialogState.open; // Session loading and filtering state const [sessions, setSessions] = useState([]); @@ -1200,6 +1217,8 @@ export const TeamDetailView = ({ clearProvisioningError, isTeamProvisioning, refreshTeamData, + refreshTeamMessagesHead, + refreshMemberActivityMeta, syncTeamPendingReplyRefresh, kanbanFilterQuery, clearKanbanFilter, @@ -1251,6 +1270,8 @@ export const TeamDetailView = ({ loading: s.selectedTeamName === teamName ? s.selectedTeamLoading : false, error: s.selectedTeamName === teamName ? s.selectedTeamError : null, refreshTeamData: s.refreshTeamData, + refreshTeamMessagesHead: s.refreshTeamMessagesHead, + refreshMemberActivityMeta: s.refreshMemberActivityMeta, syncTeamPendingReplyRefresh: s.syncTeamPendingReplyRefresh, kanbanFilterQuery: s.kanbanFilterQuery, clearKanbanFilter: s.clearKanbanFilter, @@ -1274,6 +1295,7 @@ export const TeamDetailView = ({ const tabId = useTabIdOptional(); const activeTabId = useStore((s) => s.activeTabId); const isThisTabActive = tabId ? activeTabId === tabId : false; + const wasInteractiveRef = useRef(false); useEffect(() => { const now = Date.now(); @@ -1337,6 +1359,14 @@ export const TeamDetailView = ({ } }, [tabId, initTabUIState]); + useEffect(() => { + setPendingRepliesByMember(getTeamPendingRepliesState(teamName)); + }, [teamName]); + + useEffect(() => { + setTeamPendingRepliesState(teamName, pendingRepliesByMember); + }, [pendingRepliesByMember, teamName]); + useEffect(() => { const wasProvisioning = wasProvisioningRef.current; wasProvisioningRef.current = isTeamProvisioning; @@ -1375,6 +1405,32 @@ export const TeamDetailView = ({ } }, [isThisTabActive, teamName, storedTeamName, loading, selectTeam]); + useEffect(() => { + const isInteractive = isThisTabActive && isPaneFocused; + const justBecameInteractive = isInteractive && !wasInteractiveRef.current; + wasInteractiveRef.current = isInteractive; + if (!justBecameInteractive || !teamName) { + return; + } + + void (async () => { + try { + const headResult = await refreshTeamMessagesHead(teamName); + if (headResult.feedChanged) { + await refreshMemberActivityMeta(teamName); + } + } catch { + // Best-effort refresh on tab focus. + } + })(); + }, [ + isPaneFocused, + isThisTabActive, + refreshMemberActivityMeta, + refreshTeamMessagesHead, + teamName, + ]); + // Fetch active teams when launch dialog opens (for conflict warning) useEffect(() => { if (!launchDialogOpen) return; @@ -1537,6 +1593,10 @@ export const TeamDetailView = ({ return nextMember; }); }, [leadBranch, members, trackedBranches]); + const resolvedMemberColorMap = useMemo( + () => buildMemberColorMap(membersWithLiveBranches), + [membersWithLiveBranches] + ); // Filter sessions to team-only using sessionHistory + leadSessionId const teamSessionIds = useMemo(() => { @@ -1661,10 +1721,49 @@ export const TeamDetailView = ({ setSendDialogOpen(true); }, []); - const handleRestartTeam = useCallback(() => { - setLaunchDialogOpen(true); + const openLaunchDialog = useCallback((mode: TeamLaunchDialogMode) => { + setLaunchDialogState({ open: true, mode }); }, []); + const closeLaunchDialog = useCallback(() => { + setLaunchDialogState((prev) => ({ ...prev, open: false })); + }, []); + + const handleRestartTeam = useCallback(() => { + openLaunchDialog('relaunch'); + }, [openLaunchDialog]); + + const handleLaunchDialogSubmit = useCallback( + async (request: TeamLaunchRequest): Promise => { + await launchTeam(request); + }, + [launchTeam] + ); + + const handleRelaunchDialogSubmit = useCallback( + async ( + request: TeamLaunchRequest, + nextMembers: TeamCreateRequest['members'] + ): Promise => { + await executeTeamRelaunch({ + teamName, + isTeamAlive: data?.isAlive === true, + request, + members: nextMembers, + stopTeam: (nextTeamName) => api.teams.stop(nextTeamName), + replaceMembers: (nextTeamName, nextRequest) => + api.teams.replaceMembers(nextTeamName, nextRequest), + launchTeam, + }); + }, + [data?.isAlive, launchTeam, teamName] + ); + + const handleChangeLeadRuntime = useCallback(() => { + setEditDialogOpen(false); + openLaunchDialog(data?.isAlive && !isTeamProvisioning ? 'relaunch' : 'launch'); + }, [data?.isAlive, isTeamProvisioning, openLaunchDialog]); + const handleSelectMember = useCallback((member: ResolvedTeamMember) => { setSelectedMember(member); setSelectedMemberView(null); @@ -1912,6 +2011,7 @@ export const TeamDetailView = ({ onReplyToMessage: handleReplyToMessage, onRestartTeam: handleRestartTeam, onTaskIdClick: handleTaskIdClick, + inlineScrollContainerRef: contentRef, }), [ activeMembers, @@ -2010,7 +2110,7 @@ export const TeamDetailView = ({
    @@ -2027,17 +2127,16 @@ export const TeamDetailView = ({
    setLaunchDialogOpen(false)} - onLaunch={async (request) => { - await launchTeam(request); - }} + onClose={closeLaunchDialog} + onLaunch={handleLaunchDialogSubmit} + onRelaunch={handleRelaunchDialogSubmit} /> ); @@ -2168,12 +2267,17 @@ export const TeamDetailView = ({ variant="ghost" size="sm" className="h-7 gap-1 px-2 text-xs text-[var(--color-text-muted)] hover:text-[var(--color-text)]" + disabled={isTeamProvisioning} onClick={() => setEditDialogOpen(true)} > - Edit team + + {isTeamProvisioning + ? 'Edit team is unavailable while provisioning is still in progress' + : 'Edit team'} + @@ -2294,7 +2398,7 @@ export const TeamDetailView = ({ {!data.isAlive && !isTeamProvisioning ? ( setLaunchDialogOpen(true)} + onLaunch={() => openLaunchDialog('launch')} /> ) : null} @@ -2708,9 +2812,13 @@ export const TeamDetailView = ({ currentDescription={data.config.description ?? ''} currentColor={data.config.color ?? ''} currentMembers={membersWithLiveBranches.filter((m) => !isLeadMember(m))} + leadMember={membersWithLiveBranches.find((m) => isLeadMember(m)) ?? null} + resolvedMemberColorMap={resolvedMemberColorMap} isTeamAlive={data.isAlive && !isTeamProvisioning} + isTeamProvisioning={isTeamProvisioning} projectPath={data.config.projectPath} onClose={() => setEditDialogOpen(false)} + onChangeLeadRuntime={handleChangeLeadRuntime} onSaved={() => void selectTeam(teamName)} /> @@ -2801,7 +2909,7 @@ export const TeamDetailView = ({ setLaunchDialogOpen(false)} - onLaunch={async (request) => { - await launchTeam(request); - }} + onClose={closeLaunchDialog} + onLaunch={handleLaunchDialogSubmit} + onRelaunch={handleRelaunchDialogSubmit} /> [t.id, t])); const entries: ActivityEntry[] = []; @@ -115,7 +117,7 @@ export const ActiveTasksBlock = ({
    ; + /** + * Root element for IntersectionObserver-based visibility tracking. + * Typically the same node as `scrollElementRef`, but left separate so + * future code can observe a more specific inner container when needed. + */ + observerRoot?: RefObject; + /** + * Distance from the scroll container's scroll origin to the timeline root, + * measured from the DOM. Zero in this release; used by the virtualizer in a + * follow-up change. + */ + scrollMargin?: number; + /** Enable virtualization (wired in a follow-up; ignored for now). */ + virtualizationEnabled?: boolean; +} + interface ActivityTimelineProps { messages: InboxMessage[]; teamName: string; @@ -66,6 +126,14 @@ interface ActivityTimelineProps { onExpandItem?: (key: string) => void; /** Called when ExpandableContent is expanded via "Show more" in any ActivityItem. */ onExpandContent?: () => void; + /** + * Optional viewport contract. When provided, IntersectionObserver uses the + * passed `observerRoot` instead of the document viewport, which is required + * for correctness inside scrollable layouts (sidebar, bottom-sheet) where + * the row may be clipped by its scroll parent while still intersecting the + * page viewport. + */ + viewport?: TimelineViewport; } const VIEWPORT_THRESHOLD = 0.15; @@ -74,6 +142,59 @@ const COMPACT_MESSAGES_WIDTH_PX = 400; const EMPTY_TEAM_NAMES: string[] = []; const EMPTY_TEAM_COLOR_MAP = new Map(); const DEFAULT_COLLAPSE_MODE = 'default' as const; +const VIRTUALIZER_OVERSCAN = 8; +const VIRTUALIZATION_ROW_GAP_PX = 4; + +/** + * Row count above which virtualization is worth its complexity cost. Below + * this, the direct render path is both simpler and faster (no wrapper div, + * no position: absolute, no measurement churn). Chosen so conversations under + * roughly one session of activity stay on the direct path and the virtualized + * path only activates when scrolling behavior actually starts to matter. + */ +const VIRTUALIZATION_ROW_THRESHOLD = 60; + +/** + * Per-kind height estimates for `estimateSize`. These are rough initial guesses + * only; the virtualizer re-measures rows as they mount via `measureElement` + * (wired in a follow-up PR), so small inaccuracies here are self-correcting. + * Sizes come from visually averaged steady-state heights in production layouts. + */ +const ROW_SIZE_ESTIMATES: Record = { + 'session-separator': 135, + 'compaction-divider': 50, + 'lead-thought-group': 220, + 'message-row': 140, +}; + +function collectScrollMarginObserverTargets( + rootElement: HTMLElement, + scrollElement: HTMLElement +): HTMLElement[] { + const targets = new Set([rootElement, scrollElement]); + + let current: HTMLElement | null = rootElement; + while (current && current !== scrollElement) { + const parentElement: HTMLElement | null = current.parentElement; + if (!parentElement) { + break; + } + + targets.add(parentElement); + + let previousSibling: Element | null = current.previousElementSibling; + while (previousSibling) { + if (previousSibling instanceof HTMLElement) { + targets.add(previousSibling); + } + previousSibling = previousSibling.previousElementSibling; + } + + current = parentElement; + } + + return [...targets]; +} function getItemSessionAnchorId(item: TimelineItem): string | undefined { if (item.type === 'lead-thoughts') { @@ -141,6 +262,7 @@ const MessageRowWithObserver = ({ onExpand, expandItemKey, onExpandContent, + observerRoot, }: { message: InboxMessage; teamName: string; @@ -170,6 +292,7 @@ const MessageRowWithObserver = ({ onExpand?: (key: string) => void; expandItemKey?: string; onExpandContent?: () => void; + observerRoot?: RefObject; }): React.JSX.Element => { const ref = useRef(null); const reportedRef = useRef(false); @@ -185,6 +308,10 @@ const MessageRowWithObserver = ({ if (!onVisible) return; const el = ref.current; if (!el) return; + // Resolve the observer root at effect-time. Falls back to the document + // viewport (null) when no root is provided — preserves pre-contract + // behavior for layouts without a known scroll owner. + const root = observerRoot?.current ?? null; const observer = new IntersectionObserver( ([entry]) => { if (!entry?.isIntersecting) return; @@ -195,11 +322,11 @@ const MessageRowWithObserver = ({ reportedRef.current = true; cb(msg); }, - { threshold: VIEWPORT_THRESHOLD, rootMargin: '0px' } + { root, threshold: VIEWPORT_THRESHOLD, rootMargin: '0px' } ); observer.observe(el); return () => observer.disconnect(); - }, [onVisible]); + }, [onVisible, observerRoot]); return ( @@ -265,6 +392,7 @@ const MemoizedMessageRowWithObserver = React.memo( prev.onExpand === next.onExpand && prev.expandItemKey === next.expandItemKey && prev.onExpandContent === next.onExpandContent && + prev.observerRoot === next.observerRoot && areInboxMessagesEquivalentForRender(prev.message, next.message) ); @@ -291,7 +419,9 @@ export const ActivityTimeline = React.memo(function ActivityTimeline({ onTeamClick, onExpandItem, onExpandContent, + viewport, }: ActivityTimelineProps): React.JSX.Element { + const observerRoot = viewport?.observerRoot ?? viewport?.scrollElementRef; const [visibleCount, setVisibleCount] = useState(MESSAGES_PAGE_SIZE); const rootRef = useRef(null); const [compactHeader, setCompactHeader] = useState(false); @@ -444,6 +574,129 @@ export const ActivityTimeline = React.memo(function ActivityTimeline({ const pinnedThoughtGroup = timelineItems[0]?.type === 'lead-thoughts' ? timelineItems[0] : null; const startIndex = pinnedThoughtGroup ? 1 : 0; + // Flatten timelineItems into atomic render rows. Each row maps to exactly + // one visual element — no Fragment bundles session separators with their + // owning item, because a windowing layer (landing in a follow-up PR) needs + // each row to be measurable and addressable independently. + const renderRows = useMemo(() => { + const rows: TimelineRow[] = []; + if (pinnedThoughtGroup) { + rows.push({ + kind: 'lead-thought-group', + key: getThoughtGroupKey(pinnedThoughtGroup.group), + itemIndex: 0, + group: pinnedThoughtGroup.group, + isPinned: true, + }); + } + for (let i = startIndex; i < timelineItems.length; i += 1) { + const item = timelineItems[i]; + if (i > 0) { + const currSessionId = getItemSessionAnchorId(item); + const prevSessionId = previousSessionAnchorByIndex[i]; + if (prevSessionId && currSessionId && prevSessionId !== currSessionId) { + // Include itemIndex in the key so a repeated transition (e.g. lead + // sessions A→B→A→B) does not collide on key `A->B` twice — React + // treats duplicate keys as the same element and reuses state + // across unrelated separators. + rows.push({ + kind: 'session-separator', + key: `session-separator-${i}-${prevSessionId}->${currSessionId}`, + }); + } + } + if (item.type === 'lead-thoughts') { + rows.push({ + kind: 'lead-thought-group', + key: getThoughtGroupKey(item.group), + itemIndex: i, + group: item.group, + isPinned: false, + }); + continue; + } + const message = item.message; + if (isCompactionMessage(message)) { + rows.push({ + kind: 'compaction-divider', + key: `compaction-${toMessageKey(message)}`, + message, + }); + continue; + } + rows.push({ + kind: 'message-row', + key: toMessageKey(message), + itemIndex: i, + message, + }); + } + return rows; + }, [pinnedThoughtGroup, previousSessionAnchorByIndex, startIndex, timelineItems]); + + // Virtualizer gate — activates only when the parent opts in via + // `viewport.virtualizationEnabled`, the scroll element ref is present, and + // the row count is large enough for virtualization to pay for itself. Below + // the threshold the direct render path is both simpler and faster, so we + // keep it for short lists. + const shouldVirtualize = + viewport?.virtualizationEnabled === true && + viewport.scrollElementRef != null && + renderRows.length >= VIRTUALIZATION_ROW_THRESHOLD; + + // DOM-measured distance from the scroll container's scroll origin to the + // timeline root. We avoid re-measuring on every scroll: the offset only + // changes when layout above the timeline changes, so observe the timeline, + // its ancestor chain, and all previous siblings that can push it down. + const [measuredScrollMargin, setMeasuredScrollMargin] = useState(0); + + useLayoutEffect(() => { + if (!shouldVirtualize) return; + const scrollEl = viewport?.scrollElementRef?.current ?? null; + const rootEl = rootRef.current; + if (!scrollEl || !rootEl) return; + + let pending = false; + let rafId: number | null = null; + const measure = (): void => { + if (pending) return; + pending = true; + rafId = requestAnimationFrame(() => { + rafId = null; + pending = false; + const scrollRect = scrollEl.getBoundingClientRect(); + const rootRect = rootEl.getBoundingClientRect(); + // Distance from top of scroll content to top of timeline root. Adding + // `scrollTop` compensates for the fact that both rects are relative + // to the viewport at measurement time, not the scrollable content. + const next = Math.max(0, rootRect.top - scrollRect.top + scrollEl.scrollTop); + setMeasuredScrollMargin((prev) => (Math.abs(prev - next) < 0.5 ? prev : next)); + }); + }; + + measure(); + const resizeObserver = new ResizeObserver(measure); + const observedTargets = collectScrollMarginObserverTargets(rootEl, scrollEl); + observedTargets.forEach((target) => resizeObserver.observe(target)); + window.addEventListener('resize', measure); + + return () => { + if (rafId !== null) cancelAnimationFrame(rafId); + resizeObserver.disconnect(); + window.removeEventListener('resize', measure); + }; + }, [shouldVirtualize, viewport?.scrollElementRef]); + + const rowVirtualizer = useVirtualizer({ + count: shouldVirtualize ? renderRows.length : 0, + getScrollElement: () => viewport?.scrollElementRef?.current ?? null, + estimateSize: (index) => ROW_SIZE_ESTIMATES[renderRows[index]?.kind ?? 'message-row'], + getItemKey: (index) => renderRows[index]?.key ?? `row-${index}`, + overscan: VIRTUALIZER_OVERSCAN, + gap: VIRTUALIZATION_ROW_GAP_PX, + scrollMargin: measuredScrollMargin, + }); + // Determine the index of the "newest" non-thought timeline item (for auto-expand). const newestMessageIndex = useMemo(() => { return findNewestMessageIndex(timelineItems); @@ -485,6 +738,124 @@ export const ActivityTimeline = React.memo(function ActivityTimeline({ [allCollapsed, newestMessageIndex, pinnedThoughtGroup, expandOverrides, onToggleExpandOverride] ); + // Render a single atomic row. Logic per kind mirrors the previous inline + // render path; separators and dividers are their own rows rather than + // being bundled into Fragments, which is the contract the virtualizer will + // consume in a follow-up PR. + // + // `suppressEntryAnimation` is set when the caller is the virtualized path: + // the virtualizer mounts and unmounts rows as they enter and leave the + // viewport, so relying on mount as a signal of "this item is new" would + // replay the entry animation every time the user scrolls back to an old + // row. In the direct render path the flag stays false and animation still + // runs on real data-set additions. + const renderTimelineRow = ( + row: TimelineRow, + options?: { suppressEntryAnimation?: boolean } + ): React.JSX.Element | null => { + const suppressEntry = options?.suppressEntryAnimation === true; + switch (row.kind) { + case 'session-separator': + return ( +
    +
    + + New session + +
    +
    + ); + case 'compaction-divider': + return ; + case 'lead-thought-group': { + const { group, itemIndex, isPinned, key } = row; + const firstThought = group.thoughts[0]; + const info = memberInfo.get(firstThought.from); + const collapseProps = getItemCollapseProps(key, itemIndex); + const pinnedCanBeLive = isPinned + ? currentLeadSessionId + ? firstThought.leadSessionId === currentLeadSessionId + : true + : false; + return ( + + ); + } + case 'message-row': { + const { message, itemIndex, key } = row; + const renderProps = resolveMessageRenderProps(message, ctx); + const collapseProps = getItemCollapseProps(key, itemIndex); + const isUnread = readState + ? !message.read && !readState.readSet.has(readState.getMessageKey(message)) + : !message.read; + return ( + + ); + } + } + }; + if (messages.length === 0) { return (
    @@ -496,165 +867,49 @@ export const ActivityTimeline = React.memo(function ActivityTimeline({ return (
    - {/* Pinned (newest) thought group — always at top */} - {pinnedThoughtGroup && - (() => { - const { group } = pinnedThoughtGroup; - const firstThought = group.thoughts[0]; - const pinnedCanBeLive = currentLeadSessionId - ? firstThought.leadSessionId === currentLeadSessionId - : true; - const info = memberInfo.get(firstThought.from); - const itemKey = getThoughtGroupKey(group); - const stableKey = itemKey; - const collapseProps = getItemCollapseProps(stableKey, 0); - return ( - - ); - })()} - - {/* Remaining items */} - {timelineItems.slice(startIndex).map((item, index) => { - const realIndex = index + startIndex; - - // Session boundary separator (messages sorted desc — new on top) - let sessionSeparator: React.JSX.Element | null = null; - if (realIndex > 0) { - const currSessionId = getItemSessionAnchorId(item); - const prevSessionId = previousSessionAnchorByIndex[realIndex]; - if (prevSessionId && currSessionId && prevSessionId !== currSessionId) { - sessionSeparator = ( + {shouldVirtualize ? ( +
    + {rowVirtualizer.getVirtualItems().map((virtualRow) => { + const row = renderRows[virtualRow.index]; + if (!row) return null; + return (
    -
    - - New session - -
    + {renderTimelineRow(row, { suppressEntryAnimation: true })}
    ); - } - } - - if (item.type === 'lead-thoughts') { - const { group } = item; - const firstThought = group.thoughts[0]; - const info = memberInfo.get(firstThought.from); - const itemKey = getThoughtGroupKey(group); - const stableKey = itemKey; - const collapseProps = getItemCollapseProps(stableKey, realIndex); - return ( - - {sessionSeparator} - - - ); - } - - const { message } = item; - - // Compaction boundary — render as a divider instead of a regular message card - if (isCompactionMessage(message)) { - const messageKey = toMessageKey(message); - return ( - - {sessionSeparator} - - - ); - } - - const renderProps = resolveMessageRenderProps(message, ctx); - const messageKey = toMessageKey(message); - const stableKey = messageKey; - const collapseProps = getItemCollapseProps(stableKey, realIndex); - const isUnread = readState - ? !message.read && !readState.readSet.has(readState.getMessageKey(message)) - : !message.read; - return ( - - {sessionSeparator} - - - ); - })} + })} +
    + ) : ( + renderRows.map((row) => renderTimelineRow(row)) + )} {hiddenCount > 0 && (
    {/* Bottom-up shadow gradient: darkest at bottom edge, fades upward */} diff --git a/src/renderer/components/team/activity/LeadThoughtsGroup.tsx b/src/renderer/components/team/activity/LeadThoughtsGroup.tsx index 9ee1adc1..bcb1626f 100644 --- a/src/renderer/components/team/activity/LeadThoughtsGroup.tsx +++ b/src/renderer/components/team/activity/LeadThoughtsGroup.tsx @@ -1,6 +1,7 @@ import { type JSX, memo, + type RefObject, useCallback, useEffect, useLayoutEffect, @@ -157,6 +158,14 @@ interface LeadThoughtsGroupRowProps { memberColor?: string; isNew?: boolean; onVisible?: (message: InboxMessage) => void; + /** + * Root element for IntersectionObserver-based visibility tracking. When + * omitted, the observer falls back to the document viewport — correct for + * top-level renders, incorrect when the row is inside a scroll container + * (sidebar, bottom-sheet) that can clip the row while the document + * viewport still contains it. + */ + observerRoot?: RefObject; /** When false, the live indicator is always off (for historical thought groups). */ canBeLive?: boolean; /** Whether the owning team is currently alive. */ @@ -528,6 +537,7 @@ const LeadThoughtsGroupRowComponent = ({ memberColor, isNew, onVisible, + observerRoot, canBeLive, isTeamAlive, leadActivity, @@ -637,6 +647,9 @@ const LeadThoughtsGroupRowComponent = ({ if (!onVisible) return; const el = ref.current; if (!el) return; + // Resolve observer root at effect-time. Falls back to the document + // viewport when no root is provided — preserves pre-contract behavior. + const root = observerRoot?.current ?? null; const observer = new IntersectionObserver( ([entry]) => { if (!entry?.isIntersecting) return; @@ -647,11 +660,11 @@ const LeadThoughtsGroupRowComponent = ({ } reportedCountRef.current = thoughts.length; }, - { threshold: VIEWPORT_THRESHOLD, rootMargin: '0px' } + { root, threshold: VIEWPORT_THRESHOLD, rootMargin: '0px' } ); observer.observe(el); return () => observer.disconnect(); - }, [onVisible, thoughts]); + }, [onVisible, observerRoot, thoughts]); const clearPendingScrollSync = useCallback(() => { if (scrollSyncFrameRef.current !== null) { @@ -1134,5 +1147,6 @@ export const LeadThoughtsGroupRow = memo( prev.compactHeader === next.compactHeader && prev.onExpand === next.onExpand && prev.expandItemKey === next.expandItemKey && + prev.observerRoot === next.observerRoot && areThoughtGroupsEquivalent(prev.group, next.group) ); diff --git a/src/renderer/components/team/activity/MessageExpandDialog.tsx b/src/renderer/components/team/activity/MessageExpandDialog.tsx index bed2dcff..e0986355 100644 --- a/src/renderer/components/team/activity/MessageExpandDialog.tsx +++ b/src/renderer/components/team/activity/MessageExpandDialog.tsx @@ -9,7 +9,7 @@ import { } from '@renderer/components/ui/dialog'; import { CARD_ICON_MUTED } from '@renderer/constants/cssVariables'; import { getTeamColorSet } from '@renderer/constants/teamColors'; -import { agentAvatarUrl } from '@renderer/utils/memberHelpers'; +import { agentAvatarUrl, buildMemberAvatarMap } from '@renderer/utils/memberHelpers'; import { MemberBadge } from '../MemberBadge'; @@ -28,6 +28,7 @@ function formatTime(timestamp: string): string { interface DialogThoughtsContentProps { group: LeadThoughtGroup; + members?: ResolvedTeamMember[]; memberColor?: string; onTaskIdClick?: (taskId: string) => void; onReply?: (message: InboxMessage) => void; @@ -39,6 +40,7 @@ interface DialogThoughtsContentProps { const DialogThoughtsContent = ({ group, + members, memberColor, onTaskIdClick, onReply, @@ -51,6 +53,7 @@ const DialogThoughtsContent = ({ const newest = thoughts[0]; const oldest = thoughts[thoughts.length - 1]; const colors = getTeamColorSet(memberColor ?? ''); + const avatarMap = useMemo(() => buildMemberAvatarMap(members ?? []), [members]); const chronological = useMemo(() => [...thoughts].reverse(), [thoughts]); return ( @@ -58,7 +61,7 @@ const DialogThoughtsContent = ({ {/* Header */}
    s.pendingApprovals)); const colorMap = buildMemberColorMap(members); + const avatarMap = buildMemberAvatarMap(members); const memberPending = Object.entries(pendingRepliesByMember) .map(([name, sentAtMs]) => ({ kind: 'member' as const, @@ -111,7 +113,7 @@ export const PendingRepliesBlock = ({
    s.appConfig?.general?.multimodelEnabled ?? true); const cliStatus = useStore((s) => s.cliStatus); const cliStatusLoading = useStore((s) => s.cliStatusLoading); + const bootstrapCliStatus = useStore((s) => s.bootstrapCliStatus); const fetchCliStatus = useStore((s) => s.fetchCliStatus); + const loadingCliStatus = useMemo( + () => + !cliStatus && cliStatusLoading && multimodelEnabled + ? createLoadingMultimodelCliStatus() + : cliStatus, + [cliStatus, cliStatusLoading, multimodelEnabled] + ); + const codexAccount = useCodexAccountSnapshot({ + enabled: + multimodelEnabled && + loadingCliStatus?.flavor === 'agent_teams_orchestrator' && + Boolean(loadingCliStatus?.providers.some((provider) => provider.providerId === 'codex')), + }); + const effectiveCliStatus = useMemo( + () => mergeCodexCliStatusWithSnapshot(loadingCliStatus, codexAccount.snapshot), + [loadingCliStatus, codexAccount.snapshot] + ); // ── Persisted draft state (survives tab navigation) ────────────────── const { @@ -506,7 +532,9 @@ export const CreateTeamDialog = ({ }, [members, multimodelEnabled, selectedProviderId, soloTeam, syncModelsWithLead]); const runtimeBackendSummaryByProvider = useMemo(() => { - const entries: (readonly [TeamProviderId, string | null])[] = (cliStatus?.providers ?? []).map( + const entries: (readonly [TeamProviderId, string | null])[] = ( + effectiveCliStatus?.providers ?? [] + ).map( (provider) => [ provider.providerId as TeamProviderId, @@ -514,7 +542,7 @@ export const CreateTeamDialog = ({ ] as const ); return new Map(entries); - }, [cliStatus?.providers]); + }, [effectiveCliStatus?.providers]); const runtimeBackendSummaryByProviderRef = useRef(runtimeBackendSummaryByProvider); const prepareChecksRef = useRef([]); const prepareModelResultsCacheRef = useRef( @@ -554,8 +582,12 @@ export const CreateTeamDialog = ({ if (!open || cliStatus || cliStatusLoading) { return; } - void fetchCliStatus(); - }, [open, cliStatus, cliStatusLoading, fetchCliStatus]); + void refreshCliStatusForCurrentMode({ + multimodelEnabled, + bootstrapCliStatus, + fetchCliStatus, + }); + }, [bootstrapCliStatus, cliStatus, cliStatusLoading, fetchCliStatus, multimodelEnabled, open]); useEffect(() => { if (!open || !canCreate || !launchTeam) { @@ -940,7 +972,14 @@ export const CreateTeamDialog = ({ const mentionSuggestions = useMemo( () => soloTeam - ? [{ id: 'team-lead', name: 'team-lead', subtitle: 'Team Lead', color: 'blue' }] + ? [ + { + id: 'team-lead', + name: 'team-lead', + subtitle: 'Team Lead', + color: resolveTeamLeadColorName(), + }, + ] : buildMemberDraftSuggestions(members, memberColorMap), [memberColorMap, members, soloTeam] ); @@ -952,9 +991,11 @@ export const CreateTeamDialog = ({ const runtimeProviderStatusById = useMemo( () => new Map( - (cliStatus?.providers ?? []).map((provider) => [provider.providerId, provider] as const) + (effectiveCliStatus?.providers ?? []).map( + (provider) => [provider.providerId, provider] as const + ) ), - [cliStatus?.providers] + [effectiveCliStatus?.providers] ); const sanitizedTeamName = sanitizeTeamName(teamName.trim()); @@ -972,6 +1013,11 @@ export const CreateTeamDialog = ({ cwd: effectiveCwd, prompt: prompt.trim() || undefined, providerId: selectedProviderId, + providerBackendId: + resolveUiOwnedProviderBackendId( + selectedProviderId, + runtimeProviderStatusById.get(selectedProviderId) + ) ?? undefined, model: effectiveModel, effort: (selectedEffort as EffortLevel) || undefined, limitContext, @@ -988,6 +1034,7 @@ export const CreateTeamDialog = ({ effectiveCwd, prompt, selectedProviderId, + runtimeProviderStatusById, effectiveModel, selectedEffort, limitContext, @@ -1212,7 +1259,7 @@ export const CreateTeamDialog = ({ } }} > - + {initialData ? 'Copy Team' : 'Create Team'} diff --git a/src/renderer/components/team/dialogs/EditTeamDialog.tsx b/src/renderer/components/team/dialogs/EditTeamDialog.tsx index 8cf506be..9eaa1c4c 100644 --- a/src/renderer/components/team/dialogs/EditTeamDialog.tsx +++ b/src/renderer/components/team/dialogs/EditTeamDialog.tsx @@ -1,13 +1,15 @@ -import { useEffect, useState } from 'react'; +import { useEffect, useMemo, useRef, useState } from 'react'; import { api } from '@renderer/api'; import { buildMembersFromDrafts, createMemberDraftsFromInputs, filterEditableMemberInputs, + createMemberDraft, MembersEditorSection, validateMemberNameInline, } from '@renderer/components/team/members/MembersEditorSection'; +import { MemberDraftRow } from '@renderer/components/team/members/MemberDraftRow'; import { Button } from '@renderer/components/ui/button'; import { Dialog, @@ -21,8 +23,21 @@ import { getTeamColorSet, getThemedBadge } from '@renderer/constants/teamColors' import { useFileListCacheWarmer } from '@renderer/hooks/useFileListCacheWarmer'; import { useTheme } from '@renderer/hooks/useTheme'; import { cn } from '@renderer/lib/utils'; +import { + agentAvatarUrl, + buildMemberColorMap, + displayMemberName, +} from '@renderer/utils/memberHelpers'; +import { parseNumericSuffixName } from '@shared/utils/teamMemberName'; import { Loader2 } from 'lucide-react'; +import { + buildEditTeamSourceSnapshot, + getMemberRuntimeContractKey, + getLiveRosterIdentityChanges, + getMembersRequiringRuntimeRestart, +} from './editTeamRuntimeChanges'; + import type { ResolvedTeamMember } from '@shared/types'; const TEAM_COLOR_NAMES = [ @@ -43,16 +58,73 @@ interface EditTeamDialogProps { currentDescription: string; currentColor: string; currentMembers: ResolvedTeamMember[]; + leadMember?: ResolvedTeamMember | null; + resolvedMemberColorMap?: ReadonlyMap; isTeamAlive?: boolean; + isTeamProvisioning?: boolean; projectPath?: string | null; onClose: () => void; - onSaved: () => void; + onChangeLeadRuntime: () => void; + onSaved: () => Promise | void; } function membersToDrafts(members: ResolvedTeamMember[]) { return createMemberDraftsFromInputs(filterEditableMemberInputs(members)); } +function useEditTeamErrorReset( + setError: (value: string | null) => void, + setSaveOutcomeError: (value: string | null) => void +): () => void { + return () => { + setError(null); + setSaveOutcomeError(null); + }; +} + +function getInvalidMemberNamesError( + members: readonly { + name: string; + removedAt?: number | string | null; + }[] +): string | null { + for (const member of members) { + if (member.removedAt) { + continue; + } + const name = member.name.trim(); + if (!name) { + return 'Member name cannot be empty'; + } + if (validateMemberNameInline(name) !== null) { + return 'Member name must start with alphanumeric, use only [a-zA-Z0-9._-], max 128 chars'; + } + const lower = name.toLowerCase(); + if (lower === 'user' || lower === 'team-lead') { + return `Member name "${name}" is reserved`; + } + const suffixInfo = parseNumericSuffixName(name); + if (suffixInfo && suffixInfo.suffix >= 2) { + return `Member name "${name}" is not allowed (reserved for Claude CLI auto-suffix). Use "${suffixInfo.base}" instead.`; + } + } + return null; +} + +function applyRemovedMembersToSnapshot( + members: readonly ResolvedTeamMember[], + removedMemberNames: readonly string[] +): ResolvedTeamMember[] { + if (removedMemberNames.length === 0) { + return [...members]; + } + const removedKeys = new Set(removedMemberNames.map((name) => name.trim().toLowerCase())); + const removedAt = Date.now(); + return members.map((member) => + removedKeys.has(member.name.trim().toLowerCase()) ? { ...member, removedAt } : member + ); +} + export const EditTeamDialog = ({ open, teamName, @@ -60,9 +132,13 @@ export const EditTeamDialog = ({ currentDescription, currentColor, currentMembers, + leadMember = null, + resolvedMemberColorMap, isTeamAlive = false, + isTeamProvisioning = false, projectPath, onClose, + onChangeLeadRuntime, onSaved, }: EditTeamDialogProps): React.JSX.Element => { const { isLight } = useTheme(); @@ -72,39 +148,305 @@ export const EditTeamDialog = ({ const [members, setMembers] = useState(() => membersToDrafts(currentMembers)); const [saving, setSaving] = useState(false); const [error, setError] = useState(null); + const [saveOutcomeError, setSaveOutcomeError] = useState(null); + const [membersPendingRestartRetry, setMembersPendingRestartRetry] = useState< + Record + >({}); + const wasOpenRef = useRef(false); + const initializedTeamNameRef = useRef(null); + const baselineSourceSnapshotRef = useRef(null); + const pendingCommittedSourceSnapshotRef = useRef(null); useFileListCacheWarmer(projectPath ?? null); + const clearTransientErrors = useEditTeamErrorReset(setError, setSaveOutcomeError); + const effectiveResolvedMemberColorMap = useMemo( + () => resolvedMemberColorMap ?? buildMemberColorMap(currentMembers), + [currentMembers, resolvedMemberColorMap] + ); + const leadDraft = useMemo(() => { + if (!leadMember) return null; + return createMemberDraft({ + id: `lead:${leadMember.name}`, + name: displayMemberName(leadMember.name), + originalName: leadMember.name, + roleSelection: '', + customRole: 'Team Lead', + workflow: leadMember.workflow, + providerId: leadMember.providerId, + model: leadMember.model ?? '', + effort: leadMember.effort, + }); + }, [leadMember]); useEffect(() => { + const wasOpen = wasOpenRef.current; if (open) { - setName(currentName); - setDescription(currentDescription); - setColor(currentColor); - setMembers(membersToDrafts(currentMembers)); - setError(null); + const shouldInitialize = !wasOpen || initializedTeamNameRef.current !== teamName; + if (shouldInitialize) { + setName(currentName); + setDescription(currentDescription); + setColor(currentColor); + setMembers(membersToDrafts(currentMembers)); + setError(null); + setSaveOutcomeError(null); + setMembersPendingRestartRetry({}); + initializedTeamNameRef.current = teamName; + baselineSourceSnapshotRef.current = buildEditTeamSourceSnapshot({ + name: currentName, + description: currentDescription, + color: currentColor, + members: currentMembers, + }); + pendingCommittedSourceSnapshotRef.current = null; + } else if (pendingCommittedSourceSnapshotRef.current !== null) { + const latestSourceSnapshot = buildEditTeamSourceSnapshot({ + name: currentName, + description: currentDescription, + color: currentColor, + members: currentMembers, + }); + if (latestSourceSnapshot === pendingCommittedSourceSnapshotRef.current) { + baselineSourceSnapshotRef.current = latestSourceSnapshot; + pendingCommittedSourceSnapshotRef.current = null; + } + } + } else if (wasOpen) { + initializedTeamNameRef.current = null; + baselineSourceSnapshotRef.current = null; + pendingCommittedSourceSnapshotRef.current = null; } - }, [open, currentName, currentDescription, currentColor, currentMembers]); + wasOpenRef.current = open; + }, [open, teamName, currentName, currentDescription, currentColor, currentMembers]); + + const builtMembers = useMemo(() => buildMembersFromDrafts(members), [members]); + const invalidMemberNamesError = useMemo(() => getInvalidMemberNamesError(members), [members]); + const hasDuplicateMembers = useMemo(() => { + const names = members + .filter((member) => !member.removedAt) + .map((member) => member.name.trim().toLowerCase()) + .filter(Boolean); + return new Set(names).size !== names.length; + }, [members]); + const membersToRestart = useMemo( + () => + isTeamAlive + ? getMembersRequiringRuntimeRestart({ + previousMembers: currentMembers, + nextMembers: builtMembers, + }) + : [], + [builtMembers, currentMembers, isTeamAlive] + ); + const builtMembersByName = useMemo( + () => + new Map(builtMembers.map((member) => [member.name.trim().toLowerCase(), member] as const)), + [builtMembers] + ); + const effectiveMembersToRestart = useMemo(() => { + const retryMembers = Object.entries(membersPendingRestartRetry).flatMap( + ([normalizedName, expectedRuntimeContractKey]) => { + const nextMember = builtMembersByName.get(normalizedName); + if (!nextMember) { + return []; + } + return getMemberRuntimeContractKey(nextMember) === expectedRuntimeContractKey + ? [nextMember.name.trim()] + : []; + } + ); + return Array.from( + new Set( + [...membersToRestart, ...retryMembers] + .map((memberName) => memberName.trim()) + .filter(Boolean) + ) + ); + }, [builtMembersByName, membersPendingRestartRetry, membersToRestart]); + const liveIdentityChanges = useMemo( + () => + isTeamAlive + ? getLiveRosterIdentityChanges({ + previousMembers: currentMembers, + nextDrafts: members, + }) + : { renamed: [], removed: [] }, + [currentMembers, isTeamAlive, members] + ); + const hasBlockedLiveIdentityChanges = liveIdentityChanges.renamed.length > 0; + const liveRemovedExistingMembers = useMemo( + () => (isTeamAlive ? liveIdentityChanges.removed : []), + [isTeamAlive, liveIdentityChanges.removed] + ); + const hasNewLiveTeammates = useMemo( + () => + isTeamAlive && members.some((member) => !member.removedAt && !member.originalName?.trim()), + [isTeamAlive, members] + ); + const memberWarningById = useMemo(() => { + const restartNames = new Set( + effectiveMembersToRestart.map((memberName) => memberName.trim().toLowerCase()) + ); + if (restartNames.size === 0) { + return undefined; + } + return Object.fromEntries( + members.map((member) => [ + member.id, + restartNames.has(member.name.trim().toLowerCase()) + ? 'Saving will restart this teammate to apply role, workflow, provider, model, or effort changes.' + : null, + ]) + ); + }, [effectiveMembersToRestart, members]); const handleSave = (): void => { if (!name.trim()) { setError('Team name cannot be empty'); return; } - const builtMembers = buildMembersFromDrafts(members); + if (invalidMemberNamesError) { + setError(invalidMemberNamesError); + return; + } + if (hasDuplicateMembers) { + setError('Member names must be unique before saving'); + return; + } + const latestSourceSnapshot = buildEditTeamSourceSnapshot({ + name: currentName, + description: currentDescription, + color: currentColor, + members: currentMembers, + }); + const allowedSourceSnapshots = new Set( + [baselineSourceSnapshotRef.current, pendingCommittedSourceSnapshotRef.current].filter( + (value): value is string => value !== null + ) + ); + if (allowedSourceSnapshots.size > 0 && !allowedSourceSnapshots.has(latestSourceSnapshot)) { + setError( + 'Team settings changed while this dialog was open. Reopen it and review the latest state before saving.' + ); + return; + } + if (hasBlockedLiveIdentityChanges) { + setError( + `Existing teammates cannot be renamed while the team is live. renamed: ${liveIdentityChanges.renamed.join(', ')}` + ); + return; + } + if (isTeamProvisioning) { + setError( + 'Team settings cannot be edited while provisioning is still in progress. Wait for launch to finish, then try again.' + ); + return; + } + if (hasNewLiveTeammates) { + setError( + 'Add new teammates from the dedicated Add member dialog while the team is live. Edit Team only supports updating existing teammates.' + ); + return; + } setSaving(true); setError(null); + setSaveOutcomeError(null); void (async () => { + let configSaved = false; + let membersSaved = false; + let committedMembersForSnapshot: ResolvedTeamMember[] = currentMembers; try { await api.teams.updateConfig(teamName, { name: name.trim(), description: description.trim(), color, }); + configSaved = true; + for (const removedMemberName of liveRemovedExistingMembers) { + await api.teams.removeMember(teamName, removedMemberName); + committedMembersForSnapshot = applyRemovedMembersToSnapshot(committedMembersForSnapshot, [ + removedMemberName, + ]); + } await api.teams.replaceMembers(teamName, { members: builtMembers }); - onSaved(); - onClose(); + membersSaved = true; + pendingCommittedSourceSnapshotRef.current = buildEditTeamSourceSnapshot({ + name: name.trim(), + description: description.trim(), + color: color.trim(), + members: builtMembers.map((member) => ({ + name: member.name, + role: member.role, + workflow: member.workflow, + providerId: member.providerId, + model: member.model, + effort: member.effort, + })) as ResolvedTeamMember[], + }); + + const restartFailures: string[] = []; + const failedRestartMembers: string[] = []; + for (const memberName of effectiveMembersToRestart) { + try { + await api.teams.restartMember(teamName, memberName); + } catch (restartError) { + const detail = + restartError instanceof Error ? restartError.message : String(restartError); + failedRestartMembers.push(memberName); + restartFailures.push(`${memberName} (${detail})`); + } + } + + await Promise.resolve(onSaved()); + if (restartFailures.length === 0) { + setMembersPendingRestartRetry({}); + onClose(); + return; + } + + setMembersPendingRestartRetry( + Object.fromEntries( + failedRestartMembers.flatMap((memberName) => { + const nextMember = builtMembersByName.get(memberName.trim().toLowerCase()); + if (!nextMember) { + return []; + } + return [ + [memberName.trim().toLowerCase(), getMemberRuntimeContractKey(nextMember)] as const, + ]; + }) + ) + ); + setSaveOutcomeError( + `Team saved, but failed to restart ${restartFailures.length === 1 ? 'this teammate' : 'these teammates'}: ${restartFailures.join(', ')}` + ); } catch (e) { - setError(e instanceof Error ? e.message : 'Failed to save'); + const message = e instanceof Error ? e.message : 'Failed to save'; + if (membersSaved) { + setSaveOutcomeError( + `Team changes were saved, but failed to refresh the latest view: ${message}` + ); + } else if (configSaved) { + pendingCommittedSourceSnapshotRef.current = buildEditTeamSourceSnapshot({ + name: name.trim(), + description: description.trim(), + color: color.trim(), + members: committedMembersForSnapshot, + }); + let refreshErrorDetail: string | null = null; + try { + await Promise.resolve(onSaved()); + } catch (refreshError) { + refreshErrorDetail = + refreshError instanceof Error ? refreshError.message : String(refreshError); + } + setSaveOutcomeError( + refreshErrorDetail + ? `Team settings were saved, but member changes failed: ${message}. Refresh also failed: ${refreshErrorDetail}` + : `Team settings were saved, but member changes failed: ${message}` + ); + } else { + setError(message); + } } finally { setSaving(false); } @@ -113,7 +455,7 @@ export const EditTeamDialog = ({ return ( !nextOpen && onClose()}> - + Edit Team Change team name, description and color @@ -131,7 +473,10 @@ export const EditTeamDialog = ({ id="edit-team-name" type="text" value={name} - onChange={(e) => setName(e.target.value)} + onChange={(e) => { + clearTransientErrors(); + setName(e.target.value); + }} onKeyDown={(e) => { if (e.key === 'Enter' && !saving && name.trim()) handleSave(); }} @@ -149,7 +494,10 @@ export const EditTeamDialog = ({