From 2c9926c7347377c2d244b33b5890ad0b39b60dc8 Mon Sep 17 00:00:00 2001 From: iliya Date: Wed, 8 Apr 2026 16:55:01 +0300 Subject: [PATCH] perf(team): cache lead session parsing --- docs/research/lead-session-cache-plan.md | 587 +++++++++++++++++ src/main/services/team/TeamDataService.ts | 100 ++- .../team/cache/LeadSessionParseCache.ts | 162 +++++ .../services/team/TeamDataService.test.ts | 605 +++++++++++++++++- 4 files changed, 1440 insertions(+), 14 deletions(-) create mode 100644 docs/research/lead-session-cache-plan.md create mode 100644 src/main/services/team/cache/LeadSessionParseCache.ts diff --git a/docs/research/lead-session-cache-plan.md b/docs/research/lead-session-cache-plan.md new file mode 100644 index 00000000..dd5ce6ce --- /dev/null +++ b/docs/research/lead-session-cache-plan.md @@ -0,0 +1,587 @@ +# Lead Session Parsing Cache Plan + +## Goal + +Reduce repeated `team:getData` cost for active teams by avoiding reparsing the same resolved lead-session JSONL file when the file has not changed. + +Primary target: + +- repeated refreshes of a visible active team + +Not a primary target for phase 1: + +- the very first uncached team open + +Reason: + +- the first miss still runs the current extraction logic +- phase 1 is a warm-path optimization, not a cold-path extraction rewrite + +## Why This Is The Safest First Optimization + +Today, `getTeamData()` repeatedly pays for lead-session history assembly: + +1. resolve candidate project session directories +2. list `.jsonl` files +3. find the matching session file +4. parse the same JSONL twice +5. merge, sort, and dedup messages +6. repeat the same work on the next refresh even if the file is unchanged +7. still duplicate work under concurrent misses because there is no in-flight coalescing here + +Relevant code: + +- [src/main/services/team/TeamDataService.ts#L494](/Users/belief/dev/projects/claude/claude_team_freecode/src/main/services/team/TeamDataService.ts#L494) +- [src/main/services/team/TeamDataService.ts#L2150](/Users/belief/dev/projects/claude/claude_team_freecode/src/main/services/team/TeamDataService.ts#L2150) +- [src/main/services/team/TeamDataService.ts#L2324](/Users/belief/dev/projects/claude/claude_team_freecode/src/main/services/team/TeamDataService.ts#L2324) +- [src/main/services/team/leadSessionMessageExtractor.ts#L96](/Users/belief/dev/projects/claude/claude_team_freecode/src/main/services/team/leadSessionMessageExtractor.ts#L96) +- [src/main/utils/pathDecoder.ts#L5](/Users/belief/dev/projects/claude/claude_team_freecode/src/main/utils/pathDecoder.ts#L5) + +This is safer than introducing a new lightweight IPC path because it does not change: + +- what data is returned +- when refresh happens +- how live `lead_process` rows merge with persisted history +- any renderer/main truth boundary + +## Guardrails + +This work touches `main process / IPC` and team detail truth assembly. + +Main failure modes to avoid: + +- stale lead-session history shown after the file changed +- cache poisoning due to file mutation during parse +- cross-request mutation bleed from shared cached objects +- cache key mistakes caused by lossy path encoding assumptions + +The change should preserve: + +- current message shapes +- current message IDs +- current extraction semantics +- current team detail IPC contract + +## Scope + +### In scope + +- add a bounded in-memory cache for parsed lead-session results +- reuse cached results only when the exact resolved session file is unchanged +- coalesce concurrent misses for the same file signature +- keep the returned `InboxMessage[]` shape unchanged +- add focused tests for freshness, invalidation, concurrency, and mutation safety + +### Out of scope + +- new IPC endpoints +- renderer refresh strategy changes +- changing `lead_process` merge/dedup logic +- replacing file scans with watchers +- rewriting extraction into a brand-new single-pass parser in phase 1 +- caching project-dir resolution in phase 1 + +## Design Constraints + +### 1. Cache after `jsonlPath` resolution, not before + +Do not cache by `projectPath` or encoded project dir. + +Why: + +- `encodePath()` is lossy for paths containing dashes +- `getLeadProjectDirCandidates()` already exists because one project can require multiple candidate directory probes + +Relevant code: + +- [src/main/services/team/TeamDataService.ts#L2135](/Users/belief/dev/projects/claude/claude_team_freecode/src/main/services/team/TeamDataService.ts#L2135) +- [src/main/utils/pathDecoder.ts#L27](/Users/belief/dev/projects/claude/claude_team_freecode/src/main/utils/pathDecoder.ts#L27) + +Rule: + +- cache only after the final `jsonlPath` has already been concretely resolved + +### 2. No TTL-based freshness + +This cache must not use time-based correctness semantics. + +There is a TTL-based advisory cache elsewhere: + +- [src/main/services/team/TeamMemberRuntimeAdvisoryService.ts](/Users/belief/dev/projects/claude/claude_team_freecode/src/main/services/team/TeamMemberRuntimeAdvisoryService.ts) + +That pattern is not appropriate here because lead-session rows are part of user-visible truth. + +Rule: + +- freshness comes only from file signature for the exact `jsonlPath` +- timestamps may be stored for eviction bookkeeping or debug only +- no "fresh for 5 seconds" shortcuts + +### 3. Instance-scoped cache, not module-global + +Preferred ownership: + +- `TeamDataService` owns the cache instance +- optionally inject the cache into `TeamDataService` for tests + +Do not use: + +- a hidden module-global singleton + +Why: + +- the app already uses a long-lived `TeamDataService` +- tests should get fresh state with a fresh service instance +- module-global state would make isolation and failure analysis worse + +### 4. First patch optimizes warm refreshes, not cold misses + +Phase 1 still leaves one important cost untouched: + +- on the first uncached read, the file is still parsed using the existing two-pass extraction path + +That is acceptable because: + +- it keeps correctness risk low +- it still removes repeated hot-refresh cost + +If cold misses remain too slow after this patch, the next candidate optimization is a single-pass extractor, not a riskier cache broadening. + +## Proposed Design + +### 1. Cache the combined per-file extraction result + +Integration point: + +- `extractLeadSessionTextsFromJsonl(...)` in `TeamDataService` + +Why here: + +- it already defines the combined contract used by `getTeamData()` +- it merges assistant thoughts plus slash-command results +- it provides one place to attach both memoization and in-flight coalescing +- it avoids two separate caches that could drift + +Do not cache: + +- raw `projectPath -> jsonlPath` resolution +- whole-team assembled message lists + +### 2. Cache key + +Phase-1 key should include: + +- `jsonlPath` +- `leadSessionId` +- `leadName` +- requested `maxTexts` +- cache schema version + +Important tradeoff: + +- keeping `maxTexts` in the key is less optimal for hit rate +- but it is safer than canonicalizing to a larger per-session cap in the first patch + +Why this conservative choice is correct: + +- the helper is private, but its caller passes a dynamic `remaining` budget based on earlier sessions +- requested-cap keys avoid subtle slicing mistakes in phase 1 + +### 3. Freshness signature + +Primary signature: + +- `size` +- `mtimeMs` + +Optional tie-breaker: + +- `ctimeMs` + +Why not content hashing in phase 1: + +- it adds more I/O and CPU to the hot path +- it makes the "optimization" more expensive + +Accepted residual risk: + +- a same-size rewrite on a coarse-timestamp filesystem is theoretically possible +- this is rare enough to accept for phase 1 if the cache is in-memory and tied to exact `jsonlPath` + +### 4. In-flight coalescing + +Memoization alone is not enough. + +If two refreshes miss the cache at once, both can still parse the same file. + +Required behavior: + +- keep a separate `inFlightByKey` map +- attach the observed pre-parse signature to the in-flight entry +- if another request sees the same key and same observed signature, await that promise +- if another request observes a newer signature, do not await the older in-flight promise + +Do not: + +- mix settled entries and in-flight promises in one map + +### 5. Do not cache ambiguous results + +This is the most important safety rule after exact-path scoping. + +If the file changes during parse: + +1. pre-parse stat sees signature `S1` +2. parse runs +3. post-parse stat sees signature `S2` + +If `S2 !== S1`: + +- return the parsed result to the current caller +- do not populate the fulfilled cache +- clear the in-flight entry + +Why: + +- the result may still be acceptable as a best-effort snapshot for that one request +- but it is not safe enough to promote into cache + +This is stricter and safer than caching under the old signature. + +### 6. Defensive cloning + +Never return the cached array instance directly. + +Why: + +- `getTeamData()` mutates messages during enrichment and dedup flows +- shared references would create cross-request contamination + +Clone requirements: + +- clone array +- clone each message object +- deep-clone nested mutable fields used by render/equality logic: + - `taskRefs` + - `attachments` + - `toolCalls` + - `slashCommand` + - `commandOutput` + +Do not: + +- attach cache metadata to returned message objects + +Relevant renderer equality code: + +- [src/renderer/utils/messageRenderEquality.ts](/Users/belief/dev/projects/claude/claude_team_freecode/src/renderer/utils/messageRenderEquality.ts) + +### 7. Bounded storage + +Use a small cap, for example: + +- `64` fulfilled entries + +Eviction: + +- insertion-order `Map` +- evict oldest fulfilled entry when exceeding cap + +The cache is best-effort only: + +- any entry may be dropped at any time +- eviction must never affect correctness, only latency + +### 8. Do not cache failures + +If stat, open, read, or parse fails: + +- preserve current behavior +- do not cache the error +- do not leave a rejected promise in `inFlightByKey` + +Why: + +- JSONL writes are concurrent +- transient failures must not become sticky stale state + +## Detailed Implementation Plan + +### Phase 1. Add a cache helper + +Suggested file: + +- `src/main/services/team/cache/LeadSessionParseCache.ts` + +Suggested operations: + +- `getIfFresh(key, signature)` +- `getInFlight(key, signature)` +- `setInFlight(key, signature, promise)` +- `set(key, signature, messages)` +- `clearForPath(jsonlPath)` +- optional `clearAll()` + +Helper responsibilities: + +- normalize key +- compare signatures +- return defensive clones +- coalesce concurrent misses +- bound fulfilled entries +- keep settled cache and in-flight state separate + +### Phase 2. Wire it into `TeamDataService` + +Change only `extractLeadSessionTextsFromJsonl(...)`. + +Algorithm: + +1. stat file before parse +2. build key +3. try fulfilled cache hit +4. if miss, try matching in-flight entry +5. if no matching in-flight entry, create one +6. run current combined extraction using existing helpers unchanged +7. stat file again after parse +8. if post-parse signature matches pre-parse signature, populate fulfilled cache +9. otherwise skip cache population +10. clear in-flight entry in `finally` + +Strict non-goals while wiring: + +- do not change message ordering +- do not change trimming semantics +- do not change message IDs +- do not change assistant-thought extraction logic +- do not change slash-command result extraction logic +- do not change project-dir resolution logic + +### Phase 3. Tests + +Prefer focused tests at the `TeamDataService` layer because the cache integrates there. + +Potentially add a dedicated cache helper test if the helper becomes non-trivial. + +### Phase 4. Re-evaluate after profiling + +Only if needed after phase 1: + +1. optional directory-listing cache +2. optional canonical per-session cache not keyed by requested `maxTexts` +3. optional single-pass extractor for cold misses + +Those are phase 2+ because each increases semantic risk. + +## Edge Cases + +### 1. File appended during read + +Current behavior already tolerates partial trailing lines by skipping invalid JSON lines. + +Phase-1 rule: + +- if signature changed during parse, do not cache result + +### 2. File deleted or moved after a previous successful cache fill + +Rule: + +- every cache lookup must be preceded by a fresh `stat` +- no successful fresh `stat` means no cache hit + +### 3. Different requested `maxTexts` + +Phase-1 rule: + +- requested cap stays in the cache key + +Why: + +- lower hit rate is acceptable +- semantic conservatism is more important in this area + +### 4. `leadName` changes while file is unchanged + +Rule: + +- include `leadName` in the key + +### 5. Concurrent refreshes + +Rule: + +- only same-key, same-signature requests may share an in-flight promise + +### 6. Newer request races an older in-flight parse + +If request A saw signature `S1` and request B sees `S2`: + +- request B must not await A's in-flight promise + +### 7. Path ambiguity from encoded project dirs + +Rule: + +- never treat encoded project dir as canonical cache identity +- only the resolved `jsonlPath` is canonical enough for phase 1 + +### 8. Fresh service instances in tests + +Rule: + +- new `TeamDataService` instance should imply fresh cache state + +## Areas With Lower Confidence + +### 1. File signature granularity + +Confidence: medium. + +Why: + +- filesystem timestamp granularity differs +- same-size rewrites are rare but possible + +Decision: + +- use `size + mtimeMs` primarily +- optionally add `ctimeMs` as tie-breaker +- document residual risk instead of overengineering phase 1 + +### 2. Requested-cap keys vs canonical per-session cache + +Confidence: medium. + +Why: + +- canonical caching could improve hit rate +- but requested-cap keys are more obviously equivalent to today's behavior + +Decision: + +- requested-cap keys in phase 1 + +### 3. In-flight signature matching + +Confidence: medium. + +Why: + +- easy to implement incorrectly if signature is not stored with the in-flight promise + +Decision: + +- store signature alongside each in-flight promise +- require exact signature match before reuse + +### 4. Post-parse signature mismatch handling + +Confidence: medium-high. + +Why: + +- returning but not caching ambiguous results is semantically conservative +- but this still leaves one request with a best-effort snapshot rather than a strong snapshot + +Decision: + +- accept current best-effort behavior for the single request +- forbid cache population on mismatch + +### 5. Cold-load expectations + +Confidence: high. + +Why: + +- phase 1 clearly improves warm refreshes more than first opens + +Decision: + +- state that explicitly +- do not oversell the patch + +## Testing Plan + +In [test/main/services/team/TeamDataService.test.ts](/Users/belief/dev/projects/claude/claude_team_freecode/test/main/services/team/TeamDataService.test.ts) or a dedicated cache test: + +1. repeated extraction of the same unchanged JSONL uses cached results +2. append invalidates cache and returns fresh results +3. changing `leadName` does not reuse stale sender data +4. returned cached results are cloned and caller mutation does not poison the next read +5. different `maxTexts` requests do not incorrectly reuse the wrong slice +6. two concurrent requests for the same uncached file coalesce into one parse +7. missing or deleted JSONL does not return stale cached content +8. partial trailing line remains tolerated and does not get cached as a sticky failure +9. path with dashes and underscores still works once `jsonlPath` is resolved +10. if a newer request sees a newer signature, it does not await an older in-flight parse +11. if the file changes during parse, result is returned but not stored in fulfilled cache +12. fresh `TeamDataService` instances do not share hidden cache state + +In [test/main/services/team/leadSessionMessageExtractor.test.ts](/Users/belief/dev/projects/claude/claude_team_freecode/test/main/services/team/leadSessionMessageExtractor.test.ts): + +13. existing extraction semantics remain unchanged + +Existing behavior that must still hold: + +- slash-command result merging +- message ordering +- stable message IDs + +## Verification + +Targeted first: + +```bash +bun test test/main/services/team/leadSessionMessageExtractor.test.ts test/main/services/team/TeamDataService.test.ts +``` + +Then broader team-service confidence if needed: + +```bash +bun test test/main/services/team +``` + +## Success Criteria + +The change is successful if: + +1. repeated `team:getData` for an unchanged active team avoids reparsing the same session file +2. new lead-session output appears after the file changes without manual invalidation +3. concurrent refreshes do not duplicate parse work for the same signature +4. newer signatures never reuse older in-flight parse results +5. ambiguous mid-read results are never committed into fulfilled cache +6. path-resolution behavior for project session discovery remains unchanged +7. existing TeamDataService and extractor tests still pass +8. no duplicate or missing lead thoughts appear compared with current behavior + +## Recommended Order + +1. add cache helper with bounded fulfilled entries and separate in-flight map +2. make it instance-scoped or injectable into `TeamDataService` +3. integrate only at `extractLeadSessionTextsFromJsonl(...)` +4. add pre-parse and post-parse signature checks +5. add focused tests for hit, miss, invalidation, mutation safety, concurrency, and no-cache-on-ambiguous-read +6. run targeted tests +7. only then consider directory-listing cache or single-pass extraction if profiling still points there + +## Recommendation + +Recommended phase-1 approach: + +- parse cache +- in-flight coalescing +- no cache population if the file changed during parse + +Ratings: + +- Parse-cache-only first step: 🎯 9 🛡️ 9 🧠 4 +- Parse-cache plus in-flight coalescing: 🎯 9 🛡️ 9 🧠 5 +- Parse-cache plus in-flight plus no-cache-on-mid-read-mutation: 🎯 9 🛡️ 10 🧠 6 +- Plus directory-listing cache in the same patch: 🎯 5 🛡️ 6 🧠 7 +- Skip cache and jump straight to new lightweight IPC API: 🎯 8 🛡️ 7 🧠 7 + +Estimated patch size for the recommended step: + +- around 180-320 changed lines diff --git a/src/main/services/team/TeamDataService.ts b/src/main/services/team/TeamDataService.ts index 57b543c3..f0eb1de0 100644 --- a/src/main/services/team/TeamDataService.ts +++ b/src/main/services/team/TeamDataService.ts @@ -31,6 +31,12 @@ import * as path from 'path'; import { gitIdentityResolver } from '../parsing/GitIdentityResolver'; import { atomicWriteAsync } from './atomicWrite'; +import { + areLeadSessionFileSignaturesEqual, + LeadSessionParseCache, + type LeadSessionFileSignature, + type LeadSessionParseCacheKey, +} from './cache/LeadSessionParseCache'; import { extractLeadSessionMessagesFromJsonl } from './leadSessionMessageExtractor'; import { buildTaskChangePresenceDescriptor } from './taskChangePresenceUtils'; import { TeamConfigReader } from './TeamConfigReader'; @@ -84,6 +90,7 @@ const logger = createLogger('Service:TeamDataService'); const MIN_TEXT_LENGTH = 30; const MAX_LEAD_TEXTS = 150; +const LEAD_SESSION_PARSE_CACHE_SCHEMA_VERSION = 'combined-v1'; const PROCESS_HEALTH_INTERVAL_MS = 2_000; const TASK_MAP_YIELD_EVERY = 250; const TASK_COMMENT_NOTIFICATION_SOURCE = 'system_notification'; @@ -133,7 +140,8 @@ export class TeamDataService { }), private readonly taskCommentNotificationJournal: TeamTaskCommentNotificationJournal = new TeamTaskCommentNotificationJournal(), private readonly teamMetaStore: TeamMetaStore = new TeamMetaStore(), - private memberRuntimeAdvisoryService: TeamMemberRuntimeAdvisoryService = new TeamMemberRuntimeAdvisoryService() + private memberRuntimeAdvisoryService: TeamMemberRuntimeAdvisoryService = new TeamMemberRuntimeAdvisoryService(), + private readonly leadSessionParseCache: LeadSessionParseCache = new LeadSessionParseCache() ) {} private getController(teamName: string): AgentTeamsController { @@ -2327,19 +2335,85 @@ export class TeamDataService { leadSessionId: string, maxTexts: number ): Promise { - const [assistantTexts, commandResults] = await Promise.all([ - this.extractLeadAssistantTextsFromJsonl(jsonlPath, leadName, leadSessionId, maxTexts), - extractLeadSessionMessagesFromJsonl({ - jsonlPath, - leadName, - leadSessionId, - maxMessages: maxTexts, - }), - ]); + const cacheKey: LeadSessionParseCacheKey = { + jsonlPath, + leadName, + leadSessionId, + maxTexts, + schemaVersion: LEAD_SESSION_PARSE_CACHE_SCHEMA_VERSION, + }; + const preParseSignature = await this.getLeadSessionFileSignature(jsonlPath); + if (preParseSignature) { + const cached = this.leadSessionParseCache.getIfFresh(cacheKey, preParseSignature); + if (cached) { + return cached; + } - const combined = [...assistantTexts, ...commandResults]; - combined.sort((a, b) => Date.parse(a.timestamp) - Date.parse(b.timestamp)); - return combined.length > maxTexts ? combined.slice(-maxTexts) : combined; + const inFlight = this.leadSessionParseCache.getInFlight(cacheKey, preParseSignature); + if (inFlight) { + return inFlight; + } + } + + const parse = async (): Promise => { + const [assistantTexts, commandResults] = await Promise.all([ + this.extractLeadAssistantTextsFromJsonl(jsonlPath, leadName, leadSessionId, maxTexts), + extractLeadSessionMessagesFromJsonl({ + jsonlPath, + leadName, + leadSessionId, + maxMessages: maxTexts, + }), + ]); + const combined = [...assistantTexts, ...commandResults]; + combined.sort((a, b) => Date.parse(a.timestamp) - Date.parse(b.timestamp)); + return combined.length > maxTexts ? combined.slice(-maxTexts) : combined; + }; + + if (!preParseSignature) { + return parse(); + } + + let resolveInFlight!: (messages: InboxMessage[]) => void; + let rejectInFlight!: (error: unknown) => void; + const parsePromise = new Promise((resolve, reject) => { + resolveInFlight = resolve; + rejectInFlight = reject; + }); + this.leadSessionParseCache.setInFlight(cacheKey, preParseSignature, parsePromise); + void parse().then(resolveInFlight, rejectInFlight); + + try { + const combined = await parsePromise; + const postParseSignature = await this.getLeadSessionFileSignature(jsonlPath); + if ( + postParseSignature && + areLeadSessionFileSignaturesEqual(preParseSignature, postParseSignature) + ) { + this.leadSessionParseCache.set(cacheKey, postParseSignature, combined); + } + return combined; + } finally { + this.leadSessionParseCache.clearInFlight(cacheKey, preParseSignature); + } + } + + private async getLeadSessionFileSignature( + jsonlPath: string + ): Promise { + try { + const stat = await fs.promises.stat(jsonlPath); + if (!stat.isFile()) { + return null; + } + return { + size: stat.size, + mtimeMs: stat.mtimeMs, + ...(Number.isFinite(stat.ctimeMs) ? { ctimeMs: stat.ctimeMs } : {}), + }; + } catch { + return null; + } } private async extractLeadSessionTexts(config: TeamConfig): Promise { diff --git a/src/main/services/team/cache/LeadSessionParseCache.ts b/src/main/services/team/cache/LeadSessionParseCache.ts new file mode 100644 index 00000000..f3df5660 --- /dev/null +++ b/src/main/services/team/cache/LeadSessionParseCache.ts @@ -0,0 +1,162 @@ +import type { InboxMessage } from '@shared/types'; + +export interface LeadSessionParseCacheKey { + jsonlPath: string; + leadSessionId: string; + leadName: string; + maxTexts: number; + schemaVersion: string; +} + +export interface LeadSessionFileSignature { + size: number; + mtimeMs: number; + ctimeMs?: number; +} + +interface LeadSessionParseCacheEntry { + signature: LeadSessionFileSignature; + messages: InboxMessage[]; + cachedAtMs: number; +} + +interface LeadSessionParseInFlightEntry { + signature: LeadSessionFileSignature; + promise: Promise; +} + +const DEFAULT_MAX_ENTRIES = 64; + +function keyToString(key: LeadSessionParseCacheKey): string { + return JSON.stringify([ + key.schemaVersion, + key.jsonlPath, + key.leadSessionId, + key.leadName, + key.maxTexts, + ]); +} + +function inFlightKeyToString( + key: LeadSessionParseCacheKey, + signature: LeadSessionFileSignature +): string { + return `${keyToString(key)}::${signature.size}:${signature.mtimeMs}:${signature.ctimeMs ?? ''}`; +} + +export function areLeadSessionFileSignaturesEqual( + left: LeadSessionFileSignature, + right: LeadSessionFileSignature +): boolean { + return ( + left.size === right.size && + left.mtimeMs === right.mtimeMs && + (left.ctimeMs ?? undefined) === (right.ctimeMs ?? undefined) + ); +} + +function cloneMessage(message: InboxMessage): InboxMessage { + return { + ...message, + ...(message.taskRefs ? { taskRefs: message.taskRefs.map((taskRef) => ({ ...taskRef })) } : {}), + ...(message.attachments + ? { attachments: message.attachments.map((attachment) => ({ ...attachment })) } + : {}), + ...(message.toolCalls + ? { toolCalls: message.toolCalls.map((toolCall) => ({ ...toolCall })) } + : {}), + ...(message.slashCommand ? { slashCommand: { ...message.slashCommand } } : {}), + ...(message.commandOutput ? { commandOutput: { ...message.commandOutput } } : {}), + }; +} + +function cloneMessages(messages: readonly InboxMessage[]): InboxMessage[] { + return messages.map(cloneMessage); +} + +export class LeadSessionParseCache { + private readonly entries = new Map(); + private readonly inFlightEntries = new Map(); + + constructor(private readonly maxEntries: number = DEFAULT_MAX_ENTRIES) {} + + getIfFresh( + key: LeadSessionParseCacheKey, + signature: LeadSessionFileSignature + ): InboxMessage[] | null { + const entryKey = keyToString(key); + const entry = this.entries.get(entryKey); + if (!entry) { + return null; + } + if (!areLeadSessionFileSignaturesEqual(entry.signature, signature)) { + this.entries.delete(entryKey); + return null; + } + return cloneMessages(entry.messages); + } + + getInFlight( + key: LeadSessionParseCacheKey, + signature: LeadSessionFileSignature + ): Promise | null { + const entry = this.inFlightEntries.get(inFlightKeyToString(key, signature)); + if (!entry) { + return null; + } + return entry.promise.then((messages) => cloneMessages(messages)); + } + + setInFlight( + key: LeadSessionParseCacheKey, + signature: LeadSessionFileSignature, + promise: Promise + ): void { + this.inFlightEntries.set(inFlightKeyToString(key, signature), { + signature, + promise, + }); + } + + clearInFlight(key: LeadSessionParseCacheKey, signature: LeadSessionFileSignature): void { + this.inFlightEntries.delete(inFlightKeyToString(key, signature)); + } + + set( + key: LeadSessionParseCacheKey, + signature: LeadSessionFileSignature, + messages: readonly InboxMessage[] + ): void { + const entryKey = keyToString(key); + this.entries.set(entryKey, { + signature, + messages: cloneMessages(messages), + cachedAtMs: Date.now(), + }); + while (this.entries.size > this.maxEntries) { + const oldestKey = this.entries.keys().next().value; + if (!oldestKey) { + break; + } + this.entries.delete(oldestKey); + } + } + + clearForPath(jsonlPath: string): void { + for (const key of this.entries.keys()) { + if (key.includes(`"${jsonlPath}"`)) { + this.entries.delete(key); + } + } + for (const key of this.inFlightEntries.keys()) { + if (key.includes(`"${jsonlPath}"`)) { + this.inFlightEntries.delete(key); + } + } + } + + clearAll(): void { + this.entries.clear(); + this.inFlightEntries.clear(); + } +} diff --git a/test/main/services/team/TeamDataService.test.ts b/test/main/services/team/TeamDataService.test.ts index 915a8d91..d6562433 100644 --- a/test/main/services/team/TeamDataService.test.ts +++ b/test/main/services/team/TeamDataService.test.ts @@ -1,11 +1,134 @@ -import { describe, expect, it, vi } from 'vitest'; +import * as fs from 'fs/promises'; +import * as os from 'os'; +import * as path from 'path'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +import { setClaudeBasePathOverride } from '../../../../src/main/utils/pathDecoder'; import { buildTaskChangePresenceDescriptor } from '../../../../src/main/services/team/taskChangePresenceUtils'; import { TeamDataService } from '../../../../src/main/services/team/TeamDataService'; import type { TeamTask } from '../../../../src/shared/types/team'; const TASK_COMMENT_FORWARDING_ENV = 'CLAUDE_TEAM_TASK_COMMENT_FORWARDING'; +const tempPaths: string[] = []; + +function createLeadAssistantEntry( + uuid: string, + timestamp: string, + text: string +): Record { + return { + uuid, + parentUuid: null, + type: 'assistant', + timestamp, + isSidechain: false, + userType: 'external', + cwd: '/repo', + sessionId: 'lead-1', + version: '1.0.0', + gitBranch: 'main', + requestId: `req-${uuid}`, + message: { + role: 'assistant', + model: 'claude-sonnet', + id: `msg-${uuid}`, + type: 'message', + stop_reason: 'end_turn', + stop_sequence: null, + usage: { + input_tokens: 1, + output_tokens: 1, + }, + content: [{ type: 'text', text }], + }, + }; +} + +async function createTempJsonl(entries: Record[]): Promise { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'team-data-lead-session-')); + tempPaths.push(dir); + const jsonlPath = path.join(dir, 'lead-1.jsonl'); + await fs.writeFile( + jsonlPath, + `${entries.map((entry) => JSON.stringify(entry)).join('\n')}\n`, + 'utf8' + ); + return jsonlPath; +} + +async function createTempJsonlInNamedDir( + dirName: string, + entries: Record[] +): Promise { + const dir = path.join(os.tmpdir(), dirName); + await fs.mkdir(dir, { recursive: true }); + tempPaths.push(dir); + const jsonlPath = path.join(dir, 'lead-1.jsonl'); + await fs.writeFile( + jsonlPath, + `${entries.map((entry) => JSON.stringify(entry)).join('\n')}\n`, + 'utf8' + ); + return jsonlPath; +} + +function createLeadSessionCachingService(): TeamDataService { + return new TeamDataService( + { + listTeams: vi.fn(), + getConfig: vi.fn(async () => ({ + name: 'My team', + members: [{ name: 'team-lead', role: 'Lead' }], + leadSessionId: 'lead-1', + })), + } as never, + { + getTasks: vi.fn(async () => []), + } as never, + { + listInboxNames: vi.fn(async () => []), + getMessages: vi.fn(async () => []), + } as never, + {} as never, + {} as never, + { + resolveMembers: vi.fn(() => []), + } as never, + { + getState: vi.fn(async () => ({ teamName: 'my-team', reviewers: [], tasks: {} })), + } as never, + {} as never, + { + getMembers: vi.fn(async () => []), + } as never, + { + readMessages: vi.fn(async () => []), + } as never, + (() => + ({ + processes: { + listProcesses: vi.fn(() => []), + }, + }) as never) as never, + {} as never, + {} as never, + { + getMemberAdvisories: vi.fn(async () => new Map()), + } as never + ); +} + +afterEach(async () => { + setClaudeBasePathOverride(null); + vi.restoreAllMocks(); + await Promise.all( + tempPaths.splice(0).map(async (tempPath) => { + await fs.rm(tempPath, { recursive: true, force: true }); + }) + ); +}); function createForwardingJournalStore(initialEntries: Array> = []) { const journalEntries = initialEntries; @@ -2152,4 +2275,484 @@ describe('TeamDataService', () => { }, }); }); + + it('caches unchanged lead-session extraction results and returns defensive clones', async () => { + const service = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonl([ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought for cache validation.' + ), + ]); + + const assistantSpy = vi.spyOn(service as never, 'extractLeadAssistantTextsFromJsonl' as never); + const extract = ( + service as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(service); + + const first = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + first[0]!.text = 'mutated locally'; + + const second = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + + expect(assistantSpy).toHaveBeenCalledTimes(1); + expect(second[0]?.text).toBe( + 'This is a sufficiently long assistant thought for cache validation.' + ); + }); + + it('coalesces concurrent lead-session parses for the same file signature', async () => { + const service = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonl([ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought for in-flight coalescing.' + ), + ]); + + const originalExtract = ( + service as unknown as { + extractLeadAssistantTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadAssistantTextsFromJsonl.bind(service); + const assistantSpy = vi + .spyOn(service as never, 'extractLeadAssistantTextsFromJsonl' as never) + .mockImplementation(async (...args: unknown[]) => { + const [targetPath, leadName, leadSessionId, maxTexts] = args as [ + string, + string, + string, + number, + ]; + await new Promise((resolve) => setTimeout(resolve, 25)); + return originalExtract(targetPath, leadName, leadSessionId, maxTexts); + }); + const extract = ( + service as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(service); + + const [first, second] = await Promise.all([ + extract(jsonlPath, 'team-lead', 'lead-1', 150), + extract(jsonlPath, 'team-lead', 'lead-1', 150), + ]); + + expect(assistantSpy).toHaveBeenCalledTimes(1); + expect(first[0]?.text).toBe(second[0]?.text); + }); + + it('does not populate the fulfilled cache when the file changes during parse', async () => { + const service = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonl([ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought before mutation.' + ), + ]); + + const originalExtract = ( + service as unknown as { + extractLeadAssistantTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadAssistantTextsFromJsonl.bind(service); + let appended = false; + const assistantSpy = vi + .spyOn(service as never, 'extractLeadAssistantTextsFromJsonl' as never) + .mockImplementation(async (...args: unknown[]) => { + const [targetPath, leadName, leadSessionId, maxTexts] = args as [ + string, + string, + string, + number, + ]; + if (!appended) { + appended = true; + await fs.appendFile( + targetPath, + `${JSON.stringify( + createLeadAssistantEntry( + 'assistant-2', + '2026-03-27T22:17:02.000Z', + 'This is a sufficiently long assistant thought appended during parse.' + ) + )}\n`, + 'utf8' + ); + } + return originalExtract(targetPath, leadName, leadSessionId, maxTexts); + }); + const extract = ( + service as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(service); + + const first = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + const second = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + + expect(assistantSpy).toHaveBeenCalledTimes(2); + expect(first).toHaveLength(2); + expect(second).toHaveLength(2); + }); + + it('does not reuse an older in-flight parse after the file signature changes', async () => { + const service = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonl([ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought before concurrent signature change.' + ), + ]); + + const originalExtract = ( + service as unknown as { + extractLeadAssistantTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadAssistantTextsFromJsonl.bind(service); + let releaseFirstInvocation = () => {}; + let firstInvocationStartedResolve: (() => void) | null = null; + const firstInvocationStarted = new Promise((resolve) => { + firstInvocationStartedResolve = resolve; + }); + const assistantSpy = vi + .spyOn(service as never, 'extractLeadAssistantTextsFromJsonl' as never) + .mockImplementation(async (...args: unknown[]) => { + const [targetPath, leadName, leadSessionId, maxTexts] = args as [ + string, + string, + string, + number, + ]; + if (assistantSpy.mock.calls.length === 1) { + firstInvocationStartedResolve?.(); + await new Promise((resolve) => { + releaseFirstInvocation = () => resolve(); + }); + } + return originalExtract(targetPath, leadName, leadSessionId, maxTexts); + }); + const extract = ( + service as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(service); + + const firstPromise = extract(jsonlPath, 'team-lead', 'lead-1', 150); + await firstInvocationStarted; + await fs.appendFile( + jsonlPath, + `${JSON.stringify( + createLeadAssistantEntry( + 'assistant-2', + '2026-03-27T22:17:02.000Z', + 'This is a sufficiently long assistant thought appended before the second caller.' + ) + )}\n`, + 'utf8' + ); + + const secondPromise = extract(jsonlPath, 'team-lead', 'lead-1', 150); + releaseFirstInvocation(); + + const [first, second] = await Promise.all([firstPromise, secondPromise]); + + expect(assistantSpy).toHaveBeenCalledTimes(2); + expect(first.length).toBeGreaterThan(0); + expect(second.length).toBeGreaterThan(0); + }); + + it('keeps leadName and maxTexts in the cache identity', async () => { + const service = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonl([ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought for keying behavior one.' + ), + createLeadAssistantEntry( + 'assistant-2', + '2026-03-27T22:17:02.000Z', + 'This is a sufficiently long assistant thought for keying behavior two.' + ), + ]); + + const assistantSpy = vi.spyOn(service as never, 'extractLeadAssistantTextsFromJsonl' as never); + const extract = ( + service as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(service); + + const firstLead = await extract(jsonlPath, 'team-lead', 'lead-1', 1); + const secondLeadSameKey = await extract(jsonlPath, 'team-lead', 'lead-1', 1); + const renamedLead = await extract(jsonlPath, 'captain', 'lead-1', 1); + const widerSlice = await extract(jsonlPath, 'team-lead', 'lead-1', 2); + + expect(firstLead).toHaveLength(1); + expect(secondLeadSameKey).toHaveLength(1); + expect(renamedLead[0]?.from).toBe('captain'); + expect(widerSlice).toHaveLength(2); + expect(assistantSpy).toHaveBeenCalledTimes(3); + }); + + it('does not return stale cached content when the jsonl file is deleted', async () => { + const service = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonl([ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought before file deletion.' + ), + ]); + + const assistantSpy = vi.spyOn(service as never, 'extractLeadAssistantTextsFromJsonl' as never); + const extract = ( + service as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(service); + + const first = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + await fs.rm(jsonlPath, { force: true }); + + await expect(extract(jsonlPath, 'team-lead', 'lead-1', 150)).rejects.toThrow(); + + expect(first).toHaveLength(1); + expect(assistantSpy).toHaveBeenCalledTimes(2); + }); + + it('tolerates a partial trailing line and does not keep a sticky stale result after the file is fixed', async () => { + const service = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonl([ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought before partial trailing data.' + ), + ]); + await fs.appendFile(jsonlPath, '{"type":"assistant"', 'utf8'); + + const assistantSpy = vi.spyOn(service as never, 'extractLeadAssistantTextsFromJsonl' as never); + const extract = ( + service as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(service); + + const partialRead = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + await fs.writeFile( + jsonlPath, + `${JSON.stringify( + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought before partial trailing data.' + ) + )}\n${JSON.stringify( + createLeadAssistantEntry( + 'assistant-2', + '2026-03-27T22:17:02.000Z', + 'This is a sufficiently long assistant thought after the file was fixed.' + ) + )}\n`, + 'utf8' + ); + + const repairedRead = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + + expect(partialRead).toHaveLength(1); + expect(repairedRead).toHaveLength(2); + expect(assistantSpy).toHaveBeenCalledTimes(2); + }); + + it('works for resolved jsonl paths that contain both dashes and underscores', async () => { + const service = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonlInNamedDir('team_data-lead-session-cache-check', [ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought for mixed path characters.' + ), + ]); + + const assistantSpy = vi.spyOn(service as never, 'extractLeadAssistantTextsFromJsonl' as never); + const extract = ( + service as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(service); + + const first = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + const second = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + + expect(first).toHaveLength(1); + expect(second).toHaveLength(1); + expect(assistantSpy).toHaveBeenCalledTimes(1); + }); + + it('does not keep a rejected in-flight parse sticky across retries', async () => { + const service = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonl([ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought before retry after failure.' + ), + ]); + + const originalExtract = ( + service as unknown as { + extractLeadAssistantTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadAssistantTextsFromJsonl.bind(service); + let shouldFail = true; + const assistantSpy = vi + .spyOn(service as never, 'extractLeadAssistantTextsFromJsonl' as never) + .mockImplementation(async (...args: unknown[]) => { + const [targetPath, leadName, leadSessionId, maxTexts] = args as [ + string, + string, + string, + number, + ]; + if (shouldFail) { + throw new Error('transient parse failure'); + } + return originalExtract(targetPath, leadName, leadSessionId, maxTexts); + }); + const extract = ( + service as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(service); + + await expect(extract(jsonlPath, 'team-lead', 'lead-1', 150)).rejects.toThrow( + 'transient parse failure' + ); + + shouldFail = false; + const retryResult = await extract(jsonlPath, 'team-lead', 'lead-1', 150); + + expect(retryResult).toHaveLength(1); + expect(assistantSpy).toHaveBeenCalledTimes(2); + }); + + it('does not share cache state across fresh TeamDataService instances', async () => { + const firstService = createLeadSessionCachingService(); + const secondService = createLeadSessionCachingService(); + const jsonlPath = await createTempJsonl([ + createLeadAssistantEntry( + 'assistant-1', + '2026-03-27T22:17:01.000Z', + 'This is a sufficiently long assistant thought for service instance isolation.' + ), + ]); + + const firstSpy = vi.spyOn( + firstService as never, + 'extractLeadAssistantTextsFromJsonl' as never + ); + const secondSpy = vi.spyOn( + secondService as never, + 'extractLeadAssistantTextsFromJsonl' as never + ); + const firstExtract = ( + firstService as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(firstService); + const secondExtract = ( + secondService as unknown as { + extractLeadSessionTextsFromJsonl: ( + jsonlPath: string, + leadName: string, + leadSessionId: string, + maxTexts: number + ) => Promise>; + } + ).extractLeadSessionTextsFromJsonl.bind(secondService); + + await firstExtract(jsonlPath, 'team-lead', 'lead-1', 150); + await secondExtract(jsonlPath, 'team-lead', 'lead-1', 150); + + expect(firstSpy).toHaveBeenCalledTimes(1); + expect(secondSpy).toHaveBeenCalledTimes(1); + }); });