perf: cache team transcript head metadata

2026-05-30 15:26:09 +03:00 · 2026-05-30 15:26:09 +03:00 · 61e2678a5d
commit 61e2678a5d
parent 0a750a9fa8
2 changed files with 182 additions and 66 deletions
--- a/src/main/services/team/TeamTranscriptProjectResolver.ts
+++ b/src/main/services/team/TeamTranscriptProjectResolver.ts
@ -26,6 +26,7 @@ const TEAM_AFFINITY_SCAN_LINES = 40;
 // is decided after reading just those, not the entire (possibly huge) file.
 const TEAM_AFFINITY_READ_CHUNK_BYTES = 64 * 1024;
 const TEAM_AFFINITY_FILE_CACHE_MAX_ENTRIES = 4_096;
+const TEAM_AFFINITY_HEAD_METADATA_CACHE_MAX_ENTRIES = 4_096;
 const ROOT_DISCOVERY_CONCURRENCY = 12;
 const FAST_CONTEXT_ROOT_DISCOVERY_MTIME_GRACE_MS = 24 * 60 * 60_000;

@ -151,23 +152,7 @@ function extractTextContent(entry: Record<string, unknown>): string | null {
  return null;
 }

-function extractDirectTeamName(entry: Record<string, unknown>): string | null {
-  if (typeof entry.teamName === 'string') {
-    return entry.teamName.trim().toLowerCase();
-  }
-
-  const process = entry.process as Record<string, unknown> | undefined;
-  const processTeam = process?.team as Record<string, unknown> | undefined;
-  if (typeof processTeam?.teamName === 'string') {
-    return processTeam.teamName.trim().toLowerCase();
-  }
-
-  return null;
-}
-
-function lineMentionsTeam(text: string, teamName: string): boolean {
-  const normalizedText = text.trim().toLowerCase();
-  const normalizedTeam = teamName.trim().toLowerCase();
+function lineMentionsNormalizedTeam(normalizedText: string, normalizedTeam: string): boolean {
  if (!normalizedText.includes(normalizedTeam)) {
    return false;
  }
@ -183,26 +168,52 @@ function lineMentionsTeam(text: string, teamName: string): boolean {
  );
 }

-function entryContainsNestedTeamName(value: unknown, teamName: string, depth: number = 0): boolean {
+function collectNestedTeamNames(value: unknown, teamNames: Set<string>, depth: number = 0): void {
  if (!value || depth > 8 || typeof value !== 'object') {
-    return false;
+    return;
  }

  if (Array.isArray(value)) {
-    return value.some((item) => entryContainsNestedTeamName(item, teamName, depth + 1));
+    for (const item of value) {
+      collectNestedTeamNames(item, teamNames, depth + 1);
+    }
+    return;
  }

  const entry = value as Record<string, unknown>;
-  if (typeof entry.teamName === 'string' && entry.teamName.trim().toLowerCase() === teamName) {
-    return true;
+  if (typeof entry.teamName === 'string') {
+    const normalizedTeamName = entry.teamName.trim().toLowerCase();
+    if (normalizedTeamName) {
+      teamNames.add(normalizedTeamName);
+    }
  }

-  return Object.entries(entry).some(([key, nested]) => {
+  for (const [key, nested] of Object.entries(entry)) {
    if (key === 'teamName') {
-      return false;
+      continue;
    }
-    return entryContainsNestedTeamName(nested, teamName, depth + 1);
-  });
+    collectNestedTeamNames(nested, teamNames, depth + 1);
+  }
+}
+
+function parseTeamAffinityHeadLine(rawLine: string): TeamAffinityHeadLineMetadata {
+  const empty: TeamAffinityHeadLineMetadata = {
+    nestedTeamNames: new Set<string>(),
+    normalizedTextContent: null,
+  };
+
+  try {
+    const entry = JSON.parse(rawLine) as Record<string, unknown>;
+    const nestedTeamNames = new Set<string>();
+    collectNestedTeamNames(entry, nestedTeamNames);
+    const textContent = extractTextContent(entry);
+    return {
+      nestedTeamNames,
+      normalizedTextContent: textContent ? textContent.trim().toLowerCase() : null,
+    };
+  } catch {
+    return empty;
+  }
 }

 function collectKnownSessionIds(config: TeamConfig): string[] {
@ -253,6 +264,23 @@ interface TeamAffinityFileCacheEntry {
  headWindowFull: boolean;
 }

+interface TeamAffinityHeadLineMetadata {
+  nestedTeamNames: Set<string>;
+  normalizedTextContent: string | null;
+}
+
+interface TeamAffinityHeadMetadataCacheEntry {
+  mtimeMs: number;
+  size: number;
+  inspectedLineCount: number;
+  lines: TeamAffinityHeadLineMetadata[];
+}
+
+interface TeamAffinityEvaluation {
+  belongsToTeam: boolean;
+  inspectedLineCount: number;
+}
+
 export class TeamTranscriptProjectResolver {
  private readonly contextCache = new Map<
    string,
@ -260,6 +288,10 @@ export class TeamTranscriptProjectResolver {
  >();

  private readonly teamAffinityFileCache = new Map<string, TeamAffinityFileCacheEntry>();
+  private readonly teamAffinityHeadMetadataCache = new Map<
+    string,
+    TeamAffinityHeadMetadataCacheEntry
+  >();

  constructor(
    private readonly configReader: TeamTranscriptProjectConfigReader = new TeamConfigReader()
@ -1070,46 +1102,63 @@ export class TeamTranscriptProjectResolver {
      }
    }

-    // Read the head window with a bounded chunked read plus a plain newline split
-    // instead of readline. readline's async line iterator runs an expensive Unicode
-    // line-break regex and stream/string-decoder machinery per chunk, which showed up
-    // as a top main-thread cost during launch. JSONL is strictly newline-delimited and
-    // each line is trim()'d (so a trailing CR from a CRLF ending is dropped), so a plain
-    // newline split is both cheaper and more correct here: it will not split on a bare
-    // CR or a Unicode line/paragraph separator that appears inside a JSON string value.
-    // A StringDecoder preserves multi-byte UTF-8 sequences that straddle a chunk
-    // boundary. Semantics are byte-identical to the old readline loop: inspect up to
-    // TEAM_AFFINITY_SCAN_LINES non-empty lines, first match wins via early break, and a
-    // final line is honored even without a trailing newline.
-    let belongsToTeam = false;
-    let inspected = 0;
+    const headMetadata = await this.getTeamAffinityHeadMetadata(filePath, fileStat);
+    if (!headMetadata) {
+      return false;
+    }
+    const evaluation = this.evaluateTeamAffinityHeadMetadata(headMetadata, normalizedTeam);

+    this.setTeamAffinityFileCacheEntry(cacheKey, {
+      mtimeMs: fileStat.mtimeMs,
+      size: fileStat.size,
+      belongsToTeam: evaluation.belongsToTeam,
+      headWindowFull: evaluation.inspectedLineCount >= TEAM_AFFINITY_SCAN_LINES,
+    });
+    return evaluation.belongsToTeam;
+  }
+
+  private evaluateTeamAffinityHeadMetadata(
+    metadata: TeamAffinityHeadMetadataCacheEntry,
+    normalizedTeam: string
+  ): TeamAffinityEvaluation {
+    let inspectedLineCount = 0;
+    for (const line of metadata.lines) {
+      inspectedLineCount += 1;
+      if (line.nestedTeamNames.has(normalizedTeam)) {
+        return { belongsToTeam: true, inspectedLineCount };
+      }
+      if (
+        line.normalizedTextContent &&
+        lineMentionsNormalizedTeam(line.normalizedTextContent, normalizedTeam)
+      ) {
+        return { belongsToTeam: true, inspectedLineCount };
+      }
+    }
+    return { belongsToTeam: false, inspectedLineCount: metadata.inspectedLineCount };
+  }
+
+  private async getTeamAffinityHeadMetadata(
+    filePath: string,
+    fileStat: { mtimeMs: number; size: number }
+  ): Promise<TeamAffinityHeadMetadataCacheEntry | null> {
+    const cached = this.teamAffinityHeadMetadataCache.get(filePath);
+    if (cached && cached.mtimeMs === fileStat.mtimeMs && cached.size === fileStat.size) {
+      return cached;
+    }
+    if (cached) {
+      this.teamAffinityHeadMetadataCache.delete(filePath);
+    }
+
+    const lines: TeamAffinityHeadLineMetadata[] = [];
+    let inspectedLineCount = 0;
    const inspectHeadLine = (rawLine: string): boolean => {
      const trimmed = rawLine.trim();
      if (!trimmed) {
        return false;
      }
-      inspected += 1;
-      try {
-        const entry = JSON.parse(trimmed) as Record<string, unknown>;
-        const directTeamName = extractDirectTeamName(entry);
-        if (directTeamName === normalizedTeam) {
-          belongsToTeam = true;
-          return true;
-        }
-        if (entryContainsNestedTeamName(entry, normalizedTeam)) {
-          belongsToTeam = true;
-          return true;
-        }
-        const textContent = extractTextContent(entry);
-        if (textContent && lineMentionsTeam(textContent, normalizedTeam)) {
-          belongsToTeam = true;
-          return true;
-        }
-      } catch {
-        // ignore malformed head lines
-      }
-      return inspected >= TEAM_AFFINITY_SCAN_LINES;
+      inspectedLineCount += 1;
+      lines.push(parseTeamAffinityHeadLine(trimmed));
+      return inspectedLineCount >= TEAM_AFFINITY_SCAN_LINES;
    };

    let handle: fs.FileHandle | null = null;
@ -1144,18 +1193,19 @@ export class TeamTranscriptProjectResolver {
        }
      }
    } catch {
-      return false;
+      return null;
    } finally {
      await handle?.close().catch(() => undefined);
    }

-    this.setTeamAffinityFileCacheEntry(cacheKey, {
+    const entry = {
      mtimeMs: fileStat.mtimeMs,
      size: fileStat.size,
-      belongsToTeam,
-      headWindowFull: inspected >= TEAM_AFFINITY_SCAN_LINES,
-    });
-    return belongsToTeam;
+      inspectedLineCount,
+      lines,
+    };
+    this.setTeamAffinityHeadMetadataCacheEntry(filePath, entry);
+    return entry;
  }

  private buildTeamAffinityFileCacheKey(filePath: string, normalizedTeam: string): string {
@ -1174,4 +1224,20 @@ export class TeamTranscriptProjectResolver {
    }
    this.teamAffinityFileCache.set(cacheKey, entry);
  }
+
+  private setTeamAffinityHeadMetadataCacheEntry(
+    filePath: string,
+    entry: TeamAffinityHeadMetadataCacheEntry
+  ): void {
+    if (
+      !this.teamAffinityHeadMetadataCache.has(filePath) &&
+      this.teamAffinityHeadMetadataCache.size >= TEAM_AFFINITY_HEAD_METADATA_CACHE_MAX_ENTRIES
+    ) {
+      const oldestKey = this.teamAffinityHeadMetadataCache.keys().next().value;
+      if (oldestKey) {
+        this.teamAffinityHeadMetadataCache.delete(oldestKey);
+      }
+    }
+    this.teamAffinityHeadMetadataCache.set(filePath, entry);
+  }
 }
--- a/test/main/services/team/TeamTranscriptProjectResolver.test.ts
+++ b/test/main/services/team/TeamTranscriptProjectResolver.test.ts
@ -636,6 +636,12 @@ describe('TeamTranscriptProjectResolver', () => {
    belongsToTeam: boolean;
    headWindowFull: boolean;
  };
+  type HeadMetadataCacheEntry = {
+    mtimeMs: number;
+    size: number;
+    inspectedLineCount: number;
+    lines: unknown[];
+  };
  type ResolverProbe = {
    fileBelongsToTeam: (
      filePath: string,
@ -644,6 +650,7 @@ describe('TeamTranscriptProjectResolver', () => {
    ) => Promise<boolean>;
    buildTeamAffinityFileCacheKey: (filePath: string, normalizedTeam: string) => string;
    teamAffinityFileCache: Map<string, AffinityCacheEntry>;
+    teamAffinityHeadMetadataCache: Map<string, HeadMetadataCacheEntry>;
  };

  it('caches a full-head-window negative and stops re-scanning a growing non-matching transcript', async () => {
@ -750,6 +757,49 @@ describe('TeamTranscriptProjectResolver', () => {
    expect(entry?.mtimeMs).toBe(123_456);
  });

+  it('reuses parsed head metadata across different team lookups for the same file signature', async () => {
+    await setupClaudeRoot();
+    const resolver = new TeamTranscriptProjectResolver() as unknown as ResolverProbe;
+    const projectDir = path.join(tmpDir!, 'projects', encodePath('/repo/head-cache'));
+    await fs.mkdir(projectDir, { recursive: true });
+    const jsonlPath = path.join(projectDir, 'shared.jsonl');
+    await fs.writeFile(
+      jsonlPath,
+      [
+        teamTextLine('alpha-team'),
+        JSON.stringify({
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [{ type: 'tool_use', input: { teamName: 'beta-team' } }],
+          },
+        }),
+      ].join('\n') + '\n',
+      'utf8'
+    );
+
+    const fileStat = await fs.stat(jsonlPath);
+    expect(await resolver.fileBelongsToTeam(jsonlPath, 'alpha-team', fileStat)).toBe(true);
+    await fs.unlink(jsonlPath);
+    expect(await resolver.fileBelongsToTeam(jsonlPath, 'beta-team', fileStat)).toBe(true);
+    expect(await resolver.fileBelongsToTeam(jsonlPath, 'missing-team', fileStat)).toBe(false);
+
+    expect(resolver.teamAffinityHeadMetadataCache.size).toBe(1);
+    expect(resolver.teamAffinityHeadMetadataCache.get(jsonlPath)?.inspectedLineCount).toBe(2);
+    expect(resolver.teamAffinityFileCache.get(`alpha-team\0${jsonlPath}`)).toMatchObject({
+      belongsToTeam: true,
+      headWindowFull: false,
+    });
+    expect(resolver.teamAffinityFileCache.get(`beta-team\0${jsonlPath}`)).toMatchObject({
+      belongsToTeam: true,
+      headWindowFull: false,
+    });
+    expect(resolver.teamAffinityFileCache.get(`missing-team\0${jsonlPath}`)).toMatchObject({
+      belongsToTeam: false,
+      headWindowFull: false,
+    });
+  });
+
  // The head-window scan reads chunks + splits on '\n' (not readline). These lock the
  // byte-exact equivalence: CRLF endings, a final line with no trailing newline, a
  // multi-byte char straddling the 64KB read boundary, and the 40-line window bound.