perf: replace readline with a bounded chunked read in the affinity head scan

fileBelongsToTeam streamed the head window via createReadStream + readline. readline's line iterator runs an expensive Unicode line-break regex and stream/string-decoder machinery per chunk, which showed up as a top main-thread cost during launch (the line- split regex alone was ~5.7% in the warm launch profile). Replace it with a bounded chunked fs.read + a plain '\n' split. JSONL is strictly newline-delimited and each line is trim()'d (so a trailing CR from CRLF is dropped), so a '\n' split is cheaper and more correct (it will not split on a bare CR or a Unicode line/paragraph separator inside a JSON string value, which readline would). A StringDecoder preserves multi-byte UTF-8 sequences that straddle a chunk boundary. Byte-identical semantics to the old loop: inspect up to TEAM_AFFINITY_SCAN_LINES non-empty lines, first match wins via early break, and a final line is honored even without a trailing newline. Reads in 64KB chunks so a team decided in its first lines is not penalized by a huge file. Adds tests for CRLF endings + no-trailing-newline, a multi-byte char straddling the 64KB boundary, and the 40-line window bound (21 pass).
2026-05-30 14:54:58 +03:00 · 2026-05-30 14:54:58 +03:00 · f0797e2c12
commit f0797e2c12
parent 5c1d2e8d92
2 changed files with 130 additions and 34 deletions
--- a/src/main/services/team/TeamTranscriptProjectResolver.ts
+++ b/src/main/services/team/TeamTranscriptProjectResolver.ts
@ -8,10 +8,10 @@ import {
 } from '@main/utils/pathDecoder';
 import { isLeadMember } from '@shared/utils/leadDetection';
 import { createLogger } from '@shared/utils/logger';
-import { createReadStream, type Dirent } from 'fs';
+import { type Dirent } from 'fs';
 import * as fs from 'fs/promises';
 import * as path from 'path';
-import * as readline from 'readline';
+import { StringDecoder } from 'string_decoder';

 import { TeamConfigReader } from './TeamConfigReader';

@ -21,6 +21,10 @@ const logger = createLogger('Service:TeamTranscriptProjectResolver');

 const SESSION_DISCOVERY_CACHE_TTL = 30_000;
 const TEAM_AFFINITY_SCAN_LINES = 40;
+// Read size for the head-window affinity scan. Read in chunks (not the whole file)
+// so a transcript whose head holds the team's first TEAM_AFFINITY_SCAN_LINES lines
+// is decided after reading just those, not the entire (possibly huge) file.
+const TEAM_AFFINITY_READ_CHUNK_BYTES = 64 * 1024;
 const TEAM_AFFINITY_FILE_CACHE_MAX_ENTRIES = 4_096;
 const ROOT_DISCOVERY_CONCURRENCY = 12;
 const FAST_CONTEXT_ROOT_DISCOVERY_MTIME_GRACE_MS = 24 * 60 * 60_000;
@ -1066,49 +1070,83 @@ export class TeamTranscriptProjectResolver {
      }
    }

-    const stream = createReadStream(filePath, { encoding: 'utf8' });
-    const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
+    // Read the head window with a bounded chunked read plus a plain newline split
+    // instead of readline. readline's async line iterator runs an expensive Unicode
+    // line-break regex and stream/string-decoder machinery per chunk, which showed up
+    // as a top main-thread cost during launch. JSONL is strictly newline-delimited and
+    // each line is trim()'d (so a trailing CR from a CRLF ending is dropped), so a plain
+    // newline split is both cheaper and more correct here: it will not split on a bare
+    // CR or a Unicode line/paragraph separator that appears inside a JSON string value.
+    // A StringDecoder preserves multi-byte UTF-8 sequences that straddle a chunk
+    // boundary. Semantics are byte-identical to the old readline loop: inspect up to
+    // TEAM_AFFINITY_SCAN_LINES non-empty lines, first match wins via early break, and a
+    // final line is honored even without a trailing newline.
    let belongsToTeam = false;
    let inspected = 0;

+    const inspectHeadLine = (rawLine: string): boolean => {
+      const trimmed = rawLine.trim();
+      if (!trimmed) {
+        return false;
+      }
+      inspected += 1;
+      try {
+        const entry = JSON.parse(trimmed) as Record<string, unknown>;
+        const directTeamName = extractDirectTeamName(entry);
+        if (directTeamName === normalizedTeam) {
+          belongsToTeam = true;
+          return true;
+        }
+        if (entryContainsNestedTeamName(entry, normalizedTeam)) {
+          belongsToTeam = true;
+          return true;
+        }
+        const textContent = extractTextContent(entry);
+        if (textContent && lineMentionsTeam(textContent, normalizedTeam)) {
+          belongsToTeam = true;
+          return true;
+        }
+      } catch {
+        // ignore malformed head lines
+      }
+      return inspected >= TEAM_AFFINITY_SCAN_LINES;
+    };
+
+    let handle: fs.FileHandle | null = null;
    try {
-      for await (const line of rl) {
-        const trimmed = line.trim();
-        if (!trimmed) {
-          continue;
-        }
-
-        inspected += 1;
-        try {
-          const entry = JSON.parse(trimmed) as Record<string, unknown>;
-          const directTeamName = extractDirectTeamName(entry);
-          if (directTeamName === normalizedTeam) {
-            belongsToTeam = true;
-            break;
+      handle = await fs.open(filePath, 'r');
+      const decoder = new StringDecoder('utf8');
+      const chunk = Buffer.allocUnsafe(TEAM_AFFINITY_READ_CHUNK_BYTES);
+      let pending = '';
+      let position = 0;
+      let stop = false;
+      while (!stop) {
+        const { bytesRead } = await handle.read(chunk, 0, chunk.length, position);
+        if (bytesRead <= 0) {
+          // EOF: flush the decoder and honor a final line with no trailing newline.
+          pending += decoder.end();
+          if (pending.length > 0) {
+            inspectHeadLine(pending);
          }
-          if (entryContainsNestedTeamName(entry, normalizedTeam)) {
-            belongsToTeam = true;
-            break;
-          }
-
-          const textContent = extractTextContent(entry);
-          if (textContent && lineMentionsTeam(textContent, normalizedTeam)) {
-            belongsToTeam = true;
-            break;
-          }
-        } catch {
-          // ignore malformed head lines
-        }
-
-        if (inspected >= TEAM_AFFINITY_SCAN_LINES) {
          break;
        }
+        position += bytesRead;
+        pending += decoder.write(chunk.subarray(0, bytesRead));
+        let newlineIndex = pending.indexOf('\n');
+        while (newlineIndex !== -1) {
+          const line = pending.slice(0, newlineIndex);
+          pending = pending.slice(newlineIndex + 1);
+          if (inspectHeadLine(line)) {
+            stop = true;
+            break;
+          }
+          newlineIndex = pending.indexOf('\n');
+        }
      }
    } catch {
      return false;
    } finally {
-      rl.close();
-      stream.destroy();
+      await handle?.close().catch(() => undefined);
    }

    this.setTeamAffinityFileCacheEntry(cacheKey, {
--- a/test/main/services/team/TeamTranscriptProjectResolver.test.ts
+++ b/test/main/services/team/TeamTranscriptProjectResolver.test.ts
@ -749,4 +749,62 @@ describe('TeamTranscriptProjectResolver', () => {
    expect(entry?.size).toBe(999_999); // cache recorded the precomputed stat -> no re-stat
    expect(entry?.mtimeMs).toBe(123_456);
  });
+
+  // The head-window scan reads chunks + splits on '\n' (not readline). These lock the
+  // byte-exact equivalence: CRLF endings, a final line with no trailing newline, a
+  // multi-byte char straddling the 64KB read boundary, and the 40-line window bound.
+  const teamTextLine = (team: string) =>
+    JSON.stringify({
+      type: 'user',
+      message: { role: 'user', content: [{ type: 'text', text: `Team name: ${team}` }] },
+    });
+  const noiseLine = (i: number) =>
+    JSON.stringify({ type: 'user', message: { role: 'user', content: `noise ${i}` } });
+
+  it('matches with CRLF line endings and a final line that has no trailing newline', async () => {
+    await setupClaudeRoot();
+    const resolver = new TeamTranscriptProjectResolver() as unknown as ResolverProbe;
+    const team = 'crlf-team';
+    const projectDir = path.join(tmpDir!, 'projects', encodePath('/repo/crlf'));
+    await fs.mkdir(projectDir, { recursive: true });
+    const jsonlPath = path.join(projectDir, 'c.jsonl');
+    // CRLF separators; the matching line is last and has NO trailing newline.
+    await fs.writeFile(
+      jsonlPath,
+      `${noiseLine(0)}\r\n${noiseLine(1)}\r\n${teamTextLine(team)}`,
+      'utf8'
+    );
+    expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(true);
+  });
+
+  it('matches a team mention located past the 64KB read boundary with multi-byte content', async () => {
+    await setupClaudeRoot();
+    const resolver = new TeamTranscriptProjectResolver() as unknown as ResolverProbe;
+    const team = 'boundary-team';
+    const projectDir = path.join(tmpDir!, 'projects', encodePath('/repo/mb'));
+    await fs.mkdir(projectDir, { recursive: true });
+    const jsonlPath = path.join(projectDir, 'mb.jsonl');
+    // ~40KB of 2-byte Cyrillic per line: the first two lines (~80KB) push the matching
+    // third line past the 64KB read chunk and force a multi-byte char to straddle the
+    // chunk boundary, which the StringDecoder must stitch back together.
+    const big = 'я'.repeat(20_000);
+    const heavy = (i: number) =>
+      JSON.stringify({ type: 'user', message: { role: 'user', content: `${big} ${i}` } });
+    await fs.writeFile(jsonlPath, `${heavy(0)}\n${heavy(1)}\n${teamTextLine(team)}\n`, 'utf8');
+    expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(true);
+  });
+
+  it('ignores a team mention that appears only after the 40-line head window', async () => {
+    await setupClaudeRoot();
+    const resolver = new TeamTranscriptProjectResolver() as unknown as ResolverProbe;
+    const team = 'late-team';
+    const projectDir = path.join(tmpDir!, 'projects', encodePath('/repo/late'));
+    await fs.mkdir(projectDir, { recursive: true });
+    const jsonlPath = path.join(projectDir, 'late.jsonl');
+    // 40 non-matching lines fill the head window; the mention is on line 41.
+    const lines = Array.from({ length: 40 }, (_, i) => noiseLine(i));
+    lines.push(teamTextLine(team));
+    await fs.writeFile(jsonlPath, `${lines.join('\n')}\n`, 'utf8');
+    expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(false);
+  });
 });