diff --git a/src/main/services/team/TeamTranscriptProjectResolver.ts b/src/main/services/team/TeamTranscriptProjectResolver.ts index 8064e3b7..29ea7038 100644 --- a/src/main/services/team/TeamTranscriptProjectResolver.ts +++ b/src/main/services/team/TeamTranscriptProjectResolver.ts @@ -8,10 +8,10 @@ import { } from '@main/utils/pathDecoder'; import { isLeadMember } from '@shared/utils/leadDetection'; import { createLogger } from '@shared/utils/logger'; -import { createReadStream, type Dirent } from 'fs'; +import { type Dirent } from 'fs'; import * as fs from 'fs/promises'; import * as path from 'path'; -import * as readline from 'readline'; +import { StringDecoder } from 'string_decoder'; import { TeamConfigReader } from './TeamConfigReader'; @@ -21,6 +21,10 @@ const logger = createLogger('Service:TeamTranscriptProjectResolver'); const SESSION_DISCOVERY_CACHE_TTL = 30_000; const TEAM_AFFINITY_SCAN_LINES = 40; +// Read size for the head-window affinity scan. Read in chunks (not the whole file) +// so a transcript whose head holds the team's first TEAM_AFFINITY_SCAN_LINES lines +// is decided after reading just those, not the entire (possibly huge) file. +const TEAM_AFFINITY_READ_CHUNK_BYTES = 64 * 1024; const TEAM_AFFINITY_FILE_CACHE_MAX_ENTRIES = 4_096; const ROOT_DISCOVERY_CONCURRENCY = 12; const FAST_CONTEXT_ROOT_DISCOVERY_MTIME_GRACE_MS = 24 * 60 * 60_000; @@ -1066,49 +1070,83 @@ export class TeamTranscriptProjectResolver { } } - const stream = createReadStream(filePath, { encoding: 'utf8' }); - const rl = readline.createInterface({ input: stream, crlfDelay: Infinity }); + // Read the head window with a bounded chunked read plus a plain newline split + // instead of readline. readline's async line iterator runs an expensive Unicode + // line-break regex and stream/string-decoder machinery per chunk, which showed up + // as a top main-thread cost during launch. JSONL is strictly newline-delimited and + // each line is trim()'d (so a trailing CR from a CRLF ending is dropped), so a plain + // newline split is both cheaper and more correct here: it will not split on a bare + // CR or a Unicode line/paragraph separator that appears inside a JSON string value. + // A StringDecoder preserves multi-byte UTF-8 sequences that straddle a chunk + // boundary. Semantics are byte-identical to the old readline loop: inspect up to + // TEAM_AFFINITY_SCAN_LINES non-empty lines, first match wins via early break, and a + // final line is honored even without a trailing newline. let belongsToTeam = false; let inspected = 0; + const inspectHeadLine = (rawLine: string): boolean => { + const trimmed = rawLine.trim(); + if (!trimmed) { + return false; + } + inspected += 1; + try { + const entry = JSON.parse(trimmed) as Record; + const directTeamName = extractDirectTeamName(entry); + if (directTeamName === normalizedTeam) { + belongsToTeam = true; + return true; + } + if (entryContainsNestedTeamName(entry, normalizedTeam)) { + belongsToTeam = true; + return true; + } + const textContent = extractTextContent(entry); + if (textContent && lineMentionsTeam(textContent, normalizedTeam)) { + belongsToTeam = true; + return true; + } + } catch { + // ignore malformed head lines + } + return inspected >= TEAM_AFFINITY_SCAN_LINES; + }; + + let handle: fs.FileHandle | null = null; try { - for await (const line of rl) { - const trimmed = line.trim(); - if (!trimmed) { - continue; - } - - inspected += 1; - try { - const entry = JSON.parse(trimmed) as Record; - const directTeamName = extractDirectTeamName(entry); - if (directTeamName === normalizedTeam) { - belongsToTeam = true; - break; + handle = await fs.open(filePath, 'r'); + const decoder = new StringDecoder('utf8'); + const chunk = Buffer.allocUnsafe(TEAM_AFFINITY_READ_CHUNK_BYTES); + let pending = ''; + let position = 0; + let stop = false; + while (!stop) { + const { bytesRead } = await handle.read(chunk, 0, chunk.length, position); + if (bytesRead <= 0) { + // EOF: flush the decoder and honor a final line with no trailing newline. + pending += decoder.end(); + if (pending.length > 0) { + inspectHeadLine(pending); } - if (entryContainsNestedTeamName(entry, normalizedTeam)) { - belongsToTeam = true; - break; - } - - const textContent = extractTextContent(entry); - if (textContent && lineMentionsTeam(textContent, normalizedTeam)) { - belongsToTeam = true; - break; - } - } catch { - // ignore malformed head lines - } - - if (inspected >= TEAM_AFFINITY_SCAN_LINES) { break; } + position += bytesRead; + pending += decoder.write(chunk.subarray(0, bytesRead)); + let newlineIndex = pending.indexOf('\n'); + while (newlineIndex !== -1) { + const line = pending.slice(0, newlineIndex); + pending = pending.slice(newlineIndex + 1); + if (inspectHeadLine(line)) { + stop = true; + break; + } + newlineIndex = pending.indexOf('\n'); + } } } catch { return false; } finally { - rl.close(); - stream.destroy(); + await handle?.close().catch(() => undefined); } this.setTeamAffinityFileCacheEntry(cacheKey, { diff --git a/test/main/services/team/TeamTranscriptProjectResolver.test.ts b/test/main/services/team/TeamTranscriptProjectResolver.test.ts index c3b3fd23..4cad8c14 100644 --- a/test/main/services/team/TeamTranscriptProjectResolver.test.ts +++ b/test/main/services/team/TeamTranscriptProjectResolver.test.ts @@ -749,4 +749,62 @@ describe('TeamTranscriptProjectResolver', () => { expect(entry?.size).toBe(999_999); // cache recorded the precomputed stat -> no re-stat expect(entry?.mtimeMs).toBe(123_456); }); + + // The head-window scan reads chunks + splits on '\n' (not readline). These lock the + // byte-exact equivalence: CRLF endings, a final line with no trailing newline, a + // multi-byte char straddling the 64KB read boundary, and the 40-line window bound. + const teamTextLine = (team: string) => + JSON.stringify({ + type: 'user', + message: { role: 'user', content: [{ type: 'text', text: `Team name: ${team}` }] }, + }); + const noiseLine = (i: number) => + JSON.stringify({ type: 'user', message: { role: 'user', content: `noise ${i}` } }); + + it('matches with CRLF line endings and a final line that has no trailing newline', async () => { + await setupClaudeRoot(); + const resolver = new TeamTranscriptProjectResolver() as unknown as ResolverProbe; + const team = 'crlf-team'; + const projectDir = path.join(tmpDir!, 'projects', encodePath('/repo/crlf')); + await fs.mkdir(projectDir, { recursive: true }); + const jsonlPath = path.join(projectDir, 'c.jsonl'); + // CRLF separators; the matching line is last and has NO trailing newline. + await fs.writeFile( + jsonlPath, + `${noiseLine(0)}\r\n${noiseLine(1)}\r\n${teamTextLine(team)}`, + 'utf8' + ); + expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(true); + }); + + it('matches a team mention located past the 64KB read boundary with multi-byte content', async () => { + await setupClaudeRoot(); + const resolver = new TeamTranscriptProjectResolver() as unknown as ResolverProbe; + const team = 'boundary-team'; + const projectDir = path.join(tmpDir!, 'projects', encodePath('/repo/mb')); + await fs.mkdir(projectDir, { recursive: true }); + const jsonlPath = path.join(projectDir, 'mb.jsonl'); + // ~40KB of 2-byte Cyrillic per line: the first two lines (~80KB) push the matching + // third line past the 64KB read chunk and force a multi-byte char to straddle the + // chunk boundary, which the StringDecoder must stitch back together. + const big = 'я'.repeat(20_000); + const heavy = (i: number) => + JSON.stringify({ type: 'user', message: { role: 'user', content: `${big} ${i}` } }); + await fs.writeFile(jsonlPath, `${heavy(0)}\n${heavy(1)}\n${teamTextLine(team)}\n`, 'utf8'); + expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(true); + }); + + it('ignores a team mention that appears only after the 40-line head window', async () => { + await setupClaudeRoot(); + const resolver = new TeamTranscriptProjectResolver() as unknown as ResolverProbe; + const team = 'late-team'; + const projectDir = path.join(tmpDir!, 'projects', encodePath('/repo/late')); + await fs.mkdir(projectDir, { recursive: true }); + const jsonlPath = path.join(projectDir, 'late.jsonl'); + // 40 non-matching lines fill the head window; the mention is on line 41. + const lines = Array.from({ length: 40 }, (_, i) => noiseLine(i)); + lines.push(teamTextLine(team)); + await fs.writeFile(jsonlPath, `${lines.join('\n')}\n`, 'utf8'); + expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(false); + }); });