perf: cache negative team-affinity verdicts from a full head window

fileBelongsToTeam only cached POSITIVE affinity durably; a negative verdict was
re-decided on any change, so during a launch every non-matching transcript in the
project dir that grew (mtime+size change from an active session) was re-streamed
(createReadStream+readline) and re-parsed (up to 40 head lines) on every bootstrap
poll. A live atlas-hq-5 launch profile put this whole subsystem (readline streaming
+ fileBelongsToTeam + line/team matching) at ~31% of main-thread JS, the single
largest launch cost.

A team's first 40 head lines are immutable for an append-only transcript, so a
`false` decided from a FULL inspected window (>= TEAM_AFFINITY_SCAN_LINES) stays
valid while the file only grows. Track headWindowFull on the cache entry and short-
circuit such negatives the same way positives are short-circuited (size >= cached).
Short files (partial window) are still re-scanned on growth, so a team mention that
later lands inside the head window is still detected. A shrink/rewrite (size <
cached) forces a re-scan, identical to the positive path.

Behavior-preserving for affinity correctness (no new false negatives); only removes
redundant re-streams. Adds regression tests for both the durable-negative and the
short-file-flips-to-true cases.
This commit is contained in:
777genius 2026-05-30 13:17:49 +03:00
parent 126a485477
commit c8d40be460
2 changed files with 111 additions and 1 deletions

View file

@ -241,6 +241,12 @@ interface TeamAffinityFileCacheEntry {
mtimeMs: number;
size: number;
belongsToTeam: boolean;
// True when the verdict was decided after inspecting a FULL head window
// (>= TEAM_AFFINITY_SCAN_LINES non-empty lines). For append-only transcripts the
// head is immutable, so a `false` verdict from a full window stays valid while the
// file only grows — letting us cache negatives durably instead of re-streaming
// every non-matching transcript on each bootstrap poll.
headWindowFull: boolean;
}
export class TeamTranscriptProjectResolver {
@ -1031,6 +1037,16 @@ export class TeamTranscriptProjectResolver {
if (cached.belongsToTeam && fileStat.size >= cached.size) {
return true;
}
// A `false` decided from a FULL head window is durable while the file only
// grows: the first TEAM_AFFINITY_SCAN_LINES lines of an append-only transcript
// are immutable, so growth cannot introduce a team mention inside the inspected
// window. A shrink/rewrite makes size < cached.size and falls through to a
// re-scan below, identically to the positive path. This is the main launch win:
// non-matching transcripts in the project dir are no longer re-streamed +
// re-parsed on every bootstrap poll.
if (!cached.belongsToTeam && cached.headWindowFull && fileStat.size >= cached.size) {
return false;
}
if (cached.mtimeMs === fileStat.mtimeMs && cached.size === fileStat.size) {
return cached.belongsToTeam;
}
@ -1039,9 +1055,9 @@ export class TeamTranscriptProjectResolver {
const stream = createReadStream(filePath, { encoding: 'utf8' });
const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
let belongsToTeam = false;
let inspected = 0;
try {
let inspected = 0;
for await (const line of rl) {
const trimmed = line.trim();
if (!trimmed) {
@ -1085,6 +1101,7 @@ export class TeamTranscriptProjectResolver {
mtimeMs: fileStat.mtimeMs,
size: fileStat.size,
belongsToTeam,
headWindowFull: inspected >= TEAM_AFFINITY_SCAN_LINES,
});
return belongsToTeam;
}

View file

@ -623,4 +623,97 @@ describe('TeamTranscriptProjectResolver', () => {
expect(fastContext?.sessionIds).not.toContain('old-member-session');
expect(fullContext?.sessionIds).toContain('old-member-session');
});
// Regression for the launch hot path: non-matching transcripts must not be
// re-streamed + re-parsed on every bootstrap poll. A negative verdict decided from
// a FULL head window (>= 40 inspected lines) is durable while the file only grows,
// because an append-only transcript's head is immutable. Observed via the private
// affinity cache: the durable branch returns WITHOUT re-caching, so the cached size
// stays at the first scan's size (a re-scan would update it to the grown size).
type AffinityCacheEntry = {
mtimeMs: number;
size: number;
belongsToTeam: boolean;
headWindowFull: boolean;
};
type ResolverProbe = {
fileBelongsToTeam: (filePath: string, teamName: string) => Promise<boolean>;
buildTeamAffinityFileCacheKey: (filePath: string, normalizedTeam: string) => string;
teamAffinityFileCache: Map<string, AffinityCacheEntry>;
};
it('caches a full-head-window negative and stops re-scanning a growing non-matching transcript', async () => {
await setupClaudeRoot();
const resolver = new TeamTranscriptProjectResolver() as unknown as ResolverProbe;
const team = 'absent-team';
const projectDir = path.join(tmpDir!, 'projects', encodePath('/repo/neg-durable'));
await fs.mkdir(projectDir, { recursive: true });
const jsonlPath = path.join(projectDir, 'unrelated.jsonl');
const mkLine = (i: number) =>
JSON.stringify({ type: 'user', message: { role: 'user', content: `unrelated line ${i}` } });
// 45 non-empty lines, none mentioning the team -> full head window (40) inspected.
await fs.writeFile(
jsonlPath,
`${Array.from({ length: 45 }, (_, i) => mkLine(i)).join('\n')}\n`,
'utf8'
);
expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(false);
const key = resolver.buildTeamAffinityFileCacheKey(jsonlPath, team.toLowerCase());
const first = resolver.teamAffinityFileCache.get(key);
expect(first?.belongsToTeam).toBe(false);
expect(first?.headWindowFull).toBe(true);
const sizeAfterFirst = first!.size;
// Append-only growth: size grows, mtime changes, but the inspected head is fixed.
await fs.appendFile(jsonlPath, `${mkLine(100)}\n`);
expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(false);
// Durable negative: the cache entry was NOT re-written (no re-scan), so its size
// is still the original, smaller size.
expect(resolver.teamAffinityFileCache.get(key)?.size).toBe(sizeAfterFirst);
});
// Correctness guard: a SHORT-file negative (head window not yet full) is NOT durable
// and must be re-scanned on growth, so a team mention that lands inside the first 40
// lines is still detected (the verdict flips to true).
it('re-scans a short-file negative on growth and flips to true when the head gains a team mention', async () => {
await setupClaudeRoot();
const resolver = new TeamTranscriptProjectResolver() as unknown as ResolverProbe;
const team = 'team-x';
const projectDir = path.join(tmpDir!, 'projects', encodePath('/repo/short-neg'));
await fs.mkdir(projectDir, { recursive: true });
const jsonlPath = path.join(projectDir, 'short.jsonl');
// Only 3 lines, none mentioning the team -> partial head window (not durable).
await fs.writeFile(
jsonlPath,
`${[0, 1, 2]
.map((i) => JSON.stringify({ type: 'user', message: { role: 'user', content: `hi ${i}` } }))
.join('\n')}\n`,
'utf8'
);
expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(false);
const key = resolver.buildTeamAffinityFileCacheKey(jsonlPath, team.toLowerCase());
const first = resolver.teamAffinityFileCache.get(key);
expect(first?.headWindowFull).toBe(false);
const sizeAfterFirst = first!.size;
// Append a line whose text content mentions the team (still within the first 40 lines).
await fs.appendFile(
jsonlPath,
`${JSON.stringify({
type: 'user',
message: {
role: 'user',
content: [
{ type: 'text', text: `Current durable team context:\n- Team name: ${team}` },
],
},
})}\n`
);
expect(await resolver.fileBelongsToTeam(jsonlPath, team)).toBe(true); // re-scanned -> flips
const second = resolver.teamAffinityFileCache.get(key);
expect(second?.belongsToTeam).toBe(true);
expect(second!.size).toBeGreaterThan(sizeAfterFirst); // re-scanned + re-cached
});
});