agent-ecosystem/test/main/utils/jsonlLineReader.test.ts
777genius 8cb44cd793 perf: replace readline with a chunked line generator in team JSONL readers
readline.createInterface runs an expensive Unicode line-break regex + extra
stream/string-decoder machinery per chunk. The main transcript parser (parseJsonlStream)
already uses a buffer + manual newline split; these per-team readers still used readline.

Add readJsonlLines(): an async generator that yields a JSONL file's lines via a chunked
utf8 stream read + a plain '\n' split (drop-in for 'for await (const line of rl)'), so the
consumers' loop bodies are unchanged. Stream is utf8-decoded before splitting, so multi-byte
chars across chunk boundaries are safe; trailing CR (CRLF) is stripped; empty lines and a
final newline-less line are yielded, matching readline; breaking out of the loop destroys
the stream via the generator's finally.

Adopt it in MemberStatsComputer, TaskBoundaryParser, and FileContentResolver (file-history
scan). Behavior-identical (their existing tests pass: 18 + 6 + 12) plus 6 new tests for the
generator (CRLF, empty lines, no-trailing-newline, early break, multi-byte chunk boundary).

Note: session-browser readline paths (jsonl metadata extractor, metadataExtraction,
SessionContentFilter) are off the launch path and left as-is for now.
2026-05-30 15:58:09 +03:00

73 lines
2.5 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import * as fs from 'fs/promises';
import * as os from 'os';
import * as path from 'path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { readJsonlLines } from '../../../src/main/utils/jsonlLineReader';
describe('readJsonlLines', () => {
let dir: string;
beforeEach(async () => {
dir = await fs.mkdtemp(path.join(os.tmpdir(), 'jsonl-line-reader-'));
});
afterEach(async () => {
await fs.rm(dir, { recursive: true, force: true });
});
async function write(name: string, content: string): Promise<string> {
const p = path.join(dir, name);
await fs.writeFile(p, content, 'utf8');
return p;
}
async function collect(filePath: string): Promise<string[]> {
const out: string[] = [];
for await (const line of readJsonlLines(filePath)) {
out.push(line);
}
return out;
}
it('yields every line in order, including empty lines', async () => {
// empty lines must still be yielded so callers tracking line numbers match readline
const p = await write('a.jsonl', 'a\n\nb\nc\n');
expect(await collect(p)).toEqual(['a', '', 'b', 'c']);
});
it('strips a trailing CR from CRLF endings', async () => {
const p = await write('crlf.jsonl', 'one\r\ntwo\r\nthree\r\n');
expect(await collect(p)).toEqual(['one', 'two', 'three']);
});
it('yields a final line that has no trailing newline', async () => {
const p = await write('tail.jsonl', 'first\nlast-no-newline');
expect(await collect(p)).toEqual(['first', 'last-no-newline']);
});
it('returns nothing for an empty file', async () => {
const p = await write('empty.jsonl', '');
expect(await collect(p)).toEqual([]);
});
it('stops and cleans up when the consumer breaks out of the loop', async () => {
const p = await write('stop.jsonl', 'l1\nl2\nl3\nl4\n');
const seen: string[] = [];
for await (const line of readJsonlLines(p)) {
seen.push(line);
if (line === 'l2') break;
}
expect(seen).toEqual(['l1', 'l2']);
});
it('decodes multi-byte UTF-8 that straddles a read-chunk boundary', async () => {
// >64KB of 2-byte Cyrillic before the marker forces a multi-byte char to span the
// stream's default 64KB chunk boundary; the marker line must still arrive intact.
const big = 'я'.repeat(40_000); // ~80KB
const p = await write('mb.jsonl', `${big}\n${big}\nМАРКЕР\n`);
const lines = await collect(p);
expect(lines).toHaveLength(3);
expect(lines[0]).toBe(big);
expect(lines[2]).toBe('МАРКЕР-Ω');
});
});