fix(jsonl): count baseline entries without materializing messages

This commit is contained in:
777genius 2026-05-25 22:30:56 +03:00
parent e64fff8af0
commit b0b2fa2d13
2 changed files with 86 additions and 7 deletions

View file

@ -150,11 +150,13 @@ export async function parseJsonlStream(
}
try {
const parsed = parseJsonlLine(normalized);
if (parsed) {
if (collectMessages) {
if (collectMessages) {
const parsed = parseJsonlLine(normalized);
if (parsed) {
messages.push(parsed);
parsedLineCount += 1;
}
} else if (isCountableJsonlEntryLine(normalized)) {
parsedLineCount += 1;
}
} catch {
@ -193,11 +195,14 @@ export async function parseJsonlStream(
const trailingLine = pending.toString('utf8');
const normalized = normalizeJsonlLine(trailingLine);
if (looksLikeJsonObjectLine(normalized)) {
const parsed = parseJsonlLine(normalized);
if (parsed) {
if (collectMessages) {
if (collectMessages) {
const parsed = parseJsonlLine(normalized);
if (parsed) {
messages.push(parsed);
parsedLineCount += 1;
consumedBytes += pending.length;
}
} else if (isCountableJsonlEntryLine(normalized)) {
parsedLineCount += 1;
consumedBytes += pending.length;
}
@ -255,6 +260,24 @@ function looksLikeJsonObjectLine(line: string): boolean {
return line.startsWith('{');
}
function isCountableJsonlEntryLine(line: string): boolean {
const entry = JSON.parse(line) as Partial<ChatHistoryEntry> & {
uuid?: unknown;
type?: unknown;
message?: unknown;
};
if (typeof entry.uuid !== 'string' || !parseMessageType(String(entry.type))) {
return false;
}
if (entry.type === 'user' || entry.type === 'assistant') {
return entry.message != null && typeof entry.message === 'object';
}
return true;
}
// =============================================================================
// Entry Parsing
// =============================================================================

View file

@ -3,13 +3,15 @@ import * as os from 'os';
import * as path from 'path';
import { describe, expect, it } from 'vitest';
import type { ParsedMessage } from '../../../src/main/types';
import {
analyzeSessionFileMetadata,
calculateMetrics,
countJsonlFileWithStats,
parseJsonlFile,
parseJsonlFileWithStats,
parseJsonlLine,
} from '../../../src/main/utils/jsonl';
import type { ParsedMessage } from '../../../src/main/types';
// Helper to create a minimal ParsedMessage
function createMessage(overrides: Partial<ParsedMessage> = {}): ParsedMessage {
@ -190,6 +192,60 @@ describe('jsonl', () => {
});
describe('tolerant parsing', () => {
it('counts parseable entries without retaining messages', async () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'jsonl-count-'));
try {
const filePath = path.join(tempDir, 'session.jsonl');
const validAssistant = JSON.stringify({
type: 'assistant',
uuid: 'a1',
timestamp: '2026-01-01T00:00:01.000Z',
message: {
role: 'assistant',
content: [{ type: 'text', text: 'hello' }],
},
});
const validSystem = JSON.stringify({
type: 'system',
uuid: 's1',
timestamp: '2026-01-01T00:00:02.000Z',
content: 'system line',
});
const invalidMissingMessage = JSON.stringify({
type: 'assistant',
uuid: 'bad-assistant',
});
const unknownType = JSON.stringify({
type: 'unknown',
uuid: 'unknown-1',
});
const partialJson =
'{"type":"assistant","uuid":"a2","timestamp":"2026-01-01T00:00:03.000Z","message":{"role":"assistant","content":[{"type":"text","text":"partial"';
fs.writeFileSync(
filePath,
[
validAssistant,
validSystem,
invalidMissingMessage,
unknownType,
'not json',
partialJson,
].join('\n'),
'utf8'
);
const parsed = await parseJsonlFileWithStats(filePath);
const counted = await countJsonlFileWithStats(filePath);
expect(parsed.messages.map((message) => message.uuid)).toEqual(['a1', 's1']);
expect(counted.parsedLineCount).toBe(parsed.parsedLineCount);
expect(counted.consumedBytes).toBe(parsed.consumedBytes);
} finally {
fs.rmSync(tempDir, { recursive: true, force: true });
}
});
it('skips non-JSON garbage and ignores a partial trailing object', async () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'jsonl-tolerant-'));
try {