From b0b2fa2d1395d50c869ce5a0f1f3fbc46c704bd1 Mon Sep 17 00:00:00 2001 From: 777genius Date: Mon, 25 May 2026 22:30:56 +0300 Subject: [PATCH] fix(jsonl): count baseline entries without materializing messages --- src/main/utils/jsonl.ts | 35 +++++++++++++++++---- test/main/utils/jsonl.test.ts | 58 ++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 7 deletions(-) diff --git a/src/main/utils/jsonl.ts b/src/main/utils/jsonl.ts index 16852d31..3775ea46 100644 --- a/src/main/utils/jsonl.ts +++ b/src/main/utils/jsonl.ts @@ -150,11 +150,13 @@ export async function parseJsonlStream( } try { - const parsed = parseJsonlLine(normalized); - if (parsed) { - if (collectMessages) { + if (collectMessages) { + const parsed = parseJsonlLine(normalized); + if (parsed) { messages.push(parsed); + parsedLineCount += 1; } + } else if (isCountableJsonlEntryLine(normalized)) { parsedLineCount += 1; } } catch { @@ -193,11 +195,14 @@ export async function parseJsonlStream( const trailingLine = pending.toString('utf8'); const normalized = normalizeJsonlLine(trailingLine); if (looksLikeJsonObjectLine(normalized)) { - const parsed = parseJsonlLine(normalized); - if (parsed) { - if (collectMessages) { + if (collectMessages) { + const parsed = parseJsonlLine(normalized); + if (parsed) { messages.push(parsed); + parsedLineCount += 1; + consumedBytes += pending.length; } + } else if (isCountableJsonlEntryLine(normalized)) { parsedLineCount += 1; consumedBytes += pending.length; } @@ -255,6 +260,24 @@ function looksLikeJsonObjectLine(line: string): boolean { return line.startsWith('{'); } +function isCountableJsonlEntryLine(line: string): boolean { + const entry = JSON.parse(line) as Partial & { + uuid?: unknown; + type?: unknown; + message?: unknown; + }; + + if (typeof entry.uuid !== 'string' || !parseMessageType(String(entry.type))) { + return false; + } + + if (entry.type === 'user' || entry.type === 'assistant') { + return entry.message != null && typeof entry.message === 'object'; + } + + return true; +} + // ============================================================================= // Entry Parsing // ============================================================================= diff --git a/test/main/utils/jsonl.test.ts b/test/main/utils/jsonl.test.ts index 00ea479b..952321a1 100644 --- a/test/main/utils/jsonl.test.ts +++ b/test/main/utils/jsonl.test.ts @@ -3,13 +3,15 @@ import * as os from 'os'; import * as path from 'path'; import { describe, expect, it } from 'vitest'; +import type { ParsedMessage } from '../../../src/main/types'; import { analyzeSessionFileMetadata, calculateMetrics, + countJsonlFileWithStats, parseJsonlFile, + parseJsonlFileWithStats, parseJsonlLine, } from '../../../src/main/utils/jsonl'; -import type { ParsedMessage } from '../../../src/main/types'; // Helper to create a minimal ParsedMessage function createMessage(overrides: Partial = {}): ParsedMessage { @@ -190,6 +192,60 @@ describe('jsonl', () => { }); describe('tolerant parsing', () => { + it('counts parseable entries without retaining messages', async () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'jsonl-count-')); + try { + const filePath = path.join(tempDir, 'session.jsonl'); + const validAssistant = JSON.stringify({ + type: 'assistant', + uuid: 'a1', + timestamp: '2026-01-01T00:00:01.000Z', + message: { + role: 'assistant', + content: [{ type: 'text', text: 'hello' }], + }, + }); + const validSystem = JSON.stringify({ + type: 'system', + uuid: 's1', + timestamp: '2026-01-01T00:00:02.000Z', + content: 'system line', + }); + const invalidMissingMessage = JSON.stringify({ + type: 'assistant', + uuid: 'bad-assistant', + }); + const unknownType = JSON.stringify({ + type: 'unknown', + uuid: 'unknown-1', + }); + const partialJson = + '{"type":"assistant","uuid":"a2","timestamp":"2026-01-01T00:00:03.000Z","message":{"role":"assistant","content":[{"type":"text","text":"partial"'; + + fs.writeFileSync( + filePath, + [ + validAssistant, + validSystem, + invalidMissingMessage, + unknownType, + 'not json', + partialJson, + ].join('\n'), + 'utf8' + ); + + const parsed = await parseJsonlFileWithStats(filePath); + const counted = await countJsonlFileWithStats(filePath); + + expect(parsed.messages.map((message) => message.uuid)).toEqual(['a1', 's1']); + expect(counted.parsedLineCount).toBe(parsed.parsedLineCount); + expect(counted.consumedBytes).toBe(parsed.consumedBytes); + } finally { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); + it('skips non-JSON garbage and ignores a partial trailing object', async () => { const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'jsonl-tolerant-')); try {