fix(jsonl): align count-only baseline parsing

This commit is contained in:
777genius 2026-05-25 22:58:07 +03:00
parent 7518b5af1d
commit 43afc9f907
2 changed files with 70 additions and 4 deletions

View file

@ -267,17 +267,38 @@ function isCountableJsonlEntryLine(line: string): boolean {
message?: unknown;
};
if (typeof entry.uuid !== 'string' || !parseMessageType(String(entry.type))) {
const type = typeof entry.type === 'string' ? parseMessageType(entry.type) : null;
if (typeof entry.uuid !== 'string' || entry.uuid.length === 0 || !type) {
return false;
}
if (entry.type === 'user' || entry.type === 'assistant') {
return entry.message != null && typeof entry.message === 'object';
if (type === 'user') {
if (entry.message == null) {
return false;
}
const content = (entry.message as { content?: unknown }).content;
return content == null || isParserSafeContent(content);
}
if (type === 'assistant') {
if (!isJsonObjectRecord(entry.message)) {
return false;
}
const content = entry.message.content;
return isParserSafeContent(content);
}
return true;
}
function isJsonObjectRecord(value: unknown): value is Record<string, unknown> {
return value != null && typeof value === 'object' && !Array.isArray(value);
}
function isParserSafeContent(value: unknown): boolean {
return typeof value === 'string' || (Array.isArray(value) && value.every((item) => item != null));
}
// =============================================================================
// Entry Parsing
// =============================================================================

View file

@ -212,10 +212,49 @@ describe('jsonl', () => {
timestamp: '2026-01-01T00:00:02.000Z',
content: 'system line',
});
const validUserWithoutContent = JSON.stringify({
type: 'user',
uuid: 'u1',
timestamp: '2026-01-01T00:00:03.000Z',
message: {
role: 'user',
},
});
const validUserArrayMessage = JSON.stringify({
type: 'user',
uuid: 'u2',
timestamp: '2026-01-01T00:00:04.000Z',
message: [],
});
const invalidMissingMessage = JSON.stringify({
type: 'assistant',
uuid: 'bad-assistant',
});
const invalidEmptyUuid = JSON.stringify({
type: 'system',
uuid: '',
content: 'empty uuid',
});
const invalidAssistantMissingContent = JSON.stringify({
type: 'assistant',
uuid: 'bad-assistant-content',
message: {
role: 'assistant',
},
});
const invalidAssistantArrayMessage = JSON.stringify({
type: 'assistant',
uuid: 'bad-assistant-array',
message: [],
});
const invalidAssistantNullContentBlock = JSON.stringify({
type: 'assistant',
uuid: 'bad-assistant-null-block',
message: {
role: 'assistant',
content: [null],
},
});
const unknownType = JSON.stringify({
type: 'unknown',
uuid: 'unknown-1',
@ -228,7 +267,13 @@ describe('jsonl', () => {
[
validAssistant,
validSystem,
validUserWithoutContent,
validUserArrayMessage,
invalidMissingMessage,
invalidEmptyUuid,
invalidAssistantMissingContent,
invalidAssistantArrayMessage,
invalidAssistantNullContentBlock,
unknownType,
'not json',
partialJson,
@ -239,7 +284,7 @@ describe('jsonl', () => {
const parsed = await parseJsonlFileWithStats(filePath);
const counted = await countJsonlFileWithStats(filePath);
expect(parsed.messages.map((message) => message.uuid)).toEqual(['a1', 's1']);
expect(parsed.messages.map((message) => message.uuid)).toEqual(['a1', 's1', 'u1', 'u2']);
expect(counted.parsedLineCount).toBe(parsed.parsedLineCount);
expect(counted.consumedBytes).toBe(parsed.consumedBytes);
} finally {