From 75dfcf2d502bbbee8e3773fd4e9bb8583dd1439a Mon Sep 17 00:00:00 2001 From: matt Date: Sun, 22 Feb 2026 02:03:22 +0900 Subject: [PATCH] feat: implement SearchTextCache and SearchTextExtractor for efficient text extraction and caching - Added SearchTextCache for LRU caching of extracted search text with mtime invalidation. - Introduced SearchTextExtractor for lightweight extraction of searchable text from session messages. - Updated SessionSearcher to utilize the new extractor and cache for improved search performance. - Added tests for SearchTextCache and SearchTextExtractor to ensure functionality and correctness. --- .../services/discovery/SearchTextCache.ts | 95 ++++++++ .../services/discovery/SearchTextExtractor.ts | 159 ++++++++++++ .../services/discovery/SessionSearcher.ts | 142 +++-------- src/main/services/discovery/index.ts | 2 + .../discovery/SearchTextCache.test.ts | 119 +++++++++ .../discovery/SearchTextExtractor.test.ts | 230 ++++++++++++++++++ 6 files changed, 643 insertions(+), 104 deletions(-) create mode 100644 src/main/services/discovery/SearchTextCache.ts create mode 100644 src/main/services/discovery/SearchTextExtractor.ts create mode 100644 test/main/services/discovery/SearchTextCache.test.ts create mode 100644 test/main/services/discovery/SearchTextExtractor.test.ts diff --git a/src/main/services/discovery/SearchTextCache.ts b/src/main/services/discovery/SearchTextCache.ts new file mode 100644 index 00000000..68c61ba9 --- /dev/null +++ b/src/main/services/discovery/SearchTextCache.ts @@ -0,0 +1,95 @@ +/** + * SearchTextCache - LRU cache for extracted search text with mtime invalidation. + * + * Caches SearchTextResult per session file path. Entries are small (~1KB each, + * just text + metadata), so 200 entries is a reasonable default. + * + * Invalidation: mtime comparison on get(). If the file's mtime has changed + * since caching, the entry is considered stale and undefined is returned. + * No TTL needed — mtime check is sufficient. + */ + +import type { SearchableEntry } from './SearchTextExtractor'; + +interface CacheEntry { + entries: SearchableEntry[]; + sessionTitle: string | undefined; + mtimeMs: number; +} + +export class SearchTextCache { + private readonly cache = new Map(); + private readonly maxSize: number; + + constructor(maxSize: number = 200) { + this.maxSize = maxSize; + } + + /** + * Get cached entries for a file path if the mtime matches. + * Returns undefined if not cached or stale. + */ + get( + filePath: string, + mtimeMs: number + ): { entries: SearchableEntry[]; sessionTitle: string | undefined } | undefined { + const entry = this.cache.get(filePath); + if (!entry) return undefined; + + // Stale — file was modified since we cached it + if (entry.mtimeMs !== mtimeMs) { + this.cache.delete(filePath); + return undefined; + } + + // LRU: delete and re-insert to move to end (most recent) + this.cache.delete(filePath); + this.cache.set(filePath, entry); + + return { entries: entry.entries, sessionTitle: entry.sessionTitle }; + } + + /** + * Cache extracted entries for a file path. + */ + set( + filePath: string, + mtimeMs: number, + entries: SearchableEntry[], + sessionTitle: string | undefined + ): void { + // If already exists, delete first to update position + this.cache.delete(filePath); + + // Evict oldest if at capacity + if (this.cache.size >= this.maxSize) { + const oldest = this.cache.keys().next().value; + if (oldest !== undefined) { + this.cache.delete(oldest); + } + } + + this.cache.set(filePath, { entries, sessionTitle, mtimeMs }); + } + + /** + * Remove a specific entry from the cache. + */ + invalidate(filePath: string): void { + this.cache.delete(filePath); + } + + /** + * Clear all cached entries. + */ + clear(): void { + this.cache.clear(); + } + + /** + * Current number of cached entries. + */ + get size(): number { + return this.cache.size; + } +} diff --git a/src/main/services/discovery/SearchTextExtractor.ts b/src/main/services/discovery/SearchTextExtractor.ts new file mode 100644 index 00000000..1569eb5d --- /dev/null +++ b/src/main/services/discovery/SearchTextExtractor.ts @@ -0,0 +1,159 @@ +/** + * SearchTextExtractor - Lightweight text extraction for search. + * + * Mirrors ChunkBuilder's classification loop (classifyMessages → buffer flush) + * but only extracts searchable text + metadata, skipping all expensive operations: + * - No tool execution building + * - No semantic step extraction + * - No subagent linking + * - No timeline gap filling + * - No metrics calculation + */ + +import { classifyMessages } from '@main/services/parsing/MessageClassifier'; +import { sanitizeDisplayContent } from '@shared/utils/contentSanitizer'; + +import type { ParsedMessage } from '@main/types'; + +/** + * A lightweight entry containing only the data needed for search matching. + */ +export interface SearchableEntry { + text: string; + groupId: string; + messageType: 'user' | 'assistant'; + itemType: 'user' | 'ai'; + timestamp: number; + messageUuid: string; +} + +/** + * Result of extracting searchable text from a session's messages. + */ +export interface SearchTextResult { + entries: SearchableEntry[]; + sessionTitle: string | undefined; +} + +/** + * Extract searchable text entries from parsed messages. + * + * Algorithm mirrors ChunkBuilder.buildChunks() lines 78-151: + * - Filter to main thread (!m.isSidechain) + * - classifyMessages() — cheap type guard checks + * - Walk classified messages with an aiBuffer: + * - hardNoise → skip + * - compact / system / user → flush AI buffer, then handle + * - ai → push to buffer + * - Flush remaining buffer at end + */ +export function extractSearchableEntries(messages: ParsedMessage[]): SearchTextResult { + const entries: SearchableEntry[] = []; + let sessionTitle: string | undefined; + + // Filter to main thread messages (non-sidechain) — same as ChunkBuilder line 82 + const mainMessages = messages.filter((m) => !m.isSidechain); + const classified = classifyMessages(mainMessages); + + let aiBuffer: ParsedMessage[] = []; + + for (const { message, category } of classified) { + switch (category) { + case 'hardNoise': + // Skip — filtered out + break; + + case 'compact': + case 'system': + // Flush AI buffer, but compact/system messages have no searchable text + if (aiBuffer.length > 0) { + const aiEntry = extractAIEntry(aiBuffer); + if (aiEntry) entries.push(aiEntry); + aiBuffer = []; + } + break; + + case 'user': { + // Flush AI buffer + if (aiBuffer.length > 0) { + const aiEntry = extractAIEntry(aiBuffer); + if (aiEntry) entries.push(aiEntry); + aiBuffer = []; + } + // Extract user text + const userText = extractUserText(message); + if (userText) { + if (!sessionTitle) { + sessionTitle = userText.slice(0, 100); + } + entries.push({ + text: userText, + groupId: `user-${message.uuid}`, + messageType: 'user', + itemType: 'user', + timestamp: message.timestamp.getTime(), + messageUuid: message.uuid, + }); + } + break; + } + + case 'ai': + aiBuffer.push(message); + break; + } + } + + // Flush remaining AI buffer + if (aiBuffer.length > 0) { + const aiEntry = extractAIEntry(aiBuffer); + if (aiEntry) entries.push(aiEntry); + } + + return { entries, sessionTitle }; +} + +/** + * Extract the last text output from an AI message buffer. + * Scans backward for the last assistant message with a text content block. + */ +function extractAIEntry(buffer: ParsedMessage[]): SearchableEntry | null { + // Scan backward for last assistant message with text content + for (let i = buffer.length - 1; i >= 0; i--) { + const msg = buffer[i]; + if (msg.role !== 'assistant' || !Array.isArray(msg.content)) continue; + + // Find the last text block in this message + for (let j = msg.content.length - 1; j >= 0; j--) { + const block = msg.content[j]; + if (block.type === 'text' && block.text) { + return { + text: block.text, + groupId: `ai-${buffer[0].uuid}`, + messageType: 'assistant', + itemType: 'ai', + timestamp: msg.timestamp.getTime(), + messageUuid: msg.uuid, + }; + } + } + } + return null; +} + +/** + * Extract searchable text from a user message. + * Shared logic previously in SessionSearcher.extractUserSearchableText(). + */ +export function extractUserText(message: ParsedMessage): string { + let rawText = ''; + if (typeof message.content === 'string') { + rawText = message.content; + } else if (Array.isArray(message.content)) { + rawText = message.content + .filter((block) => block.type === 'text') + .map((block) => block.text) + .join(''); + } + return sanitizeDisplayContent(rawText); +} diff --git a/src/main/services/discovery/SessionSearcher.ts b/src/main/services/discovery/SessionSearcher.ts index 3ebe9827..a4382c98 100644 --- a/src/main/services/discovery/SessionSearcher.ts +++ b/src/main/services/discovery/SessionSearcher.ts @@ -6,21 +6,14 @@ * - Search within a single session file * - Restrict matching scope to User text + AI last text output * - Extract context around each match occurrence + * + * Uses SearchTextExtractor for lightweight text extraction (skips ChunkBuilder) + * and SearchTextCache for mtime-based caching of extracted entries. */ -import { ChunkBuilder } from '@main/services/analysis/ChunkBuilder'; import { LocalFileSystemProvider } from '@main/services/infrastructure/LocalFileSystemProvider'; -import { - isEnhancedAIChunk, - isUserChunk, - type ParsedMessage, - type SearchResult, - type SearchSessionsResult, - type SemanticStep, -} from '@main/types'; import { parseJsonlFile } from '@main/utils/jsonl'; import { extractBaseDir, extractSessionId } from '@main/utils/pathDecoder'; -import { sanitizeDisplayContent } from '@shared/utils/contentSanitizer'; import { createLogger } from '@shared/utils/logger'; import { extractMarkdownPlainText, @@ -28,36 +21,31 @@ import { } from '@shared/utils/markdownTextSearch'; import * as path from 'path'; +import { SearchTextCache } from './SearchTextCache'; +import { extractSearchableEntries } from './SearchTextExtractor'; import { subprojectRegistry } from './SubprojectRegistry'; +import type { SearchableEntry } from './SearchTextExtractor'; import type { FileSystemProvider } from '@main/services/infrastructure/FileSystemProvider'; +import type { SearchResult, SearchSessionsResult } from '@main/types'; const logger = createLogger('Discovery:SessionSearcher'); const SSH_FAST_SEARCH_STAGE_LIMITS = [40, 140, 320] as const; const SSH_FAST_SEARCH_MIN_RESULTS = 8; const SSH_FAST_SEARCH_TIME_BUDGET_MS = 4500; -interface SearchableEntry { - text: string; - groupId: string; - messageType: 'user' | 'assistant'; - itemType: 'user' | 'ai'; - timestamp: number; - messageUuid: string; -} - /** * SessionSearcher provides methods for searching sessions. */ export class SessionSearcher { private readonly projectsDir: string; - private readonly chunkBuilder: ChunkBuilder; private readonly fsProvider: FileSystemProvider; + private readonly searchCache: SearchTextCache; constructor(projectsDir: string, fsProvider?: FileSystemProvider) { this.projectsDir = projectsDir; - this.chunkBuilder = new ChunkBuilder(); this.fsProvider = fsProvider ?? new LocalFileSystemProvider(); + this.searchCache = new SearchTextCache(); } /** @@ -151,7 +139,8 @@ export class SessionSearcher { sessionId, file.filePath, normalizedQuery, - maxResults + maxResults, + file.mtimeMs ); }) ); @@ -207,11 +196,15 @@ export class SessionSearcher { /** * Searches a single session file for a query string. * + * Uses SearchTextExtractor for lightweight text extraction (no ChunkBuilder) + * and SearchTextCache for mtime-based caching. + * * @param projectId - The project ID * @param sessionId - The session ID * @param filePath - Path to the session file * @param query - Normalized search query (lowercase) * @param maxResults - Maximum number of results to return + * @param mtimeMs - File modification time for cache invalidation * @returns Array of search results */ async searchSessionFile( @@ -219,71 +212,35 @@ export class SessionSearcher { sessionId: string, filePath: string, query: string, - maxResults: number + maxResults: number, + mtimeMs: number ): Promise { const results: SearchResult[] = []; - let sessionTitle: string | undefined; - const messages = await parseJsonlFile(filePath, this.fsProvider); - const chunks = this.chunkBuilder.buildChunks(messages, []); - for (const chunk of chunks) { - if (results.length >= maxResults) { - break; - } + // Check cache first + let cached = this.searchCache.get(filePath, mtimeMs); + if (!cached) { + // Cache miss — parse and extract + const messages = await parseJsonlFile(filePath, this.fsProvider); + const extracted = extractSearchableEntries(messages); + this.searchCache.set(filePath, mtimeMs, extracted.entries, extracted.sessionTitle); + cached = extracted; + } - if (isUserChunk(chunk)) { - const userText = this.extractUserSearchableText(chunk.userMessage); - if (!sessionTitle && userText) { - sessionTitle = userText.slice(0, 100); - } - if (!userText) { - continue; - } - const searchableEntry: SearchableEntry = { - text: userText, - groupId: chunk.id, - messageType: 'user', - itemType: 'user', - timestamp: chunk.userMessage.timestamp.getTime(), - messageUuid: chunk.userMessage.uuid, - }; - this.collectMatchesForEntry( - searchableEntry, - query, - results, - maxResults, - projectId, - sessionId, - sessionTitle - ); - continue; - } + const { entries, sessionTitle } = cached; - if (isEnhancedAIChunk(chunk)) { - const lastOutputStep = this.findLastOutputTextStep(chunk.semanticSteps); - const outputText = lastOutputStep?.content.outputText; - if (!lastOutputStep || !outputText) { - continue; - } + for (const entry of entries) { + if (results.length >= maxResults) break; - const searchableEntry: SearchableEntry = { - text: outputText, - groupId: chunk.id, - messageType: 'assistant', - itemType: 'ai', - timestamp: lastOutputStep.startTime.getTime(), - messageUuid: lastOutputStep.sourceMessageId ?? chunk.responses[0]?.uuid ?? '', - }; - this.collectMatchesForEntry( - searchableEntry, - query, - results, - maxResults, - projectId, - sessionId, - sessionTitle - ); - } + this.collectMatchesForEntry( + entry, + query, + results, + maxResults, + projectId, + sessionId, + sessionTitle + ); } return results; @@ -342,29 +299,6 @@ export class SessionSearcher { } } - private extractUserSearchableText(message: ParsedMessage): string { - let rawText = ''; - if (typeof message.content === 'string') { - rawText = message.content; - } else if (Array.isArray(message.content)) { - rawText = message.content - .filter((block) => block.type === 'text') - .map((block) => block.text) - .join(''); - } - return sanitizeDisplayContent(rawText); - } - - private findLastOutputTextStep(steps: SemanticStep[]): SemanticStep | null { - for (let i = steps.length - 1; i >= 0; i--) { - const step = steps[i]; - if (step.type === 'output' && step.content.outputText) { - return step; - } - } - return null; - } - private async collectFulfilledInBatches( items: T[], batchSize: number, diff --git a/src/main/services/discovery/index.ts b/src/main/services/discovery/index.ts index 6c7b1897..815eaaff 100644 --- a/src/main/services/discovery/index.ts +++ b/src/main/services/discovery/index.ts @@ -12,6 +12,8 @@ export * from './ProjectPathResolver'; export * from './ProjectScanner'; +export * from './SearchTextCache'; +export * from './SearchTextExtractor'; export * from './SessionContentFilter'; export * from './SessionSearcher'; export * from './SubagentLocator'; diff --git a/test/main/services/discovery/SearchTextCache.test.ts b/test/main/services/discovery/SearchTextCache.test.ts new file mode 100644 index 00000000..12c510d0 --- /dev/null +++ b/test/main/services/discovery/SearchTextCache.test.ts @@ -0,0 +1,119 @@ +import { describe, expect, it } from 'vitest'; + +import { SearchTextCache } from '../../../../src/main/services/discovery/SearchTextCache'; + +import type { SearchableEntry } from '../../../../src/main/services/discovery/SearchTextExtractor'; + +function makeEntry(text: string, groupId: string): SearchableEntry { + return { + text, + groupId, + messageType: 'user', + itemType: 'user', + timestamp: Date.now(), + messageUuid: groupId, + }; +} + +describe('SearchTextCache', () => { + it('returns cached entry on mtime match', () => { + const cache = new SearchTextCache(); + const entries = [makeEntry('hello', 'user-1')]; + cache.set('/path/a.jsonl', 1000, entries, 'Title A'); + + const result = cache.get('/path/a.jsonl', 1000); + expect(result).toBeDefined(); + expect(result!.entries).toEqual(entries); + expect(result!.sessionTitle).toBe('Title A'); + }); + + it('returns undefined on mtime mismatch (stale)', () => { + const cache = new SearchTextCache(); + const entries = [makeEntry('hello', 'user-1')]; + cache.set('/path/a.jsonl', 1000, entries, 'Title A'); + + const result = cache.get('/path/a.jsonl', 2000); + expect(result).toBeUndefined(); + }); + + it('returns undefined for uncached paths', () => { + const cache = new SearchTextCache(); + const result = cache.get('/path/missing.jsonl', 1000); + expect(result).toBeUndefined(); + }); + + it('evicts oldest entry when at max capacity', () => { + const cache = new SearchTextCache(3); + + cache.set('/path/1.jsonl', 100, [makeEntry('one', 'u1')], 'One'); + cache.set('/path/2.jsonl', 200, [makeEntry('two', 'u2')], 'Two'); + cache.set('/path/3.jsonl', 300, [makeEntry('three', 'u3')], 'Three'); + + expect(cache.size).toBe(3); + + // Adding a 4th entry should evict the oldest (1.jsonl) + cache.set('/path/4.jsonl', 400, [makeEntry('four', 'u4')], 'Four'); + + expect(cache.size).toBe(3); + expect(cache.get('/path/1.jsonl', 100)).toBeUndefined(); + expect(cache.get('/path/4.jsonl', 400)).toBeDefined(); + }); + + it('LRU access moves entry to end, preserving it from eviction', () => { + const cache = new SearchTextCache(3); + + cache.set('/path/1.jsonl', 100, [makeEntry('one', 'u1')], 'One'); + cache.set('/path/2.jsonl', 200, [makeEntry('two', 'u2')], 'Two'); + cache.set('/path/3.jsonl', 300, [makeEntry('three', 'u3')], 'Three'); + + // Access entry 1, moving it to end + cache.get('/path/1.jsonl', 100); + + // Adding a 4th should now evict entry 2 (oldest after LRU access) + cache.set('/path/4.jsonl', 400, [makeEntry('four', 'u4')], 'Four'); + + expect(cache.get('/path/1.jsonl', 100)).toBeDefined(); + expect(cache.get('/path/2.jsonl', 200)).toBeUndefined(); + }); + + it('invalidate() removes a specific entry', () => { + const cache = new SearchTextCache(); + cache.set('/path/a.jsonl', 1000, [makeEntry('hello', 'u1')], 'Title'); + + cache.invalidate('/path/a.jsonl'); + expect(cache.get('/path/a.jsonl', 1000)).toBeUndefined(); + expect(cache.size).toBe(0); + }); + + it('clear() empties the cache', () => { + const cache = new SearchTextCache(); + cache.set('/path/1.jsonl', 100, [makeEntry('one', 'u1')], 'One'); + cache.set('/path/2.jsonl', 200, [makeEntry('two', 'u2')], 'Two'); + + expect(cache.size).toBe(2); + cache.clear(); + expect(cache.size).toBe(0); + }); + + it('handles undefined sessionTitle', () => { + const cache = new SearchTextCache(); + cache.set('/path/a.jsonl', 1000, [], undefined); + + const result = cache.get('/path/a.jsonl', 1000); + expect(result).toBeDefined(); + expect(result!.sessionTitle).toBeUndefined(); + expect(result!.entries).toEqual([]); + }); + + it('updates existing entry on re-set', () => { + const cache = new SearchTextCache(); + cache.set('/path/a.jsonl', 1000, [makeEntry('old', 'u1')], 'Old'); + cache.set('/path/a.jsonl', 2000, [makeEntry('new', 'u2')], 'New'); + + const result = cache.get('/path/a.jsonl', 2000); + expect(result).toBeDefined(); + expect(result!.entries[0].text).toBe('new'); + expect(result!.sessionTitle).toBe('New'); + expect(cache.size).toBe(1); + }); +}); diff --git a/test/main/services/discovery/SearchTextExtractor.test.ts b/test/main/services/discovery/SearchTextExtractor.test.ts new file mode 100644 index 00000000..1d2b122c --- /dev/null +++ b/test/main/services/discovery/SearchTextExtractor.test.ts @@ -0,0 +1,230 @@ +import { describe, expect, it } from 'vitest'; + +import { + extractSearchableEntries, + extractUserText, +} from '../../../../src/main/services/discovery/SearchTextExtractor'; + +import type { ParsedMessage } from '../../../../src/main/types'; + +function makeUserMessage( + uuid: string, + content: string, + timestamp = '2026-01-01T00:00:00.000Z' +): ParsedMessage { + return { + uuid, + type: 'user', + role: 'user', + content, + timestamp: new Date(timestamp), + isMeta: false, + isSidechain: false, + } as ParsedMessage; +} + +function makeAssistantMessage( + uuid: string, + textContent: string, + timestamp = '2026-01-01T00:00:01.000Z' +): ParsedMessage { + return { + uuid, + type: 'assistant', + role: 'assistant', + content: [{ type: 'text', text: textContent }], + timestamp: new Date(timestamp), + isMeta: false, + isSidechain: false, + } as ParsedMessage; +} + +function makeAssistantWithThinking( + uuid: string, + thinking: string, + textContent: string, + timestamp = '2026-01-01T00:00:01.000Z' +): ParsedMessage { + return { + uuid, + type: 'assistant', + role: 'assistant', + content: [ + { type: 'thinking', thinking }, + { type: 'text', text: textContent }, + ], + timestamp: new Date(timestamp), + isMeta: false, + isSidechain: false, + } as ParsedMessage; +} + +function makeToolResultMessage( + uuid: string, + timestamp = '2026-01-01T00:00:01.500Z' +): ParsedMessage { + return { + uuid, + type: 'user', + role: 'user', + content: [{ type: 'tool_result', tool_use_id: 'tool-1', content: 'result text' }], + timestamp: new Date(timestamp), + isMeta: true, + isSidechain: false, + } as ParsedMessage; +} + +describe('SearchTextExtractor', () => { + describe('extractSearchableEntries', () => { + it('produces user-{uuid} groupIds for user messages', () => { + const messages = [makeUserMessage('u1', 'hello world')]; + const result = extractSearchableEntries(messages); + + expect(result.entries).toHaveLength(1); + expect(result.entries[0].groupId).toBe('user-u1'); + expect(result.entries[0].itemType).toBe('user'); + expect(result.entries[0].messageType).toBe('user'); + expect(result.entries[0].text).toBe('hello world'); + }); + + it('produces ai-{uuid} groupIds for AI groups (using first buffer message uuid)', () => { + const messages = [ + makeUserMessage('u1', 'question'), + makeToolResultMessage('tr1', '2026-01-01T00:00:01.000Z'), + makeAssistantMessage('a1', 'thinking...', '2026-01-01T00:00:02.000Z'), + makeAssistantMessage('a2', 'final answer', '2026-01-01T00:00:03.000Z'), + ]; + const result = extractSearchableEntries(messages); + + const aiEntries = result.entries.filter((e) => e.itemType === 'ai'); + expect(aiEntries).toHaveLength(1); + // groupId uses the first message in the AI buffer + expect(aiEntries[0].groupId).toMatch(/^ai-/); + // Text is from the last assistant message with text + expect(aiEntries[0].text).toBe('final answer'); + }); + + it('extracts last AI text output correctly (backward scan)', () => { + const messages = [ + makeUserMessage('u1', 'question'), + makeAssistantMessage('a1', 'older output', '2026-01-01T00:00:01.000Z'), + makeAssistantMessage('a2', 'latest output', '2026-01-01T00:00:02.000Z'), + ]; + const result = extractSearchableEntries(messages); + + const aiEntries = result.entries.filter((e) => e.itemType === 'ai'); + expect(aiEntries).toHaveLength(1); + expect(aiEntries[0].text).toBe('latest output'); + }); + + it('handles assistant messages with thinking + text blocks', () => { + const messages = [ + makeUserMessage('u1', 'question'), + makeAssistantWithThinking('a1', 'internal reasoning', 'visible answer'), + ]; + const result = extractSearchableEntries(messages); + + const aiEntries = result.entries.filter((e) => e.itemType === 'ai'); + expect(aiEntries).toHaveLength(1); + expect(aiEntries[0].text).toBe('visible answer'); + }); + + it('skips sidechain messages', () => { + const sidechain: ParsedMessage = { + ...makeUserMessage('u-side', 'sidechain text'), + isSidechain: true, + } as ParsedMessage; + const messages = [sidechain, makeUserMessage('u1', 'main thread')]; + const result = extractSearchableEntries(messages); + + expect(result.entries).toHaveLength(1); + expect(result.entries[0].text).toBe('main thread'); + }); + + it('extracts sessionTitle from first user message (truncated to 100 chars)', () => { + const longText = 'a'.repeat(200); + const messages = [ + makeUserMessage('u1', longText), + makeUserMessage('u2', 'second message'), + ]; + const result = extractSearchableEntries(messages); + + expect(result.sessionTitle).toBe('a'.repeat(100)); + }); + + it('handles empty messages array', () => { + const result = extractSearchableEntries([]); + expect(result.entries).toHaveLength(0); + expect(result.sessionTitle).toBeUndefined(); + }); + + it('handles messages with no user messages', () => { + const messages = [ + makeAssistantMessage('a1', 'just AI talking'), + ]; + const result = extractSearchableEntries(messages); + + expect(result.sessionTitle).toBeUndefined(); + const aiEntries = result.entries.filter((e) => e.itemType === 'ai'); + expect(aiEntries).toHaveLength(1); + }); + + it('handles AI buffer with no text content', () => { + const noTextAssistant: ParsedMessage = { + uuid: 'a1', + type: 'assistant', + role: 'assistant', + content: [{ type: 'thinking', thinking: 'just thinking' }], + timestamp: new Date('2026-01-01T00:00:01.000Z'), + isMeta: false, + isSidechain: false, + } as ParsedMessage; + const messages = [makeUserMessage('u1', 'question'), noTextAssistant]; + const result = extractSearchableEntries(messages); + + const aiEntries = result.entries.filter((e) => e.itemType === 'ai'); + expect(aiEntries).toHaveLength(0); + }); + + it('flushes AI buffer on user messages', () => { + const messages = [ + makeUserMessage('u1', 'first question'), + makeAssistantMessage('a1', 'first answer', '2026-01-01T00:00:01.000Z'), + makeUserMessage('u2', 'second question', '2026-01-01T00:00:02.000Z'), + makeAssistantMessage('a2', 'second answer', '2026-01-01T00:00:03.000Z'), + ]; + const result = extractSearchableEntries(messages); + + expect(result.entries).toHaveLength(4); + const userEntries = result.entries.filter((e) => e.itemType === 'user'); + const aiEntries = result.entries.filter((e) => e.itemType === 'ai'); + expect(userEntries).toHaveLength(2); + expect(aiEntries).toHaveLength(2); + expect(aiEntries[0].text).toBe('first answer'); + expect(aiEntries[1].text).toBe('second answer'); + }); + }); + + describe('extractUserText', () => { + it('extracts string content', () => { + const msg = makeUserMessage('u1', 'hello world'); + expect(extractUserText(msg)).toBe('hello world'); + }); + + it('extracts array content with text blocks', () => { + const msg: ParsedMessage = { + uuid: 'u1', + type: 'user', + role: 'user', + content: [ + { type: 'text', text: 'part one' }, + { type: 'text', text: ' part two' }, + ], + timestamp: new Date(), + isMeta: false, + isSidechain: false, + } as ParsedMessage; + expect(extractUserText(msg)).toBe('part one part two'); + }); + }); +});