feat: implement SearchTextCache and SearchTextExtractor for efficient text extraction and caching
- Added SearchTextCache for LRU caching of extracted search text with mtime invalidation. - Introduced SearchTextExtractor for lightweight extraction of searchable text from session messages. - Updated SessionSearcher to utilize the new extractor and cache for improved search performance. - Added tests for SearchTextCache and SearchTextExtractor to ensure functionality and correctness.
This commit is contained in:
parent
05b7888c17
commit
75dfcf2d50
6 changed files with 643 additions and 104 deletions
95
src/main/services/discovery/SearchTextCache.ts
Normal file
95
src/main/services/discovery/SearchTextCache.ts
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
/**
|
||||
* SearchTextCache - LRU cache for extracted search text with mtime invalidation.
|
||||
*
|
||||
* Caches SearchTextResult per session file path. Entries are small (~1KB each,
|
||||
* just text + metadata), so 200 entries is a reasonable default.
|
||||
*
|
||||
* Invalidation: mtime comparison on get(). If the file's mtime has changed
|
||||
* since caching, the entry is considered stale and undefined is returned.
|
||||
* No TTL needed — mtime check is sufficient.
|
||||
*/
|
||||
|
||||
import type { SearchableEntry } from './SearchTextExtractor';
|
||||
|
||||
interface CacheEntry {
|
||||
entries: SearchableEntry[];
|
||||
sessionTitle: string | undefined;
|
||||
mtimeMs: number;
|
||||
}
|
||||
|
||||
export class SearchTextCache {
|
||||
private readonly cache = new Map<string, CacheEntry>();
|
||||
private readonly maxSize: number;
|
||||
|
||||
constructor(maxSize: number = 200) {
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cached entries for a file path if the mtime matches.
|
||||
* Returns undefined if not cached or stale.
|
||||
*/
|
||||
get(
|
||||
filePath: string,
|
||||
mtimeMs: number
|
||||
): { entries: SearchableEntry[]; sessionTitle: string | undefined } | undefined {
|
||||
const entry = this.cache.get(filePath);
|
||||
if (!entry) return undefined;
|
||||
|
||||
// Stale — file was modified since we cached it
|
||||
if (entry.mtimeMs !== mtimeMs) {
|
||||
this.cache.delete(filePath);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// LRU: delete and re-insert to move to end (most recent)
|
||||
this.cache.delete(filePath);
|
||||
this.cache.set(filePath, entry);
|
||||
|
||||
return { entries: entry.entries, sessionTitle: entry.sessionTitle };
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache extracted entries for a file path.
|
||||
*/
|
||||
set(
|
||||
filePath: string,
|
||||
mtimeMs: number,
|
||||
entries: SearchableEntry[],
|
||||
sessionTitle: string | undefined
|
||||
): void {
|
||||
// If already exists, delete first to update position
|
||||
this.cache.delete(filePath);
|
||||
|
||||
// Evict oldest if at capacity
|
||||
if (this.cache.size >= this.maxSize) {
|
||||
const oldest = this.cache.keys().next().value;
|
||||
if (oldest !== undefined) {
|
||||
this.cache.delete(oldest);
|
||||
}
|
||||
}
|
||||
|
||||
this.cache.set(filePath, { entries, sessionTitle, mtimeMs });
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a specific entry from the cache.
|
||||
*/
|
||||
invalidate(filePath: string): void {
|
||||
this.cache.delete(filePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cached entries.
|
||||
*/
|
||||
clear(): void {
|
||||
this.cache.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Current number of cached entries.
|
||||
*/
|
||||
get size(): number {
|
||||
return this.cache.size;
|
||||
}
|
||||
}
|
||||
159
src/main/services/discovery/SearchTextExtractor.ts
Normal file
159
src/main/services/discovery/SearchTextExtractor.ts
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
/**
|
||||
* SearchTextExtractor - Lightweight text extraction for search.
|
||||
*
|
||||
* Mirrors ChunkBuilder's classification loop (classifyMessages → buffer flush)
|
||||
* but only extracts searchable text + metadata, skipping all expensive operations:
|
||||
* - No tool execution building
|
||||
* - No semantic step extraction
|
||||
* - No subagent linking
|
||||
* - No timeline gap filling
|
||||
* - No metrics calculation
|
||||
*/
|
||||
|
||||
import { classifyMessages } from '@main/services/parsing/MessageClassifier';
|
||||
import { sanitizeDisplayContent } from '@shared/utils/contentSanitizer';
|
||||
|
||||
import type { ParsedMessage } from '@main/types';
|
||||
|
||||
/**
|
||||
* A lightweight entry containing only the data needed for search matching.
|
||||
*/
|
||||
export interface SearchableEntry {
|
||||
text: string;
|
||||
groupId: string;
|
||||
messageType: 'user' | 'assistant';
|
||||
itemType: 'user' | 'ai';
|
||||
timestamp: number;
|
||||
messageUuid: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of extracting searchable text from a session's messages.
|
||||
*/
|
||||
export interface SearchTextResult {
|
||||
entries: SearchableEntry[];
|
||||
sessionTitle: string | undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract searchable text entries from parsed messages.
|
||||
*
|
||||
* Algorithm mirrors ChunkBuilder.buildChunks() lines 78-151:
|
||||
* - Filter to main thread (!m.isSidechain)
|
||||
* - classifyMessages() — cheap type guard checks
|
||||
* - Walk classified messages with an aiBuffer:
|
||||
* - hardNoise → skip
|
||||
* - compact / system / user → flush AI buffer, then handle
|
||||
* - ai → push to buffer
|
||||
* - Flush remaining buffer at end
|
||||
*/
|
||||
export function extractSearchableEntries(messages: ParsedMessage[]): SearchTextResult {
|
||||
const entries: SearchableEntry[] = [];
|
||||
let sessionTitle: string | undefined;
|
||||
|
||||
// Filter to main thread messages (non-sidechain) — same as ChunkBuilder line 82
|
||||
const mainMessages = messages.filter((m) => !m.isSidechain);
|
||||
const classified = classifyMessages(mainMessages);
|
||||
|
||||
let aiBuffer: ParsedMessage[] = [];
|
||||
|
||||
for (const { message, category } of classified) {
|
||||
switch (category) {
|
||||
case 'hardNoise':
|
||||
// Skip — filtered out
|
||||
break;
|
||||
|
||||
case 'compact':
|
||||
case 'system':
|
||||
// Flush AI buffer, but compact/system messages have no searchable text
|
||||
if (aiBuffer.length > 0) {
|
||||
const aiEntry = extractAIEntry(aiBuffer);
|
||||
if (aiEntry) entries.push(aiEntry);
|
||||
aiBuffer = [];
|
||||
}
|
||||
break;
|
||||
|
||||
case 'user': {
|
||||
// Flush AI buffer
|
||||
if (aiBuffer.length > 0) {
|
||||
const aiEntry = extractAIEntry(aiBuffer);
|
||||
if (aiEntry) entries.push(aiEntry);
|
||||
aiBuffer = [];
|
||||
}
|
||||
// Extract user text
|
||||
const userText = extractUserText(message);
|
||||
if (userText) {
|
||||
if (!sessionTitle) {
|
||||
sessionTitle = userText.slice(0, 100);
|
||||
}
|
||||
entries.push({
|
||||
text: userText,
|
||||
groupId: `user-${message.uuid}`,
|
||||
messageType: 'user',
|
||||
itemType: 'user',
|
||||
timestamp: message.timestamp.getTime(),
|
||||
messageUuid: message.uuid,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'ai':
|
||||
aiBuffer.push(message);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Flush remaining AI buffer
|
||||
if (aiBuffer.length > 0) {
|
||||
const aiEntry = extractAIEntry(aiBuffer);
|
||||
if (aiEntry) entries.push(aiEntry);
|
||||
}
|
||||
|
||||
return { entries, sessionTitle };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the last text output from an AI message buffer.
|
||||
* Scans backward for the last assistant message with a text content block.
|
||||
*/
|
||||
function extractAIEntry(buffer: ParsedMessage[]): SearchableEntry | null {
|
||||
// Scan backward for last assistant message with text content
|
||||
for (let i = buffer.length - 1; i >= 0; i--) {
|
||||
const msg = buffer[i];
|
||||
if (msg.role !== 'assistant' || !Array.isArray(msg.content)) continue;
|
||||
|
||||
// Find the last text block in this message
|
||||
for (let j = msg.content.length - 1; j >= 0; j--) {
|
||||
const block = msg.content[j];
|
||||
if (block.type === 'text' && block.text) {
|
||||
return {
|
||||
text: block.text,
|
||||
groupId: `ai-${buffer[0].uuid}`,
|
||||
messageType: 'assistant',
|
||||
itemType: 'ai',
|
||||
timestamp: msg.timestamp.getTime(),
|
||||
messageUuid: msg.uuid,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract searchable text from a user message.
|
||||
* Shared logic previously in SessionSearcher.extractUserSearchableText().
|
||||
*/
|
||||
export function extractUserText(message: ParsedMessage): string {
|
||||
let rawText = '';
|
||||
if (typeof message.content === 'string') {
|
||||
rawText = message.content;
|
||||
} else if (Array.isArray(message.content)) {
|
||||
rawText = message.content
|
||||
.filter((block) => block.type === 'text')
|
||||
.map((block) => block.text)
|
||||
.join('');
|
||||
}
|
||||
return sanitizeDisplayContent(rawText);
|
||||
}
|
||||
|
|
@ -6,21 +6,14 @@
|
|||
* - Search within a single session file
|
||||
* - Restrict matching scope to User text + AI last text output
|
||||
* - Extract context around each match occurrence
|
||||
*
|
||||
* Uses SearchTextExtractor for lightweight text extraction (skips ChunkBuilder)
|
||||
* and SearchTextCache for mtime-based caching of extracted entries.
|
||||
*/
|
||||
|
||||
import { ChunkBuilder } from '@main/services/analysis/ChunkBuilder';
|
||||
import { LocalFileSystemProvider } from '@main/services/infrastructure/LocalFileSystemProvider';
|
||||
import {
|
||||
isEnhancedAIChunk,
|
||||
isUserChunk,
|
||||
type ParsedMessage,
|
||||
type SearchResult,
|
||||
type SearchSessionsResult,
|
||||
type SemanticStep,
|
||||
} from '@main/types';
|
||||
import { parseJsonlFile } from '@main/utils/jsonl';
|
||||
import { extractBaseDir, extractSessionId } from '@main/utils/pathDecoder';
|
||||
import { sanitizeDisplayContent } from '@shared/utils/contentSanitizer';
|
||||
import { createLogger } from '@shared/utils/logger';
|
||||
import {
|
||||
extractMarkdownPlainText,
|
||||
|
|
@ -28,36 +21,31 @@ import {
|
|||
} from '@shared/utils/markdownTextSearch';
|
||||
import * as path from 'path';
|
||||
|
||||
import { SearchTextCache } from './SearchTextCache';
|
||||
import { extractSearchableEntries } from './SearchTextExtractor';
|
||||
import { subprojectRegistry } from './SubprojectRegistry';
|
||||
|
||||
import type { SearchableEntry } from './SearchTextExtractor';
|
||||
import type { FileSystemProvider } from '@main/services/infrastructure/FileSystemProvider';
|
||||
import type { SearchResult, SearchSessionsResult } from '@main/types';
|
||||
|
||||
const logger = createLogger('Discovery:SessionSearcher');
|
||||
const SSH_FAST_SEARCH_STAGE_LIMITS = [40, 140, 320] as const;
|
||||
const SSH_FAST_SEARCH_MIN_RESULTS = 8;
|
||||
const SSH_FAST_SEARCH_TIME_BUDGET_MS = 4500;
|
||||
|
||||
interface SearchableEntry {
|
||||
text: string;
|
||||
groupId: string;
|
||||
messageType: 'user' | 'assistant';
|
||||
itemType: 'user' | 'ai';
|
||||
timestamp: number;
|
||||
messageUuid: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* SessionSearcher provides methods for searching sessions.
|
||||
*/
|
||||
export class SessionSearcher {
|
||||
private readonly projectsDir: string;
|
||||
private readonly chunkBuilder: ChunkBuilder;
|
||||
private readonly fsProvider: FileSystemProvider;
|
||||
private readonly searchCache: SearchTextCache;
|
||||
|
||||
constructor(projectsDir: string, fsProvider?: FileSystemProvider) {
|
||||
this.projectsDir = projectsDir;
|
||||
this.chunkBuilder = new ChunkBuilder();
|
||||
this.fsProvider = fsProvider ?? new LocalFileSystemProvider();
|
||||
this.searchCache = new SearchTextCache();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -151,7 +139,8 @@ export class SessionSearcher {
|
|||
sessionId,
|
||||
file.filePath,
|
||||
normalizedQuery,
|
||||
maxResults
|
||||
maxResults,
|
||||
file.mtimeMs
|
||||
);
|
||||
})
|
||||
);
|
||||
|
|
@ -207,11 +196,15 @@ export class SessionSearcher {
|
|||
/**
|
||||
* Searches a single session file for a query string.
|
||||
*
|
||||
* Uses SearchTextExtractor for lightweight text extraction (no ChunkBuilder)
|
||||
* and SearchTextCache for mtime-based caching.
|
||||
*
|
||||
* @param projectId - The project ID
|
||||
* @param sessionId - The session ID
|
||||
* @param filePath - Path to the session file
|
||||
* @param query - Normalized search query (lowercase)
|
||||
* @param maxResults - Maximum number of results to return
|
||||
* @param mtimeMs - File modification time for cache invalidation
|
||||
* @returns Array of search results
|
||||
*/
|
||||
async searchSessionFile(
|
||||
|
|
@ -219,71 +212,35 @@ export class SessionSearcher {
|
|||
sessionId: string,
|
||||
filePath: string,
|
||||
query: string,
|
||||
maxResults: number
|
||||
maxResults: number,
|
||||
mtimeMs: number
|
||||
): Promise<SearchResult[]> {
|
||||
const results: SearchResult[] = [];
|
||||
let sessionTitle: string | undefined;
|
||||
const messages = await parseJsonlFile(filePath, this.fsProvider);
|
||||
const chunks = this.chunkBuilder.buildChunks(messages, []);
|
||||
|
||||
for (const chunk of chunks) {
|
||||
if (results.length >= maxResults) {
|
||||
break;
|
||||
}
|
||||
// Check cache first
|
||||
let cached = this.searchCache.get(filePath, mtimeMs);
|
||||
if (!cached) {
|
||||
// Cache miss — parse and extract
|
||||
const messages = await parseJsonlFile(filePath, this.fsProvider);
|
||||
const extracted = extractSearchableEntries(messages);
|
||||
this.searchCache.set(filePath, mtimeMs, extracted.entries, extracted.sessionTitle);
|
||||
cached = extracted;
|
||||
}
|
||||
|
||||
if (isUserChunk(chunk)) {
|
||||
const userText = this.extractUserSearchableText(chunk.userMessage);
|
||||
if (!sessionTitle && userText) {
|
||||
sessionTitle = userText.slice(0, 100);
|
||||
}
|
||||
if (!userText) {
|
||||
continue;
|
||||
}
|
||||
const searchableEntry: SearchableEntry = {
|
||||
text: userText,
|
||||
groupId: chunk.id,
|
||||
messageType: 'user',
|
||||
itemType: 'user',
|
||||
timestamp: chunk.userMessage.timestamp.getTime(),
|
||||
messageUuid: chunk.userMessage.uuid,
|
||||
};
|
||||
this.collectMatchesForEntry(
|
||||
searchableEntry,
|
||||
query,
|
||||
results,
|
||||
maxResults,
|
||||
projectId,
|
||||
sessionId,
|
||||
sessionTitle
|
||||
);
|
||||
continue;
|
||||
}
|
||||
const { entries, sessionTitle } = cached;
|
||||
|
||||
if (isEnhancedAIChunk(chunk)) {
|
||||
const lastOutputStep = this.findLastOutputTextStep(chunk.semanticSteps);
|
||||
const outputText = lastOutputStep?.content.outputText;
|
||||
if (!lastOutputStep || !outputText) {
|
||||
continue;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
if (results.length >= maxResults) break;
|
||||
|
||||
const searchableEntry: SearchableEntry = {
|
||||
text: outputText,
|
||||
groupId: chunk.id,
|
||||
messageType: 'assistant',
|
||||
itemType: 'ai',
|
||||
timestamp: lastOutputStep.startTime.getTime(),
|
||||
messageUuid: lastOutputStep.sourceMessageId ?? chunk.responses[0]?.uuid ?? '',
|
||||
};
|
||||
this.collectMatchesForEntry(
|
||||
searchableEntry,
|
||||
query,
|
||||
results,
|
||||
maxResults,
|
||||
projectId,
|
||||
sessionId,
|
||||
sessionTitle
|
||||
);
|
||||
}
|
||||
this.collectMatchesForEntry(
|
||||
entry,
|
||||
query,
|
||||
results,
|
||||
maxResults,
|
||||
projectId,
|
||||
sessionId,
|
||||
sessionTitle
|
||||
);
|
||||
}
|
||||
|
||||
return results;
|
||||
|
|
@ -342,29 +299,6 @@ export class SessionSearcher {
|
|||
}
|
||||
}
|
||||
|
||||
private extractUserSearchableText(message: ParsedMessage): string {
|
||||
let rawText = '';
|
||||
if (typeof message.content === 'string') {
|
||||
rawText = message.content;
|
||||
} else if (Array.isArray(message.content)) {
|
||||
rawText = message.content
|
||||
.filter((block) => block.type === 'text')
|
||||
.map((block) => block.text)
|
||||
.join('');
|
||||
}
|
||||
return sanitizeDisplayContent(rawText);
|
||||
}
|
||||
|
||||
private findLastOutputTextStep(steps: SemanticStep[]): SemanticStep | null {
|
||||
for (let i = steps.length - 1; i >= 0; i--) {
|
||||
const step = steps[i];
|
||||
if (step.type === 'output' && step.content.outputText) {
|
||||
return step;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private async collectFulfilledInBatches<T, R>(
|
||||
items: T[],
|
||||
batchSize: number,
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@
|
|||
|
||||
export * from './ProjectPathResolver';
|
||||
export * from './ProjectScanner';
|
||||
export * from './SearchTextCache';
|
||||
export * from './SearchTextExtractor';
|
||||
export * from './SessionContentFilter';
|
||||
export * from './SessionSearcher';
|
||||
export * from './SubagentLocator';
|
||||
|
|
|
|||
119
test/main/services/discovery/SearchTextCache.test.ts
Normal file
119
test/main/services/discovery/SearchTextCache.test.ts
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { SearchTextCache } from '../../../../src/main/services/discovery/SearchTextCache';
|
||||
|
||||
import type { SearchableEntry } from '../../../../src/main/services/discovery/SearchTextExtractor';
|
||||
|
||||
function makeEntry(text: string, groupId: string): SearchableEntry {
|
||||
return {
|
||||
text,
|
||||
groupId,
|
||||
messageType: 'user',
|
||||
itemType: 'user',
|
||||
timestamp: Date.now(),
|
||||
messageUuid: groupId,
|
||||
};
|
||||
}
|
||||
|
||||
describe('SearchTextCache', () => {
|
||||
it('returns cached entry on mtime match', () => {
|
||||
const cache = new SearchTextCache();
|
||||
const entries = [makeEntry('hello', 'user-1')];
|
||||
cache.set('/path/a.jsonl', 1000, entries, 'Title A');
|
||||
|
||||
const result = cache.get('/path/a.jsonl', 1000);
|
||||
expect(result).toBeDefined();
|
||||
expect(result!.entries).toEqual(entries);
|
||||
expect(result!.sessionTitle).toBe('Title A');
|
||||
});
|
||||
|
||||
it('returns undefined on mtime mismatch (stale)', () => {
|
||||
const cache = new SearchTextCache();
|
||||
const entries = [makeEntry('hello', 'user-1')];
|
||||
cache.set('/path/a.jsonl', 1000, entries, 'Title A');
|
||||
|
||||
const result = cache.get('/path/a.jsonl', 2000);
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for uncached paths', () => {
|
||||
const cache = new SearchTextCache();
|
||||
const result = cache.get('/path/missing.jsonl', 1000);
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
it('evicts oldest entry when at max capacity', () => {
|
||||
const cache = new SearchTextCache(3);
|
||||
|
||||
cache.set('/path/1.jsonl', 100, [makeEntry('one', 'u1')], 'One');
|
||||
cache.set('/path/2.jsonl', 200, [makeEntry('two', 'u2')], 'Two');
|
||||
cache.set('/path/3.jsonl', 300, [makeEntry('three', 'u3')], 'Three');
|
||||
|
||||
expect(cache.size).toBe(3);
|
||||
|
||||
// Adding a 4th entry should evict the oldest (1.jsonl)
|
||||
cache.set('/path/4.jsonl', 400, [makeEntry('four', 'u4')], 'Four');
|
||||
|
||||
expect(cache.size).toBe(3);
|
||||
expect(cache.get('/path/1.jsonl', 100)).toBeUndefined();
|
||||
expect(cache.get('/path/4.jsonl', 400)).toBeDefined();
|
||||
});
|
||||
|
||||
it('LRU access moves entry to end, preserving it from eviction', () => {
|
||||
const cache = new SearchTextCache(3);
|
||||
|
||||
cache.set('/path/1.jsonl', 100, [makeEntry('one', 'u1')], 'One');
|
||||
cache.set('/path/2.jsonl', 200, [makeEntry('two', 'u2')], 'Two');
|
||||
cache.set('/path/3.jsonl', 300, [makeEntry('three', 'u3')], 'Three');
|
||||
|
||||
// Access entry 1, moving it to end
|
||||
cache.get('/path/1.jsonl', 100);
|
||||
|
||||
// Adding a 4th should now evict entry 2 (oldest after LRU access)
|
||||
cache.set('/path/4.jsonl', 400, [makeEntry('four', 'u4')], 'Four');
|
||||
|
||||
expect(cache.get('/path/1.jsonl', 100)).toBeDefined();
|
||||
expect(cache.get('/path/2.jsonl', 200)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('invalidate() removes a specific entry', () => {
|
||||
const cache = new SearchTextCache();
|
||||
cache.set('/path/a.jsonl', 1000, [makeEntry('hello', 'u1')], 'Title');
|
||||
|
||||
cache.invalidate('/path/a.jsonl');
|
||||
expect(cache.get('/path/a.jsonl', 1000)).toBeUndefined();
|
||||
expect(cache.size).toBe(0);
|
||||
});
|
||||
|
||||
it('clear() empties the cache', () => {
|
||||
const cache = new SearchTextCache();
|
||||
cache.set('/path/1.jsonl', 100, [makeEntry('one', 'u1')], 'One');
|
||||
cache.set('/path/2.jsonl', 200, [makeEntry('two', 'u2')], 'Two');
|
||||
|
||||
expect(cache.size).toBe(2);
|
||||
cache.clear();
|
||||
expect(cache.size).toBe(0);
|
||||
});
|
||||
|
||||
it('handles undefined sessionTitle', () => {
|
||||
const cache = new SearchTextCache();
|
||||
cache.set('/path/a.jsonl', 1000, [], undefined);
|
||||
|
||||
const result = cache.get('/path/a.jsonl', 1000);
|
||||
expect(result).toBeDefined();
|
||||
expect(result!.sessionTitle).toBeUndefined();
|
||||
expect(result!.entries).toEqual([]);
|
||||
});
|
||||
|
||||
it('updates existing entry on re-set', () => {
|
||||
const cache = new SearchTextCache();
|
||||
cache.set('/path/a.jsonl', 1000, [makeEntry('old', 'u1')], 'Old');
|
||||
cache.set('/path/a.jsonl', 2000, [makeEntry('new', 'u2')], 'New');
|
||||
|
||||
const result = cache.get('/path/a.jsonl', 2000);
|
||||
expect(result).toBeDefined();
|
||||
expect(result!.entries[0].text).toBe('new');
|
||||
expect(result!.sessionTitle).toBe('New');
|
||||
expect(cache.size).toBe(1);
|
||||
});
|
||||
});
|
||||
230
test/main/services/discovery/SearchTextExtractor.test.ts
Normal file
230
test/main/services/discovery/SearchTextExtractor.test.ts
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
extractSearchableEntries,
|
||||
extractUserText,
|
||||
} from '../../../../src/main/services/discovery/SearchTextExtractor';
|
||||
|
||||
import type { ParsedMessage } from '../../../../src/main/types';
|
||||
|
||||
function makeUserMessage(
|
||||
uuid: string,
|
||||
content: string,
|
||||
timestamp = '2026-01-01T00:00:00.000Z'
|
||||
): ParsedMessage {
|
||||
return {
|
||||
uuid,
|
||||
type: 'user',
|
||||
role: 'user',
|
||||
content,
|
||||
timestamp: new Date(timestamp),
|
||||
isMeta: false,
|
||||
isSidechain: false,
|
||||
} as ParsedMessage;
|
||||
}
|
||||
|
||||
function makeAssistantMessage(
|
||||
uuid: string,
|
||||
textContent: string,
|
||||
timestamp = '2026-01-01T00:00:01.000Z'
|
||||
): ParsedMessage {
|
||||
return {
|
||||
uuid,
|
||||
type: 'assistant',
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: textContent }],
|
||||
timestamp: new Date(timestamp),
|
||||
isMeta: false,
|
||||
isSidechain: false,
|
||||
} as ParsedMessage;
|
||||
}
|
||||
|
||||
function makeAssistantWithThinking(
|
||||
uuid: string,
|
||||
thinking: string,
|
||||
textContent: string,
|
||||
timestamp = '2026-01-01T00:00:01.000Z'
|
||||
): ParsedMessage {
|
||||
return {
|
||||
uuid,
|
||||
type: 'assistant',
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'thinking', thinking },
|
||||
{ type: 'text', text: textContent },
|
||||
],
|
||||
timestamp: new Date(timestamp),
|
||||
isMeta: false,
|
||||
isSidechain: false,
|
||||
} as ParsedMessage;
|
||||
}
|
||||
|
||||
function makeToolResultMessage(
|
||||
uuid: string,
|
||||
timestamp = '2026-01-01T00:00:01.500Z'
|
||||
): ParsedMessage {
|
||||
return {
|
||||
uuid,
|
||||
type: 'user',
|
||||
role: 'user',
|
||||
content: [{ type: 'tool_result', tool_use_id: 'tool-1', content: 'result text' }],
|
||||
timestamp: new Date(timestamp),
|
||||
isMeta: true,
|
||||
isSidechain: false,
|
||||
} as ParsedMessage;
|
||||
}
|
||||
|
||||
describe('SearchTextExtractor', () => {
|
||||
describe('extractSearchableEntries', () => {
|
||||
it('produces user-{uuid} groupIds for user messages', () => {
|
||||
const messages = [makeUserMessage('u1', 'hello world')];
|
||||
const result = extractSearchableEntries(messages);
|
||||
|
||||
expect(result.entries).toHaveLength(1);
|
||||
expect(result.entries[0].groupId).toBe('user-u1');
|
||||
expect(result.entries[0].itemType).toBe('user');
|
||||
expect(result.entries[0].messageType).toBe('user');
|
||||
expect(result.entries[0].text).toBe('hello world');
|
||||
});
|
||||
|
||||
it('produces ai-{uuid} groupIds for AI groups (using first buffer message uuid)', () => {
|
||||
const messages = [
|
||||
makeUserMessage('u1', 'question'),
|
||||
makeToolResultMessage('tr1', '2026-01-01T00:00:01.000Z'),
|
||||
makeAssistantMessage('a1', 'thinking...', '2026-01-01T00:00:02.000Z'),
|
||||
makeAssistantMessage('a2', 'final answer', '2026-01-01T00:00:03.000Z'),
|
||||
];
|
||||
const result = extractSearchableEntries(messages);
|
||||
|
||||
const aiEntries = result.entries.filter((e) => e.itemType === 'ai');
|
||||
expect(aiEntries).toHaveLength(1);
|
||||
// groupId uses the first message in the AI buffer
|
||||
expect(aiEntries[0].groupId).toMatch(/^ai-/);
|
||||
// Text is from the last assistant message with text
|
||||
expect(aiEntries[0].text).toBe('final answer');
|
||||
});
|
||||
|
||||
it('extracts last AI text output correctly (backward scan)', () => {
|
||||
const messages = [
|
||||
makeUserMessage('u1', 'question'),
|
||||
makeAssistantMessage('a1', 'older output', '2026-01-01T00:00:01.000Z'),
|
||||
makeAssistantMessage('a2', 'latest output', '2026-01-01T00:00:02.000Z'),
|
||||
];
|
||||
const result = extractSearchableEntries(messages);
|
||||
|
||||
const aiEntries = result.entries.filter((e) => e.itemType === 'ai');
|
||||
expect(aiEntries).toHaveLength(1);
|
||||
expect(aiEntries[0].text).toBe('latest output');
|
||||
});
|
||||
|
||||
it('handles assistant messages with thinking + text blocks', () => {
|
||||
const messages = [
|
||||
makeUserMessage('u1', 'question'),
|
||||
makeAssistantWithThinking('a1', 'internal reasoning', 'visible answer'),
|
||||
];
|
||||
const result = extractSearchableEntries(messages);
|
||||
|
||||
const aiEntries = result.entries.filter((e) => e.itemType === 'ai');
|
||||
expect(aiEntries).toHaveLength(1);
|
||||
expect(aiEntries[0].text).toBe('visible answer');
|
||||
});
|
||||
|
||||
it('skips sidechain messages', () => {
|
||||
const sidechain: ParsedMessage = {
|
||||
...makeUserMessage('u-side', 'sidechain text'),
|
||||
isSidechain: true,
|
||||
} as ParsedMessage;
|
||||
const messages = [sidechain, makeUserMessage('u1', 'main thread')];
|
||||
const result = extractSearchableEntries(messages);
|
||||
|
||||
expect(result.entries).toHaveLength(1);
|
||||
expect(result.entries[0].text).toBe('main thread');
|
||||
});
|
||||
|
||||
it('extracts sessionTitle from first user message (truncated to 100 chars)', () => {
|
||||
const longText = 'a'.repeat(200);
|
||||
const messages = [
|
||||
makeUserMessage('u1', longText),
|
||||
makeUserMessage('u2', 'second message'),
|
||||
];
|
||||
const result = extractSearchableEntries(messages);
|
||||
|
||||
expect(result.sessionTitle).toBe('a'.repeat(100));
|
||||
});
|
||||
|
||||
it('handles empty messages array', () => {
|
||||
const result = extractSearchableEntries([]);
|
||||
expect(result.entries).toHaveLength(0);
|
||||
expect(result.sessionTitle).toBeUndefined();
|
||||
});
|
||||
|
||||
it('handles messages with no user messages', () => {
|
||||
const messages = [
|
||||
makeAssistantMessage('a1', 'just AI talking'),
|
||||
];
|
||||
const result = extractSearchableEntries(messages);
|
||||
|
||||
expect(result.sessionTitle).toBeUndefined();
|
||||
const aiEntries = result.entries.filter((e) => e.itemType === 'ai');
|
||||
expect(aiEntries).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('handles AI buffer with no text content', () => {
|
||||
const noTextAssistant: ParsedMessage = {
|
||||
uuid: 'a1',
|
||||
type: 'assistant',
|
||||
role: 'assistant',
|
||||
content: [{ type: 'thinking', thinking: 'just thinking' }],
|
||||
timestamp: new Date('2026-01-01T00:00:01.000Z'),
|
||||
isMeta: false,
|
||||
isSidechain: false,
|
||||
} as ParsedMessage;
|
||||
const messages = [makeUserMessage('u1', 'question'), noTextAssistant];
|
||||
const result = extractSearchableEntries(messages);
|
||||
|
||||
const aiEntries = result.entries.filter((e) => e.itemType === 'ai');
|
||||
expect(aiEntries).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('flushes AI buffer on user messages', () => {
|
||||
const messages = [
|
||||
makeUserMessage('u1', 'first question'),
|
||||
makeAssistantMessage('a1', 'first answer', '2026-01-01T00:00:01.000Z'),
|
||||
makeUserMessage('u2', 'second question', '2026-01-01T00:00:02.000Z'),
|
||||
makeAssistantMessage('a2', 'second answer', '2026-01-01T00:00:03.000Z'),
|
||||
];
|
||||
const result = extractSearchableEntries(messages);
|
||||
|
||||
expect(result.entries).toHaveLength(4);
|
||||
const userEntries = result.entries.filter((e) => e.itemType === 'user');
|
||||
const aiEntries = result.entries.filter((e) => e.itemType === 'ai');
|
||||
expect(userEntries).toHaveLength(2);
|
||||
expect(aiEntries).toHaveLength(2);
|
||||
expect(aiEntries[0].text).toBe('first answer');
|
||||
expect(aiEntries[1].text).toBe('second answer');
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractUserText', () => {
|
||||
it('extracts string content', () => {
|
||||
const msg = makeUserMessage('u1', 'hello world');
|
||||
expect(extractUserText(msg)).toBe('hello world');
|
||||
});
|
||||
|
||||
it('extracts array content with text blocks', () => {
|
||||
const msg: ParsedMessage = {
|
||||
uuid: 'u1',
|
||||
type: 'user',
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: 'part one' },
|
||||
{ type: 'text', text: ' part two' },
|
||||
],
|
||||
timestamp: new Date(),
|
||||
isMeta: false,
|
||||
isSidechain: false,
|
||||
} as ParsedMessage;
|
||||
expect(extractUserText(msg)).toBe('part one part two');
|
||||
});
|
||||
});
|
||||
});
|
||||
Loading…
Reference in a new issue