From eacd7d82e187e81d63baed6cd6bebc83487ee684 Mon Sep 17 00:00:00 2001 From: iliya Date: Tue, 17 Mar 2026 11:20:04 +0200 Subject: [PATCH] feat: implement project scanning timeout and enhance metadata extraction - Added scanProjectWithTimeout method to ProjectScanner to prevent long scans from blocking the entire batch. - Introduced cleanup logic in metadata extraction functions to ensure proper resource management during timeouts. - Updated logging to provide insights on scan durations and session statistics for better performance monitoring. - Enhanced repository group fetching logic in DashboardView and DateGroupedSessions components to handle loading and error states more effectively. --- src/main/services/discovery/ProjectScanner.ts | 44 +++++++++- src/main/utils/metadataExtraction.ts | 81 ++++++++++++------- .../components/dashboard/DashboardView.tsx | 9 ++- .../sidebar/DateGroupedSessions.tsx | 13 ++- 4 files changed, 112 insertions(+), 35 deletions(-) diff --git a/src/main/services/discovery/ProjectScanner.ts b/src/main/services/discovery/ProjectScanner.ts index c9056f04..6bcd9cab 100644 --- a/src/main/services/discovery/ProjectScanner.ts +++ b/src/main/services/discovery/ProjectScanner.ts @@ -168,7 +168,7 @@ export class ProjectScanner { const projectArrays = await this.collectFulfilledInBatches( projectDirs, this.fsProvider.type === 'ssh' ? 8 : ProjectScanner.LOCAL_PROJECT_BATCH, - async (dir) => this.scanProject(dir.name) + async (dir) => this.scanProjectWithTimeout(dir.name) ); // Flatten and sort by most recent @@ -312,6 +312,31 @@ export class ProjectScanner { // Project Scanning (continued) // =========================================================================== + // Per-project scan timeout: prevents a single slow directory from blocking + // the entire scan batch (e.g. a project with 1000+ session files on slow I/O). + private static readonly SCAN_PROJECT_TIMEOUT_MS = 15_000; + + /** + * Scans a single project directory with a timeout guard. + * Returns empty array if the scan exceeds the timeout. + */ + private async scanProjectWithTimeout(encodedName: string): Promise { + let timer: ReturnType | null = null; + const timeout = new Promise((resolve) => { + timer = setTimeout(() => { + logger.warn( + `[scanProject] timeout after ${ProjectScanner.SCAN_PROJECT_TIMEOUT_MS}ms project=${encodedName}` + ); + resolve([]); + }, ProjectScanner.SCAN_PROJECT_TIMEOUT_MS); + }); + try { + return await Promise.race([this.scanProject(encodedName), timeout]); + } finally { + if (timer) clearTimeout(timer); + } + } + /** * Scans a single project directory and returns project metadata. * If sessions have different cwd values, splits into multiple projects. @@ -319,7 +344,9 @@ export class ProjectScanner { private async scanProject(encodedName: string): Promise { try { const projectPath = path.join(this.projectsDir, encodedName); + const readdirStart = Date.now(); const entries = await this.fsProvider.readdir(projectPath); + const readdirMs = Date.now() - readdirStart; // Get session files (.jsonl at root level) const sessionFiles = entries.filter( @@ -330,6 +357,12 @@ export class ProjectScanner { return []; } + if (sessionFiles.length > 200 || readdirMs > 500) { + logger.debug( + `[scanProject] ${encodedName} readdir=${readdirMs}ms entries=${entries.length} jsonl=${sessionFiles.length}` + ); + } + // Collect file stats and cwd for each session interface SessionInfo { sessionId: string; @@ -346,6 +379,8 @@ export class ProjectScanner { const MAX_CWD_SPLIT_FILES = 80; const shouldSplitByCwd = this.fsProvider.type !== 'ssh' && sessionFiles.length <= MAX_CWD_SPLIT_FILES; + + const sessionStatStart = Date.now(); const sessionInfos = await this.collectFulfilledInBatches( sessionFiles, this.fsProvider.type === 'ssh' ? 32 : ProjectScanner.LOCAL_SESSION_BATCH, @@ -377,6 +412,13 @@ export class ProjectScanner { return []; } + const sessionStatMs = Date.now() - sessionStatStart; + if (sessionFiles.length > 200 || sessionStatMs > 1000) { + logger.debug( + `[scanProject] ${encodedName} sessionStat=${sessionStatMs}ms files=${sessionFiles.length} infos=${sessionInfos.length}` + ); + } + // Group sessions by cwd const cwdGroups = new Map(); const firstCwd = sessionInfos.find((s) => s.cwd)?.cwd ?? undefined; diff --git a/src/main/utils/metadataExtraction.ts b/src/main/utils/metadataExtraction.ts index 5b717093..8ec70183 100644 --- a/src/main/utils/metadataExtraction.ts +++ b/src/main/utils/metadataExtraction.ts @@ -51,23 +51,37 @@ export async function extractCwd( } const fileStream = fsProvider.createReadStream(filePath, { encoding: 'utf8' }); - let bytes = 0; - let timedOut = false; - const timer = setTimeout(() => { - timedOut = true; - fileStream.destroy(); - }, JSONL_HEAD_TIMEOUT_MS); - fileStream.on('data', (chunk: string) => { - bytes += byteLen(chunk); - if (bytes > JSONL_HEAD_MAX_BYTES) { - fileStream.destroy(); - } - }); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity, }); + let bytes = 0; + let timedOut = false; + let cleaned = false; + + // Close readline FIRST so `for await` exits, then destroy the stream. + // Calling only stream.destroy() can leave readline hanging when it has + // a partial line buffered (e.g. a 400KB+ JSONL line read in 64KB chunks). + const cleanup = (): void => { + if (cleaned) return; + cleaned = true; + rl.close(); + fileStream.destroy(); + }; + + const timer = setTimeout(() => { + timedOut = true; + cleanup(); + }, JSONL_HEAD_TIMEOUT_MS); + + fileStream.on('data', (chunk: string) => { + bytes += byteLen(chunk); + if (bytes > JSONL_HEAD_MAX_BYTES) { + cleanup(); + } + }); + try { let lines = 0; for await (const line of rl) { @@ -84,8 +98,6 @@ export async function extractCwd( } // Only conversational entries have cwd if ('cwd' in entry && entry.cwd) { - rl.close(); - fileStream.destroy(); return entry.cwd; } } @@ -95,8 +107,7 @@ export async function extractCwd( } } finally { clearTimeout(timer); - rl.close(); - fileStream.destroy(); + cleanup(); } return null; @@ -122,23 +133,34 @@ export async function extractFirstUserMessagePreview( } const fileStream = fsProvider.createReadStream(filePath, { encoding: 'utf8' }); - let bytes = 0; - let timedOut = false; - const timer = setTimeout(() => { - timedOut = true; - fileStream.destroy(); - }, JSONL_HEAD_TIMEOUT_MS); - fileStream.on('data', (chunk: string) => { - bytes += byteLen(chunk); - if (bytes > JSONL_HEAD_MAX_BYTES) { - fileStream.destroy(); - } - }); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity, }); + let bytes = 0; + let timedOut = false; + let cleaned = false; + + const cleanup = (): void => { + if (cleaned) return; + cleaned = true; + rl.close(); + fileStream.destroy(); + }; + + const timer = setTimeout(() => { + timedOut = true; + cleanup(); + }, JSONL_HEAD_TIMEOUT_MS); + + fileStream.on('data', (chunk: string) => { + bytes += byteLen(chunk); + if (bytes > JSONL_HEAD_MAX_BYTES) { + cleanup(); + } + }); + let commandFallback: { text: string; timestamp: string } | null = null; let linesRead = 0; @@ -182,8 +204,7 @@ export async function extractFirstUserMessagePreview( return commandFallback; } finally { clearTimeout(timer); - rl.close(); - fileStream.destroy(); + cleanup(); } return commandFallback; diff --git a/src/renderer/components/dashboard/DashboardView.tsx b/src/renderer/components/dashboard/DashboardView.tsx index 6afa98fc..2ac49c67 100644 --- a/src/renderer/components/dashboard/DashboardView.tsx +++ b/src/renderer/components/dashboard/DashboardView.tsx @@ -518,10 +518,15 @@ const ProjectsGrid = ({ const [visibleProjects, setVisibleProjects] = useState(maxProjects); useEffect(() => { - if (repositoryGroups.length === 0 && !repositoryGroupsLoading) { + if (repositoryGroups.length === 0 && !repositoryGroupsLoading && !repositoryGroupsError) { void fetchRepositoryGroups(); } - }, [repositoryGroups.length, repositoryGroupsLoading, fetchRepositoryGroups]); + }, [ + repositoryGroups.length, + repositoryGroupsLoading, + repositoryGroupsError, + fetchRepositoryGroups, + ]); useEffect(() => { if (repositoryGroups.length > 0 && !hasFetchedTasksRef.current && !repositoryGroupsLoading) { diff --git a/src/renderer/components/sidebar/DateGroupedSessions.tsx b/src/renderer/components/sidebar/DateGroupedSessions.tsx index 620108fc..dd98b1a3 100644 --- a/src/renderer/components/sidebar/DateGroupedSessions.tsx +++ b/src/renderer/components/sidebar/DateGroupedSessions.tsx @@ -241,11 +241,18 @@ export const DateGroupedSessions = (): React.JSX.Element => { // Loading guards in the store actions prevent duplicate IPC calls // when the centralized init chain has already started a fetch. const repositoryGroupsLoading = useStore((s) => s.repositoryGroupsLoading); + const repositoryGroupsError = useStore((s) => s.repositoryGroupsError); const projectsLoading = useStore((s) => s.projectsLoading); + const projectsError = useStore((s) => s.projectsError); useEffect(() => { - if (viewMode === 'grouped' && repositoryGroups.length === 0 && !repositoryGroupsLoading) { + if ( + viewMode === 'grouped' && + repositoryGroups.length === 0 && + !repositoryGroupsLoading && + !repositoryGroupsError + ) { void fetchRepositoryGroups(); - } else if (viewMode === 'flat' && projects.length === 0 && !projectsLoading) { + } else if (viewMode === 'flat' && projects.length === 0 && !projectsLoading && !projectsError) { void fetchProjects(); } }, [ @@ -253,7 +260,9 @@ export const DateGroupedSessions = (): React.JSX.Element => { repositoryGroups.length, projects.length, repositoryGroupsLoading, + repositoryGroupsError, projectsLoading, + projectsError, fetchRepositoryGroups, fetchProjects, ]);