feat: implement project scanning timeout and enhance metadata extraction

- Added scanProjectWithTimeout method to ProjectScanner to prevent long scans from blocking the entire batch.
- Introduced cleanup logic in metadata extraction functions to ensure proper resource management during timeouts.
- Updated logging to provide insights on scan durations and session statistics for better performance monitoring.
- Enhanced repository group fetching logic in DashboardView and DateGroupedSessions components to handle loading and error states more effectively.
This commit is contained in:
iliya 2026-03-17 11:20:04 +02:00
parent f9f3db7dcb
commit eacd7d82e1
4 changed files with 112 additions and 35 deletions

View file

@ -168,7 +168,7 @@ export class ProjectScanner {
const projectArrays = await this.collectFulfilledInBatches(
projectDirs,
this.fsProvider.type === 'ssh' ? 8 : ProjectScanner.LOCAL_PROJECT_BATCH,
async (dir) => this.scanProject(dir.name)
async (dir) => this.scanProjectWithTimeout(dir.name)
);
// Flatten and sort by most recent
@ -312,6 +312,31 @@ export class ProjectScanner {
// Project Scanning (continued)
// ===========================================================================
// Per-project scan timeout: prevents a single slow directory from blocking
// the entire scan batch (e.g. a project with 1000+ session files on slow I/O).
private static readonly SCAN_PROJECT_TIMEOUT_MS = 15_000;
/**
* Scans a single project directory with a timeout guard.
* Returns empty array if the scan exceeds the timeout.
*/
private async scanProjectWithTimeout(encodedName: string): Promise<Project[]> {
let timer: ReturnType<typeof setTimeout> | null = null;
const timeout = new Promise<Project[]>((resolve) => {
timer = setTimeout(() => {
logger.warn(
`[scanProject] timeout after ${ProjectScanner.SCAN_PROJECT_TIMEOUT_MS}ms project=${encodedName}`
);
resolve([]);
}, ProjectScanner.SCAN_PROJECT_TIMEOUT_MS);
});
try {
return await Promise.race([this.scanProject(encodedName), timeout]);
} finally {
if (timer) clearTimeout(timer);
}
}
/**
* Scans a single project directory and returns project metadata.
* If sessions have different cwd values, splits into multiple projects.
@ -319,7 +344,9 @@ export class ProjectScanner {
private async scanProject(encodedName: string): Promise<Project[]> {
try {
const projectPath = path.join(this.projectsDir, encodedName);
const readdirStart = Date.now();
const entries = await this.fsProvider.readdir(projectPath);
const readdirMs = Date.now() - readdirStart;
// Get session files (.jsonl at root level)
const sessionFiles = entries.filter(
@ -330,6 +357,12 @@ export class ProjectScanner {
return [];
}
if (sessionFiles.length > 200 || readdirMs > 500) {
logger.debug(
`[scanProject] ${encodedName} readdir=${readdirMs}ms entries=${entries.length} jsonl=${sessionFiles.length}`
);
}
// Collect file stats and cwd for each session
interface SessionInfo {
sessionId: string;
@ -346,6 +379,8 @@ export class ProjectScanner {
const MAX_CWD_SPLIT_FILES = 80;
const shouldSplitByCwd =
this.fsProvider.type !== 'ssh' && sessionFiles.length <= MAX_CWD_SPLIT_FILES;
const sessionStatStart = Date.now();
const sessionInfos = await this.collectFulfilledInBatches(
sessionFiles,
this.fsProvider.type === 'ssh' ? 32 : ProjectScanner.LOCAL_SESSION_BATCH,
@ -377,6 +412,13 @@ export class ProjectScanner {
return [];
}
const sessionStatMs = Date.now() - sessionStatStart;
if (sessionFiles.length > 200 || sessionStatMs > 1000) {
logger.debug(
`[scanProject] ${encodedName} sessionStat=${sessionStatMs}ms files=${sessionFiles.length} infos=${sessionInfos.length}`
);
}
// Group sessions by cwd
const cwdGroups = new Map<string, SessionInfo[]>();
const firstCwd = sessionInfos.find((s) => s.cwd)?.cwd ?? undefined;

View file

@ -51,23 +51,37 @@ export async function extractCwd(
}
const fileStream = fsProvider.createReadStream(filePath, { encoding: 'utf8' });
let bytes = 0;
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
fileStream.destroy();
}, JSONL_HEAD_TIMEOUT_MS);
fileStream.on('data', (chunk: string) => {
bytes += byteLen(chunk);
if (bytes > JSONL_HEAD_MAX_BYTES) {
fileStream.destroy();
}
});
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity,
});
let bytes = 0;
let timedOut = false;
let cleaned = false;
// Close readline FIRST so `for await` exits, then destroy the stream.
// Calling only stream.destroy() can leave readline hanging when it has
// a partial line buffered (e.g. a 400KB+ JSONL line read in 64KB chunks).
const cleanup = (): void => {
if (cleaned) return;
cleaned = true;
rl.close();
fileStream.destroy();
};
const timer = setTimeout(() => {
timedOut = true;
cleanup();
}, JSONL_HEAD_TIMEOUT_MS);
fileStream.on('data', (chunk: string) => {
bytes += byteLen(chunk);
if (bytes > JSONL_HEAD_MAX_BYTES) {
cleanup();
}
});
try {
let lines = 0;
for await (const line of rl) {
@ -84,8 +98,6 @@ export async function extractCwd(
}
// Only conversational entries have cwd
if ('cwd' in entry && entry.cwd) {
rl.close();
fileStream.destroy();
return entry.cwd;
}
}
@ -95,8 +107,7 @@ export async function extractCwd(
}
} finally {
clearTimeout(timer);
rl.close();
fileStream.destroy();
cleanup();
}
return null;
@ -122,23 +133,34 @@ export async function extractFirstUserMessagePreview(
}
const fileStream = fsProvider.createReadStream(filePath, { encoding: 'utf8' });
let bytes = 0;
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
fileStream.destroy();
}, JSONL_HEAD_TIMEOUT_MS);
fileStream.on('data', (chunk: string) => {
bytes += byteLen(chunk);
if (bytes > JSONL_HEAD_MAX_BYTES) {
fileStream.destroy();
}
});
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity,
});
let bytes = 0;
let timedOut = false;
let cleaned = false;
const cleanup = (): void => {
if (cleaned) return;
cleaned = true;
rl.close();
fileStream.destroy();
};
const timer = setTimeout(() => {
timedOut = true;
cleanup();
}, JSONL_HEAD_TIMEOUT_MS);
fileStream.on('data', (chunk: string) => {
bytes += byteLen(chunk);
if (bytes > JSONL_HEAD_MAX_BYTES) {
cleanup();
}
});
let commandFallback: { text: string; timestamp: string } | null = null;
let linesRead = 0;
@ -182,8 +204,7 @@ export async function extractFirstUserMessagePreview(
return commandFallback;
} finally {
clearTimeout(timer);
rl.close();
fileStream.destroy();
cleanup();
}
return commandFallback;

View file

@ -518,10 +518,15 @@ const ProjectsGrid = ({
const [visibleProjects, setVisibleProjects] = useState(maxProjects);
useEffect(() => {
if (repositoryGroups.length === 0 && !repositoryGroupsLoading) {
if (repositoryGroups.length === 0 && !repositoryGroupsLoading && !repositoryGroupsError) {
void fetchRepositoryGroups();
}
}, [repositoryGroups.length, repositoryGroupsLoading, fetchRepositoryGroups]);
}, [
repositoryGroups.length,
repositoryGroupsLoading,
repositoryGroupsError,
fetchRepositoryGroups,
]);
useEffect(() => {
if (repositoryGroups.length > 0 && !hasFetchedTasksRef.current && !repositoryGroupsLoading) {

View file

@ -241,11 +241,18 @@ export const DateGroupedSessions = (): React.JSX.Element => {
// Loading guards in the store actions prevent duplicate IPC calls
// when the centralized init chain has already started a fetch.
const repositoryGroupsLoading = useStore((s) => s.repositoryGroupsLoading);
const repositoryGroupsError = useStore((s) => s.repositoryGroupsError);
const projectsLoading = useStore((s) => s.projectsLoading);
const projectsError = useStore((s) => s.projectsError);
useEffect(() => {
if (viewMode === 'grouped' && repositoryGroups.length === 0 && !repositoryGroupsLoading) {
if (
viewMode === 'grouped' &&
repositoryGroups.length === 0 &&
!repositoryGroupsLoading &&
!repositoryGroupsError
) {
void fetchRepositoryGroups();
} else if (viewMode === 'flat' && projects.length === 0 && !projectsLoading) {
} else if (viewMode === 'flat' && projects.length === 0 && !projectsLoading && !projectsError) {
void fetchProjects();
}
}, [
@ -253,7 +260,9 @@ export const DateGroupedSessions = (): React.JSX.Element => {
repositoryGroups.length,
projects.length,
repositoryGroupsLoading,
repositoryGroupsError,
projectsLoading,
projectsError,
fetchRepositoryGroups,
fetchProjects,
]);