From 3b0c2ed24bade174b724097f0661bb784a4771e1 Mon Sep 17 00:00:00 2001 From: 777genius Date: Fri, 29 May 2026 12:29:37 +0300 Subject: [PATCH] perf: cache runtime usage telemetry --- .../services/team/TeamProvisioningService.ts | 95 +++++++++++++++++-- .../team/TeamProvisioningService.test.ts | 16 ++++ 2 files changed, 101 insertions(+), 10 deletions(-) diff --git a/src/main/services/team/TeamProvisioningService.ts b/src/main/services/team/TeamProvisioningService.ts index a0c14bbb..ff28873b 100644 --- a/src/main/services/team/TeamProvisioningService.ts +++ b/src/main/services/team/TeamProvisioningService.ts @@ -772,7 +772,7 @@ interface RuntimeProcessLoadStats extends RuntimeProcessUsageStats { type RuntimeTelemetryProcessSource = 'native' | 'wsl' | 'windows-host'; -interface RuntimeTelemetryProcessTableRow extends RuntimeProcessTableRow { +interface RuntimeTelemetryProcessTableRow extends RuntimeProcessTableRow, RuntimeProcessUsageStats { runtimeTelemetrySource?: RuntimeTelemetryProcessSource; } @@ -3294,6 +3294,7 @@ export class TeamProvisioningService { private static readonly MAX_RUNTIME_USAGE_PIDS_PER_SNAPSHOT = 512; private static readonly RUNTIME_PROCESS_TABLE_TIMEOUT_MS = 1_500; private static readonly RUNTIME_WINDOWS_PROCESS_TABLE_TIMEOUT_MS = 1_500; + private static readonly RUNTIME_PROCESS_USAGE_CACHE_TTL_MS = 30_000; private static readonly RUNTIME_PIDUSAGE_BATCH_TIMEOUT_MS = 2_000; private static readonly RUNTIME_PIDUSAGE_SINGLE_TIMEOUT_MS = 750; private static readonly RUNTIME_PIDUSAGE_FALLBACK_CONCURRENCY = 16; @@ -3396,6 +3397,13 @@ export class TeamProvisioningService { includesWindowsHostRows: boolean; } >(); + private readonly runtimeProcessUsageStatsCacheByPid = new Map< + number, + { + expiresAtMs: number; + stats: RuntimeProcessUsageStats | null; + } + >(); private readonly agentRuntimeSnapshotInFlightByTeam = new Map< string, { @@ -14095,7 +14103,14 @@ export class TeamProvisioningService { const runtimeUsagePids = [ ...new Set([...runtimeUsageTreesByRootPid.values()].flatMap((tree) => tree.pids)), ]; - usageStatsByPid = await this.readProcessUsageStatsByPid(runtimeUsagePids); + usageStatsByPid = this.buildProcessUsageStatsFromRows(runtimeProcessRows, runtimeUsagePids); + const pidsMissingUsageStats = runtimeUsagePids.filter((pid) => !usageStatsByPid.has(pid)); + if (pidsMissingUsageStats.length > 0) { + const sampledUsageStats = await this.readProcessUsageStatsByPid(pidsMissingUsageStats); + for (const [pid, stats] of sampledUsageStats) { + usageStatsByPid.set(pid, stats); + } + } } catch (error) { logger.debug( `[${teamName}] Runtime telemetry sampling failed; continuing without resource metrics: ${ @@ -25663,10 +25678,12 @@ export class TeamProvisioningService { candidate.runtimeTelemetrySource === 'windows-host' ? candidate.runtimeTelemetrySource : undefined); + const usageStats = this.normalizeRuntimeProcessUsageStats(candidate); normalizedRows.push({ pid: Math.floor(pid), ppid: Math.floor(ppid), command, + ...(usageStats ?? {}), ...(runtimeTelemetrySource ? { runtimeTelemetrySource } : {}), }); } @@ -26038,6 +26055,28 @@ export class TeamProvisioningService { } } + private buildProcessUsageStatsFromRows( + processRows: readonly RuntimeTelemetryProcessTableRow[] | null, + pids: readonly number[] + ): Map { + const usageStatsByPid = new Map(); + const requestedPids = new Set(pids.filter((pid) => Number.isFinite(pid) && pid > 0)); + if (!Array.isArray(processRows) || requestedPids.size === 0) { + return usageStatsByPid; + } + + for (const row of processRows) { + if (!requestedPids.has(row.pid)) { + continue; + } + const usageStats = this.normalizeRuntimeProcessUsageStats(row); + if (usageStats) { + usageStatsByPid.set(row.pid, usageStats); + } + } + return usageStatsByPid; + } + private async readProcessUsageStatsByPid( pids: readonly number[] ): Promise> { @@ -26048,20 +26087,57 @@ export class TeamProvisioningService { } const usageStatsByPid = new Map(); + const pidsToRead: number[] = []; + const now = Date.now(); + for (const pid of uniquePids) { + const cached = this.runtimeProcessUsageStatsCacheByPid.get(pid); + if (cached && cached.expiresAtMs > now) { + if (cached.stats) { + usageStatsByPid.set(pid, { ...cached.stats }); + } + continue; + } + pidsToRead.push(pid); + } + if (pidsToRead.length === 0) { + return usageStatsByPid; + } + + const rememberUsageStats = ( + pid: number, + stats: RuntimeProcessUsageStats | null | undefined + ): void => { + const normalized = stats ? { ...stats } : null; + this.runtimeProcessUsageStatsCacheByPid.set(pid, { + expiresAtMs: Date.now() + TeamProvisioningService.RUNTIME_PROCESS_USAGE_CACHE_TTL_MS, + stats: normalized, + }); + if (normalized) { + usageStatsByPid.set(pid, { ...normalized }); + } + }; + const options = RUNTIME_PIDUSAGE_OPTIONS; try { const statsByPid = await this.withRuntimeTelemetryTimeout( - pidusage(uniquePids, options), + pidusage(pidsToRead, options), TeamProvisioningService.RUNTIME_PIDUSAGE_BATCH_TIMEOUT_MS, 'pidusage batch runtime telemetry' ); + const observedPids = new Set(); for (const [rawPid, stat] of Object.entries( statsByPid && typeof statsByPid === 'object' ? statsByPid : {} )) { const pid = Number.parseInt(rawPid, 10); const usageStats = this.normalizeRuntimeProcessUsageStats(stat); - if (Number.isFinite(pid) && pid > 0 && usageStats) { - usageStatsByPid.set(pid, usageStats); + if (Number.isFinite(pid) && pid > 0) { + observedPids.add(pid); + rememberUsageStats(pid, usageStats); + } + } + for (const pid of pidsToRead) { + if (!observedPids.has(pid)) { + rememberUsageStats(pid, null); } } return usageStatsByPid; @@ -26079,10 +26155,10 @@ export class TeamProvisioningService { for ( let offset = 0; - offset < uniquePids.length; + offset < pidsToRead.length; offset += TeamProvisioningService.RUNTIME_PIDUSAGE_FALLBACK_CONCURRENCY ) { - const chunk = uniquePids.slice( + const chunk = pidsToRead.slice( offset, offset + TeamProvisioningService.RUNTIME_PIDUSAGE_FALLBACK_CONCURRENCY ); @@ -26095,13 +26171,12 @@ export class TeamProvisioningService { `pidusage runtime telemetry pid=${pid}` ); const usageStats = this.normalizeRuntimeProcessUsageStats(stat); - if (usageStats) { - usageStatsByPid.set(pid, usageStats); - } + rememberUsageStats(pid, usageStats); } catch (error) { if (error instanceof RuntimeTelemetryTimeoutError) { logger.debug(error.message); } + rememberUsageStats(pid, null); // Process likely exited between discovery and sampling. } }) diff --git a/test/main/services/team/TeamProvisioningService.test.ts b/test/main/services/team/TeamProvisioningService.test.ts index dbe00b15..254d1dfe 100644 --- a/test/main/services/team/TeamProvisioningService.test.ts +++ b/test/main/services/team/TeamProvisioningService.test.ts @@ -4740,6 +4740,22 @@ describe('TeamProvisioningService', () => { expect(stats.get(333)).toEqual({ rssBytes: 123_000_000, cpuPercent: 7 }); }); + it('caches runtime process usage stats for repeated reads', async () => { + const svc = new TeamProvisioningService(); + const usageByPid: Record> = { + '111': createPidusageStat(111, 123_000_000, 7), + }; + vi.mocked(pidusage).mockResolvedValueOnce(usageByPid); + + const first = await (svc as any).readProcessUsageStatsByPid([111]); + const second = await (svc as any).readProcessUsageStatsByPid([111]); + + expect(pidusage).toHaveBeenCalledTimes(1); + expect(pidusage).toHaveBeenCalledWith([111], EXPECTED_RUNTIME_PIDUSAGE_OPTIONS); + expect(first.get(111)).toEqual({ rssBytes: 123_000_000, cpuPercent: 7 }); + expect(second.get(111)).toEqual({ rssBytes: 123_000_000, cpuPercent: 7 }); + }); + it('falls back to direct agent process lookup when tmux pane pid lookup is unavailable', async () => { const svc = new TeamProvisioningService(); (svc as any).configReader = {