readline.createInterface runs an expensive Unicode line-break regex + extra stream/string-decoder machinery per chunk. The main transcript parser (parseJsonlStream) already uses a buffer + manual newline split; these per-team readers still used readline. Add readJsonlLines(): an async generator that yields a JSONL file's lines via a chunked utf8 stream read + a plain '\n' split (drop-in for 'for await (const line of rl)'), so the consumers' loop bodies are unchanged. Stream is utf8-decoded before splitting, so multi-byte chars across chunk boundaries are safe; trailing CR (CRLF) is stripped; empty lines and a final newline-less line are yielded, matching readline; breaking out of the loop destroys the stream via the generator's finally. Adopt it in MemberStatsComputer, TaskBoundaryParser, and FileContentResolver (file-history scan). Behavior-identical (their existing tests pass: 18 + 6 + 12) plus 6 new tests for the generator (CRLF, empty lines, no-trailing-newline, early break, multi-byte chunk boundary). Note: session-browser readline paths (jsonl metadata extractor, metadataExtraction, SessionContentFilter) are off the launch path and left as-is for now.
492 lines
18 KiB
TypeScript
492 lines
18 KiB
TypeScript
import { readJsonlLines } from '@main/utils/jsonlLineReader';
|
|
import { createLogger } from '@shared/utils/logger';
|
|
|
|
import { type TeamMemberLogsFinder } from './TeamMemberLogsFinder';
|
|
import { countLineChanges } from './UnifiedLineCounter';
|
|
|
|
import type { FileLineStats, MemberFullStats } from '@shared/types';
|
|
|
|
const logger = createLogger('Service:MemberStatsComputer');
|
|
|
|
const TRAILING_PUNCT_CHARS = new Set([';', '.', ',']);
|
|
const INVALID_NAMES = new Set(['null', 'undefined', 'none', 'false', 'true', '']);
|
|
const WINDOWS_NULL_DEVICE_RE = /^[a-z]:\/nul$/;
|
|
|
|
function stripTrailingPunct(s: string): string {
|
|
let end = s.length;
|
|
while (end > 0 && TRAILING_PUNCT_CHARS.has(s[end - 1])) end--;
|
|
return end === s.length ? s : s.slice(0, end);
|
|
}
|
|
|
|
function isNullDevicePath(value: string): boolean {
|
|
const normalized = value.replace(/\\/g, '/').toLowerCase();
|
|
return (
|
|
normalized === '/dev/null' ||
|
|
normalized === '//./nul' ||
|
|
normalized === '//?/nul' ||
|
|
WINDOWS_NULL_DEVICE_RE.test(normalized)
|
|
);
|
|
}
|
|
|
|
export function isValidFilePath(value: string): boolean {
|
|
const cleaned = stripTrailingPunct(value.trim());
|
|
const normalizedName = cleaned.toLowerCase();
|
|
const hasPathSeparator = cleaned.includes('/') || cleaned.includes('\\');
|
|
return (
|
|
cleaned.length > 1 &&
|
|
!INVALID_NAMES.has(normalizedName) &&
|
|
hasPathSeparator &&
|
|
!isNullDevicePath(cleaned)
|
|
);
|
|
}
|
|
|
|
const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
|
|
|
|
interface CacheEntry {
|
|
stats: MemberFullStats;
|
|
timestamp: number;
|
|
}
|
|
|
|
export class MemberStatsComputer {
|
|
private cache = new Map<string, CacheEntry>();
|
|
|
|
constructor(private readonly logsFinder: TeamMemberLogsFinder) {}
|
|
|
|
async getStats(teamName: string, memberName: string): Promise<MemberFullStats> {
|
|
const cacheKey = `${teamName}:${memberName}`;
|
|
const cached = this.cache.get(cacheKey);
|
|
if (cached && Date.now() - cached.timestamp < CACHE_TTL_MS) {
|
|
return cached.stats;
|
|
}
|
|
|
|
const paths = await this.logsFinder.findMemberLogPaths(teamName, memberName);
|
|
|
|
let linesAdded = 0;
|
|
let linesRemoved = 0;
|
|
const filesTouchedSet = new Set<string>();
|
|
const perFileStats: Record<string, FileLineStats> = {};
|
|
const toolUsage: Record<string, number> = {};
|
|
let inputTokens = 0;
|
|
let outputTokens = 0;
|
|
let cacheReadTokens = 0;
|
|
let messageCount = 0;
|
|
let totalDurationMs = 0;
|
|
|
|
for (const filePath of paths) {
|
|
const parsed = await this.parseFile(filePath);
|
|
linesAdded += parsed.linesAdded;
|
|
linesRemoved += parsed.linesRemoved;
|
|
for (const f of parsed.filesTouched) filesTouchedSet.add(f);
|
|
for (const [fp, fls] of Object.entries(parsed.perFileStats)) {
|
|
const existing = perFileStats[fp];
|
|
if (existing) {
|
|
existing.added += fls.added;
|
|
existing.removed += fls.removed;
|
|
} else {
|
|
perFileStats[fp] = { added: fls.added, removed: fls.removed };
|
|
}
|
|
}
|
|
for (const [tool, count] of Object.entries(parsed.toolUsage)) {
|
|
toolUsage[tool] = (toolUsage[tool] ?? 0) + count;
|
|
}
|
|
inputTokens += parsed.inputTokens;
|
|
outputTokens += parsed.outputTokens;
|
|
cacheReadTokens += parsed.cacheReadTokens;
|
|
messageCount += parsed.messageCount;
|
|
totalDurationMs += parsed.durationMs;
|
|
}
|
|
|
|
const validFiles = [...filesTouchedSet]
|
|
.filter(isValidFilePath)
|
|
.sort((a, b) => a.localeCompare(b));
|
|
|
|
// Also filter perFileStats keys to exclude invalid paths
|
|
const filteredFileStats: Record<string, FileLineStats> = {};
|
|
for (const [fp, fls] of Object.entries(perFileStats)) {
|
|
if (isValidFilePath(fp)) {
|
|
filteredFileStats[fp] = fls;
|
|
}
|
|
}
|
|
|
|
const stats: MemberFullStats = {
|
|
linesAdded,
|
|
linesRemoved,
|
|
filesTouched: validFiles,
|
|
fileStats: filteredFileStats,
|
|
toolUsage,
|
|
inputTokens,
|
|
outputTokens,
|
|
cacheReadTokens,
|
|
costUsd: 0,
|
|
tasksCompleted: 0,
|
|
messageCount,
|
|
totalDurationMs,
|
|
sessionCount: paths.length,
|
|
computedAt: new Date().toISOString(),
|
|
};
|
|
|
|
this.cache.set(cacheKey, { stats, timestamp: Date.now() });
|
|
return stats;
|
|
}
|
|
|
|
private async parseFile(filePath: string): Promise<{
|
|
linesAdded: number;
|
|
linesRemoved: number;
|
|
filesTouched: string[];
|
|
perFileStats: Record<string, FileLineStats>;
|
|
toolUsage: Record<string, number>;
|
|
inputTokens: number;
|
|
outputTokens: number;
|
|
cacheReadTokens: number;
|
|
messageCount: number;
|
|
durationMs: number;
|
|
}> {
|
|
let linesAdded = 0;
|
|
let linesRemoved = 0;
|
|
const filesTouchedSet = new Set<string>();
|
|
const perFileStats: Record<string, FileLineStats> = {};
|
|
const toolUsage: Record<string, number> = {};
|
|
let inputTokens = 0;
|
|
let outputTokens = 0;
|
|
let cacheReadTokens = 0;
|
|
let messageCount = 0;
|
|
let firstTimestamp: string | null = null;
|
|
let lastTimestamp: string | null = null;
|
|
|
|
// Track last known content per file for accurate Write/NotebookEdit diffs
|
|
const fileLastContent = new Map<string, string>();
|
|
|
|
const cleanPath = (fp: string): string => stripTrailingPunct(fp.trim());
|
|
|
|
const trackFile = (fp: string): void => {
|
|
if (typeof fp === 'string') {
|
|
const cleaned = cleanPath(fp);
|
|
if (isValidFilePath(cleaned)) filesTouchedSet.add(cleaned);
|
|
}
|
|
};
|
|
|
|
const addFileLines = (fp: string, added: number, removed: number): void => {
|
|
const cleaned = cleanPath(fp);
|
|
if (!isValidFilePath(cleaned)) return;
|
|
const existing = perFileStats[cleaned];
|
|
if (existing) {
|
|
existing.added += added;
|
|
existing.removed += removed;
|
|
} else {
|
|
perFileStats[cleaned] = { added, removed };
|
|
}
|
|
};
|
|
|
|
try {
|
|
for await (const line of readJsonlLines(filePath)) {
|
|
const trimmed = line.trim();
|
|
if (!trimmed) continue;
|
|
|
|
try {
|
|
const msg = JSON.parse(trimmed) as Record<string, unknown>;
|
|
|
|
if (typeof msg.timestamp === 'string') {
|
|
if (!firstTimestamp) firstTimestamp = msg.timestamp;
|
|
lastTimestamp = msg.timestamp;
|
|
}
|
|
|
|
// Count messages
|
|
const role = this.extractRole(msg);
|
|
if (role) messageCount++;
|
|
|
|
// Extract token usage
|
|
const usage = this.extractUsage(msg);
|
|
if (usage) {
|
|
inputTokens += usage.inputTokens;
|
|
outputTokens += usage.outputTokens;
|
|
cacheReadTokens += usage.cacheReadTokens;
|
|
}
|
|
|
|
// Extract tool_use blocks from assistant messages
|
|
if (role === 'assistant') {
|
|
const content = this.extractContent(msg);
|
|
if (Array.isArray(content)) {
|
|
for (const block of content) {
|
|
if (
|
|
block &&
|
|
typeof block === 'object' &&
|
|
(block as Record<string, unknown>).type === 'tool_use'
|
|
) {
|
|
const toolBlock = block as Record<string, unknown>;
|
|
const rawName = typeof toolBlock.name === 'string' ? toolBlock.name : 'unknown';
|
|
const toolName = rawName.startsWith('proxy_') ? rawName.slice(6) : rawName;
|
|
toolUsage[toolName] = (toolUsage[toolName] ?? 0) + 1;
|
|
|
|
const input = toolBlock.input as Record<string, unknown> | undefined;
|
|
if (!input) continue;
|
|
|
|
// Track files
|
|
if (typeof input.file_path === 'string') {
|
|
trackFile(input.file_path);
|
|
}
|
|
if (typeof input.path === 'string' && toolName === 'Read') {
|
|
trackFile(input.path);
|
|
}
|
|
|
|
// Count lines for Edit (using semantic diff for accuracy)
|
|
if (toolName === 'Edit') {
|
|
const editPath = typeof input.file_path === 'string' ? input.file_path : '';
|
|
const oldStr = typeof input.old_string === 'string' ? input.old_string : '';
|
|
const newStr = typeof input.new_string === 'string' ? input.new_string : '';
|
|
const replaceAll = input.replace_all === true;
|
|
const { added: fileAdded, removed: fileRemoved } = countLineChanges(
|
|
oldStr,
|
|
newStr
|
|
);
|
|
linesAdded += fileAdded;
|
|
linesRemoved += fileRemoved;
|
|
if (editPath) {
|
|
addFileLines(editPath, fileAdded, fileRemoved);
|
|
// Update fileLastContent so subsequent Writes diff against correct state
|
|
const prev = fileLastContent.get(editPath);
|
|
if (prev !== undefined && oldStr) {
|
|
if (replaceAll) {
|
|
fileLastContent.set(editPath, prev.split(oldStr).join(newStr));
|
|
} else {
|
|
const idx = prev.indexOf(oldStr);
|
|
if (idx !== -1) {
|
|
fileLastContent.set(
|
|
editPath,
|
|
prev.substring(0, idx) + newStr + prev.substring(idx + oldStr.length)
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Count lines for Write (track previous content for accurate diff)
|
|
if (toolName === 'Write') {
|
|
const writeContent = typeof input.content === 'string' ? input.content : '';
|
|
const writePath = typeof input.file_path === 'string' ? input.file_path : '';
|
|
if (writeContent) {
|
|
const prevContent = fileLastContent.get(writePath) ?? '';
|
|
const { added: fileAdded, removed: fileRemoved } = countLineChanges(
|
|
prevContent,
|
|
writeContent
|
|
);
|
|
if (writePath) fileLastContent.set(writePath, writeContent);
|
|
linesAdded += fileAdded;
|
|
linesRemoved += fileRemoved;
|
|
if (writePath) {
|
|
addFileLines(writePath, fileAdded, fileRemoved);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Count lines for NotebookEdit (semantic diff)
|
|
if (toolName === 'NotebookEdit') {
|
|
const src = typeof input.new_source === 'string' ? input.new_source : '';
|
|
if (src) {
|
|
const nbPath =
|
|
typeof input.notebook_path === 'string' ? input.notebook_path : '';
|
|
const prevContent = fileLastContent.get(nbPath) ?? '';
|
|
const { added: fileAdded, removed: fileRemoved } = countLineChanges(
|
|
prevContent,
|
|
src
|
|
);
|
|
if (nbPath) fileLastContent.set(nbPath, src);
|
|
linesAdded += fileAdded;
|
|
linesRemoved += fileRemoved;
|
|
if (nbPath) {
|
|
addFileLines(nbPath, fileAdded, fileRemoved);
|
|
}
|
|
}
|
|
if (typeof input.notebook_path === 'string') {
|
|
trackFile(input.notebook_path);
|
|
}
|
|
}
|
|
|
|
// Count lines for Bash commands that write to files
|
|
if (toolName === 'Bash') {
|
|
const cmd = typeof input.command === 'string' ? input.command : '';
|
|
if (cmd) {
|
|
const bashLines = estimateBashLinesChanged(cmd);
|
|
linesAdded += bashLines.added;
|
|
linesRemoved += bashLines.removed;
|
|
const touchedFiles = [...new Set(bashLines.files)];
|
|
for (const f of touchedFiles) {
|
|
trackFile(f);
|
|
}
|
|
// Only attribute per-file lines when a single file is touched;
|
|
// with multiple files we can't determine per-file distribution
|
|
if (touchedFiles.length === 1) {
|
|
addFileLines(touchedFiles[0], bashLines.added, bashLines.removed);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch {
|
|
// Skip malformed lines
|
|
}
|
|
}
|
|
} catch (err) {
|
|
logger.debug(`Failed to parse file ${filePath}: ${String(err)}`);
|
|
}
|
|
|
|
let durationMs = 0;
|
|
if (firstTimestamp && lastTimestamp) {
|
|
durationMs = Math.max(
|
|
0,
|
|
new Date(lastTimestamp).getTime() - new Date(firstTimestamp).getTime()
|
|
);
|
|
}
|
|
|
|
return {
|
|
linesAdded,
|
|
linesRemoved,
|
|
filesTouched: [...filesTouchedSet],
|
|
perFileStats,
|
|
toolUsage,
|
|
inputTokens,
|
|
outputTokens,
|
|
cacheReadTokens,
|
|
messageCount,
|
|
durationMs,
|
|
};
|
|
}
|
|
|
|
private extractRole(msg: Record<string, unknown>): string | null {
|
|
if (typeof msg.role === 'string') return msg.role;
|
|
if (msg.message && typeof msg.message === 'object') {
|
|
const inner = msg.message as Record<string, unknown>;
|
|
if (typeof inner.role === 'string') return inner.role;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
private extractContent(msg: Record<string, unknown>): unknown[] | null {
|
|
const content = msg.content ?? (msg.message as Record<string, unknown> | undefined)?.content;
|
|
if (Array.isArray(content)) return content as unknown[];
|
|
return null;
|
|
}
|
|
|
|
private extractUsage(
|
|
msg: Record<string, unknown>
|
|
): { inputTokens: number; outputTokens: number; cacheReadTokens: number } | null {
|
|
const usage = (msg.usage ?? (msg.message as Record<string, unknown> | undefined)?.usage) as
|
|
| Record<string, unknown>
|
|
| undefined;
|
|
if (!usage || typeof usage !== 'object') return null;
|
|
|
|
return {
|
|
inputTokens: typeof usage.input_tokens === 'number' ? usage.input_tokens : 0,
|
|
outputTokens: typeof usage.output_tokens === 'number' ? usage.output_tokens : 0,
|
|
cacheReadTokens:
|
|
typeof usage.cache_read_input_tokens === 'number' ? usage.cache_read_input_tokens : 0,
|
|
};
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Bash line-change heuristics
|
|
// ---------------------------------------------------------------------------
|
|
|
|
interface BashLinesResult {
|
|
added: number;
|
|
removed: number;
|
|
files: string[];
|
|
}
|
|
|
|
/**
|
|
* Best-effort estimation of lines changed by a Bash command.
|
|
* Handles common patterns: heredoc writes, echo/printf redirects,
|
|
* sed in-place edits, and tee writes.
|
|
*
|
|
* Future improvements for Bash line counting accuracy:
|
|
* - Currently only covers ~30-40% of real Bash file-write patterns.
|
|
* - Misses: variable expansions (`echo "$var" > file`), piped output
|
|
* (`grep ... | sort > file`), `python -c`, `git apply`, `patch`,
|
|
* `mv`/`cp`, complex heredocs with `<<-` (tab-stripped).
|
|
* - The fundamental limitation is that Bash command output is not stored
|
|
* in the JSONL tool_use input — only the command string is available.
|
|
* The actual content written to files lives inside the shell runtime
|
|
* and is not captured.
|
|
* - Potential improvements: parse tool_result blocks for git diff --stat
|
|
* patterns (requires two-pass parser), or run a post-hoc `git log --stat`
|
|
* against the project repo filtered by session timestamps.
|
|
*/
|
|
export function estimateBashLinesChanged(command: string): BashLinesResult {
|
|
let added = 0;
|
|
let removed = 0;
|
|
const files: string[] = [];
|
|
|
|
// 1. Heredoc: cat <<'EOF' > file OR cat <<EOF > file
|
|
// Count lines between delimiter markers.
|
|
const heredocPattern = /<<-?\s*'?(\w+)'?/g;
|
|
let heredocMatch: RegExpExecArray | null;
|
|
while ((heredocMatch = heredocPattern.exec(command)) !== null) {
|
|
const delimiter = heredocMatch[1];
|
|
const afterHeredoc = command.slice(heredocMatch.index + heredocMatch[0].length);
|
|
const endIdx = afterHeredoc.indexOf(`\n${delimiter}`);
|
|
if (endIdx > 0) {
|
|
const startIdx = afterHeredoc.indexOf('\n');
|
|
if (startIdx >= 0 && startIdx < endIdx) {
|
|
const content = afterHeredoc.slice(startIdx + 1, endIdx);
|
|
added += content.split('\n').length;
|
|
}
|
|
}
|
|
}
|
|
|
|
// 2. Echo / printf with redirect: echo "..." > /path OR printf "..." > /path
|
|
|
|
const echoPattern =
|
|
/(?:echo|printf)\s+(?:-[a-zA-Z]+\s+)?(?:"([^"]*)"|'([^']*)')\s*>{1,2}\s*(\S+)/g; // eslint-disable-line security/detect-unsafe-regex -- Fixed alternation, short command strings only
|
|
let echoMatch: RegExpExecArray | null;
|
|
while ((echoMatch = echoPattern.exec(command)) !== null) {
|
|
const content = echoMatch[1] ?? echoMatch[2] ?? '';
|
|
if (content) {
|
|
added += content.split('\\n').length;
|
|
}
|
|
const filePath = echoMatch[3];
|
|
if (filePath?.trim()) {
|
|
files.push(filePath);
|
|
}
|
|
}
|
|
|
|
// 3. sed -i: each invocation ~ 1 line changed
|
|
// eslint-disable-next-line sonarjs/slow-regex -- Simple alternation on short command strings, no backtracking risk
|
|
const sedPattern = /sed\s+(?:-[a-zA-Z]*i[a-zA-Z]*|-i)\s/g;
|
|
let sedMatch: RegExpExecArray | null;
|
|
while ((sedMatch = sedPattern.exec(command)) !== null) {
|
|
added += 1;
|
|
removed += 1;
|
|
const afterSed = command.slice(sedMatch.index);
|
|
const sedFileMatch = /\s(\/\S+)\s*(?:[;&|]|$)/.exec(afterSed);
|
|
if (sedFileMatch) {
|
|
files.push(sedFileMatch[1]);
|
|
}
|
|
}
|
|
|
|
// 4. Redirect to file (catch-all for remaining redirects not caught above)
|
|
if (added === 0 && removed === 0) {
|
|
const redirectPattern = />{1,2}\s*(\/\S+)/g;
|
|
let redirectMatch: RegExpExecArray | null;
|
|
while ((redirectMatch = redirectPattern.exec(command)) !== null) {
|
|
const filePath = redirectMatch[1];
|
|
if (filePath) {
|
|
files.push(filePath);
|
|
}
|
|
}
|
|
}
|
|
|
|
// 5. tee: ... | tee /path/to/file
|
|
const teePattern = /\btee\s+(?:-a\s+)?(\/\S+)/g; // eslint-disable-line security/detect-unsafe-regex -- Simple pattern on short command strings
|
|
let teeMatch: RegExpExecArray | null;
|
|
while ((teeMatch = teePattern.exec(command)) !== null) {
|
|
const filePath = teeMatch[1];
|
|
if (filePath) {
|
|
files.push(filePath);
|
|
}
|
|
}
|
|
|
|
return { added, removed, files };
|
|
}
|