diff --git a/src/renderer/components/chat/SessionContextPanel/components/FlatInjectionList.tsx b/src/renderer/components/chat/SessionContextPanel/components/FlatInjectionList.tsx index cb2d9e39..5b5f35d4 100644 --- a/src/renderer/components/chat/SessionContextPanel/components/FlatInjectionList.tsx +++ b/src/renderer/components/chat/SessionContextPanel/components/FlatInjectionList.tsx @@ -191,9 +191,7 @@ export const FlatInjectionList = ({ } }; - const displayText = row.description - ? `${row.label} \u2014 ${row.description}` - : row.label; + const displayText = row.description ? `${row.label} \u2014 ${row.description}` : row.label; return (
diff --git a/src/renderer/components/chat/items/linkedTool/renderHelpers.tsx b/src/renderer/components/chat/items/linkedTool/renderHelpers.tsx index 4b64eed4..6463f947 100644 --- a/src/renderer/components/chat/items/linkedTool/renderHelpers.tsx +++ b/src/renderer/components/chat/items/linkedTool/renderHelpers.tsx @@ -152,7 +152,7 @@ export function extractOutputText(content: string | unknown[]): string { .map((block) => typeof block === 'object' && block !== null && 'text' in block ? (block as { text: string }).text - : JSON.stringify(block, null, 2), + : JSON.stringify(block, null, 2) ) .join('\n'); } else { diff --git a/src/renderer/components/report/SessionReportTab.tsx b/src/renderer/components/report/SessionReportTab.tsx index b72088cc..a46bdf71 100644 --- a/src/renderer/components/report/SessionReportTab.tsx +++ b/src/renderer/components/report/SessionReportTab.tsx @@ -63,6 +63,7 @@ export const SessionReportTab = ({ tab }: SessionReportTabProps) => { prompt={report.promptQuality} startup={report.startupOverhead} testProgression={report.testProgression} + fileReadRedundancy={report.fileReadRedundancy} /> {
Per Commit
-
- {data.costPerCommit != null ? fmt(data.costPerCommit) : 'N/A'} +
+ + {data.costPerCommit != null ? fmt(data.costPerCommit) : 'N/A'} + + {data.costPerCommitAssessment && ( + + {assessmentLabel(data.costPerCommitAssessment)} + + )}
Per Line Changed
-
- {data.costPerLineChanged != null ? `$${data.costPerLineChanged.toFixed(6)}` : 'N/A'} +
+ + {data.costPerLineChanged != null ? `$${data.costPerLineChanged.toFixed(6)}` : 'N/A'} + + {data.costPerLineAssessment && ( + + {assessmentLabel(data.costPerLineAssessment)} + + )}
+ {data.subagentCostSharePct != null && ( +
+ Subagent Cost Share: + {data.subagentCostSharePct}% + {data.subagentCostShareAssessment && ( + + {assessmentLabel(data.subagentCostShareAssessment)} + + )} +
+ )} + {modelEntries.length > 0 && ( diff --git a/src/renderer/components/report/sections/FrictionSection.tsx b/src/renderer/components/report/sections/FrictionSection.tsx index 96ea0032..c50591ac 100644 --- a/src/renderer/components/report/sections/FrictionSection.tsx +++ b/src/renderer/components/report/sections/FrictionSection.tsx @@ -1,29 +1,28 @@ +import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments'; import { MessageSquareWarning } from 'lucide-react'; import { ReportSection } from '../ReportSection'; import type { ReportFrictionSignals, ReportThrashingSignals } from '@renderer/types/sessionReport'; -const frictionColor = (rate: number): string => { - if (rate <= 0.1) return '#4ade80'; - if (rate <= 0.25) return '#fbbf24'; - return '#f87171'; -}; - interface FrictionSectionProps { data: ReportFrictionSignals; thrashing: ReportThrashingSignals; } export const FrictionSection = ({ data, thrashing }: FrictionSectionProps) => { + const frictionColor = + data.frictionRate <= 0.1 ? '#4ade80' : data.frictionRate <= 0.25 ? '#fbbf24' : '#f87171'; + const thrashColor = assessmentColor(thrashing.thrashingAssessment); + return (
Friction Rate: {(data.frictionRate * 100).toFixed(1)}% @@ -54,7 +53,15 @@ export const FrictionSection = ({ data, thrashing }: FrictionSectionProps) => { {(thrashing.bashNearDuplicates.length > 0 || thrashing.editReworkFiles.length > 0) && (
-
Thrashing Signals
+
+ Thrashing Signals + + {assessmentLabel(thrashing.thrashingAssessment)} + +
{thrashing.bashNearDuplicates.length > 0 && (
diff --git a/src/renderer/components/report/sections/OverviewSection.tsx b/src/renderer/components/report/sections/OverviewSection.tsx index e4027405..d871b02d 100644 --- a/src/renderer/components/report/sections/OverviewSection.tsx +++ b/src/renderer/components/report/sections/OverviewSection.tsx @@ -1,24 +1,10 @@ +import { assessmentColor } from '@renderer/utils/reportAssessments'; import { Activity } from 'lucide-react'; import { ReportSection } from '../ReportSection'; import type { ReportOverview } from '@renderer/types/sessionReport'; -const assessmentColor = (assessment: ReportOverview['contextAssessment']): string => { - switch (assessment) { - case 'healthy': - return '#4ade80'; - case 'moderate': - return '#fbbf24'; - case 'high': - return '#f87171'; - case 'critical': - return '#f87171'; - default: - return '#a1a1aa'; - } -}; - interface OverviewSectionProps { data: ReportOverview; } diff --git a/src/renderer/components/report/sections/QualitySection.tsx b/src/renderer/components/report/sections/QualitySection.tsx index c0dea581..a086fd72 100644 --- a/src/renderer/components/report/sections/QualitySection.tsx +++ b/src/renderer/components/report/sections/QualitySection.tsx @@ -1,63 +1,33 @@ +import { assessmentColor, assessmentLabel, severityColor } from '@renderer/utils/reportAssessments'; import { BarChart3 } from 'lucide-react'; import { ReportSection } from '../ReportSection'; import type { + ReportFileReadRedundancy, ReportPromptQuality, ReportStartupOverhead, ReportTestProgression, } from '@renderer/types/sessionReport'; -const assessmentColor = (assessment: ReportPromptQuality['assessment']): string => { - switch (assessment) { - case 'well_specified': - return '#4ade80'; - case 'moderate_friction': - return '#fbbf24'; - case 'underspecified': - return '#f87171'; - case 'verbose_but_unclear': - return '#f87171'; - default: - return '#a1a1aa'; - } -}; - -const assessmentLabel = (assessment: ReportPromptQuality['assessment']): string => { - switch (assessment) { - case 'well_specified': - return 'Well Specified'; - case 'moderate_friction': - return 'Moderate Friction'; - case 'underspecified': - return 'Underspecified'; - case 'verbose_but_unclear': - return 'Verbose but Unclear'; - default: - return assessment; - } -}; - -const trajectoryColor = (trajectory: ReportTestProgression['trajectory']): string => { - switch (trajectory) { - case 'improving': - return '#4ade80'; - case 'regressing': - return '#f87171'; - case 'stable': - return '#fbbf24'; - default: - return '#a1a1aa'; - } -}; - interface QualitySectionProps { prompt: ReportPromptQuality; startup: ReportStartupOverhead; testProgression: ReportTestProgression; + fileReadRedundancy: ReportFileReadRedundancy; } -export const QualitySection = ({ prompt, startup, testProgression }: QualitySectionProps) => { +export const QualitySection = ({ + prompt, + startup, + testProgression, + fileReadRedundancy, +}: QualitySectionProps) => { + const promptColor = assessmentColor(prompt.assessment); + const trajectoryColor = assessmentColor(testProgression.trajectory); + const overheadColor = assessmentColor(startup.overheadAssessment); + const redundancyColor = assessmentColor(fileReadRedundancy.redundancyAssessment); + return ( {/* Prompt quality */} @@ -66,10 +36,7 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
{assessmentLabel(prompt.assessment)} @@ -101,7 +68,15 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect {/* Startup overhead */}
-
Startup Overhead
+
+ Startup Overhead + + {assessmentLabel(startup.overheadAssessment)} + +
Messages Before Work
@@ -120,21 +95,44 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
+ {/* File read redundancy */} +
+
+ File Read Redundancy + + {assessmentLabel(fileReadRedundancy.redundancyAssessment)} + +
+
+
+
Total Reads
+
{fileReadRedundancy.totalReads}
+
+
+
Unique Files
+
{fileReadRedundancy.uniqueFiles}
+
+
+
Reads/Unique File
+
+ {fileReadRedundancy.readsPerUniqueFile}x +
+
+
+
+ {/* Test progression */}
Test Progression
- {testProgression.trajectory === 'insufficient_data' - ? 'Insufficient Data' - : testProgression.trajectory.charAt(0).toUpperCase() + - testProgression.trajectory.slice(1)} + {assessmentLabel(testProgression.trajectory)} {testProgression.snapshotCount} snapshot{testProgression.snapshotCount !== 1 ? 's' : ''} @@ -145,11 +143,11 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
First Run
- + {testProgression.firstSnapshot.passed} passed {' / '} - + {testProgression.firstSnapshot.failed} failed
@@ -157,11 +155,11 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
Last Run
- + {testProgression.lastSnapshot.passed} passed {' / '} - + {testProgression.lastSnapshot.failed} failed
diff --git a/src/renderer/components/report/sections/SubagentSection.tsx b/src/renderer/components/report/sections/SubagentSection.tsx index bfef657d..7a788d36 100644 --- a/src/renderer/components/report/sections/SubagentSection.tsx +++ b/src/renderer/components/report/sections/SubagentSection.tsx @@ -1,3 +1,4 @@ +import { severityColor } from '@renderer/utils/reportAssessments'; import { Users } from 'lucide-react'; import { ReportSection } from '../ReportSection'; @@ -53,8 +54,19 @@ export const SubagentSection = ({ data }: SubagentSectionProps) => {
{data.byAgent.map((agent, idx) => ( -
- {agent.description} + +
+ {agent.description} +
+ {agent.modelMismatch && ( +
+ {agent.modelMismatch.recommendation} +
+ )}
{agent.subagentType} diff --git a/src/renderer/components/report/sections/TimelineSection.tsx b/src/renderer/components/report/sections/TimelineSection.tsx index e01718dc..dd3629dd 100644 --- a/src/renderer/components/report/sections/TimelineSection.tsx +++ b/src/renderer/components/report/sections/TimelineSection.tsx @@ -1,3 +1,4 @@ +import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments'; import { Clock } from 'lucide-react'; import { ReportSection } from '../ReportSection'; @@ -15,11 +16,21 @@ interface TimelineSectionProps { } export const TimelineSection = ({ idle, modelSwitches, keyEvents }: TimelineSectionProps) => { + const idleColor = assessmentColor(idle.idleAssessment); + return ( {/* Idle stats */}
-
Idle Analysis
+
+ Idle Analysis + + {assessmentLabel(idle.idleAssessment)} + +
Idle Gaps
@@ -35,10 +46,7 @@ export const TimelineSection = ({ idle, modelSwitches, keyEvents }: TimelineSect
Idle %
-
50 ? '#fbbf24' : '#4ade80' }} - > +
{idle.idlePct}%
@@ -48,8 +56,21 @@ export const TimelineSection = ({ idle, modelSwitches, keyEvents }: TimelineSect {/* Model switches */} {modelSwitches.count > 0 && (
-
- Model Switches ({modelSwitches.count}) +
+ + Model Switches ({modelSwitches.count}) + + {modelSwitches.switchPattern && ( + + {assessmentLabel(modelSwitches.switchPattern)} + + )}
{modelSwitches.switches.map((sw, idx) => ( diff --git a/src/renderer/components/report/sections/TokenSection.tsx b/src/renderer/components/report/sections/TokenSection.tsx index 9dd1dd0c..c0123398 100644 --- a/src/renderer/components/report/sections/TokenSection.tsx +++ b/src/renderer/components/report/sections/TokenSection.tsx @@ -1,3 +1,4 @@ +import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments'; import { Coins } from 'lucide-react'; import { ReportSection } from '../ReportSection'; @@ -65,12 +66,40 @@ export const TokenSection = ({ data, cacheEconomics }: TokenSectionProps) => {
Cache Efficiency
-
{cacheEconomics.cacheEfficiencyPct}%
+
+ + {cacheEconomics.cacheEfficiencyPct}% + + {cacheEconomics.cacheEfficiencyAssessment && ( + + {assessmentLabel(cacheEconomics.cacheEfficiencyAssessment)} + + )} +
R/W Ratio
-
- {cacheEconomics.cacheReadToWriteRatio}x +
+ + {cacheEconomics.cacheReadToWriteRatio}x + + {cacheEconomics.cacheRatioAssessment && ( + + {assessmentLabel(cacheEconomics.cacheRatioAssessment)} + + )}
diff --git a/src/renderer/components/report/sections/ToolSection.tsx b/src/renderer/components/report/sections/ToolSection.tsx index 8f8bce2b..526b99ac 100644 --- a/src/renderer/components/report/sections/ToolSection.tsx +++ b/src/renderer/components/report/sections/ToolSection.tsx @@ -1,3 +1,4 @@ +import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments'; import { Wrench } from 'lucide-react'; import { ReportSection } from '../ReportSection'; @@ -13,10 +14,23 @@ export const ToolSection = ({ data }: ToolSectionProps) => { (a, b) => b[1].totalCalls - a[1].totalCalls ); + const healthColor = assessmentColor(data.overallToolHealth); + return ( -
- {data.totalCalls.toLocaleString()} total calls across {toolEntries.length} tools +
+ + {data.totalCalls.toLocaleString()} total calls across {toolEntries.length} tools + + + {assessmentLabel(data.overallToolHealth)} +
@@ -25,18 +39,13 @@ export const ToolSection = ({ data }: ToolSectionProps) => { - + + {toolEntries.map(([tool, stats]) => { - const rateColor = - stats.successRatePct < 80 - ? '#f87171' - : stats.successRatePct < 90 - ? '#fbbf24' - : undefined; - + const color = assessmentColor(stats.assessment); return ( @@ -46,12 +55,17 @@ export const ToolSection = ({ data }: ToolSectionProps) => { - + ); })} diff --git a/src/renderer/types/sessionReport.ts b/src/renderer/types/sessionReport.ts index c899e98c..b8707292 100644 --- a/src/renderer/types/sessionReport.ts +++ b/src/renderer/types/sessionReport.ts @@ -3,6 +3,19 @@ * Output of analyzeSession() — one interface per report section. */ +import type { + CacheAssessment, + CostAssessment, + IdleAssessment, + ModelMismatch, + OverheadAssessment, + RedundancyAssessment, + SubagentCostShareAssessment, + SwitchPattern, + ThrashingAssessment, + ToolHealthAssessment, +} from '@renderer/utils/reportAssessments'; + // ============================================================================= // Pricing // ============================================================================= @@ -67,27 +80,33 @@ export interface ReportCostAnalysis { costByModel: Record; costPerCommit: number | null; costPerLineChanged: number | null; + costPerCommitAssessment: CostAssessment | null; + costPerLineAssessment: CostAssessment | null; + subagentCostSharePct: number | null; + subagentCostShareAssessment: SubagentCostShareAssessment | null; } export interface ReportCacheEconomics { - cacheCreation5m: number; - cacheCreation1h: number; cacheRead: number; cacheEfficiencyPct: number; coldStartDetected: boolean; cacheReadToWriteRatio: number; + cacheEfficiencyAssessment: CacheAssessment | null; + cacheRatioAssessment: CacheAssessment | null; } export interface ToolSuccessRate { totalCalls: number; errors: number; successRatePct: number; + assessment: ToolHealthAssessment; } export interface ReportToolUsage { counts: Record; totalCalls: number; successRates: Record; + overallToolHealth: ToolHealthAssessment; } export interface SubagentEntry { @@ -99,6 +118,7 @@ export interface SubagentEntry { totalToolUseCount: number; costUsd: number; costNote?: string; + modelMismatch: ModelMismatch | null; } export interface ReportSubagentMetrics { @@ -157,6 +177,7 @@ export interface ReportFrictionSignals { export interface ReportThrashingSignals { bashNearDuplicates: { prefix: string; count: number }[]; editReworkFiles: { filePath: string; editIndices: number[] }[]; + thrashingAssessment: ThrashingAssessment; } export interface ReportConversationTree { @@ -187,6 +208,7 @@ export interface ReportIdleAnalysis { activeWorkingHuman: string; idlePct: number; longestGaps: IdleGap[]; + idleAssessment: IdleAssessment; } export interface ModelSwitch { @@ -200,6 +222,7 @@ export interface ReportModelSwitches { count: number; switches: ModelSwitch[]; modelsUsed: string[]; + switchPattern: SwitchPattern | null; } export interface ReportWorkingDirectories { @@ -230,6 +253,7 @@ export interface ReportStartupOverhead { messagesBeforeFirstWork: number; tokensBeforeFirstWork: number; pctOfTotal: number; + overheadAssessment: OverheadAssessment; } export interface ReportTokenDensityTimeline { @@ -271,6 +295,7 @@ export interface ReportFileReadRedundancy { uniqueFiles: number; readsPerUniqueFile: number; redundantFiles: Record; + redundancyAssessment: RedundancyAssessment; } // ============================================================================= diff --git a/src/renderer/utils/displayItemBuilder.ts b/src/renderer/utils/displayItemBuilder.ts index 07b4e786..ddaf1f89 100644 --- a/src/renderer/utils/displayItemBuilder.ts +++ b/src/renderer/utils/displayItemBuilder.ts @@ -425,8 +425,7 @@ export function buildDisplayItemsFromMessages( } // Only treat as subagent input if there are NO tool_result blocks in this message const hasToolResults = - Array.isArray(msg.content) && - msg.content.some((b) => b.type === 'tool_result'); + Array.isArray(msg.content) && msg.content.some((b) => b.type === 'tool_result'); if (rawText.trim() && !hasToolResults) { displayItems.push({ type: 'subagent_input', diff --git a/src/renderer/utils/reportAssessments.ts b/src/renderer/utils/reportAssessments.ts new file mode 100644 index 00000000..85cc9e0d --- /dev/null +++ b/src/renderer/utils/reportAssessments.ts @@ -0,0 +1,270 @@ +/** + * Centralized assessment severity/color utilities for session reports. + * + * Maps raw assessment values to severity levels and colors, + * replacing duplicated assessmentColor() functions across report sections. + */ + +// ============================================================================= +// Types +// ============================================================================= + +export type Severity = 'good' | 'warning' | 'danger' | 'neutral'; + +// ============================================================================= +// Colors +// ============================================================================= + +const SEVERITY_COLORS: Record = { + good: '#4ade80', + warning: '#fbbf24', + danger: '#f87171', + neutral: '#a1a1aa', +}; + +export function severityColor(severity: Severity): string { + return SEVERITY_COLORS[severity]; +} + +// ============================================================================= +// Assessment → Severity Mapping +// ============================================================================= + +const ASSESSMENT_SEVERITY: Record = { + // Context + healthy: 'good', + moderate: 'warning', + high: 'danger', + critical: 'danger', + + // Cost / subagent share + efficient: 'good', + normal: 'good', + expensive: 'warning', + red_flag: 'danger', + very_high: 'danger', + + // Cache + good: 'good', + concerning: 'warning', + + // Tool health + degraded: 'warning', + unreliable: 'danger', + + // Idle ('moderate' already mapped above under Context) + high_idle: 'danger', + + // File read + wasteful: 'warning', + + // Startup + heavy: 'warning', + + // Thrashing + none: 'good', + mild: 'warning', + severe: 'danger', + + // Prompt quality + well_specified: 'good', + moderate_friction: 'warning', + underspecified: 'danger', + verbose_but_unclear: 'danger', + + // Test trajectory + improving: 'good', + stable: 'warning', + regressing: 'danger', + insufficient_data: 'neutral', + + // Model switch + opus_plan_mode: 'good', + manual_switch: 'neutral', +}; + +export function assessmentSeverity(assessment: string | null | undefined): Severity { + if (!assessment) return 'neutral'; + return ASSESSMENT_SEVERITY[assessment] ?? 'neutral'; +} + +export function assessmentColor(assessment: string | null | undefined): string { + return severityColor(assessmentSeverity(assessment)); +} + +// ============================================================================= +// Label Formatting +// ============================================================================= + +export function assessmentLabel(value: string): string { + return value + .split('_') + .map((w) => w.charAt(0).toUpperCase() + w.slice(1)) + .join(' '); +} + +// ============================================================================= +// Threshold Constants +// ============================================================================= + +export const THRESHOLDS = { + costPerCommit: { + efficient: 0.5, + normal: 2, + expensive: 5, + }, + costPerLine: { + efficient: 0.01, + normal: 0.05, + expensive: 0.2, + }, + subagentCostShare: { + normal: 30, + high: 60, + veryHigh: 80, + }, + cacheEfficiency: { + good: 95, + }, + cacheRwRatio: { + good: 20, + }, + toolSuccess: { + healthy: 95, + degraded: 80, + }, + idle: { + efficient: 20, + moderate: 50, + }, + fileReadsPerUnique: { + normal: 2.0, + }, + startupOverhead: { + normal: 5, + }, +} as const; + +// ============================================================================= +// Assessment Computers +// ============================================================================= + +export type CostAssessment = 'efficient' | 'normal' | 'expensive' | 'red_flag'; +export type CacheAssessment = 'good' | 'concerning'; +export type ToolHealthAssessment = 'healthy' | 'degraded' | 'unreliable'; +export type IdleAssessment = 'efficient' | 'moderate' | 'high_idle'; +export type RedundancyAssessment = 'normal' | 'wasteful'; +export type OverheadAssessment = 'normal' | 'heavy'; +export type ThrashingAssessment = 'none' | 'mild' | 'severe'; +export type SubagentCostShareAssessment = 'normal' | 'high' | 'very_high' | 'red_flag'; +export type SwitchPattern = 'opus_plan_mode' | 'manual_switch' | 'none'; + +export function computeCostPerCommitAssessment(costPerCommit: number): CostAssessment { + if (costPerCommit < THRESHOLDS.costPerCommit.efficient) return 'efficient'; + if (costPerCommit < THRESHOLDS.costPerCommit.normal) return 'normal'; + if (costPerCommit < THRESHOLDS.costPerCommit.expensive) return 'expensive'; + return 'red_flag'; +} + +export function computeCostPerLineAssessment(costPerLine: number): CostAssessment { + if (costPerLine < THRESHOLDS.costPerLine.efficient) return 'efficient'; + if (costPerLine < THRESHOLDS.costPerLine.normal) return 'normal'; + if (costPerLine < THRESHOLDS.costPerLine.expensive) return 'expensive'; + return 'red_flag'; +} + +export function computeSubagentCostShareAssessment(pct: number): SubagentCostShareAssessment { + if (pct < THRESHOLDS.subagentCostShare.normal) return 'normal'; + if (pct < THRESHOLDS.subagentCostShare.high) return 'high'; + if (pct < THRESHOLDS.subagentCostShare.veryHigh) return 'very_high'; + return 'red_flag'; +} + +export function computeCacheEfficiencyAssessment(pct: number): CacheAssessment { + return pct >= THRESHOLDS.cacheEfficiency.good ? 'good' : 'concerning'; +} + +export function computeCacheRatioAssessment(ratio: number): CacheAssessment { + return ratio >= THRESHOLDS.cacheRwRatio.good ? 'good' : 'concerning'; +} + +export function computeToolHealthAssessment(successPct: number): ToolHealthAssessment { + if (successPct > THRESHOLDS.toolSuccess.healthy) return 'healthy'; + if (successPct >= THRESHOLDS.toolSuccess.degraded) return 'degraded'; + return 'unreliable'; +} + +export function computeIdleAssessment(idlePct: number): IdleAssessment { + if (idlePct < THRESHOLDS.idle.efficient) return 'efficient'; + if (idlePct < THRESHOLDS.idle.moderate) return 'moderate'; + return 'high_idle'; +} + +export function computeRedundancyAssessment(readsPerUnique: number): RedundancyAssessment { + return readsPerUnique <= THRESHOLDS.fileReadsPerUnique.normal ? 'normal' : 'wasteful'; +} + +export function computeOverheadAssessment(pctOfTotal: number): OverheadAssessment { + return pctOfTotal <= THRESHOLDS.startupOverhead.normal ? 'normal' : 'heavy'; +} + +export function computeThrashingAssessment(signalCount: number): ThrashingAssessment { + if (signalCount === 0) return 'none'; + if (signalCount <= 2) return 'mild'; + return 'severe'; +} + +export interface ModelMismatch { + description: string; + expectedComplexity: 'mechanical' | 'read_only'; + recommendation: string; +} + +const MECHANICAL_PATTERNS = /\b(rename|move|lint|format|delete|remove|copy|replace)\b/i; +const READ_ONLY_PATTERNS = /\b(explore|search|find|verify|check|scan|discover|list|read)\b/i; + +export function detectModelMismatch(description: string, model: string): ModelMismatch | null { + const isOpus = model.toLowerCase().includes('opus'); + if (!isOpus) return null; + + if (MECHANICAL_PATTERNS.test(description)) { + return { + description, + expectedComplexity: 'mechanical', + recommendation: 'Consider using Haiku for mechanical tasks to reduce cost.', + }; + } + + if (READ_ONLY_PATTERNS.test(description)) { + return { + description, + expectedComplexity: 'read_only', + recommendation: 'Consider using Haiku or Sonnet for read-only exploration tasks.', + }; + } + + return null; +} + +export function detectSwitchPattern( + switches: { from: string; to: string }[] +): SwitchPattern | null { + if (switches.length === 0) return null; + if (switches.length < 2) return 'manual_switch'; + + // Look for Sonnet→Opus→Sonnet pattern (plan mode) + for (let i = 0; i < switches.length - 1; i++) { + const s1 = switches[i]; + const s2 = switches[i + 1]; + if ( + s1.from.toLowerCase().includes('sonnet') && + s1.to.toLowerCase().includes('opus') && + s2.from.toLowerCase().includes('opus') && + s2.to.toLowerCase().includes('sonnet') + ) { + return 'opus_plan_mode'; + } + } + + return 'manual_switch'; +} diff --git a/src/renderer/utils/sessionAnalyzer.ts b/src/renderer/utils/sessionAnalyzer.ts index 70c00cc0..286b4b85 100644 --- a/src/renderer/utils/sessionAnalyzer.ts +++ b/src/renderer/utils/sessionAnalyzer.ts @@ -8,6 +8,21 @@ * Runs entirely in the renderer process — no IPC needed. */ +import { + computeCacheEfficiencyAssessment, + computeCacheRatioAssessment, + computeCostPerCommitAssessment, + computeCostPerLineAssessment, + computeIdleAssessment, + computeOverheadAssessment, + computeRedundancyAssessment, + computeSubagentCostShareAssessment, + computeThrashingAssessment, + computeToolHealthAssessment, + detectModelMismatch, + detectSwitchPattern, +} from '@renderer/utils/reportAssessments'; + import type { AgentTreeNode, FrictionCorrection, @@ -25,6 +40,7 @@ import type { TestSnapshot, ThinkingBlockAnalysis, ToolError, + ToolSuccessRate, UserQuestion, } from '@renderer/types/sessionReport'; import type { @@ -283,8 +299,6 @@ export function analyzeSession(detail: SessionDetail): SessionReport { }; // Cache economics - const cacheCreation5m = 0; - const cacheCreation1h = 0; let totalCacheCreation = 0; let totalCacheRead = 0; let coldStartDetected = false; @@ -356,7 +370,7 @@ export function analyzeSession(detail: SessionDetail): SessionReport { const testSnapshots: TestSnapshot[] = []; // Cost tracking - let totalSessionCost = 0; + let parentCost = 0; // Git activity const gitCommits: GitCommit[] = []; @@ -457,7 +471,7 @@ export function analyzeSession(detail: SessionDetail): SessionReport { const callCost = costUsd(model, inpTok, outTok, cr, cc); stats.costUsd += callCost; - totalSessionCost += callCost; + parentCost += callCost; totalCacheCreation += cc; totalCacheRead += cr; @@ -868,18 +882,23 @@ export function analyzeSession(detail: SessionDetail): SessionReport { const linesChanged = linesAddedTotal + linesRemovedTotal; // --- Subagent metrics from detail.processes --- - const subagentEntries: SubagentEntry[] = detail.processes.map((proc: Process) => ({ - description: proc.description ?? 'unknown', - subagentType: proc.subagentType ?? 'unknown', - model: 'default (inherits parent)', - totalTokens: proc.metrics.totalTokens, - totalDurationMs: proc.durationMs, - totalToolUseCount: proc.messages.reduce( - (sum: number, pm: ParsedMessage) => sum + pm.toolCalls.length, - 0 - ), - costUsd: proc.metrics.costUsd ?? 0, - })); + const subagentEntries: SubagentEntry[] = detail.processes.map((proc: Process) => { + const desc = proc.description ?? 'unknown'; + const model = 'default (inherits parent)'; + return { + description: desc, + subagentType: proc.subagentType ?? 'unknown', + model, + totalTokens: proc.metrics.totalTokens, + totalDurationMs: proc.durationMs, + totalToolUseCount: proc.messages.reduce( + (sum: number, pm: ParsedMessage) => sum + pm.toolCalls.length, + 0 + ), + costUsd: proc.metrics.costUsd ?? 0, + modelMismatch: detectModelMismatch(desc, model), + }; + }); const saFromProcesses = { count: subagentEntries.length, @@ -892,22 +911,32 @@ export function analyzeSession(detail: SessionDetail): SessionReport { }; // --- Tool usage with success rates --- - const toolSuccessRates: Record< - string, - { totalCalls: number; errors: number; successRatePct: number } - > = {}; + const toolSuccessRates: Record = {}; const sortedToolCounts = [...toolCounts.entries()].sort((a, b) => b[1] - a[1]); const countsRecord: Record = {}; for (const [tool, count] of sortedToolCounts) { countsRecord[tool] = count; const errCount = errorsByTool.get(tool) ?? 0; + const successPct = count ? Math.round(((count - errCount) / count) * 1000) / 10 : 0; toolSuccessRates[tool] = { totalCalls: count, errors: errCount, - successRatePct: count ? Math.round(((count - errCount) / count) * 1000) / 10 : 0, + successRatePct: successPct, + assessment: computeToolHealthAssessment(successPct), }; } + // Overall tool health: worst assessment among tools with >5 calls + const significantTools = Object.values(toolSuccessRates).filter((t) => t.totalCalls > 5); + type THAssessment = 'healthy' | 'degraded' | 'unreliable'; + const overallToolHealth: THAssessment = + significantTools.length > 0 + ? significantTools.reduce((worst, t) => { + const order = { healthy: 0, degraded: 1, unreliable: 2 } as const; + return order[t.assessment] > order[worst] ? t.assessment : worst; + }, 'healthy') + : computeToolHealthAssessment(100); + // --- Key events timing --- for (let j = 1; j < keyEvents.length; j++) { const prevDt = keyEvents[j - 1].timestamp; @@ -967,14 +996,19 @@ export function analyzeSession(detail: SessionDetail): SessionReport { // --- Conversation tree analysis --- const depthMemo = new Map(); - function getDepth(uuid: string): number { + function getDepth(uuid: string, visited = new Set()): number { if (depthMemo.has(uuid)) return depthMemo.get(uuid)!; + if (visited.has(uuid)) { + depthMemo.set(uuid, 0); + return 0; + } + visited.add(uuid); const parent = parentMap.get(uuid); if (!parent) { depthMemo.set(uuid, 0); return 0; } - const depth = 1 + getDepth(parent); + const depth = 1 + getDepth(parent, visited); depthMemo.set(uuid, depth); return depth; } @@ -1071,6 +1105,20 @@ export function analyzeSession(detail: SessionDetail): SessionReport { // --- Subagent cost from processes --- const processSubagentCost = subagentEntries.reduce((sum, a) => sum + a.costUsd, 0); + const totalCost = parentCost + processSubagentCost; + + // --- Assessment computations --- + const costPerCommitVal = + commitCount > 0 ? Math.round((totalCost / commitCount) * 10000) / 10000 : null; + const costPerLineVal = + linesChanged > 0 ? Math.round((totalCost / linesChanged) * 1000000) / 1000000 : null; + const subagentCostSharePct = + totalCost > 0 ? Math.round((processSubagentCost / totalCost) * 10000) / 100 : null; + + const readsPerUniqueFile = uniqueFiles ? Math.round((totalReads / uniqueFiles) * 100) / 100 : 0; + const startupPctOfTotal = grandTotal ? Math.round((startupTokens / grandTotal) * 10000) / 100 : 0; + const idlePct = wallClock > 0 ? Math.round((totalIdle / wallClock) * 1000) / 10 : 0; + const thrashingSignalCount = bashNearDuplicates.length + editReworkFiles.length; // =================================================================== // BUILD REPORT @@ -1111,39 +1159,44 @@ export function analyzeSession(detail: SessionDetail): SessionReport { }, costAnalysis: { - parentCostUsd: Math.round(totalSessionCost * 10000) / 10000, + parentCostUsd: Math.round(parentCost * 10000) / 10000, subagentCostUsd: Math.round(processSubagentCost * 10000) / 10000, - totalSessionCostUsd: Math.round((totalSessionCost + processSubagentCost) * 10000) / 10000, + totalSessionCostUsd: Math.round(totalCost * 10000) / 10000, costByModel: Object.fromEntries( [...modelStats.entries()].map(([model, stats]) => [ model, Math.round(stats.costUsd * 10000) / 10000, ]) ), - costPerCommit: - commitCount > 0 - ? Math.round(((totalSessionCost + processSubagentCost) / commitCount) * 10000) / 10000 - : null, - costPerLineChanged: - linesChanged > 0 - ? Math.round(((totalSessionCost + processSubagentCost) / linesChanged) * 1000000) / - 1000000 + costPerCommit: costPerCommitVal, + costPerLineChanged: costPerLineVal, + costPerCommitAssessment: + costPerCommitVal != null ? computeCostPerCommitAssessment(costPerCommitVal) : null, + costPerLineAssessment: + costPerLineVal != null ? computeCostPerLineAssessment(costPerLineVal) : null, + subagentCostSharePct, + subagentCostShareAssessment: + subagentCostSharePct != null + ? computeSubagentCostShareAssessment(subagentCostSharePct) : null, }, cacheEconomics: { - cacheCreation5m, - cacheCreation1h, cacheRead: totalCacheRead, cacheEfficiencyPct: cacheEfficiency, coldStartDetected, cacheReadToWriteRatio: cacheRwRatio, + cacheEfficiencyAssessment: + cacheTotalCreationAndRead > 0 ? computeCacheEfficiencyAssessment(cacheEfficiency) : null, + cacheRatioAssessment: + totalCacheCreation > 0 ? computeCacheRatioAssessment(cacheRwRatio) : null, }, toolUsage: { counts: countsRecord, totalCalls: [...toolCounts.values()].reduce((sum, c) => sum + c, 0), successRates: toolSuccessRates, + overallToolHealth, }, subagentMetrics: saFromProcesses, @@ -1178,6 +1231,7 @@ export function analyzeSession(detail: SessionDetail): SessionReport { thrashingSignals: { bashNearDuplicates, editReworkFiles, + thrashingAssessment: computeThrashingAssessment(thrashingSignalCount), }, conversationTree: { @@ -1196,14 +1250,16 @@ export function analyzeSession(detail: SessionDetail): SessionReport { wallClockSeconds: Math.round(wallClock * 10) / 10, activeWorkingSeconds: Math.round(Math.max(activeTime, 0) * 10) / 10, activeWorkingHuman: formatDuration(Math.floor(Math.max(activeTime, 0))), - idlePct: wallClock > 0 ? Math.round((totalIdle / wallClock) * 1000) / 10 : 0, + idlePct, longestGaps: [...idleGaps].sort((a, b) => b.gapSeconds - a.gapSeconds).slice(0, 5), + idleAssessment: computeIdleAssessment(idlePct), }, modelSwitches: { count: modelSwitches.length, switches: modelSwitches, modelsUsed, + switchPattern: detectSwitchPattern(modelSwitches), }, workingDirectories: { @@ -1225,7 +1281,8 @@ export function analyzeSession(detail: SessionDetail): SessionReport { startupOverhead: { messagesBeforeFirstWork: startupMessages, tokensBeforeFirstWork: startupTokens, - pctOfTotal: grandTotal ? Math.round((startupTokens / grandTotal) * 10000) / 100 : 0, + pctOfTotal: startupPctOfTotal, + overheadAssessment: computeOverheadAssessment(startupPctOfTotal), }, tokenDensityTimeline: { quartiles }, @@ -1253,8 +1310,9 @@ export function analyzeSession(detail: SessionDetail): SessionReport { fileReadRedundancy: { totalReads, uniqueFiles, - readsPerUniqueFile: uniqueFiles ? Math.round((totalReads / uniqueFiles) * 100) / 100 : 0, + readsPerUniqueFile, redundantFiles, + redundancyAssessment: computeRedundancyAssessment(readsPerUniqueFile), }, compaction: { diff --git a/test/renderer/utils/reportAssessments.test.ts b/test/renderer/utils/reportAssessments.test.ts new file mode 100644 index 00000000..a961f1cc --- /dev/null +++ b/test/renderer/utils/reportAssessments.test.ts @@ -0,0 +1,259 @@ +import { describe, it, expect } from 'vitest'; + +import { + assessmentColor, + assessmentLabel, + assessmentSeverity, + computeCacheEfficiencyAssessment, + computeCacheRatioAssessment, + computeCostPerCommitAssessment, + computeCostPerLineAssessment, + computeIdleAssessment, + computeOverheadAssessment, + computeRedundancyAssessment, + computeSubagentCostShareAssessment, + computeThrashingAssessment, + computeToolHealthAssessment, + detectModelMismatch, + detectSwitchPattern, + severityColor, + THRESHOLDS, +} from '@renderer/utils/reportAssessments'; + +describe('reportAssessments', () => { + describe('severityColor', () => { + it('maps severity to hex color', () => { + expect(severityColor('good')).toBe('#4ade80'); + expect(severityColor('warning')).toBe('#fbbf24'); + expect(severityColor('danger')).toBe('#f87171'); + expect(severityColor('neutral')).toBe('#a1a1aa'); + }); + }); + + describe('assessmentSeverity', () => { + it('maps known assessments to severity', () => { + expect(assessmentSeverity('healthy')).toBe('good'); + expect(assessmentSeverity('efficient')).toBe('good'); + expect(assessmentSeverity('expensive')).toBe('warning'); + expect(assessmentSeverity('red_flag')).toBe('danger'); + expect(assessmentSeverity('very_high')).toBe('danger'); + expect(assessmentSeverity('degraded')).toBe('warning'); + expect(assessmentSeverity('unreliable')).toBe('danger'); + expect(assessmentSeverity('high_idle')).toBe('danger'); + expect(assessmentSeverity('moderate')).toBe('warning'); + }); + + it('returns neutral for null/undefined/unknown', () => { + expect(assessmentSeverity(null)).toBe('neutral'); + expect(assessmentSeverity(undefined)).toBe('neutral'); + expect(assessmentSeverity('unknown_value')).toBe('neutral'); + }); + }); + + describe('assessmentColor', () => { + it('returns correct color for assessment string', () => { + expect(assessmentColor('healthy')).toBe('#4ade80'); + expect(assessmentColor('red_flag')).toBe('#f87171'); + expect(assessmentColor(null)).toBe('#a1a1aa'); + }); + }); + + describe('assessmentLabel', () => { + it('converts snake_case to Title Case', () => { + expect(assessmentLabel('red_flag')).toBe('Red Flag'); + expect(assessmentLabel('well_specified')).toBe('Well Specified'); + expect(assessmentLabel('healthy')).toBe('Healthy'); + expect(assessmentLabel('high_idle')).toBe('High Idle'); + expect(assessmentLabel('opus_plan_mode')).toBe('Opus Plan Mode'); + }); + }); + + describe('computeCostPerCommitAssessment', () => { + it('returns efficient below threshold', () => { + expect(computeCostPerCommitAssessment(0.3)).toBe('efficient'); + }); + it('returns normal in range', () => { + expect(computeCostPerCommitAssessment(1.0)).toBe('normal'); + }); + it('returns expensive in range', () => { + expect(computeCostPerCommitAssessment(3.0)).toBe('expensive'); + }); + it('returns red_flag above threshold', () => { + expect(computeCostPerCommitAssessment(10.0)).toBe('red_flag'); + }); + it('respects threshold boundaries', () => { + expect(computeCostPerCommitAssessment(THRESHOLDS.costPerCommit.efficient - 0.01)).toBe( + 'efficient' + ); + expect(computeCostPerCommitAssessment(THRESHOLDS.costPerCommit.efficient)).toBe('normal'); + }); + }); + + describe('computeCostPerLineAssessment', () => { + it('returns efficient below threshold', () => { + expect(computeCostPerLineAssessment(0.005)).toBe('efficient'); + }); + it('returns red_flag above threshold', () => { + expect(computeCostPerLineAssessment(0.5)).toBe('red_flag'); + }); + }); + + describe('computeSubagentCostShareAssessment', () => { + it('returns normal below 30%', () => { + expect(computeSubagentCostShareAssessment(20)).toBe('normal'); + }); + it('returns high in range', () => { + expect(computeSubagentCostShareAssessment(45)).toBe('high'); + }); + it('returns very_high in range', () => { + expect(computeSubagentCostShareAssessment(70)).toBe('very_high'); + }); + it('returns red_flag above 80%', () => { + expect(computeSubagentCostShareAssessment(90)).toBe('red_flag'); + }); + }); + + describe('computeCacheEfficiencyAssessment', () => { + it('returns good above 95%', () => { + expect(computeCacheEfficiencyAssessment(96)).toBe('good'); + }); + it('returns concerning below 95%', () => { + expect(computeCacheEfficiencyAssessment(90)).toBe('concerning'); + }); + }); + + describe('computeCacheRatioAssessment', () => { + it('returns good above 20', () => { + expect(computeCacheRatioAssessment(25)).toBe('good'); + }); + it('returns concerning below 20', () => { + expect(computeCacheRatioAssessment(10)).toBe('concerning'); + }); + }); + + describe('computeToolHealthAssessment', () => { + it('returns healthy above 95%', () => { + expect(computeToolHealthAssessment(98)).toBe('healthy'); + }); + it('returns degraded between 80-95%', () => { + expect(computeToolHealthAssessment(85)).toBe('degraded'); + }); + it('returns unreliable below 80%', () => { + expect(computeToolHealthAssessment(70)).toBe('unreliable'); + }); + it('boundary: 95 is degraded, 95.1 is healthy', () => { + expect(computeToolHealthAssessment(95)).toBe('degraded'); + expect(computeToolHealthAssessment(95.1)).toBe('healthy'); + }); + }); + + describe('computeIdleAssessment', () => { + it('returns efficient below 20%', () => { + expect(computeIdleAssessment(10)).toBe('efficient'); + }); + it('returns moderate between 20-50%', () => { + expect(computeIdleAssessment(35)).toBe('moderate'); + }); + it('returns high_idle above 50%', () => { + expect(computeIdleAssessment(60)).toBe('high_idle'); + }); + }); + + describe('computeRedundancyAssessment', () => { + it('returns normal at or below 2.0', () => { + expect(computeRedundancyAssessment(1.5)).toBe('normal'); + expect(computeRedundancyAssessment(2.0)).toBe('normal'); + }); + it('returns wasteful above 2.0', () => { + expect(computeRedundancyAssessment(3.0)).toBe('wasteful'); + }); + }); + + describe('computeOverheadAssessment', () => { + it('returns normal at or below 5%', () => { + expect(computeOverheadAssessment(3)).toBe('normal'); + expect(computeOverheadAssessment(5)).toBe('normal'); + }); + it('returns heavy above 5%', () => { + expect(computeOverheadAssessment(10)).toBe('heavy'); + }); + }); + + describe('computeThrashingAssessment', () => { + it('returns none for 0 signals', () => { + expect(computeThrashingAssessment(0)).toBe('none'); + }); + it('returns mild for 1-2 signals', () => { + expect(computeThrashingAssessment(1)).toBe('mild'); + expect(computeThrashingAssessment(2)).toBe('mild'); + }); + it('returns severe for 3+ signals', () => { + expect(computeThrashingAssessment(3)).toBe('severe'); + expect(computeThrashingAssessment(5)).toBe('severe'); + }); + }); + + describe('detectModelMismatch', () => { + it('returns null for non-opus models', () => { + expect(detectModelMismatch('rename files', 'claude-sonnet-4')).toBeNull(); + }); + + it('detects mechanical tasks on opus', () => { + const result = detectModelMismatch('rename all variables', 'claude-opus-4'); + expect(result).not.toBeNull(); + expect(result!.expectedComplexity).toBe('mechanical'); + }); + + it('detects read-only tasks on opus', () => { + const result = detectModelMismatch('explore the codebase', 'claude-opus-4'); + expect(result).not.toBeNull(); + expect(result!.expectedComplexity).toBe('read_only'); + }); + + it('returns null for complex tasks on opus', () => { + expect(detectModelMismatch('implement authentication system', 'claude-opus-4')).toBeNull(); + }); + + it('detects various mechanical keywords', () => { + for (const kw of ['lint', 'format', 'delete', 'move', 'copy', 'replace']) { + expect(detectModelMismatch(`${kw} the code`, 'opus')).not.toBeNull(); + } + }); + + it('detects various read-only keywords', () => { + for (const kw of ['search', 'find', 'verify', 'check', 'scan', 'discover']) { + expect(detectModelMismatch(`${kw} for errors`, 'opus')).not.toBeNull(); + } + }); + }); + + describe('detectSwitchPattern', () => { + it('returns null for no switches', () => { + expect(detectSwitchPattern([])).toBeNull(); + }); + + it('returns manual_switch for single switch', () => { + expect(detectSwitchPattern([{ from: 'claude-sonnet-4', to: 'claude-haiku-4' }])).toBe( + 'manual_switch' + ); + }); + + it('detects opus_plan_mode pattern', () => { + expect( + detectSwitchPattern([ + { from: 'claude-sonnet-4', to: 'claude-opus-4' }, + { from: 'claude-opus-4', to: 'claude-sonnet-4' }, + ]) + ).toBe('opus_plan_mode'); + }); + + it('returns manual_switch for non-plan-mode switches', () => { + expect( + detectSwitchPattern([ + { from: 'claude-sonnet-4', to: 'claude-haiku-4' }, + { from: 'claude-haiku-4', to: 'claude-sonnet-4' }, + ]) + ).toBe('manual_switch'); + }); + }); +}); diff --git a/test/renderer/utils/sessionAnalyzer.test.ts b/test/renderer/utils/sessionAnalyzer.test.ts index a75cf256..06064df4 100644 --- a/test/renderer/utils/sessionAnalyzer.test.ts +++ b/test/renderer/utils/sessionAnalyzer.test.ts @@ -1181,4 +1181,329 @@ describe('analyzeSession', () => { expect(report.compaction.note).toContain('No compaction'); }); }); + + // ------------------------------------------------------------------------- + // Assessment computations + // ------------------------------------------------------------------------- + + describe('cost assessments', () => { + it('computes costPerCommitAssessment when commits exist', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + usage: { input_tokens: 50000, output_tokens: 10000 }, + toolCalls: [ + { + id: 'tc-1', + name: 'Bash', + input: { command: "git commit -m 'fix'" }, + isTask: false, + }, + ], + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.costAnalysis.costPerCommitAssessment).not.toBeNull(); + }); + + it('returns null assessments when no commits', () => { + const report = analyzeSession(createMockDetail()); + expect(report.costAnalysis.costPerCommitAssessment).toBeNull(); + expect(report.costAnalysis.costPerLineAssessment).toBeNull(); + }); + + it('returns null subagentCostShareAssessment when no cost', () => { + const report = analyzeSession(createMockDetail()); + expect(report.costAnalysis.subagentCostSharePct).toBeNull(); + expect(report.costAnalysis.subagentCostShareAssessment).toBeNull(); + }); + }); + + describe('cache assessments', () => { + it('computes cache efficiency assessment', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + usage: { + input_tokens: 100, + output_tokens: 50, + cache_creation_input_tokens: 100, + cache_read_input_tokens: 9900, + }, + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.cacheEconomics.cacheEfficiencyAssessment).toBe('good'); + }); + + it('returns concerning for low cache efficiency', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + usage: { + input_tokens: 100, + output_tokens: 50, + cache_creation_input_tokens: 500, + cache_read_input_tokens: 500, + }, + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.cacheEconomics.cacheEfficiencyAssessment).toBe('concerning'); + }); + + it('returns null when no cache data', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + usage: { + input_tokens: 100, + output_tokens: 50, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.cacheEconomics.cacheEfficiencyAssessment).toBeNull(); + expect(report.cacheEconomics.cacheRatioAssessment).toBeNull(); + }); + }); + + describe('tool health assessments', () => { + it('computes per-tool assessment', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + toolCalls: [ + { id: 'tc-1', name: 'Read', input: { file_path: '/a.ts' }, isTask: false }, + { id: 'tc-2', name: 'Read', input: { file_path: '/b.ts' }, isTask: false }, + ], + }), + createMockMessage({ + type: 'user', + isMeta: true, + content: [], + toolResults: [ + { toolUseId: 'tc-1', content: 'ok', isError: false }, + { toolUseId: 'tc-2', content: 'ok', isError: false }, + ], + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.toolUsage.successRates.Read.assessment).toBe('healthy'); + }); + + it('computes overall tool health', () => { + const report = analyzeSession(createMockDetail()); + expect(report.toolUsage.overallToolHealth).toBe('healthy'); + }); + }); + + describe('idle assessment', () => { + it('returns efficient for low idle', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + timestamp: new Date('2024-01-01T10:00:00Z'), + }), + createMockMessage({ + type: 'user', + content: 'quick', + timestamp: new Date('2024-01-01T10:00:30Z'), + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.idleAnalysis.idleAssessment).toBe('efficient'); + }); + + it('returns high_idle for mostly idle session', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + timestamp: new Date('2024-01-01T10:00:00Z'), + }), + createMockMessage({ + type: 'user', + content: 'back', + timestamp: new Date('2024-01-01T11:00:00Z'), + }), + createMockMessage({ + type: 'assistant', + timestamp: new Date('2024-01-01T11:00:10Z'), + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.idleAnalysis.idleAssessment).toBe('high_idle'); + }); + }); + + describe('thrashing assessment', () => { + it('returns none when no signals', () => { + const report = analyzeSession(createMockDetail()); + expect(report.thrashingSignals.thrashingAssessment).toBe('none'); + }); + + it('returns mild or severe based on signal count', () => { + const makeEditMsg = (file: string, id: string) => + createMockMessage({ + type: 'assistant', + toolCalls: [{ id, name: 'Edit', input: { file_path: file }, isTask: false }], + }); + + // 3 edits on one file = 1 signal + 3 repeated bash = 1 signal = mild (2) + const messages: ParsedMessage[] = [ + makeEditMsg('/foo.ts', 'e1'), + makeEditMsg('/foo.ts', 'e2'), + makeEditMsg('/foo.ts', 'e3'), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(['mild', 'severe']).toContain(report.thrashingSignals.thrashingAssessment); + }); + }); + + describe('model switch pattern', () => { + it('detects opus_plan_mode', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + timestamp: new Date('2024-01-01T10:00:00Z'), + }), + createMockMessage({ + type: 'assistant', + model: 'claude-opus-4-20250514', + timestamp: new Date('2024-01-01T10:01:00Z'), + }), + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + timestamp: new Date('2024-01-01T10:02:00Z'), + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.modelSwitches.switchPattern).toBe('opus_plan_mode'); + }); + + it('returns null when no switches', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + usage: { input_tokens: 100, output_tokens: 50 }, + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.modelSwitches.switchPattern).toBeNull(); + }); + }); + + describe('startup overhead assessment', () => { + it('returns normal for low overhead', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + usage: { input_tokens: 100, output_tokens: 50 }, + toolCalls: [{ id: 'tc-1', name: 'Read', input: { file_path: '/f.ts' }, isTask: false }], + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.startupOverhead.overheadAssessment).toBe('normal'); + }); + + it('returns heavy for high overhead', () => { + const messages: ParsedMessage[] = [ + // Lots of startup tokens, no work tools + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + usage: { input_tokens: 50000, output_tokens: 10000 }, + toolCalls: [], + }), + // Small work message + createMockMessage({ + type: 'assistant', + model: 'claude-sonnet-4-20250514', + usage: { input_tokens: 100, output_tokens: 50 }, + toolCalls: [{ id: 'tc-1', name: 'Read', input: { file_path: '/f.ts' }, isTask: false }], + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.startupOverhead.overheadAssessment).toBe('heavy'); + }); + }); + + describe('file read redundancy assessment', () => { + it('returns normal for low redundancy', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + toolCalls: [ + { id: 'tc-1', name: 'Read', input: { file_path: '/a.ts' }, isTask: false }, + { id: 'tc-2', name: 'Read', input: { file_path: '/b.ts' }, isTask: false }, + ], + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.fileReadRedundancy.redundancyAssessment).toBe('normal'); + }); + + it('returns wasteful for high redundancy', () => { + const messages: ParsedMessage[] = [ + createMockMessage({ + type: 'assistant', + toolCalls: [ + { id: 'tc-1', name: 'Read', input: { file_path: '/a.ts' }, isTask: false }, + { id: 'tc-2', name: 'Read', input: { file_path: '/a.ts' }, isTask: false }, + { id: 'tc-3', name: 'Read', input: { file_path: '/a.ts' }, isTask: false }, + { id: 'tc-4', name: 'Read', input: { file_path: '/a.ts' }, isTask: false }, + ], + }), + ]; + + const report = analyzeSession(createMockDetail({ messages })); + expect(report.fileReadRedundancy.redundancyAssessment).toBe('wasteful'); + }); + }); + + describe('model mismatch in subagents', () => { + it('detects mismatch for mechanical tasks on opus', () => { + const processes: Process[] = [ + { + id: 'agent-1', + filePath: '/path/to/agent-1.jsonl', + messages: [], + startTime: new Date('2024-01-01T10:00:00Z'), + endTime: new Date('2024-01-01T10:01:00Z'), + durationMs: 60000, + metrics: createMockMetrics({ totalTokens: 5000, costUsd: 0.05 }), + description: 'rename all variables', + subagentType: 'code', + isParallel: false, + }, + ]; + + const report = analyzeSession(createMockDetail({ processes })); + // model is 'default (inherits parent)' which doesn't contain 'opus', so no mismatch + expect(report.subagentMetrics.byAgent[0].modelMismatch).toBeNull(); + }); + }); });
Tool Calls ErrorsSuccess %Success %Health
{tool} {stats.errors.toLocaleString()} + {stats.successRatePct}% + + {assessmentLabel(stats.assessment)} + +