feat(report): add threshold-based assessment badges to all report sections

Add interpretive assessment layer to session reports. Every metric section
now shows color-coded severity badges (green/amber/red) computed from
configurable thresholds, replacing raw numbers with instant signal.

Includes centralized reportAssessments utility, model mismatch detection,
switch pattern recognition, and 44 new tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Paul Holstein 2026-02-21 18:01:43 -05:00
parent ab1ad071fe
commit 820bb124d5
17 changed files with 1208 additions and 161 deletions

View file

@ -191,9 +191,7 @@ export const FlatInjectionList = ({
}
};
const displayText = row.description
? `${row.label} \u2014 ${row.description}`
: row.label;
const displayText = row.description ? `${row.label} \u2014 ${row.description}` : row.label;
return (
<div key={row.key} className="flex items-center gap-0.5">

View file

@ -152,7 +152,7 @@ export function extractOutputText(content: string | unknown[]): string {
.map((block) =>
typeof block === 'object' && block !== null && 'text' in block
? (block as { text: string }).text
: JSON.stringify(block, null, 2),
: JSON.stringify(block, null, 2)
)
.join('\n');
} else {

View file

@ -63,6 +63,7 @@ export const SessionReportTab = ({ tab }: SessionReportTabProps) => {
prompt={report.promptQuality}
startup={report.startupOverhead}
testProgression={report.testProgression}
fileReadRedundancy={report.fileReadRedundancy}
/>
<InsightsSection
skills={report.skillsInvoked}

View file

@ -1,3 +1,4 @@
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
import { DollarSign } from 'lucide-react';
import { ReportSection } from '../ReportSection';
@ -28,18 +29,62 @@ export const CostSection = ({ data }: CostSectionProps) => {
</div>
<div>
<div className="text-xs text-text-muted">Per Commit</div>
<div className="text-sm font-medium text-text">
{data.costPerCommit != null ? fmt(data.costPerCommit) : 'N/A'}
<div className="flex items-center gap-2">
<span className="text-sm font-medium text-text">
{data.costPerCommit != null ? fmt(data.costPerCommit) : 'N/A'}
</span>
{data.costPerCommitAssessment && (
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${assessmentColor(data.costPerCommitAssessment)}20`,
color: assessmentColor(data.costPerCommitAssessment),
}}
>
{assessmentLabel(data.costPerCommitAssessment)}
</span>
)}
</div>
</div>
<div>
<div className="text-xs text-text-muted">Per Line Changed</div>
<div className="text-sm font-medium text-text">
{data.costPerLineChanged != null ? `$${data.costPerLineChanged.toFixed(6)}` : 'N/A'}
<div className="flex items-center gap-2">
<span className="text-sm font-medium text-text">
{data.costPerLineChanged != null ? `$${data.costPerLineChanged.toFixed(6)}` : 'N/A'}
</span>
{data.costPerLineAssessment && (
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${assessmentColor(data.costPerLineAssessment)}20`,
color: assessmentColor(data.costPerLineAssessment),
}}
>
{assessmentLabel(data.costPerLineAssessment)}
</span>
)}
</div>
</div>
</div>
{data.subagentCostSharePct != null && (
<div className="mb-4 flex items-center gap-2">
<span className="text-xs text-text-muted">Subagent Cost Share:</span>
<span className="text-sm font-medium text-text">{data.subagentCostSharePct}%</span>
{data.subagentCostShareAssessment && (
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${assessmentColor(data.subagentCostShareAssessment)}20`,
color: assessmentColor(data.subagentCostShareAssessment),
}}
>
{assessmentLabel(data.subagentCostShareAssessment)}
</span>
)}
</div>
)}
{modelEntries.length > 0 && (
<table className="w-full text-xs">
<thead>

View file

@ -1,29 +1,28 @@
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
import { MessageSquareWarning } from 'lucide-react';
import { ReportSection } from '../ReportSection';
import type { ReportFrictionSignals, ReportThrashingSignals } from '@renderer/types/sessionReport';
const frictionColor = (rate: number): string => {
if (rate <= 0.1) return '#4ade80';
if (rate <= 0.25) return '#fbbf24';
return '#f87171';
};
interface FrictionSectionProps {
data: ReportFrictionSignals;
thrashing: ReportThrashingSignals;
}
export const FrictionSection = ({ data, thrashing }: FrictionSectionProps) => {
const frictionColor =
data.frictionRate <= 0.1 ? '#4ade80' : data.frictionRate <= 0.25 ? '#fbbf24' : '#f87171';
const thrashColor = assessmentColor(thrashing.thrashingAssessment);
return (
<ReportSection title="Friction Signals" icon={MessageSquareWarning}>
<div className="mb-4 flex items-center gap-3">
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${frictionColor(data.frictionRate)}20`,
color: frictionColor(data.frictionRate),
backgroundColor: `${frictionColor}20`,
color: frictionColor,
}}
>
Friction Rate: {(data.frictionRate * 100).toFixed(1)}%
@ -54,7 +53,15 @@ export const FrictionSection = ({ data, thrashing }: FrictionSectionProps) => {
{(thrashing.bashNearDuplicates.length > 0 || thrashing.editReworkFiles.length > 0) && (
<div>
<div className="mb-2 text-xs font-medium text-text-muted">Thrashing Signals</div>
<div className="mb-2 flex items-center gap-2">
<span className="text-xs font-medium text-text-muted">Thrashing Signals</span>
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{ backgroundColor: `${thrashColor}20`, color: thrashColor }}
>
{assessmentLabel(thrashing.thrashingAssessment)}
</span>
</div>
{thrashing.bashNearDuplicates.length > 0 && (
<div className="mb-2">

View file

@ -1,24 +1,10 @@
import { assessmentColor } from '@renderer/utils/reportAssessments';
import { Activity } from 'lucide-react';
import { ReportSection } from '../ReportSection';
import type { ReportOverview } from '@renderer/types/sessionReport';
const assessmentColor = (assessment: ReportOverview['contextAssessment']): string => {
switch (assessment) {
case 'healthy':
return '#4ade80';
case 'moderate':
return '#fbbf24';
case 'high':
return '#f87171';
case 'critical':
return '#f87171';
default:
return '#a1a1aa';
}
};
interface OverviewSectionProps {
data: ReportOverview;
}

View file

@ -1,63 +1,33 @@
import { assessmentColor, assessmentLabel, severityColor } from '@renderer/utils/reportAssessments';
import { BarChart3 } from 'lucide-react';
import { ReportSection } from '../ReportSection';
import type {
ReportFileReadRedundancy,
ReportPromptQuality,
ReportStartupOverhead,
ReportTestProgression,
} from '@renderer/types/sessionReport';
const assessmentColor = (assessment: ReportPromptQuality['assessment']): string => {
switch (assessment) {
case 'well_specified':
return '#4ade80';
case 'moderate_friction':
return '#fbbf24';
case 'underspecified':
return '#f87171';
case 'verbose_but_unclear':
return '#f87171';
default:
return '#a1a1aa';
}
};
const assessmentLabel = (assessment: ReportPromptQuality['assessment']): string => {
switch (assessment) {
case 'well_specified':
return 'Well Specified';
case 'moderate_friction':
return 'Moderate Friction';
case 'underspecified':
return 'Underspecified';
case 'verbose_but_unclear':
return 'Verbose but Unclear';
default:
return assessment;
}
};
const trajectoryColor = (trajectory: ReportTestProgression['trajectory']): string => {
switch (trajectory) {
case 'improving':
return '#4ade80';
case 'regressing':
return '#f87171';
case 'stable':
return '#fbbf24';
default:
return '#a1a1aa';
}
};
interface QualitySectionProps {
prompt: ReportPromptQuality;
startup: ReportStartupOverhead;
testProgression: ReportTestProgression;
fileReadRedundancy: ReportFileReadRedundancy;
}
export const QualitySection = ({ prompt, startup, testProgression }: QualitySectionProps) => {
export const QualitySection = ({
prompt,
startup,
testProgression,
fileReadRedundancy,
}: QualitySectionProps) => {
const promptColor = assessmentColor(prompt.assessment);
const trajectoryColor = assessmentColor(testProgression.trajectory);
const overheadColor = assessmentColor(startup.overheadAssessment);
const redundancyColor = assessmentColor(fileReadRedundancy.redundancyAssessment);
return (
<ReportSection title="Quality Signals" icon={BarChart3}>
{/* Prompt quality */}
@ -66,10 +36,7 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
<div className="mb-2 flex items-center gap-2">
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${assessmentColor(prompt.assessment)}20`,
color: assessmentColor(prompt.assessment),
}}
style={{ backgroundColor: `${promptColor}20`, color: promptColor }}
>
{assessmentLabel(prompt.assessment)}
</span>
@ -101,7 +68,15 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
{/* Startup overhead */}
<div className="mb-4">
<div className="mb-2 text-xs font-medium text-text-muted">Startup Overhead</div>
<div className="mb-2 flex items-center gap-2">
<span className="text-xs font-medium text-text-muted">Startup Overhead</span>
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{ backgroundColor: `${overheadColor}20`, color: overheadColor }}
>
{assessmentLabel(startup.overheadAssessment)}
</span>
</div>
<div className="grid grid-cols-2 gap-3 sm:grid-cols-3">
<div>
<div className="text-xs text-text-muted">Messages Before Work</div>
@ -120,21 +95,44 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
</div>
</div>
{/* File read redundancy */}
<div className="mb-4">
<div className="mb-2 flex items-center gap-2">
<span className="text-xs font-medium text-text-muted">File Read Redundancy</span>
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{ backgroundColor: `${redundancyColor}20`, color: redundancyColor }}
>
{assessmentLabel(fileReadRedundancy.redundancyAssessment)}
</span>
</div>
<div className="grid grid-cols-2 gap-3 sm:grid-cols-3">
<div>
<div className="text-xs text-text-muted">Total Reads</div>
<div className="text-sm font-medium text-text">{fileReadRedundancy.totalReads}</div>
</div>
<div>
<div className="text-xs text-text-muted">Unique Files</div>
<div className="text-sm font-medium text-text">{fileReadRedundancy.uniqueFiles}</div>
</div>
<div>
<div className="text-xs text-text-muted">Reads/Unique File</div>
<div className="text-sm font-medium text-text">
{fileReadRedundancy.readsPerUniqueFile}x
</div>
</div>
</div>
</div>
{/* Test progression */}
<div>
<div className="mb-2 text-xs font-medium text-text-muted">Test Progression</div>
<div className="mb-2 flex items-center gap-2">
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${trajectoryColor(testProgression.trajectory)}20`,
color: trajectoryColor(testProgression.trajectory),
}}
style={{ backgroundColor: `${trajectoryColor}20`, color: trajectoryColor }}
>
{testProgression.trajectory === 'insufficient_data'
? 'Insufficient Data'
: testProgression.trajectory.charAt(0).toUpperCase() +
testProgression.trajectory.slice(1)}
{assessmentLabel(testProgression.trajectory)}
</span>
<span className="text-xs text-text-muted">
{testProgression.snapshotCount} snapshot{testProgression.snapshotCount !== 1 ? 's' : ''}
@ -145,11 +143,11 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
<div>
<div className="text-xs text-text-muted">First Run</div>
<div className="text-sm text-text">
<span style={{ color: '#4ade80' }}>
<span style={{ color: severityColor('good') }}>
{testProgression.firstSnapshot.passed} passed
</span>
{' / '}
<span style={{ color: '#f87171' }}>
<span style={{ color: severityColor('danger') }}>
{testProgression.firstSnapshot.failed} failed
</span>
</div>
@ -157,11 +155,11 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
<div>
<div className="text-xs text-text-muted">Last Run</div>
<div className="text-sm text-text">
<span style={{ color: '#4ade80' }}>
<span style={{ color: severityColor('good') }}>
{testProgression.lastSnapshot.passed} passed
</span>
{' / '}
<span style={{ color: '#f87171' }}>
<span style={{ color: severityColor('danger') }}>
{testProgression.lastSnapshot.failed} failed
</span>
</div>

View file

@ -1,3 +1,4 @@
import { severityColor } from '@renderer/utils/reportAssessments';
import { Users } from 'lucide-react';
import { ReportSection } from '../ReportSection';
@ -53,8 +54,19 @@ export const SubagentSection = ({ data }: SubagentSectionProps) => {
<tbody>
{data.byAgent.map((agent, idx) => (
<tr key={idx} className="border-border/50 border-b">
<td className="max-w-48 truncate py-1.5 pr-4 text-text" title={agent.description}>
{agent.description}
<td className="max-w-48 py-1.5 pr-4 text-text">
<div className="truncate" title={agent.description}>
{agent.description}
</div>
{agent.modelMismatch && (
<div
className="mt-0.5 truncate text-[10px]"
style={{ color: severityColor('warning') }}
title={agent.modelMismatch.recommendation}
>
{agent.modelMismatch.recommendation}
</div>
)}
</td>
<td className="py-1.5 pr-4 text-text-secondary">{agent.subagentType}</td>
<td className="py-1.5 pr-4 text-right text-text">

View file

@ -1,3 +1,4 @@
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
import { Clock } from 'lucide-react';
import { ReportSection } from '../ReportSection';
@ -15,11 +16,21 @@ interface TimelineSectionProps {
}
export const TimelineSection = ({ idle, modelSwitches, keyEvents }: TimelineSectionProps) => {
const idleColor = assessmentColor(idle.idleAssessment);
return (
<ReportSection title="Timeline & Activity" icon={Clock}>
{/* Idle stats */}
<div className="mb-4">
<div className="mb-2 text-xs font-medium text-text-muted">Idle Analysis</div>
<div className="mb-2 flex items-center gap-2">
<span className="text-xs font-medium text-text-muted">Idle Analysis</span>
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{ backgroundColor: `${idleColor}20`, color: idleColor }}
>
{assessmentLabel(idle.idleAssessment)}
</span>
</div>
<div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
<div>
<div className="text-xs text-text-muted">Idle Gaps</div>
@ -35,10 +46,7 @@ export const TimelineSection = ({ idle, modelSwitches, keyEvents }: TimelineSect
</div>
<div>
<div className="text-xs text-text-muted">Idle %</div>
<div
className="text-sm font-medium"
style={{ color: idle.idlePct > 50 ? '#fbbf24' : '#4ade80' }}
>
<div className="text-sm font-medium" style={{ color: idleColor }}>
{idle.idlePct}%
</div>
</div>
@ -48,8 +56,21 @@ export const TimelineSection = ({ idle, modelSwitches, keyEvents }: TimelineSect
{/* Model switches */}
{modelSwitches.count > 0 && (
<div className="mb-4">
<div className="mb-2 text-xs font-medium text-text-muted">
Model Switches ({modelSwitches.count})
<div className="mb-2 flex items-center gap-2">
<span className="text-xs font-medium text-text-muted">
Model Switches ({modelSwitches.count})
</span>
{modelSwitches.switchPattern && (
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${assessmentColor(modelSwitches.switchPattern)}20`,
color: assessmentColor(modelSwitches.switchPattern),
}}
>
{assessmentLabel(modelSwitches.switchPattern)}
</span>
)}
</div>
<div className="flex flex-col gap-1">
{modelSwitches.switches.map((sw, idx) => (

View file

@ -1,3 +1,4 @@
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
import { Coins } from 'lucide-react';
import { ReportSection } from '../ReportSection';
@ -65,12 +66,40 @@ export const TokenSection = ({ data, cacheEconomics }: TokenSectionProps) => {
<div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
<div>
<div className="text-xs text-text-muted">Cache Efficiency</div>
<div className="text-sm font-medium text-text">{cacheEconomics.cacheEfficiencyPct}%</div>
<div className="flex items-center gap-2">
<span className="text-sm font-medium text-text">
{cacheEconomics.cacheEfficiencyPct}%
</span>
{cacheEconomics.cacheEfficiencyAssessment && (
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${assessmentColor(cacheEconomics.cacheEfficiencyAssessment)}20`,
color: assessmentColor(cacheEconomics.cacheEfficiencyAssessment),
}}
>
{assessmentLabel(cacheEconomics.cacheEfficiencyAssessment)}
</span>
)}
</div>
</div>
<div>
<div className="text-xs text-text-muted">R/W Ratio</div>
<div className="text-sm font-medium text-text">
{cacheEconomics.cacheReadToWriteRatio}x
<div className="flex items-center gap-2">
<span className="text-sm font-medium text-text">
{cacheEconomics.cacheReadToWriteRatio}x
</span>
{cacheEconomics.cacheRatioAssessment && (
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${assessmentColor(cacheEconomics.cacheRatioAssessment)}20`,
color: assessmentColor(cacheEconomics.cacheRatioAssessment),
}}
>
{assessmentLabel(cacheEconomics.cacheRatioAssessment)}
</span>
)}
</div>
</div>
<div>

View file

@ -1,3 +1,4 @@
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
import { Wrench } from 'lucide-react';
import { ReportSection } from '../ReportSection';
@ -13,10 +14,23 @@ export const ToolSection = ({ data }: ToolSectionProps) => {
(a, b) => b[1].totalCalls - a[1].totalCalls
);
const healthColor = assessmentColor(data.overallToolHealth);
return (
<ReportSection title="Tool Usage" icon={Wrench}>
<div className="mb-2 text-xs text-text-muted">
{data.totalCalls.toLocaleString()} total calls across {toolEntries.length} tools
<div className="mb-2 flex items-center gap-2">
<span className="text-xs text-text-muted">
{data.totalCalls.toLocaleString()} total calls across {toolEntries.length} tools
</span>
<span
className="rounded px-2 py-0.5 text-xs font-medium"
style={{
backgroundColor: `${healthColor}20`,
color: healthColor,
}}
>
{assessmentLabel(data.overallToolHealth)}
</span>
</div>
<div className="overflow-x-auto">
<table className="w-full text-xs">
@ -25,18 +39,13 @@ export const ToolSection = ({ data }: ToolSectionProps) => {
<th className="pb-2 pr-4">Tool</th>
<th className="pb-2 pr-4 text-right">Calls</th>
<th className="pb-2 pr-4 text-right">Errors</th>
<th className="pb-2 text-right">Success %</th>
<th className="pb-2 pr-4 text-right">Success %</th>
<th className="pb-2 text-right">Health</th>
</tr>
</thead>
<tbody>
{toolEntries.map(([tool, stats]) => {
const rateColor =
stats.successRatePct < 80
? '#f87171'
: stats.successRatePct < 90
? '#fbbf24'
: undefined;
const color = assessmentColor(stats.assessment);
return (
<tr key={tool} className="border-border/50 border-b">
<td className="py-1.5 pr-4 text-text">{tool}</td>
@ -46,12 +55,17 @@ export const ToolSection = ({ data }: ToolSectionProps) => {
<td className="py-1.5 pr-4 text-right text-text">
{stats.errors.toLocaleString()}
</td>
<td
className="py-1.5 text-right"
style={rateColor ? { color: rateColor } : undefined}
>
<td className="py-1.5 pr-4 text-right" style={{ color }}>
{stats.successRatePct}%
</td>
<td className="py-1.5 text-right">
<span
className="rounded px-1.5 py-0.5 text-[10px] font-medium"
style={{ backgroundColor: `${color}20`, color }}
>
{assessmentLabel(stats.assessment)}
</span>
</td>
</tr>
);
})}

View file

@ -3,6 +3,19 @@
* Output of analyzeSession() one interface per report section.
*/
import type {
CacheAssessment,
CostAssessment,
IdleAssessment,
ModelMismatch,
OverheadAssessment,
RedundancyAssessment,
SubagentCostShareAssessment,
SwitchPattern,
ThrashingAssessment,
ToolHealthAssessment,
} from '@renderer/utils/reportAssessments';
// =============================================================================
// Pricing
// =============================================================================
@ -67,27 +80,33 @@ export interface ReportCostAnalysis {
costByModel: Record<string, number>;
costPerCommit: number | null;
costPerLineChanged: number | null;
costPerCommitAssessment: CostAssessment | null;
costPerLineAssessment: CostAssessment | null;
subagentCostSharePct: number | null;
subagentCostShareAssessment: SubagentCostShareAssessment | null;
}
export interface ReportCacheEconomics {
cacheCreation5m: number;
cacheCreation1h: number;
cacheRead: number;
cacheEfficiencyPct: number;
coldStartDetected: boolean;
cacheReadToWriteRatio: number;
cacheEfficiencyAssessment: CacheAssessment | null;
cacheRatioAssessment: CacheAssessment | null;
}
export interface ToolSuccessRate {
totalCalls: number;
errors: number;
successRatePct: number;
assessment: ToolHealthAssessment;
}
export interface ReportToolUsage {
counts: Record<string, number>;
totalCalls: number;
successRates: Record<string, ToolSuccessRate>;
overallToolHealth: ToolHealthAssessment;
}
export interface SubagentEntry {
@ -99,6 +118,7 @@ export interface SubagentEntry {
totalToolUseCount: number;
costUsd: number;
costNote?: string;
modelMismatch: ModelMismatch | null;
}
export interface ReportSubagentMetrics {
@ -157,6 +177,7 @@ export interface ReportFrictionSignals {
export interface ReportThrashingSignals {
bashNearDuplicates: { prefix: string; count: number }[];
editReworkFiles: { filePath: string; editIndices: number[] }[];
thrashingAssessment: ThrashingAssessment;
}
export interface ReportConversationTree {
@ -187,6 +208,7 @@ export interface ReportIdleAnalysis {
activeWorkingHuman: string;
idlePct: number;
longestGaps: IdleGap[];
idleAssessment: IdleAssessment;
}
export interface ModelSwitch {
@ -200,6 +222,7 @@ export interface ReportModelSwitches {
count: number;
switches: ModelSwitch[];
modelsUsed: string[];
switchPattern: SwitchPattern | null;
}
export interface ReportWorkingDirectories {
@ -230,6 +253,7 @@ export interface ReportStartupOverhead {
messagesBeforeFirstWork: number;
tokensBeforeFirstWork: number;
pctOfTotal: number;
overheadAssessment: OverheadAssessment;
}
export interface ReportTokenDensityTimeline {
@ -271,6 +295,7 @@ export interface ReportFileReadRedundancy {
uniqueFiles: number;
readsPerUniqueFile: number;
redundantFiles: Record<string, number>;
redundancyAssessment: RedundancyAssessment;
}
// =============================================================================

View file

@ -425,8 +425,7 @@ export function buildDisplayItemsFromMessages(
}
// Only treat as subagent input if there are NO tool_result blocks in this message
const hasToolResults =
Array.isArray(msg.content) &&
msg.content.some((b) => b.type === 'tool_result');
Array.isArray(msg.content) && msg.content.some((b) => b.type === 'tool_result');
if (rawText.trim() && !hasToolResults) {
displayItems.push({
type: 'subagent_input',

View file

@ -0,0 +1,270 @@
/**
* Centralized assessment severity/color utilities for session reports.
*
* Maps raw assessment values to severity levels and colors,
* replacing duplicated assessmentColor() functions across report sections.
*/
// =============================================================================
// Types
// =============================================================================
export type Severity = 'good' | 'warning' | 'danger' | 'neutral';
// =============================================================================
// Colors
// =============================================================================
const SEVERITY_COLORS: Record<Severity, string> = {
good: '#4ade80',
warning: '#fbbf24',
danger: '#f87171',
neutral: '#a1a1aa',
};
export function severityColor(severity: Severity): string {
return SEVERITY_COLORS[severity];
}
// =============================================================================
// Assessment → Severity Mapping
// =============================================================================
const ASSESSMENT_SEVERITY: Record<string, Severity> = {
// Context
healthy: 'good',
moderate: 'warning',
high: 'danger',
critical: 'danger',
// Cost / subagent share
efficient: 'good',
normal: 'good',
expensive: 'warning',
red_flag: 'danger',
very_high: 'danger',
// Cache
good: 'good',
concerning: 'warning',
// Tool health
degraded: 'warning',
unreliable: 'danger',
// Idle ('moderate' already mapped above under Context)
high_idle: 'danger',
// File read
wasteful: 'warning',
// Startup
heavy: 'warning',
// Thrashing
none: 'good',
mild: 'warning',
severe: 'danger',
// Prompt quality
well_specified: 'good',
moderate_friction: 'warning',
underspecified: 'danger',
verbose_but_unclear: 'danger',
// Test trajectory
improving: 'good',
stable: 'warning',
regressing: 'danger',
insufficient_data: 'neutral',
// Model switch
opus_plan_mode: 'good',
manual_switch: 'neutral',
};
export function assessmentSeverity(assessment: string | null | undefined): Severity {
if (!assessment) return 'neutral';
return ASSESSMENT_SEVERITY[assessment] ?? 'neutral';
}
export function assessmentColor(assessment: string | null | undefined): string {
return severityColor(assessmentSeverity(assessment));
}
// =============================================================================
// Label Formatting
// =============================================================================
export function assessmentLabel(value: string): string {
return value
.split('_')
.map((w) => w.charAt(0).toUpperCase() + w.slice(1))
.join(' ');
}
// =============================================================================
// Threshold Constants
// =============================================================================
export const THRESHOLDS = {
costPerCommit: {
efficient: 0.5,
normal: 2,
expensive: 5,
},
costPerLine: {
efficient: 0.01,
normal: 0.05,
expensive: 0.2,
},
subagentCostShare: {
normal: 30,
high: 60,
veryHigh: 80,
},
cacheEfficiency: {
good: 95,
},
cacheRwRatio: {
good: 20,
},
toolSuccess: {
healthy: 95,
degraded: 80,
},
idle: {
efficient: 20,
moderate: 50,
},
fileReadsPerUnique: {
normal: 2.0,
},
startupOverhead: {
normal: 5,
},
} as const;
// =============================================================================
// Assessment Computers
// =============================================================================
export type CostAssessment = 'efficient' | 'normal' | 'expensive' | 'red_flag';
export type CacheAssessment = 'good' | 'concerning';
export type ToolHealthAssessment = 'healthy' | 'degraded' | 'unreliable';
export type IdleAssessment = 'efficient' | 'moderate' | 'high_idle';
export type RedundancyAssessment = 'normal' | 'wasteful';
export type OverheadAssessment = 'normal' | 'heavy';
export type ThrashingAssessment = 'none' | 'mild' | 'severe';
export type SubagentCostShareAssessment = 'normal' | 'high' | 'very_high' | 'red_flag';
export type SwitchPattern = 'opus_plan_mode' | 'manual_switch' | 'none';
export function computeCostPerCommitAssessment(costPerCommit: number): CostAssessment {
if (costPerCommit < THRESHOLDS.costPerCommit.efficient) return 'efficient';
if (costPerCommit < THRESHOLDS.costPerCommit.normal) return 'normal';
if (costPerCommit < THRESHOLDS.costPerCommit.expensive) return 'expensive';
return 'red_flag';
}
export function computeCostPerLineAssessment(costPerLine: number): CostAssessment {
if (costPerLine < THRESHOLDS.costPerLine.efficient) return 'efficient';
if (costPerLine < THRESHOLDS.costPerLine.normal) return 'normal';
if (costPerLine < THRESHOLDS.costPerLine.expensive) return 'expensive';
return 'red_flag';
}
export function computeSubagentCostShareAssessment(pct: number): SubagentCostShareAssessment {
if (pct < THRESHOLDS.subagentCostShare.normal) return 'normal';
if (pct < THRESHOLDS.subagentCostShare.high) return 'high';
if (pct < THRESHOLDS.subagentCostShare.veryHigh) return 'very_high';
return 'red_flag';
}
export function computeCacheEfficiencyAssessment(pct: number): CacheAssessment {
return pct >= THRESHOLDS.cacheEfficiency.good ? 'good' : 'concerning';
}
export function computeCacheRatioAssessment(ratio: number): CacheAssessment {
return ratio >= THRESHOLDS.cacheRwRatio.good ? 'good' : 'concerning';
}
export function computeToolHealthAssessment(successPct: number): ToolHealthAssessment {
if (successPct > THRESHOLDS.toolSuccess.healthy) return 'healthy';
if (successPct >= THRESHOLDS.toolSuccess.degraded) return 'degraded';
return 'unreliable';
}
export function computeIdleAssessment(idlePct: number): IdleAssessment {
if (idlePct < THRESHOLDS.idle.efficient) return 'efficient';
if (idlePct < THRESHOLDS.idle.moderate) return 'moderate';
return 'high_idle';
}
export function computeRedundancyAssessment(readsPerUnique: number): RedundancyAssessment {
return readsPerUnique <= THRESHOLDS.fileReadsPerUnique.normal ? 'normal' : 'wasteful';
}
export function computeOverheadAssessment(pctOfTotal: number): OverheadAssessment {
return pctOfTotal <= THRESHOLDS.startupOverhead.normal ? 'normal' : 'heavy';
}
export function computeThrashingAssessment(signalCount: number): ThrashingAssessment {
if (signalCount === 0) return 'none';
if (signalCount <= 2) return 'mild';
return 'severe';
}
export interface ModelMismatch {
description: string;
expectedComplexity: 'mechanical' | 'read_only';
recommendation: string;
}
const MECHANICAL_PATTERNS = /\b(rename|move|lint|format|delete|remove|copy|replace)\b/i;
const READ_ONLY_PATTERNS = /\b(explore|search|find|verify|check|scan|discover|list|read)\b/i;
export function detectModelMismatch(description: string, model: string): ModelMismatch | null {
const isOpus = model.toLowerCase().includes('opus');
if (!isOpus) return null;
if (MECHANICAL_PATTERNS.test(description)) {
return {
description,
expectedComplexity: 'mechanical',
recommendation: 'Consider using Haiku for mechanical tasks to reduce cost.',
};
}
if (READ_ONLY_PATTERNS.test(description)) {
return {
description,
expectedComplexity: 'read_only',
recommendation: 'Consider using Haiku or Sonnet for read-only exploration tasks.',
};
}
return null;
}
export function detectSwitchPattern(
switches: { from: string; to: string }[]
): SwitchPattern | null {
if (switches.length === 0) return null;
if (switches.length < 2) return 'manual_switch';
// Look for Sonnet→Opus→Sonnet pattern (plan mode)
for (let i = 0; i < switches.length - 1; i++) {
const s1 = switches[i];
const s2 = switches[i + 1];
if (
s1.from.toLowerCase().includes('sonnet') &&
s1.to.toLowerCase().includes('opus') &&
s2.from.toLowerCase().includes('opus') &&
s2.to.toLowerCase().includes('sonnet')
) {
return 'opus_plan_mode';
}
}
return 'manual_switch';
}

View file

@ -8,6 +8,21 @@
* Runs entirely in the renderer process no IPC needed.
*/
import {
computeCacheEfficiencyAssessment,
computeCacheRatioAssessment,
computeCostPerCommitAssessment,
computeCostPerLineAssessment,
computeIdleAssessment,
computeOverheadAssessment,
computeRedundancyAssessment,
computeSubagentCostShareAssessment,
computeThrashingAssessment,
computeToolHealthAssessment,
detectModelMismatch,
detectSwitchPattern,
} from '@renderer/utils/reportAssessments';
import type {
AgentTreeNode,
FrictionCorrection,
@ -25,6 +40,7 @@ import type {
TestSnapshot,
ThinkingBlockAnalysis,
ToolError,
ToolSuccessRate,
UserQuestion,
} from '@renderer/types/sessionReport';
import type {
@ -283,8 +299,6 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
};
// Cache economics
const cacheCreation5m = 0;
const cacheCreation1h = 0;
let totalCacheCreation = 0;
let totalCacheRead = 0;
let coldStartDetected = false;
@ -356,7 +370,7 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
const testSnapshots: TestSnapshot[] = [];
// Cost tracking
let totalSessionCost = 0;
let parentCost = 0;
// Git activity
const gitCommits: GitCommit[] = [];
@ -457,7 +471,7 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
const callCost = costUsd(model, inpTok, outTok, cr, cc);
stats.costUsd += callCost;
totalSessionCost += callCost;
parentCost += callCost;
totalCacheCreation += cc;
totalCacheRead += cr;
@ -868,18 +882,23 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
const linesChanged = linesAddedTotal + linesRemovedTotal;
// --- Subagent metrics from detail.processes ---
const subagentEntries: SubagentEntry[] = detail.processes.map((proc: Process) => ({
description: proc.description ?? 'unknown',
subagentType: proc.subagentType ?? 'unknown',
model: 'default (inherits parent)',
totalTokens: proc.metrics.totalTokens,
totalDurationMs: proc.durationMs,
totalToolUseCount: proc.messages.reduce(
(sum: number, pm: ParsedMessage) => sum + pm.toolCalls.length,
0
),
costUsd: proc.metrics.costUsd ?? 0,
}));
const subagentEntries: SubagentEntry[] = detail.processes.map((proc: Process) => {
const desc = proc.description ?? 'unknown';
const model = 'default (inherits parent)';
return {
description: desc,
subagentType: proc.subagentType ?? 'unknown',
model,
totalTokens: proc.metrics.totalTokens,
totalDurationMs: proc.durationMs,
totalToolUseCount: proc.messages.reduce(
(sum: number, pm: ParsedMessage) => sum + pm.toolCalls.length,
0
),
costUsd: proc.metrics.costUsd ?? 0,
modelMismatch: detectModelMismatch(desc, model),
};
});
const saFromProcesses = {
count: subagentEntries.length,
@ -892,22 +911,32 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
};
// --- Tool usage with success rates ---
const toolSuccessRates: Record<
string,
{ totalCalls: number; errors: number; successRatePct: number }
> = {};
const toolSuccessRates: Record<string, ToolSuccessRate> = {};
const sortedToolCounts = [...toolCounts.entries()].sort((a, b) => b[1] - a[1]);
const countsRecord: Record<string, number> = {};
for (const [tool, count] of sortedToolCounts) {
countsRecord[tool] = count;
const errCount = errorsByTool.get(tool) ?? 0;
const successPct = count ? Math.round(((count - errCount) / count) * 1000) / 10 : 0;
toolSuccessRates[tool] = {
totalCalls: count,
errors: errCount,
successRatePct: count ? Math.round(((count - errCount) / count) * 1000) / 10 : 0,
successRatePct: successPct,
assessment: computeToolHealthAssessment(successPct),
};
}
// Overall tool health: worst assessment among tools with >5 calls
const significantTools = Object.values(toolSuccessRates).filter((t) => t.totalCalls > 5);
type THAssessment = 'healthy' | 'degraded' | 'unreliable';
const overallToolHealth: THAssessment =
significantTools.length > 0
? significantTools.reduce<THAssessment>((worst, t) => {
const order = { healthy: 0, degraded: 1, unreliable: 2 } as const;
return order[t.assessment] > order[worst] ? t.assessment : worst;
}, 'healthy')
: computeToolHealthAssessment(100);
// --- Key events timing ---
for (let j = 1; j < keyEvents.length; j++) {
const prevDt = keyEvents[j - 1].timestamp;
@ -967,14 +996,19 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
// --- Conversation tree analysis ---
const depthMemo = new Map<string, number>();
function getDepth(uuid: string): number {
function getDepth(uuid: string, visited = new Set<string>()): number {
if (depthMemo.has(uuid)) return depthMemo.get(uuid)!;
if (visited.has(uuid)) {
depthMemo.set(uuid, 0);
return 0;
}
visited.add(uuid);
const parent = parentMap.get(uuid);
if (!parent) {
depthMemo.set(uuid, 0);
return 0;
}
const depth = 1 + getDepth(parent);
const depth = 1 + getDepth(parent, visited);
depthMemo.set(uuid, depth);
return depth;
}
@ -1071,6 +1105,20 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
// --- Subagent cost from processes ---
const processSubagentCost = subagentEntries.reduce((sum, a) => sum + a.costUsd, 0);
const totalCost = parentCost + processSubagentCost;
// --- Assessment computations ---
const costPerCommitVal =
commitCount > 0 ? Math.round((totalCost / commitCount) * 10000) / 10000 : null;
const costPerLineVal =
linesChanged > 0 ? Math.round((totalCost / linesChanged) * 1000000) / 1000000 : null;
const subagentCostSharePct =
totalCost > 0 ? Math.round((processSubagentCost / totalCost) * 10000) / 100 : null;
const readsPerUniqueFile = uniqueFiles ? Math.round((totalReads / uniqueFiles) * 100) / 100 : 0;
const startupPctOfTotal = grandTotal ? Math.round((startupTokens / grandTotal) * 10000) / 100 : 0;
const idlePct = wallClock > 0 ? Math.round((totalIdle / wallClock) * 1000) / 10 : 0;
const thrashingSignalCount = bashNearDuplicates.length + editReworkFiles.length;
// ===================================================================
// BUILD REPORT
@ -1111,39 +1159,44 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
},
costAnalysis: {
parentCostUsd: Math.round(totalSessionCost * 10000) / 10000,
parentCostUsd: Math.round(parentCost * 10000) / 10000,
subagentCostUsd: Math.round(processSubagentCost * 10000) / 10000,
totalSessionCostUsd: Math.round((totalSessionCost + processSubagentCost) * 10000) / 10000,
totalSessionCostUsd: Math.round(totalCost * 10000) / 10000,
costByModel: Object.fromEntries(
[...modelStats.entries()].map(([model, stats]) => [
model,
Math.round(stats.costUsd * 10000) / 10000,
])
),
costPerCommit:
commitCount > 0
? Math.round(((totalSessionCost + processSubagentCost) / commitCount) * 10000) / 10000
: null,
costPerLineChanged:
linesChanged > 0
? Math.round(((totalSessionCost + processSubagentCost) / linesChanged) * 1000000) /
1000000
costPerCommit: costPerCommitVal,
costPerLineChanged: costPerLineVal,
costPerCommitAssessment:
costPerCommitVal != null ? computeCostPerCommitAssessment(costPerCommitVal) : null,
costPerLineAssessment:
costPerLineVal != null ? computeCostPerLineAssessment(costPerLineVal) : null,
subagentCostSharePct,
subagentCostShareAssessment:
subagentCostSharePct != null
? computeSubagentCostShareAssessment(subagentCostSharePct)
: null,
},
cacheEconomics: {
cacheCreation5m,
cacheCreation1h,
cacheRead: totalCacheRead,
cacheEfficiencyPct: cacheEfficiency,
coldStartDetected,
cacheReadToWriteRatio: cacheRwRatio,
cacheEfficiencyAssessment:
cacheTotalCreationAndRead > 0 ? computeCacheEfficiencyAssessment(cacheEfficiency) : null,
cacheRatioAssessment:
totalCacheCreation > 0 ? computeCacheRatioAssessment(cacheRwRatio) : null,
},
toolUsage: {
counts: countsRecord,
totalCalls: [...toolCounts.values()].reduce((sum, c) => sum + c, 0),
successRates: toolSuccessRates,
overallToolHealth,
},
subagentMetrics: saFromProcesses,
@ -1178,6 +1231,7 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
thrashingSignals: {
bashNearDuplicates,
editReworkFiles,
thrashingAssessment: computeThrashingAssessment(thrashingSignalCount),
},
conversationTree: {
@ -1196,14 +1250,16 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
wallClockSeconds: Math.round(wallClock * 10) / 10,
activeWorkingSeconds: Math.round(Math.max(activeTime, 0) * 10) / 10,
activeWorkingHuman: formatDuration(Math.floor(Math.max(activeTime, 0))),
idlePct: wallClock > 0 ? Math.round((totalIdle / wallClock) * 1000) / 10 : 0,
idlePct,
longestGaps: [...idleGaps].sort((a, b) => b.gapSeconds - a.gapSeconds).slice(0, 5),
idleAssessment: computeIdleAssessment(idlePct),
},
modelSwitches: {
count: modelSwitches.length,
switches: modelSwitches,
modelsUsed,
switchPattern: detectSwitchPattern(modelSwitches),
},
workingDirectories: {
@ -1225,7 +1281,8 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
startupOverhead: {
messagesBeforeFirstWork: startupMessages,
tokensBeforeFirstWork: startupTokens,
pctOfTotal: grandTotal ? Math.round((startupTokens / grandTotal) * 10000) / 100 : 0,
pctOfTotal: startupPctOfTotal,
overheadAssessment: computeOverheadAssessment(startupPctOfTotal),
},
tokenDensityTimeline: { quartiles },
@ -1253,8 +1310,9 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
fileReadRedundancy: {
totalReads,
uniqueFiles,
readsPerUniqueFile: uniqueFiles ? Math.round((totalReads / uniqueFiles) * 100) / 100 : 0,
readsPerUniqueFile,
redundantFiles,
redundancyAssessment: computeRedundancyAssessment(readsPerUniqueFile),
},
compaction: {

View file

@ -0,0 +1,259 @@
import { describe, it, expect } from 'vitest';
import {
assessmentColor,
assessmentLabel,
assessmentSeverity,
computeCacheEfficiencyAssessment,
computeCacheRatioAssessment,
computeCostPerCommitAssessment,
computeCostPerLineAssessment,
computeIdleAssessment,
computeOverheadAssessment,
computeRedundancyAssessment,
computeSubagentCostShareAssessment,
computeThrashingAssessment,
computeToolHealthAssessment,
detectModelMismatch,
detectSwitchPattern,
severityColor,
THRESHOLDS,
} from '@renderer/utils/reportAssessments';
describe('reportAssessments', () => {
describe('severityColor', () => {
it('maps severity to hex color', () => {
expect(severityColor('good')).toBe('#4ade80');
expect(severityColor('warning')).toBe('#fbbf24');
expect(severityColor('danger')).toBe('#f87171');
expect(severityColor('neutral')).toBe('#a1a1aa');
});
});
describe('assessmentSeverity', () => {
it('maps known assessments to severity', () => {
expect(assessmentSeverity('healthy')).toBe('good');
expect(assessmentSeverity('efficient')).toBe('good');
expect(assessmentSeverity('expensive')).toBe('warning');
expect(assessmentSeverity('red_flag')).toBe('danger');
expect(assessmentSeverity('very_high')).toBe('danger');
expect(assessmentSeverity('degraded')).toBe('warning');
expect(assessmentSeverity('unreliable')).toBe('danger');
expect(assessmentSeverity('high_idle')).toBe('danger');
expect(assessmentSeverity('moderate')).toBe('warning');
});
it('returns neutral for null/undefined/unknown', () => {
expect(assessmentSeverity(null)).toBe('neutral');
expect(assessmentSeverity(undefined)).toBe('neutral');
expect(assessmentSeverity('unknown_value')).toBe('neutral');
});
});
describe('assessmentColor', () => {
it('returns correct color for assessment string', () => {
expect(assessmentColor('healthy')).toBe('#4ade80');
expect(assessmentColor('red_flag')).toBe('#f87171');
expect(assessmentColor(null)).toBe('#a1a1aa');
});
});
describe('assessmentLabel', () => {
it('converts snake_case to Title Case', () => {
expect(assessmentLabel('red_flag')).toBe('Red Flag');
expect(assessmentLabel('well_specified')).toBe('Well Specified');
expect(assessmentLabel('healthy')).toBe('Healthy');
expect(assessmentLabel('high_idle')).toBe('High Idle');
expect(assessmentLabel('opus_plan_mode')).toBe('Opus Plan Mode');
});
});
describe('computeCostPerCommitAssessment', () => {
it('returns efficient below threshold', () => {
expect(computeCostPerCommitAssessment(0.3)).toBe('efficient');
});
it('returns normal in range', () => {
expect(computeCostPerCommitAssessment(1.0)).toBe('normal');
});
it('returns expensive in range', () => {
expect(computeCostPerCommitAssessment(3.0)).toBe('expensive');
});
it('returns red_flag above threshold', () => {
expect(computeCostPerCommitAssessment(10.0)).toBe('red_flag');
});
it('respects threshold boundaries', () => {
expect(computeCostPerCommitAssessment(THRESHOLDS.costPerCommit.efficient - 0.01)).toBe(
'efficient'
);
expect(computeCostPerCommitAssessment(THRESHOLDS.costPerCommit.efficient)).toBe('normal');
});
});
describe('computeCostPerLineAssessment', () => {
it('returns efficient below threshold', () => {
expect(computeCostPerLineAssessment(0.005)).toBe('efficient');
});
it('returns red_flag above threshold', () => {
expect(computeCostPerLineAssessment(0.5)).toBe('red_flag');
});
});
describe('computeSubagentCostShareAssessment', () => {
it('returns normal below 30%', () => {
expect(computeSubagentCostShareAssessment(20)).toBe('normal');
});
it('returns high in range', () => {
expect(computeSubagentCostShareAssessment(45)).toBe('high');
});
it('returns very_high in range', () => {
expect(computeSubagentCostShareAssessment(70)).toBe('very_high');
});
it('returns red_flag above 80%', () => {
expect(computeSubagentCostShareAssessment(90)).toBe('red_flag');
});
});
describe('computeCacheEfficiencyAssessment', () => {
it('returns good above 95%', () => {
expect(computeCacheEfficiencyAssessment(96)).toBe('good');
});
it('returns concerning below 95%', () => {
expect(computeCacheEfficiencyAssessment(90)).toBe('concerning');
});
});
describe('computeCacheRatioAssessment', () => {
it('returns good above 20', () => {
expect(computeCacheRatioAssessment(25)).toBe('good');
});
it('returns concerning below 20', () => {
expect(computeCacheRatioAssessment(10)).toBe('concerning');
});
});
describe('computeToolHealthAssessment', () => {
it('returns healthy above 95%', () => {
expect(computeToolHealthAssessment(98)).toBe('healthy');
});
it('returns degraded between 80-95%', () => {
expect(computeToolHealthAssessment(85)).toBe('degraded');
});
it('returns unreliable below 80%', () => {
expect(computeToolHealthAssessment(70)).toBe('unreliable');
});
it('boundary: 95 is degraded, 95.1 is healthy', () => {
expect(computeToolHealthAssessment(95)).toBe('degraded');
expect(computeToolHealthAssessment(95.1)).toBe('healthy');
});
});
describe('computeIdleAssessment', () => {
it('returns efficient below 20%', () => {
expect(computeIdleAssessment(10)).toBe('efficient');
});
it('returns moderate between 20-50%', () => {
expect(computeIdleAssessment(35)).toBe('moderate');
});
it('returns high_idle above 50%', () => {
expect(computeIdleAssessment(60)).toBe('high_idle');
});
});
describe('computeRedundancyAssessment', () => {
it('returns normal at or below 2.0', () => {
expect(computeRedundancyAssessment(1.5)).toBe('normal');
expect(computeRedundancyAssessment(2.0)).toBe('normal');
});
it('returns wasteful above 2.0', () => {
expect(computeRedundancyAssessment(3.0)).toBe('wasteful');
});
});
describe('computeOverheadAssessment', () => {
it('returns normal at or below 5%', () => {
expect(computeOverheadAssessment(3)).toBe('normal');
expect(computeOverheadAssessment(5)).toBe('normal');
});
it('returns heavy above 5%', () => {
expect(computeOverheadAssessment(10)).toBe('heavy');
});
});
describe('computeThrashingAssessment', () => {
it('returns none for 0 signals', () => {
expect(computeThrashingAssessment(0)).toBe('none');
});
it('returns mild for 1-2 signals', () => {
expect(computeThrashingAssessment(1)).toBe('mild');
expect(computeThrashingAssessment(2)).toBe('mild');
});
it('returns severe for 3+ signals', () => {
expect(computeThrashingAssessment(3)).toBe('severe');
expect(computeThrashingAssessment(5)).toBe('severe');
});
});
describe('detectModelMismatch', () => {
it('returns null for non-opus models', () => {
expect(detectModelMismatch('rename files', 'claude-sonnet-4')).toBeNull();
});
it('detects mechanical tasks on opus', () => {
const result = detectModelMismatch('rename all variables', 'claude-opus-4');
expect(result).not.toBeNull();
expect(result!.expectedComplexity).toBe('mechanical');
});
it('detects read-only tasks on opus', () => {
const result = detectModelMismatch('explore the codebase', 'claude-opus-4');
expect(result).not.toBeNull();
expect(result!.expectedComplexity).toBe('read_only');
});
it('returns null for complex tasks on opus', () => {
expect(detectModelMismatch('implement authentication system', 'claude-opus-4')).toBeNull();
});
it('detects various mechanical keywords', () => {
for (const kw of ['lint', 'format', 'delete', 'move', 'copy', 'replace']) {
expect(detectModelMismatch(`${kw} the code`, 'opus')).not.toBeNull();
}
});
it('detects various read-only keywords', () => {
for (const kw of ['search', 'find', 'verify', 'check', 'scan', 'discover']) {
expect(detectModelMismatch(`${kw} for errors`, 'opus')).not.toBeNull();
}
});
});
describe('detectSwitchPattern', () => {
it('returns null for no switches', () => {
expect(detectSwitchPattern([])).toBeNull();
});
it('returns manual_switch for single switch', () => {
expect(detectSwitchPattern([{ from: 'claude-sonnet-4', to: 'claude-haiku-4' }])).toBe(
'manual_switch'
);
});
it('detects opus_plan_mode pattern', () => {
expect(
detectSwitchPattern([
{ from: 'claude-sonnet-4', to: 'claude-opus-4' },
{ from: 'claude-opus-4', to: 'claude-sonnet-4' },
])
).toBe('opus_plan_mode');
});
it('returns manual_switch for non-plan-mode switches', () => {
expect(
detectSwitchPattern([
{ from: 'claude-sonnet-4', to: 'claude-haiku-4' },
{ from: 'claude-haiku-4', to: 'claude-sonnet-4' },
])
).toBe('manual_switch');
});
});
});

View file

@ -1181,4 +1181,329 @@ describe('analyzeSession', () => {
expect(report.compaction.note).toContain('No compaction');
});
});
// -------------------------------------------------------------------------
// Assessment computations
// -------------------------------------------------------------------------
describe('cost assessments', () => {
it('computes costPerCommitAssessment when commits exist', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
usage: { input_tokens: 50000, output_tokens: 10000 },
toolCalls: [
{
id: 'tc-1',
name: 'Bash',
input: { command: "git commit -m 'fix'" },
isTask: false,
},
],
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.costAnalysis.costPerCommitAssessment).not.toBeNull();
});
it('returns null assessments when no commits', () => {
const report = analyzeSession(createMockDetail());
expect(report.costAnalysis.costPerCommitAssessment).toBeNull();
expect(report.costAnalysis.costPerLineAssessment).toBeNull();
});
it('returns null subagentCostShareAssessment when no cost', () => {
const report = analyzeSession(createMockDetail());
expect(report.costAnalysis.subagentCostSharePct).toBeNull();
expect(report.costAnalysis.subagentCostShareAssessment).toBeNull();
});
});
describe('cache assessments', () => {
it('computes cache efficiency assessment', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
usage: {
input_tokens: 100,
output_tokens: 50,
cache_creation_input_tokens: 100,
cache_read_input_tokens: 9900,
},
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.cacheEconomics.cacheEfficiencyAssessment).toBe('good');
});
it('returns concerning for low cache efficiency', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
usage: {
input_tokens: 100,
output_tokens: 50,
cache_creation_input_tokens: 500,
cache_read_input_tokens: 500,
},
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.cacheEconomics.cacheEfficiencyAssessment).toBe('concerning');
});
it('returns null when no cache data', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
usage: {
input_tokens: 100,
output_tokens: 50,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
},
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.cacheEconomics.cacheEfficiencyAssessment).toBeNull();
expect(report.cacheEconomics.cacheRatioAssessment).toBeNull();
});
});
describe('tool health assessments', () => {
it('computes per-tool assessment', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
toolCalls: [
{ id: 'tc-1', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
{ id: 'tc-2', name: 'Read', input: { file_path: '/b.ts' }, isTask: false },
],
}),
createMockMessage({
type: 'user',
isMeta: true,
content: [],
toolResults: [
{ toolUseId: 'tc-1', content: 'ok', isError: false },
{ toolUseId: 'tc-2', content: 'ok', isError: false },
],
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.toolUsage.successRates.Read.assessment).toBe('healthy');
});
it('computes overall tool health', () => {
const report = analyzeSession(createMockDetail());
expect(report.toolUsage.overallToolHealth).toBe('healthy');
});
});
describe('idle assessment', () => {
it('returns efficient for low idle', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
timestamp: new Date('2024-01-01T10:00:00Z'),
}),
createMockMessage({
type: 'user',
content: 'quick',
timestamp: new Date('2024-01-01T10:00:30Z'),
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.idleAnalysis.idleAssessment).toBe('efficient');
});
it('returns high_idle for mostly idle session', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
timestamp: new Date('2024-01-01T10:00:00Z'),
}),
createMockMessage({
type: 'user',
content: 'back',
timestamp: new Date('2024-01-01T11:00:00Z'),
}),
createMockMessage({
type: 'assistant',
timestamp: new Date('2024-01-01T11:00:10Z'),
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.idleAnalysis.idleAssessment).toBe('high_idle');
});
});
describe('thrashing assessment', () => {
it('returns none when no signals', () => {
const report = analyzeSession(createMockDetail());
expect(report.thrashingSignals.thrashingAssessment).toBe('none');
});
it('returns mild or severe based on signal count', () => {
const makeEditMsg = (file: string, id: string) =>
createMockMessage({
type: 'assistant',
toolCalls: [{ id, name: 'Edit', input: { file_path: file }, isTask: false }],
});
// 3 edits on one file = 1 signal + 3 repeated bash = 1 signal = mild (2)
const messages: ParsedMessage[] = [
makeEditMsg('/foo.ts', 'e1'),
makeEditMsg('/foo.ts', 'e2'),
makeEditMsg('/foo.ts', 'e3'),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(['mild', 'severe']).toContain(report.thrashingSignals.thrashingAssessment);
});
});
describe('model switch pattern', () => {
it('detects opus_plan_mode', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
timestamp: new Date('2024-01-01T10:00:00Z'),
}),
createMockMessage({
type: 'assistant',
model: 'claude-opus-4-20250514',
timestamp: new Date('2024-01-01T10:01:00Z'),
}),
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
timestamp: new Date('2024-01-01T10:02:00Z'),
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.modelSwitches.switchPattern).toBe('opus_plan_mode');
});
it('returns null when no switches', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
usage: { input_tokens: 100, output_tokens: 50 },
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.modelSwitches.switchPattern).toBeNull();
});
});
describe('startup overhead assessment', () => {
it('returns normal for low overhead', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
usage: { input_tokens: 100, output_tokens: 50 },
toolCalls: [{ id: 'tc-1', name: 'Read', input: { file_path: '/f.ts' }, isTask: false }],
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.startupOverhead.overheadAssessment).toBe('normal');
});
it('returns heavy for high overhead', () => {
const messages: ParsedMessage[] = [
// Lots of startup tokens, no work tools
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
usage: { input_tokens: 50000, output_tokens: 10000 },
toolCalls: [],
}),
// Small work message
createMockMessage({
type: 'assistant',
model: 'claude-sonnet-4-20250514',
usage: { input_tokens: 100, output_tokens: 50 },
toolCalls: [{ id: 'tc-1', name: 'Read', input: { file_path: '/f.ts' }, isTask: false }],
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.startupOverhead.overheadAssessment).toBe('heavy');
});
});
describe('file read redundancy assessment', () => {
it('returns normal for low redundancy', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
toolCalls: [
{ id: 'tc-1', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
{ id: 'tc-2', name: 'Read', input: { file_path: '/b.ts' }, isTask: false },
],
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.fileReadRedundancy.redundancyAssessment).toBe('normal');
});
it('returns wasteful for high redundancy', () => {
const messages: ParsedMessage[] = [
createMockMessage({
type: 'assistant',
toolCalls: [
{ id: 'tc-1', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
{ id: 'tc-2', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
{ id: 'tc-3', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
{ id: 'tc-4', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
],
}),
];
const report = analyzeSession(createMockDetail({ messages }));
expect(report.fileReadRedundancy.redundancyAssessment).toBe('wasteful');
});
});
describe('model mismatch in subagents', () => {
it('detects mismatch for mechanical tasks on opus', () => {
const processes: Process[] = [
{
id: 'agent-1',
filePath: '/path/to/agent-1.jsonl',
messages: [],
startTime: new Date('2024-01-01T10:00:00Z'),
endTime: new Date('2024-01-01T10:01:00Z'),
durationMs: 60000,
metrics: createMockMetrics({ totalTokens: 5000, costUsd: 0.05 }),
description: 'rename all variables',
subagentType: 'code',
isParallel: false,
},
];
const report = analyzeSession(createMockDetail({ processes }));
// model is 'default (inherits parent)' which doesn't contain 'opus', so no mismatch
expect(report.subagentMetrics.byAgent[0].modelMismatch).toBeNull();
});
});
});