feat(report): add threshold-based assessment badges to all report sections
Add interpretive assessment layer to session reports. Every metric section now shows color-coded severity badges (green/amber/red) computed from configurable thresholds, replacing raw numbers with instant signal. Includes centralized reportAssessments utility, model mismatch detection, switch pattern recognition, and 44 new tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ab1ad071fe
commit
820bb124d5
17 changed files with 1208 additions and 161 deletions
|
|
@ -191,9 +191,7 @@ export const FlatInjectionList = ({
|
|||
}
|
||||
};
|
||||
|
||||
const displayText = row.description
|
||||
? `${row.label} \u2014 ${row.description}`
|
||||
: row.label;
|
||||
const displayText = row.description ? `${row.label} \u2014 ${row.description}` : row.label;
|
||||
|
||||
return (
|
||||
<div key={row.key} className="flex items-center gap-0.5">
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ export function extractOutputText(content: string | unknown[]): string {
|
|||
.map((block) =>
|
||||
typeof block === 'object' && block !== null && 'text' in block
|
||||
? (block as { text: string }).text
|
||||
: JSON.stringify(block, null, 2),
|
||||
: JSON.stringify(block, null, 2)
|
||||
)
|
||||
.join('\n');
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ export const SessionReportTab = ({ tab }: SessionReportTabProps) => {
|
|||
prompt={report.promptQuality}
|
||||
startup={report.startupOverhead}
|
||||
testProgression={report.testProgression}
|
||||
fileReadRedundancy={report.fileReadRedundancy}
|
||||
/>
|
||||
<InsightsSection
|
||||
skills={report.skillsInvoked}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
|
||||
import { DollarSign } from 'lucide-react';
|
||||
|
||||
import { ReportSection } from '../ReportSection';
|
||||
|
|
@ -28,18 +29,62 @@ export const CostSection = ({ data }: CostSectionProps) => {
|
|||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">Per Commit</div>
|
||||
<div className="text-sm font-medium text-text">
|
||||
{data.costPerCommit != null ? fmt(data.costPerCommit) : 'N/A'}
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-medium text-text">
|
||||
{data.costPerCommit != null ? fmt(data.costPerCommit) : 'N/A'}
|
||||
</span>
|
||||
{data.costPerCommitAssessment && (
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${assessmentColor(data.costPerCommitAssessment)}20`,
|
||||
color: assessmentColor(data.costPerCommitAssessment),
|
||||
}}
|
||||
>
|
||||
{assessmentLabel(data.costPerCommitAssessment)}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">Per Line Changed</div>
|
||||
<div className="text-sm font-medium text-text">
|
||||
{data.costPerLineChanged != null ? `$${data.costPerLineChanged.toFixed(6)}` : 'N/A'}
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-medium text-text">
|
||||
{data.costPerLineChanged != null ? `$${data.costPerLineChanged.toFixed(6)}` : 'N/A'}
|
||||
</span>
|
||||
{data.costPerLineAssessment && (
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${assessmentColor(data.costPerLineAssessment)}20`,
|
||||
color: assessmentColor(data.costPerLineAssessment),
|
||||
}}
|
||||
>
|
||||
{assessmentLabel(data.costPerLineAssessment)}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{data.subagentCostSharePct != null && (
|
||||
<div className="mb-4 flex items-center gap-2">
|
||||
<span className="text-xs text-text-muted">Subagent Cost Share:</span>
|
||||
<span className="text-sm font-medium text-text">{data.subagentCostSharePct}%</span>
|
||||
{data.subagentCostShareAssessment && (
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${assessmentColor(data.subagentCostShareAssessment)}20`,
|
||||
color: assessmentColor(data.subagentCostShareAssessment),
|
||||
}}
|
||||
>
|
||||
{assessmentLabel(data.subagentCostShareAssessment)}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{modelEntries.length > 0 && (
|
||||
<table className="w-full text-xs">
|
||||
<thead>
|
||||
|
|
|
|||
|
|
@ -1,29 +1,28 @@
|
|||
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
|
||||
import { MessageSquareWarning } from 'lucide-react';
|
||||
|
||||
import { ReportSection } from '../ReportSection';
|
||||
|
||||
import type { ReportFrictionSignals, ReportThrashingSignals } from '@renderer/types/sessionReport';
|
||||
|
||||
const frictionColor = (rate: number): string => {
|
||||
if (rate <= 0.1) return '#4ade80';
|
||||
if (rate <= 0.25) return '#fbbf24';
|
||||
return '#f87171';
|
||||
};
|
||||
|
||||
interface FrictionSectionProps {
|
||||
data: ReportFrictionSignals;
|
||||
thrashing: ReportThrashingSignals;
|
||||
}
|
||||
|
||||
export const FrictionSection = ({ data, thrashing }: FrictionSectionProps) => {
|
||||
const frictionColor =
|
||||
data.frictionRate <= 0.1 ? '#4ade80' : data.frictionRate <= 0.25 ? '#fbbf24' : '#f87171';
|
||||
const thrashColor = assessmentColor(thrashing.thrashingAssessment);
|
||||
|
||||
return (
|
||||
<ReportSection title="Friction Signals" icon={MessageSquareWarning}>
|
||||
<div className="mb-4 flex items-center gap-3">
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${frictionColor(data.frictionRate)}20`,
|
||||
color: frictionColor(data.frictionRate),
|
||||
backgroundColor: `${frictionColor}20`,
|
||||
color: frictionColor,
|
||||
}}
|
||||
>
|
||||
Friction Rate: {(data.frictionRate * 100).toFixed(1)}%
|
||||
|
|
@ -54,7 +53,15 @@ export const FrictionSection = ({ data, thrashing }: FrictionSectionProps) => {
|
|||
|
||||
{(thrashing.bashNearDuplicates.length > 0 || thrashing.editReworkFiles.length > 0) && (
|
||||
<div>
|
||||
<div className="mb-2 text-xs font-medium text-text-muted">Thrashing Signals</div>
|
||||
<div className="mb-2 flex items-center gap-2">
|
||||
<span className="text-xs font-medium text-text-muted">Thrashing Signals</span>
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{ backgroundColor: `${thrashColor}20`, color: thrashColor }}
|
||||
>
|
||||
{assessmentLabel(thrashing.thrashingAssessment)}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{thrashing.bashNearDuplicates.length > 0 && (
|
||||
<div className="mb-2">
|
||||
|
|
|
|||
|
|
@ -1,24 +1,10 @@
|
|||
import { assessmentColor } from '@renderer/utils/reportAssessments';
|
||||
import { Activity } from 'lucide-react';
|
||||
|
||||
import { ReportSection } from '../ReportSection';
|
||||
|
||||
import type { ReportOverview } from '@renderer/types/sessionReport';
|
||||
|
||||
const assessmentColor = (assessment: ReportOverview['contextAssessment']): string => {
|
||||
switch (assessment) {
|
||||
case 'healthy':
|
||||
return '#4ade80';
|
||||
case 'moderate':
|
||||
return '#fbbf24';
|
||||
case 'high':
|
||||
return '#f87171';
|
||||
case 'critical':
|
||||
return '#f87171';
|
||||
default:
|
||||
return '#a1a1aa';
|
||||
}
|
||||
};
|
||||
|
||||
interface OverviewSectionProps {
|
||||
data: ReportOverview;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,63 +1,33 @@
|
|||
import { assessmentColor, assessmentLabel, severityColor } from '@renderer/utils/reportAssessments';
|
||||
import { BarChart3 } from 'lucide-react';
|
||||
|
||||
import { ReportSection } from '../ReportSection';
|
||||
|
||||
import type {
|
||||
ReportFileReadRedundancy,
|
||||
ReportPromptQuality,
|
||||
ReportStartupOverhead,
|
||||
ReportTestProgression,
|
||||
} from '@renderer/types/sessionReport';
|
||||
|
||||
const assessmentColor = (assessment: ReportPromptQuality['assessment']): string => {
|
||||
switch (assessment) {
|
||||
case 'well_specified':
|
||||
return '#4ade80';
|
||||
case 'moderate_friction':
|
||||
return '#fbbf24';
|
||||
case 'underspecified':
|
||||
return '#f87171';
|
||||
case 'verbose_but_unclear':
|
||||
return '#f87171';
|
||||
default:
|
||||
return '#a1a1aa';
|
||||
}
|
||||
};
|
||||
|
||||
const assessmentLabel = (assessment: ReportPromptQuality['assessment']): string => {
|
||||
switch (assessment) {
|
||||
case 'well_specified':
|
||||
return 'Well Specified';
|
||||
case 'moderate_friction':
|
||||
return 'Moderate Friction';
|
||||
case 'underspecified':
|
||||
return 'Underspecified';
|
||||
case 'verbose_but_unclear':
|
||||
return 'Verbose but Unclear';
|
||||
default:
|
||||
return assessment;
|
||||
}
|
||||
};
|
||||
|
||||
const trajectoryColor = (trajectory: ReportTestProgression['trajectory']): string => {
|
||||
switch (trajectory) {
|
||||
case 'improving':
|
||||
return '#4ade80';
|
||||
case 'regressing':
|
||||
return '#f87171';
|
||||
case 'stable':
|
||||
return '#fbbf24';
|
||||
default:
|
||||
return '#a1a1aa';
|
||||
}
|
||||
};
|
||||
|
||||
interface QualitySectionProps {
|
||||
prompt: ReportPromptQuality;
|
||||
startup: ReportStartupOverhead;
|
||||
testProgression: ReportTestProgression;
|
||||
fileReadRedundancy: ReportFileReadRedundancy;
|
||||
}
|
||||
|
||||
export const QualitySection = ({ prompt, startup, testProgression }: QualitySectionProps) => {
|
||||
export const QualitySection = ({
|
||||
prompt,
|
||||
startup,
|
||||
testProgression,
|
||||
fileReadRedundancy,
|
||||
}: QualitySectionProps) => {
|
||||
const promptColor = assessmentColor(prompt.assessment);
|
||||
const trajectoryColor = assessmentColor(testProgression.trajectory);
|
||||
const overheadColor = assessmentColor(startup.overheadAssessment);
|
||||
const redundancyColor = assessmentColor(fileReadRedundancy.redundancyAssessment);
|
||||
|
||||
return (
|
||||
<ReportSection title="Quality Signals" icon={BarChart3}>
|
||||
{/* Prompt quality */}
|
||||
|
|
@ -66,10 +36,7 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
|
|||
<div className="mb-2 flex items-center gap-2">
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${assessmentColor(prompt.assessment)}20`,
|
||||
color: assessmentColor(prompt.assessment),
|
||||
}}
|
||||
style={{ backgroundColor: `${promptColor}20`, color: promptColor }}
|
||||
>
|
||||
{assessmentLabel(prompt.assessment)}
|
||||
</span>
|
||||
|
|
@ -101,7 +68,15 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
|
|||
|
||||
{/* Startup overhead */}
|
||||
<div className="mb-4">
|
||||
<div className="mb-2 text-xs font-medium text-text-muted">Startup Overhead</div>
|
||||
<div className="mb-2 flex items-center gap-2">
|
||||
<span className="text-xs font-medium text-text-muted">Startup Overhead</span>
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{ backgroundColor: `${overheadColor}20`, color: overheadColor }}
|
||||
>
|
||||
{assessmentLabel(startup.overheadAssessment)}
|
||||
</span>
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-3">
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">Messages Before Work</div>
|
||||
|
|
@ -120,21 +95,44 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
|
|||
</div>
|
||||
</div>
|
||||
|
||||
{/* File read redundancy */}
|
||||
<div className="mb-4">
|
||||
<div className="mb-2 flex items-center gap-2">
|
||||
<span className="text-xs font-medium text-text-muted">File Read Redundancy</span>
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{ backgroundColor: `${redundancyColor}20`, color: redundancyColor }}
|
||||
>
|
||||
{assessmentLabel(fileReadRedundancy.redundancyAssessment)}
|
||||
</span>
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-3">
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">Total Reads</div>
|
||||
<div className="text-sm font-medium text-text">{fileReadRedundancy.totalReads}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">Unique Files</div>
|
||||
<div className="text-sm font-medium text-text">{fileReadRedundancy.uniqueFiles}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">Reads/Unique File</div>
|
||||
<div className="text-sm font-medium text-text">
|
||||
{fileReadRedundancy.readsPerUniqueFile}x
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Test progression */}
|
||||
<div>
|
||||
<div className="mb-2 text-xs font-medium text-text-muted">Test Progression</div>
|
||||
<div className="mb-2 flex items-center gap-2">
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${trajectoryColor(testProgression.trajectory)}20`,
|
||||
color: trajectoryColor(testProgression.trajectory),
|
||||
}}
|
||||
style={{ backgroundColor: `${trajectoryColor}20`, color: trajectoryColor }}
|
||||
>
|
||||
{testProgression.trajectory === 'insufficient_data'
|
||||
? 'Insufficient Data'
|
||||
: testProgression.trajectory.charAt(0).toUpperCase() +
|
||||
testProgression.trajectory.slice(1)}
|
||||
{assessmentLabel(testProgression.trajectory)}
|
||||
</span>
|
||||
<span className="text-xs text-text-muted">
|
||||
{testProgression.snapshotCount} snapshot{testProgression.snapshotCount !== 1 ? 's' : ''}
|
||||
|
|
@ -145,11 +143,11 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
|
|||
<div>
|
||||
<div className="text-xs text-text-muted">First Run</div>
|
||||
<div className="text-sm text-text">
|
||||
<span style={{ color: '#4ade80' }}>
|
||||
<span style={{ color: severityColor('good') }}>
|
||||
{testProgression.firstSnapshot.passed} passed
|
||||
</span>
|
||||
{' / '}
|
||||
<span style={{ color: '#f87171' }}>
|
||||
<span style={{ color: severityColor('danger') }}>
|
||||
{testProgression.firstSnapshot.failed} failed
|
||||
</span>
|
||||
</div>
|
||||
|
|
@ -157,11 +155,11 @@ export const QualitySection = ({ prompt, startup, testProgression }: QualitySect
|
|||
<div>
|
||||
<div className="text-xs text-text-muted">Last Run</div>
|
||||
<div className="text-sm text-text">
|
||||
<span style={{ color: '#4ade80' }}>
|
||||
<span style={{ color: severityColor('good') }}>
|
||||
{testProgression.lastSnapshot.passed} passed
|
||||
</span>
|
||||
{' / '}
|
||||
<span style={{ color: '#f87171' }}>
|
||||
<span style={{ color: severityColor('danger') }}>
|
||||
{testProgression.lastSnapshot.failed} failed
|
||||
</span>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { severityColor } from '@renderer/utils/reportAssessments';
|
||||
import { Users } from 'lucide-react';
|
||||
|
||||
import { ReportSection } from '../ReportSection';
|
||||
|
|
@ -53,8 +54,19 @@ export const SubagentSection = ({ data }: SubagentSectionProps) => {
|
|||
<tbody>
|
||||
{data.byAgent.map((agent, idx) => (
|
||||
<tr key={idx} className="border-border/50 border-b">
|
||||
<td className="max-w-48 truncate py-1.5 pr-4 text-text" title={agent.description}>
|
||||
{agent.description}
|
||||
<td className="max-w-48 py-1.5 pr-4 text-text">
|
||||
<div className="truncate" title={agent.description}>
|
||||
{agent.description}
|
||||
</div>
|
||||
{agent.modelMismatch && (
|
||||
<div
|
||||
className="mt-0.5 truncate text-[10px]"
|
||||
style={{ color: severityColor('warning') }}
|
||||
title={agent.modelMismatch.recommendation}
|
||||
>
|
||||
{agent.modelMismatch.recommendation}
|
||||
</div>
|
||||
)}
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 text-text-secondary">{agent.subagentType}</td>
|
||||
<td className="py-1.5 pr-4 text-right text-text">
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
|
||||
import { Clock } from 'lucide-react';
|
||||
|
||||
import { ReportSection } from '../ReportSection';
|
||||
|
|
@ -15,11 +16,21 @@ interface TimelineSectionProps {
|
|||
}
|
||||
|
||||
export const TimelineSection = ({ idle, modelSwitches, keyEvents }: TimelineSectionProps) => {
|
||||
const idleColor = assessmentColor(idle.idleAssessment);
|
||||
|
||||
return (
|
||||
<ReportSection title="Timeline & Activity" icon={Clock}>
|
||||
{/* Idle stats */}
|
||||
<div className="mb-4">
|
||||
<div className="mb-2 text-xs font-medium text-text-muted">Idle Analysis</div>
|
||||
<div className="mb-2 flex items-center gap-2">
|
||||
<span className="text-xs font-medium text-text-muted">Idle Analysis</span>
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{ backgroundColor: `${idleColor}20`, color: idleColor }}
|
||||
>
|
||||
{assessmentLabel(idle.idleAssessment)}
|
||||
</span>
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">Idle Gaps</div>
|
||||
|
|
@ -35,10 +46,7 @@ export const TimelineSection = ({ idle, modelSwitches, keyEvents }: TimelineSect
|
|||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">Idle %</div>
|
||||
<div
|
||||
className="text-sm font-medium"
|
||||
style={{ color: idle.idlePct > 50 ? '#fbbf24' : '#4ade80' }}
|
||||
>
|
||||
<div className="text-sm font-medium" style={{ color: idleColor }}>
|
||||
{idle.idlePct}%
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -48,8 +56,21 @@ export const TimelineSection = ({ idle, modelSwitches, keyEvents }: TimelineSect
|
|||
{/* Model switches */}
|
||||
{modelSwitches.count > 0 && (
|
||||
<div className="mb-4">
|
||||
<div className="mb-2 text-xs font-medium text-text-muted">
|
||||
Model Switches ({modelSwitches.count})
|
||||
<div className="mb-2 flex items-center gap-2">
|
||||
<span className="text-xs font-medium text-text-muted">
|
||||
Model Switches ({modelSwitches.count})
|
||||
</span>
|
||||
{modelSwitches.switchPattern && (
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${assessmentColor(modelSwitches.switchPattern)}20`,
|
||||
color: assessmentColor(modelSwitches.switchPattern),
|
||||
}}
|
||||
>
|
||||
{assessmentLabel(modelSwitches.switchPattern)}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex flex-col gap-1">
|
||||
{modelSwitches.switches.map((sw, idx) => (
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
|
||||
import { Coins } from 'lucide-react';
|
||||
|
||||
import { ReportSection } from '../ReportSection';
|
||||
|
|
@ -65,12 +66,40 @@ export const TokenSection = ({ data, cacheEconomics }: TokenSectionProps) => {
|
|||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">Cache Efficiency</div>
|
||||
<div className="text-sm font-medium text-text">{cacheEconomics.cacheEfficiencyPct}%</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-medium text-text">
|
||||
{cacheEconomics.cacheEfficiencyPct}%
|
||||
</span>
|
||||
{cacheEconomics.cacheEfficiencyAssessment && (
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${assessmentColor(cacheEconomics.cacheEfficiencyAssessment)}20`,
|
||||
color: assessmentColor(cacheEconomics.cacheEfficiencyAssessment),
|
||||
}}
|
||||
>
|
||||
{assessmentLabel(cacheEconomics.cacheEfficiencyAssessment)}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-text-muted">R/W Ratio</div>
|
||||
<div className="text-sm font-medium text-text">
|
||||
{cacheEconomics.cacheReadToWriteRatio}x
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-medium text-text">
|
||||
{cacheEconomics.cacheReadToWriteRatio}x
|
||||
</span>
|
||||
{cacheEconomics.cacheRatioAssessment && (
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${assessmentColor(cacheEconomics.cacheRatioAssessment)}20`,
|
||||
color: assessmentColor(cacheEconomics.cacheRatioAssessment),
|
||||
}}
|
||||
>
|
||||
{assessmentLabel(cacheEconomics.cacheRatioAssessment)}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { assessmentColor, assessmentLabel } from '@renderer/utils/reportAssessments';
|
||||
import { Wrench } from 'lucide-react';
|
||||
|
||||
import { ReportSection } from '../ReportSection';
|
||||
|
|
@ -13,10 +14,23 @@ export const ToolSection = ({ data }: ToolSectionProps) => {
|
|||
(a, b) => b[1].totalCalls - a[1].totalCalls
|
||||
);
|
||||
|
||||
const healthColor = assessmentColor(data.overallToolHealth);
|
||||
|
||||
return (
|
||||
<ReportSection title="Tool Usage" icon={Wrench}>
|
||||
<div className="mb-2 text-xs text-text-muted">
|
||||
{data.totalCalls.toLocaleString()} total calls across {toolEntries.length} tools
|
||||
<div className="mb-2 flex items-center gap-2">
|
||||
<span className="text-xs text-text-muted">
|
||||
{data.totalCalls.toLocaleString()} total calls across {toolEntries.length} tools
|
||||
</span>
|
||||
<span
|
||||
className="rounded px-2 py-0.5 text-xs font-medium"
|
||||
style={{
|
||||
backgroundColor: `${healthColor}20`,
|
||||
color: healthColor,
|
||||
}}
|
||||
>
|
||||
{assessmentLabel(data.overallToolHealth)}
|
||||
</span>
|
||||
</div>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-xs">
|
||||
|
|
@ -25,18 +39,13 @@ export const ToolSection = ({ data }: ToolSectionProps) => {
|
|||
<th className="pb-2 pr-4">Tool</th>
|
||||
<th className="pb-2 pr-4 text-right">Calls</th>
|
||||
<th className="pb-2 pr-4 text-right">Errors</th>
|
||||
<th className="pb-2 text-right">Success %</th>
|
||||
<th className="pb-2 pr-4 text-right">Success %</th>
|
||||
<th className="pb-2 text-right">Health</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{toolEntries.map(([tool, stats]) => {
|
||||
const rateColor =
|
||||
stats.successRatePct < 80
|
||||
? '#f87171'
|
||||
: stats.successRatePct < 90
|
||||
? '#fbbf24'
|
||||
: undefined;
|
||||
|
||||
const color = assessmentColor(stats.assessment);
|
||||
return (
|
||||
<tr key={tool} className="border-border/50 border-b">
|
||||
<td className="py-1.5 pr-4 text-text">{tool}</td>
|
||||
|
|
@ -46,12 +55,17 @@ export const ToolSection = ({ data }: ToolSectionProps) => {
|
|||
<td className="py-1.5 pr-4 text-right text-text">
|
||||
{stats.errors.toLocaleString()}
|
||||
</td>
|
||||
<td
|
||||
className="py-1.5 text-right"
|
||||
style={rateColor ? { color: rateColor } : undefined}
|
||||
>
|
||||
<td className="py-1.5 pr-4 text-right" style={{ color }}>
|
||||
{stats.successRatePct}%
|
||||
</td>
|
||||
<td className="py-1.5 text-right">
|
||||
<span
|
||||
className="rounded px-1.5 py-0.5 text-[10px] font-medium"
|
||||
style={{ backgroundColor: `${color}20`, color }}
|
||||
>
|
||||
{assessmentLabel(stats.assessment)}
|
||||
</span>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
})}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,19 @@
|
|||
* Output of analyzeSession() — one interface per report section.
|
||||
*/
|
||||
|
||||
import type {
|
||||
CacheAssessment,
|
||||
CostAssessment,
|
||||
IdleAssessment,
|
||||
ModelMismatch,
|
||||
OverheadAssessment,
|
||||
RedundancyAssessment,
|
||||
SubagentCostShareAssessment,
|
||||
SwitchPattern,
|
||||
ThrashingAssessment,
|
||||
ToolHealthAssessment,
|
||||
} from '@renderer/utils/reportAssessments';
|
||||
|
||||
// =============================================================================
|
||||
// Pricing
|
||||
// =============================================================================
|
||||
|
|
@ -67,27 +80,33 @@ export interface ReportCostAnalysis {
|
|||
costByModel: Record<string, number>;
|
||||
costPerCommit: number | null;
|
||||
costPerLineChanged: number | null;
|
||||
costPerCommitAssessment: CostAssessment | null;
|
||||
costPerLineAssessment: CostAssessment | null;
|
||||
subagentCostSharePct: number | null;
|
||||
subagentCostShareAssessment: SubagentCostShareAssessment | null;
|
||||
}
|
||||
|
||||
export interface ReportCacheEconomics {
|
||||
cacheCreation5m: number;
|
||||
cacheCreation1h: number;
|
||||
cacheRead: number;
|
||||
cacheEfficiencyPct: number;
|
||||
coldStartDetected: boolean;
|
||||
cacheReadToWriteRatio: number;
|
||||
cacheEfficiencyAssessment: CacheAssessment | null;
|
||||
cacheRatioAssessment: CacheAssessment | null;
|
||||
}
|
||||
|
||||
export interface ToolSuccessRate {
|
||||
totalCalls: number;
|
||||
errors: number;
|
||||
successRatePct: number;
|
||||
assessment: ToolHealthAssessment;
|
||||
}
|
||||
|
||||
export interface ReportToolUsage {
|
||||
counts: Record<string, number>;
|
||||
totalCalls: number;
|
||||
successRates: Record<string, ToolSuccessRate>;
|
||||
overallToolHealth: ToolHealthAssessment;
|
||||
}
|
||||
|
||||
export interface SubagentEntry {
|
||||
|
|
@ -99,6 +118,7 @@ export interface SubagentEntry {
|
|||
totalToolUseCount: number;
|
||||
costUsd: number;
|
||||
costNote?: string;
|
||||
modelMismatch: ModelMismatch | null;
|
||||
}
|
||||
|
||||
export interface ReportSubagentMetrics {
|
||||
|
|
@ -157,6 +177,7 @@ export interface ReportFrictionSignals {
|
|||
export interface ReportThrashingSignals {
|
||||
bashNearDuplicates: { prefix: string; count: number }[];
|
||||
editReworkFiles: { filePath: string; editIndices: number[] }[];
|
||||
thrashingAssessment: ThrashingAssessment;
|
||||
}
|
||||
|
||||
export interface ReportConversationTree {
|
||||
|
|
@ -187,6 +208,7 @@ export interface ReportIdleAnalysis {
|
|||
activeWorkingHuman: string;
|
||||
idlePct: number;
|
||||
longestGaps: IdleGap[];
|
||||
idleAssessment: IdleAssessment;
|
||||
}
|
||||
|
||||
export interface ModelSwitch {
|
||||
|
|
@ -200,6 +222,7 @@ export interface ReportModelSwitches {
|
|||
count: number;
|
||||
switches: ModelSwitch[];
|
||||
modelsUsed: string[];
|
||||
switchPattern: SwitchPattern | null;
|
||||
}
|
||||
|
||||
export interface ReportWorkingDirectories {
|
||||
|
|
@ -230,6 +253,7 @@ export interface ReportStartupOverhead {
|
|||
messagesBeforeFirstWork: number;
|
||||
tokensBeforeFirstWork: number;
|
||||
pctOfTotal: number;
|
||||
overheadAssessment: OverheadAssessment;
|
||||
}
|
||||
|
||||
export interface ReportTokenDensityTimeline {
|
||||
|
|
@ -271,6 +295,7 @@ export interface ReportFileReadRedundancy {
|
|||
uniqueFiles: number;
|
||||
readsPerUniqueFile: number;
|
||||
redundantFiles: Record<string, number>;
|
||||
redundancyAssessment: RedundancyAssessment;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
|
|
|
|||
|
|
@ -425,8 +425,7 @@ export function buildDisplayItemsFromMessages(
|
|||
}
|
||||
// Only treat as subagent input if there are NO tool_result blocks in this message
|
||||
const hasToolResults =
|
||||
Array.isArray(msg.content) &&
|
||||
msg.content.some((b) => b.type === 'tool_result');
|
||||
Array.isArray(msg.content) && msg.content.some((b) => b.type === 'tool_result');
|
||||
if (rawText.trim() && !hasToolResults) {
|
||||
displayItems.push({
|
||||
type: 'subagent_input',
|
||||
|
|
|
|||
270
src/renderer/utils/reportAssessments.ts
Normal file
270
src/renderer/utils/reportAssessments.ts
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
/**
|
||||
* Centralized assessment severity/color utilities for session reports.
|
||||
*
|
||||
* Maps raw assessment values to severity levels and colors,
|
||||
* replacing duplicated assessmentColor() functions across report sections.
|
||||
*/
|
||||
|
||||
// =============================================================================
|
||||
// Types
|
||||
// =============================================================================
|
||||
|
||||
export type Severity = 'good' | 'warning' | 'danger' | 'neutral';
|
||||
|
||||
// =============================================================================
|
||||
// Colors
|
||||
// =============================================================================
|
||||
|
||||
const SEVERITY_COLORS: Record<Severity, string> = {
|
||||
good: '#4ade80',
|
||||
warning: '#fbbf24',
|
||||
danger: '#f87171',
|
||||
neutral: '#a1a1aa',
|
||||
};
|
||||
|
||||
export function severityColor(severity: Severity): string {
|
||||
return SEVERITY_COLORS[severity];
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Assessment → Severity Mapping
|
||||
// =============================================================================
|
||||
|
||||
const ASSESSMENT_SEVERITY: Record<string, Severity> = {
|
||||
// Context
|
||||
healthy: 'good',
|
||||
moderate: 'warning',
|
||||
high: 'danger',
|
||||
critical: 'danger',
|
||||
|
||||
// Cost / subagent share
|
||||
efficient: 'good',
|
||||
normal: 'good',
|
||||
expensive: 'warning',
|
||||
red_flag: 'danger',
|
||||
very_high: 'danger',
|
||||
|
||||
// Cache
|
||||
good: 'good',
|
||||
concerning: 'warning',
|
||||
|
||||
// Tool health
|
||||
degraded: 'warning',
|
||||
unreliable: 'danger',
|
||||
|
||||
// Idle ('moderate' already mapped above under Context)
|
||||
high_idle: 'danger',
|
||||
|
||||
// File read
|
||||
wasteful: 'warning',
|
||||
|
||||
// Startup
|
||||
heavy: 'warning',
|
||||
|
||||
// Thrashing
|
||||
none: 'good',
|
||||
mild: 'warning',
|
||||
severe: 'danger',
|
||||
|
||||
// Prompt quality
|
||||
well_specified: 'good',
|
||||
moderate_friction: 'warning',
|
||||
underspecified: 'danger',
|
||||
verbose_but_unclear: 'danger',
|
||||
|
||||
// Test trajectory
|
||||
improving: 'good',
|
||||
stable: 'warning',
|
||||
regressing: 'danger',
|
||||
insufficient_data: 'neutral',
|
||||
|
||||
// Model switch
|
||||
opus_plan_mode: 'good',
|
||||
manual_switch: 'neutral',
|
||||
};
|
||||
|
||||
export function assessmentSeverity(assessment: string | null | undefined): Severity {
|
||||
if (!assessment) return 'neutral';
|
||||
return ASSESSMENT_SEVERITY[assessment] ?? 'neutral';
|
||||
}
|
||||
|
||||
export function assessmentColor(assessment: string | null | undefined): string {
|
||||
return severityColor(assessmentSeverity(assessment));
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Label Formatting
|
||||
// =============================================================================
|
||||
|
||||
export function assessmentLabel(value: string): string {
|
||||
return value
|
||||
.split('_')
|
||||
.map((w) => w.charAt(0).toUpperCase() + w.slice(1))
|
||||
.join(' ');
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Threshold Constants
|
||||
// =============================================================================
|
||||
|
||||
export const THRESHOLDS = {
|
||||
costPerCommit: {
|
||||
efficient: 0.5,
|
||||
normal: 2,
|
||||
expensive: 5,
|
||||
},
|
||||
costPerLine: {
|
||||
efficient: 0.01,
|
||||
normal: 0.05,
|
||||
expensive: 0.2,
|
||||
},
|
||||
subagentCostShare: {
|
||||
normal: 30,
|
||||
high: 60,
|
||||
veryHigh: 80,
|
||||
},
|
||||
cacheEfficiency: {
|
||||
good: 95,
|
||||
},
|
||||
cacheRwRatio: {
|
||||
good: 20,
|
||||
},
|
||||
toolSuccess: {
|
||||
healthy: 95,
|
||||
degraded: 80,
|
||||
},
|
||||
idle: {
|
||||
efficient: 20,
|
||||
moderate: 50,
|
||||
},
|
||||
fileReadsPerUnique: {
|
||||
normal: 2.0,
|
||||
},
|
||||
startupOverhead: {
|
||||
normal: 5,
|
||||
},
|
||||
} as const;
|
||||
|
||||
// =============================================================================
|
||||
// Assessment Computers
|
||||
// =============================================================================
|
||||
|
||||
export type CostAssessment = 'efficient' | 'normal' | 'expensive' | 'red_flag';
|
||||
export type CacheAssessment = 'good' | 'concerning';
|
||||
export type ToolHealthAssessment = 'healthy' | 'degraded' | 'unreliable';
|
||||
export type IdleAssessment = 'efficient' | 'moderate' | 'high_idle';
|
||||
export type RedundancyAssessment = 'normal' | 'wasteful';
|
||||
export type OverheadAssessment = 'normal' | 'heavy';
|
||||
export type ThrashingAssessment = 'none' | 'mild' | 'severe';
|
||||
export type SubagentCostShareAssessment = 'normal' | 'high' | 'very_high' | 'red_flag';
|
||||
export type SwitchPattern = 'opus_plan_mode' | 'manual_switch' | 'none';
|
||||
|
||||
export function computeCostPerCommitAssessment(costPerCommit: number): CostAssessment {
|
||||
if (costPerCommit < THRESHOLDS.costPerCommit.efficient) return 'efficient';
|
||||
if (costPerCommit < THRESHOLDS.costPerCommit.normal) return 'normal';
|
||||
if (costPerCommit < THRESHOLDS.costPerCommit.expensive) return 'expensive';
|
||||
return 'red_flag';
|
||||
}
|
||||
|
||||
export function computeCostPerLineAssessment(costPerLine: number): CostAssessment {
|
||||
if (costPerLine < THRESHOLDS.costPerLine.efficient) return 'efficient';
|
||||
if (costPerLine < THRESHOLDS.costPerLine.normal) return 'normal';
|
||||
if (costPerLine < THRESHOLDS.costPerLine.expensive) return 'expensive';
|
||||
return 'red_flag';
|
||||
}
|
||||
|
||||
export function computeSubagentCostShareAssessment(pct: number): SubagentCostShareAssessment {
|
||||
if (pct < THRESHOLDS.subagentCostShare.normal) return 'normal';
|
||||
if (pct < THRESHOLDS.subagentCostShare.high) return 'high';
|
||||
if (pct < THRESHOLDS.subagentCostShare.veryHigh) return 'very_high';
|
||||
return 'red_flag';
|
||||
}
|
||||
|
||||
export function computeCacheEfficiencyAssessment(pct: number): CacheAssessment {
|
||||
return pct >= THRESHOLDS.cacheEfficiency.good ? 'good' : 'concerning';
|
||||
}
|
||||
|
||||
export function computeCacheRatioAssessment(ratio: number): CacheAssessment {
|
||||
return ratio >= THRESHOLDS.cacheRwRatio.good ? 'good' : 'concerning';
|
||||
}
|
||||
|
||||
export function computeToolHealthAssessment(successPct: number): ToolHealthAssessment {
|
||||
if (successPct > THRESHOLDS.toolSuccess.healthy) return 'healthy';
|
||||
if (successPct >= THRESHOLDS.toolSuccess.degraded) return 'degraded';
|
||||
return 'unreliable';
|
||||
}
|
||||
|
||||
export function computeIdleAssessment(idlePct: number): IdleAssessment {
|
||||
if (idlePct < THRESHOLDS.idle.efficient) return 'efficient';
|
||||
if (idlePct < THRESHOLDS.idle.moderate) return 'moderate';
|
||||
return 'high_idle';
|
||||
}
|
||||
|
||||
export function computeRedundancyAssessment(readsPerUnique: number): RedundancyAssessment {
|
||||
return readsPerUnique <= THRESHOLDS.fileReadsPerUnique.normal ? 'normal' : 'wasteful';
|
||||
}
|
||||
|
||||
export function computeOverheadAssessment(pctOfTotal: number): OverheadAssessment {
|
||||
return pctOfTotal <= THRESHOLDS.startupOverhead.normal ? 'normal' : 'heavy';
|
||||
}
|
||||
|
||||
export function computeThrashingAssessment(signalCount: number): ThrashingAssessment {
|
||||
if (signalCount === 0) return 'none';
|
||||
if (signalCount <= 2) return 'mild';
|
||||
return 'severe';
|
||||
}
|
||||
|
||||
export interface ModelMismatch {
|
||||
description: string;
|
||||
expectedComplexity: 'mechanical' | 'read_only';
|
||||
recommendation: string;
|
||||
}
|
||||
|
||||
const MECHANICAL_PATTERNS = /\b(rename|move|lint|format|delete|remove|copy|replace)\b/i;
|
||||
const READ_ONLY_PATTERNS = /\b(explore|search|find|verify|check|scan|discover|list|read)\b/i;
|
||||
|
||||
export function detectModelMismatch(description: string, model: string): ModelMismatch | null {
|
||||
const isOpus = model.toLowerCase().includes('opus');
|
||||
if (!isOpus) return null;
|
||||
|
||||
if (MECHANICAL_PATTERNS.test(description)) {
|
||||
return {
|
||||
description,
|
||||
expectedComplexity: 'mechanical',
|
||||
recommendation: 'Consider using Haiku for mechanical tasks to reduce cost.',
|
||||
};
|
||||
}
|
||||
|
||||
if (READ_ONLY_PATTERNS.test(description)) {
|
||||
return {
|
||||
description,
|
||||
expectedComplexity: 'read_only',
|
||||
recommendation: 'Consider using Haiku or Sonnet for read-only exploration tasks.',
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function detectSwitchPattern(
|
||||
switches: { from: string; to: string }[]
|
||||
): SwitchPattern | null {
|
||||
if (switches.length === 0) return null;
|
||||
if (switches.length < 2) return 'manual_switch';
|
||||
|
||||
// Look for Sonnet→Opus→Sonnet pattern (plan mode)
|
||||
for (let i = 0; i < switches.length - 1; i++) {
|
||||
const s1 = switches[i];
|
||||
const s2 = switches[i + 1];
|
||||
if (
|
||||
s1.from.toLowerCase().includes('sonnet') &&
|
||||
s1.to.toLowerCase().includes('opus') &&
|
||||
s2.from.toLowerCase().includes('opus') &&
|
||||
s2.to.toLowerCase().includes('sonnet')
|
||||
) {
|
||||
return 'opus_plan_mode';
|
||||
}
|
||||
}
|
||||
|
||||
return 'manual_switch';
|
||||
}
|
||||
|
|
@ -8,6 +8,21 @@
|
|||
* Runs entirely in the renderer process — no IPC needed.
|
||||
*/
|
||||
|
||||
import {
|
||||
computeCacheEfficiencyAssessment,
|
||||
computeCacheRatioAssessment,
|
||||
computeCostPerCommitAssessment,
|
||||
computeCostPerLineAssessment,
|
||||
computeIdleAssessment,
|
||||
computeOverheadAssessment,
|
||||
computeRedundancyAssessment,
|
||||
computeSubagentCostShareAssessment,
|
||||
computeThrashingAssessment,
|
||||
computeToolHealthAssessment,
|
||||
detectModelMismatch,
|
||||
detectSwitchPattern,
|
||||
} from '@renderer/utils/reportAssessments';
|
||||
|
||||
import type {
|
||||
AgentTreeNode,
|
||||
FrictionCorrection,
|
||||
|
|
@ -25,6 +40,7 @@ import type {
|
|||
TestSnapshot,
|
||||
ThinkingBlockAnalysis,
|
||||
ToolError,
|
||||
ToolSuccessRate,
|
||||
UserQuestion,
|
||||
} from '@renderer/types/sessionReport';
|
||||
import type {
|
||||
|
|
@ -283,8 +299,6 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
};
|
||||
|
||||
// Cache economics
|
||||
const cacheCreation5m = 0;
|
||||
const cacheCreation1h = 0;
|
||||
let totalCacheCreation = 0;
|
||||
let totalCacheRead = 0;
|
||||
let coldStartDetected = false;
|
||||
|
|
@ -356,7 +370,7 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
const testSnapshots: TestSnapshot[] = [];
|
||||
|
||||
// Cost tracking
|
||||
let totalSessionCost = 0;
|
||||
let parentCost = 0;
|
||||
|
||||
// Git activity
|
||||
const gitCommits: GitCommit[] = [];
|
||||
|
|
@ -457,7 +471,7 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
|
||||
const callCost = costUsd(model, inpTok, outTok, cr, cc);
|
||||
stats.costUsd += callCost;
|
||||
totalSessionCost += callCost;
|
||||
parentCost += callCost;
|
||||
|
||||
totalCacheCreation += cc;
|
||||
totalCacheRead += cr;
|
||||
|
|
@ -868,18 +882,23 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
const linesChanged = linesAddedTotal + linesRemovedTotal;
|
||||
|
||||
// --- Subagent metrics from detail.processes ---
|
||||
const subagentEntries: SubagentEntry[] = detail.processes.map((proc: Process) => ({
|
||||
description: proc.description ?? 'unknown',
|
||||
subagentType: proc.subagentType ?? 'unknown',
|
||||
model: 'default (inherits parent)',
|
||||
totalTokens: proc.metrics.totalTokens,
|
||||
totalDurationMs: proc.durationMs,
|
||||
totalToolUseCount: proc.messages.reduce(
|
||||
(sum: number, pm: ParsedMessage) => sum + pm.toolCalls.length,
|
||||
0
|
||||
),
|
||||
costUsd: proc.metrics.costUsd ?? 0,
|
||||
}));
|
||||
const subagentEntries: SubagentEntry[] = detail.processes.map((proc: Process) => {
|
||||
const desc = proc.description ?? 'unknown';
|
||||
const model = 'default (inherits parent)';
|
||||
return {
|
||||
description: desc,
|
||||
subagentType: proc.subagentType ?? 'unknown',
|
||||
model,
|
||||
totalTokens: proc.metrics.totalTokens,
|
||||
totalDurationMs: proc.durationMs,
|
||||
totalToolUseCount: proc.messages.reduce(
|
||||
(sum: number, pm: ParsedMessage) => sum + pm.toolCalls.length,
|
||||
0
|
||||
),
|
||||
costUsd: proc.metrics.costUsd ?? 0,
|
||||
modelMismatch: detectModelMismatch(desc, model),
|
||||
};
|
||||
});
|
||||
|
||||
const saFromProcesses = {
|
||||
count: subagentEntries.length,
|
||||
|
|
@ -892,22 +911,32 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
};
|
||||
|
||||
// --- Tool usage with success rates ---
|
||||
const toolSuccessRates: Record<
|
||||
string,
|
||||
{ totalCalls: number; errors: number; successRatePct: number }
|
||||
> = {};
|
||||
const toolSuccessRates: Record<string, ToolSuccessRate> = {};
|
||||
const sortedToolCounts = [...toolCounts.entries()].sort((a, b) => b[1] - a[1]);
|
||||
const countsRecord: Record<string, number> = {};
|
||||
for (const [tool, count] of sortedToolCounts) {
|
||||
countsRecord[tool] = count;
|
||||
const errCount = errorsByTool.get(tool) ?? 0;
|
||||
const successPct = count ? Math.round(((count - errCount) / count) * 1000) / 10 : 0;
|
||||
toolSuccessRates[tool] = {
|
||||
totalCalls: count,
|
||||
errors: errCount,
|
||||
successRatePct: count ? Math.round(((count - errCount) / count) * 1000) / 10 : 0,
|
||||
successRatePct: successPct,
|
||||
assessment: computeToolHealthAssessment(successPct),
|
||||
};
|
||||
}
|
||||
|
||||
// Overall tool health: worst assessment among tools with >5 calls
|
||||
const significantTools = Object.values(toolSuccessRates).filter((t) => t.totalCalls > 5);
|
||||
type THAssessment = 'healthy' | 'degraded' | 'unreliable';
|
||||
const overallToolHealth: THAssessment =
|
||||
significantTools.length > 0
|
||||
? significantTools.reduce<THAssessment>((worst, t) => {
|
||||
const order = { healthy: 0, degraded: 1, unreliable: 2 } as const;
|
||||
return order[t.assessment] > order[worst] ? t.assessment : worst;
|
||||
}, 'healthy')
|
||||
: computeToolHealthAssessment(100);
|
||||
|
||||
// --- Key events timing ---
|
||||
for (let j = 1; j < keyEvents.length; j++) {
|
||||
const prevDt = keyEvents[j - 1].timestamp;
|
||||
|
|
@ -967,14 +996,19 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
|
||||
// --- Conversation tree analysis ---
|
||||
const depthMemo = new Map<string, number>();
|
||||
function getDepth(uuid: string): number {
|
||||
function getDepth(uuid: string, visited = new Set<string>()): number {
|
||||
if (depthMemo.has(uuid)) return depthMemo.get(uuid)!;
|
||||
if (visited.has(uuid)) {
|
||||
depthMemo.set(uuid, 0);
|
||||
return 0;
|
||||
}
|
||||
visited.add(uuid);
|
||||
const parent = parentMap.get(uuid);
|
||||
if (!parent) {
|
||||
depthMemo.set(uuid, 0);
|
||||
return 0;
|
||||
}
|
||||
const depth = 1 + getDepth(parent);
|
||||
const depth = 1 + getDepth(parent, visited);
|
||||
depthMemo.set(uuid, depth);
|
||||
return depth;
|
||||
}
|
||||
|
|
@ -1071,6 +1105,20 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
|
||||
// --- Subagent cost from processes ---
|
||||
const processSubagentCost = subagentEntries.reduce((sum, a) => sum + a.costUsd, 0);
|
||||
const totalCost = parentCost + processSubagentCost;
|
||||
|
||||
// --- Assessment computations ---
|
||||
const costPerCommitVal =
|
||||
commitCount > 0 ? Math.round((totalCost / commitCount) * 10000) / 10000 : null;
|
||||
const costPerLineVal =
|
||||
linesChanged > 0 ? Math.round((totalCost / linesChanged) * 1000000) / 1000000 : null;
|
||||
const subagentCostSharePct =
|
||||
totalCost > 0 ? Math.round((processSubagentCost / totalCost) * 10000) / 100 : null;
|
||||
|
||||
const readsPerUniqueFile = uniqueFiles ? Math.round((totalReads / uniqueFiles) * 100) / 100 : 0;
|
||||
const startupPctOfTotal = grandTotal ? Math.round((startupTokens / grandTotal) * 10000) / 100 : 0;
|
||||
const idlePct = wallClock > 0 ? Math.round((totalIdle / wallClock) * 1000) / 10 : 0;
|
||||
const thrashingSignalCount = bashNearDuplicates.length + editReworkFiles.length;
|
||||
|
||||
// ===================================================================
|
||||
// BUILD REPORT
|
||||
|
|
@ -1111,39 +1159,44 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
},
|
||||
|
||||
costAnalysis: {
|
||||
parentCostUsd: Math.round(totalSessionCost * 10000) / 10000,
|
||||
parentCostUsd: Math.round(parentCost * 10000) / 10000,
|
||||
subagentCostUsd: Math.round(processSubagentCost * 10000) / 10000,
|
||||
totalSessionCostUsd: Math.round((totalSessionCost + processSubagentCost) * 10000) / 10000,
|
||||
totalSessionCostUsd: Math.round(totalCost * 10000) / 10000,
|
||||
costByModel: Object.fromEntries(
|
||||
[...modelStats.entries()].map(([model, stats]) => [
|
||||
model,
|
||||
Math.round(stats.costUsd * 10000) / 10000,
|
||||
])
|
||||
),
|
||||
costPerCommit:
|
||||
commitCount > 0
|
||||
? Math.round(((totalSessionCost + processSubagentCost) / commitCount) * 10000) / 10000
|
||||
: null,
|
||||
costPerLineChanged:
|
||||
linesChanged > 0
|
||||
? Math.round(((totalSessionCost + processSubagentCost) / linesChanged) * 1000000) /
|
||||
1000000
|
||||
costPerCommit: costPerCommitVal,
|
||||
costPerLineChanged: costPerLineVal,
|
||||
costPerCommitAssessment:
|
||||
costPerCommitVal != null ? computeCostPerCommitAssessment(costPerCommitVal) : null,
|
||||
costPerLineAssessment:
|
||||
costPerLineVal != null ? computeCostPerLineAssessment(costPerLineVal) : null,
|
||||
subagentCostSharePct,
|
||||
subagentCostShareAssessment:
|
||||
subagentCostSharePct != null
|
||||
? computeSubagentCostShareAssessment(subagentCostSharePct)
|
||||
: null,
|
||||
},
|
||||
|
||||
cacheEconomics: {
|
||||
cacheCreation5m,
|
||||
cacheCreation1h,
|
||||
cacheRead: totalCacheRead,
|
||||
cacheEfficiencyPct: cacheEfficiency,
|
||||
coldStartDetected,
|
||||
cacheReadToWriteRatio: cacheRwRatio,
|
||||
cacheEfficiencyAssessment:
|
||||
cacheTotalCreationAndRead > 0 ? computeCacheEfficiencyAssessment(cacheEfficiency) : null,
|
||||
cacheRatioAssessment:
|
||||
totalCacheCreation > 0 ? computeCacheRatioAssessment(cacheRwRatio) : null,
|
||||
},
|
||||
|
||||
toolUsage: {
|
||||
counts: countsRecord,
|
||||
totalCalls: [...toolCounts.values()].reduce((sum, c) => sum + c, 0),
|
||||
successRates: toolSuccessRates,
|
||||
overallToolHealth,
|
||||
},
|
||||
|
||||
subagentMetrics: saFromProcesses,
|
||||
|
|
@ -1178,6 +1231,7 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
thrashingSignals: {
|
||||
bashNearDuplicates,
|
||||
editReworkFiles,
|
||||
thrashingAssessment: computeThrashingAssessment(thrashingSignalCount),
|
||||
},
|
||||
|
||||
conversationTree: {
|
||||
|
|
@ -1196,14 +1250,16 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
wallClockSeconds: Math.round(wallClock * 10) / 10,
|
||||
activeWorkingSeconds: Math.round(Math.max(activeTime, 0) * 10) / 10,
|
||||
activeWorkingHuman: formatDuration(Math.floor(Math.max(activeTime, 0))),
|
||||
idlePct: wallClock > 0 ? Math.round((totalIdle / wallClock) * 1000) / 10 : 0,
|
||||
idlePct,
|
||||
longestGaps: [...idleGaps].sort((a, b) => b.gapSeconds - a.gapSeconds).slice(0, 5),
|
||||
idleAssessment: computeIdleAssessment(idlePct),
|
||||
},
|
||||
|
||||
modelSwitches: {
|
||||
count: modelSwitches.length,
|
||||
switches: modelSwitches,
|
||||
modelsUsed,
|
||||
switchPattern: detectSwitchPattern(modelSwitches),
|
||||
},
|
||||
|
||||
workingDirectories: {
|
||||
|
|
@ -1225,7 +1281,8 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
startupOverhead: {
|
||||
messagesBeforeFirstWork: startupMessages,
|
||||
tokensBeforeFirstWork: startupTokens,
|
||||
pctOfTotal: grandTotal ? Math.round((startupTokens / grandTotal) * 10000) / 100 : 0,
|
||||
pctOfTotal: startupPctOfTotal,
|
||||
overheadAssessment: computeOverheadAssessment(startupPctOfTotal),
|
||||
},
|
||||
|
||||
tokenDensityTimeline: { quartiles },
|
||||
|
|
@ -1253,8 +1310,9 @@ export function analyzeSession(detail: SessionDetail): SessionReport {
|
|||
fileReadRedundancy: {
|
||||
totalReads,
|
||||
uniqueFiles,
|
||||
readsPerUniqueFile: uniqueFiles ? Math.round((totalReads / uniqueFiles) * 100) / 100 : 0,
|
||||
readsPerUniqueFile,
|
||||
redundantFiles,
|
||||
redundancyAssessment: computeRedundancyAssessment(readsPerUniqueFile),
|
||||
},
|
||||
|
||||
compaction: {
|
||||
|
|
|
|||
259
test/renderer/utils/reportAssessments.test.ts
Normal file
259
test/renderer/utils/reportAssessments.test.ts
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
import { describe, it, expect } from 'vitest';
|
||||
|
||||
import {
|
||||
assessmentColor,
|
||||
assessmentLabel,
|
||||
assessmentSeverity,
|
||||
computeCacheEfficiencyAssessment,
|
||||
computeCacheRatioAssessment,
|
||||
computeCostPerCommitAssessment,
|
||||
computeCostPerLineAssessment,
|
||||
computeIdleAssessment,
|
||||
computeOverheadAssessment,
|
||||
computeRedundancyAssessment,
|
||||
computeSubagentCostShareAssessment,
|
||||
computeThrashingAssessment,
|
||||
computeToolHealthAssessment,
|
||||
detectModelMismatch,
|
||||
detectSwitchPattern,
|
||||
severityColor,
|
||||
THRESHOLDS,
|
||||
} from '@renderer/utils/reportAssessments';
|
||||
|
||||
describe('reportAssessments', () => {
|
||||
describe('severityColor', () => {
|
||||
it('maps severity to hex color', () => {
|
||||
expect(severityColor('good')).toBe('#4ade80');
|
||||
expect(severityColor('warning')).toBe('#fbbf24');
|
||||
expect(severityColor('danger')).toBe('#f87171');
|
||||
expect(severityColor('neutral')).toBe('#a1a1aa');
|
||||
});
|
||||
});
|
||||
|
||||
describe('assessmentSeverity', () => {
|
||||
it('maps known assessments to severity', () => {
|
||||
expect(assessmentSeverity('healthy')).toBe('good');
|
||||
expect(assessmentSeverity('efficient')).toBe('good');
|
||||
expect(assessmentSeverity('expensive')).toBe('warning');
|
||||
expect(assessmentSeverity('red_flag')).toBe('danger');
|
||||
expect(assessmentSeverity('very_high')).toBe('danger');
|
||||
expect(assessmentSeverity('degraded')).toBe('warning');
|
||||
expect(assessmentSeverity('unreliable')).toBe('danger');
|
||||
expect(assessmentSeverity('high_idle')).toBe('danger');
|
||||
expect(assessmentSeverity('moderate')).toBe('warning');
|
||||
});
|
||||
|
||||
it('returns neutral for null/undefined/unknown', () => {
|
||||
expect(assessmentSeverity(null)).toBe('neutral');
|
||||
expect(assessmentSeverity(undefined)).toBe('neutral');
|
||||
expect(assessmentSeverity('unknown_value')).toBe('neutral');
|
||||
});
|
||||
});
|
||||
|
||||
describe('assessmentColor', () => {
|
||||
it('returns correct color for assessment string', () => {
|
||||
expect(assessmentColor('healthy')).toBe('#4ade80');
|
||||
expect(assessmentColor('red_flag')).toBe('#f87171');
|
||||
expect(assessmentColor(null)).toBe('#a1a1aa');
|
||||
});
|
||||
});
|
||||
|
||||
describe('assessmentLabel', () => {
|
||||
it('converts snake_case to Title Case', () => {
|
||||
expect(assessmentLabel('red_flag')).toBe('Red Flag');
|
||||
expect(assessmentLabel('well_specified')).toBe('Well Specified');
|
||||
expect(assessmentLabel('healthy')).toBe('Healthy');
|
||||
expect(assessmentLabel('high_idle')).toBe('High Idle');
|
||||
expect(assessmentLabel('opus_plan_mode')).toBe('Opus Plan Mode');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeCostPerCommitAssessment', () => {
|
||||
it('returns efficient below threshold', () => {
|
||||
expect(computeCostPerCommitAssessment(0.3)).toBe('efficient');
|
||||
});
|
||||
it('returns normal in range', () => {
|
||||
expect(computeCostPerCommitAssessment(1.0)).toBe('normal');
|
||||
});
|
||||
it('returns expensive in range', () => {
|
||||
expect(computeCostPerCommitAssessment(3.0)).toBe('expensive');
|
||||
});
|
||||
it('returns red_flag above threshold', () => {
|
||||
expect(computeCostPerCommitAssessment(10.0)).toBe('red_flag');
|
||||
});
|
||||
it('respects threshold boundaries', () => {
|
||||
expect(computeCostPerCommitAssessment(THRESHOLDS.costPerCommit.efficient - 0.01)).toBe(
|
||||
'efficient'
|
||||
);
|
||||
expect(computeCostPerCommitAssessment(THRESHOLDS.costPerCommit.efficient)).toBe('normal');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeCostPerLineAssessment', () => {
|
||||
it('returns efficient below threshold', () => {
|
||||
expect(computeCostPerLineAssessment(0.005)).toBe('efficient');
|
||||
});
|
||||
it('returns red_flag above threshold', () => {
|
||||
expect(computeCostPerLineAssessment(0.5)).toBe('red_flag');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeSubagentCostShareAssessment', () => {
|
||||
it('returns normal below 30%', () => {
|
||||
expect(computeSubagentCostShareAssessment(20)).toBe('normal');
|
||||
});
|
||||
it('returns high in range', () => {
|
||||
expect(computeSubagentCostShareAssessment(45)).toBe('high');
|
||||
});
|
||||
it('returns very_high in range', () => {
|
||||
expect(computeSubagentCostShareAssessment(70)).toBe('very_high');
|
||||
});
|
||||
it('returns red_flag above 80%', () => {
|
||||
expect(computeSubagentCostShareAssessment(90)).toBe('red_flag');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeCacheEfficiencyAssessment', () => {
|
||||
it('returns good above 95%', () => {
|
||||
expect(computeCacheEfficiencyAssessment(96)).toBe('good');
|
||||
});
|
||||
it('returns concerning below 95%', () => {
|
||||
expect(computeCacheEfficiencyAssessment(90)).toBe('concerning');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeCacheRatioAssessment', () => {
|
||||
it('returns good above 20', () => {
|
||||
expect(computeCacheRatioAssessment(25)).toBe('good');
|
||||
});
|
||||
it('returns concerning below 20', () => {
|
||||
expect(computeCacheRatioAssessment(10)).toBe('concerning');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeToolHealthAssessment', () => {
|
||||
it('returns healthy above 95%', () => {
|
||||
expect(computeToolHealthAssessment(98)).toBe('healthy');
|
||||
});
|
||||
it('returns degraded between 80-95%', () => {
|
||||
expect(computeToolHealthAssessment(85)).toBe('degraded');
|
||||
});
|
||||
it('returns unreliable below 80%', () => {
|
||||
expect(computeToolHealthAssessment(70)).toBe('unreliable');
|
||||
});
|
||||
it('boundary: 95 is degraded, 95.1 is healthy', () => {
|
||||
expect(computeToolHealthAssessment(95)).toBe('degraded');
|
||||
expect(computeToolHealthAssessment(95.1)).toBe('healthy');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeIdleAssessment', () => {
|
||||
it('returns efficient below 20%', () => {
|
||||
expect(computeIdleAssessment(10)).toBe('efficient');
|
||||
});
|
||||
it('returns moderate between 20-50%', () => {
|
||||
expect(computeIdleAssessment(35)).toBe('moderate');
|
||||
});
|
||||
it('returns high_idle above 50%', () => {
|
||||
expect(computeIdleAssessment(60)).toBe('high_idle');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeRedundancyAssessment', () => {
|
||||
it('returns normal at or below 2.0', () => {
|
||||
expect(computeRedundancyAssessment(1.5)).toBe('normal');
|
||||
expect(computeRedundancyAssessment(2.0)).toBe('normal');
|
||||
});
|
||||
it('returns wasteful above 2.0', () => {
|
||||
expect(computeRedundancyAssessment(3.0)).toBe('wasteful');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeOverheadAssessment', () => {
|
||||
it('returns normal at or below 5%', () => {
|
||||
expect(computeOverheadAssessment(3)).toBe('normal');
|
||||
expect(computeOverheadAssessment(5)).toBe('normal');
|
||||
});
|
||||
it('returns heavy above 5%', () => {
|
||||
expect(computeOverheadAssessment(10)).toBe('heavy');
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeThrashingAssessment', () => {
|
||||
it('returns none for 0 signals', () => {
|
||||
expect(computeThrashingAssessment(0)).toBe('none');
|
||||
});
|
||||
it('returns mild for 1-2 signals', () => {
|
||||
expect(computeThrashingAssessment(1)).toBe('mild');
|
||||
expect(computeThrashingAssessment(2)).toBe('mild');
|
||||
});
|
||||
it('returns severe for 3+ signals', () => {
|
||||
expect(computeThrashingAssessment(3)).toBe('severe');
|
||||
expect(computeThrashingAssessment(5)).toBe('severe');
|
||||
});
|
||||
});
|
||||
|
||||
describe('detectModelMismatch', () => {
|
||||
it('returns null for non-opus models', () => {
|
||||
expect(detectModelMismatch('rename files', 'claude-sonnet-4')).toBeNull();
|
||||
});
|
||||
|
||||
it('detects mechanical tasks on opus', () => {
|
||||
const result = detectModelMismatch('rename all variables', 'claude-opus-4');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.expectedComplexity).toBe('mechanical');
|
||||
});
|
||||
|
||||
it('detects read-only tasks on opus', () => {
|
||||
const result = detectModelMismatch('explore the codebase', 'claude-opus-4');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.expectedComplexity).toBe('read_only');
|
||||
});
|
||||
|
||||
it('returns null for complex tasks on opus', () => {
|
||||
expect(detectModelMismatch('implement authentication system', 'claude-opus-4')).toBeNull();
|
||||
});
|
||||
|
||||
it('detects various mechanical keywords', () => {
|
||||
for (const kw of ['lint', 'format', 'delete', 'move', 'copy', 'replace']) {
|
||||
expect(detectModelMismatch(`${kw} the code`, 'opus')).not.toBeNull();
|
||||
}
|
||||
});
|
||||
|
||||
it('detects various read-only keywords', () => {
|
||||
for (const kw of ['search', 'find', 'verify', 'check', 'scan', 'discover']) {
|
||||
expect(detectModelMismatch(`${kw} for errors`, 'opus')).not.toBeNull();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('detectSwitchPattern', () => {
|
||||
it('returns null for no switches', () => {
|
||||
expect(detectSwitchPattern([])).toBeNull();
|
||||
});
|
||||
|
||||
it('returns manual_switch for single switch', () => {
|
||||
expect(detectSwitchPattern([{ from: 'claude-sonnet-4', to: 'claude-haiku-4' }])).toBe(
|
||||
'manual_switch'
|
||||
);
|
||||
});
|
||||
|
||||
it('detects opus_plan_mode pattern', () => {
|
||||
expect(
|
||||
detectSwitchPattern([
|
||||
{ from: 'claude-sonnet-4', to: 'claude-opus-4' },
|
||||
{ from: 'claude-opus-4', to: 'claude-sonnet-4' },
|
||||
])
|
||||
).toBe('opus_plan_mode');
|
||||
});
|
||||
|
||||
it('returns manual_switch for non-plan-mode switches', () => {
|
||||
expect(
|
||||
detectSwitchPattern([
|
||||
{ from: 'claude-sonnet-4', to: 'claude-haiku-4' },
|
||||
{ from: 'claude-haiku-4', to: 'claude-sonnet-4' },
|
||||
])
|
||||
).toBe('manual_switch');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1181,4 +1181,329 @@ describe('analyzeSession', () => {
|
|||
expect(report.compaction.note).toContain('No compaction');
|
||||
});
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Assessment computations
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
describe('cost assessments', () => {
|
||||
it('computes costPerCommitAssessment when commits exist', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
usage: { input_tokens: 50000, output_tokens: 10000 },
|
||||
toolCalls: [
|
||||
{
|
||||
id: 'tc-1',
|
||||
name: 'Bash',
|
||||
input: { command: "git commit -m 'fix'" },
|
||||
isTask: false,
|
||||
},
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.costAnalysis.costPerCommitAssessment).not.toBeNull();
|
||||
});
|
||||
|
||||
it('returns null assessments when no commits', () => {
|
||||
const report = analyzeSession(createMockDetail());
|
||||
expect(report.costAnalysis.costPerCommitAssessment).toBeNull();
|
||||
expect(report.costAnalysis.costPerLineAssessment).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null subagentCostShareAssessment when no cost', () => {
|
||||
const report = analyzeSession(createMockDetail());
|
||||
expect(report.costAnalysis.subagentCostSharePct).toBeNull();
|
||||
expect(report.costAnalysis.subagentCostShareAssessment).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('cache assessments', () => {
|
||||
it('computes cache efficiency assessment', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
usage: {
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
cache_creation_input_tokens: 100,
|
||||
cache_read_input_tokens: 9900,
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.cacheEconomics.cacheEfficiencyAssessment).toBe('good');
|
||||
});
|
||||
|
||||
it('returns concerning for low cache efficiency', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
usage: {
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
cache_creation_input_tokens: 500,
|
||||
cache_read_input_tokens: 500,
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.cacheEconomics.cacheEfficiencyAssessment).toBe('concerning');
|
||||
});
|
||||
|
||||
it('returns null when no cache data', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
usage: {
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.cacheEconomics.cacheEfficiencyAssessment).toBeNull();
|
||||
expect(report.cacheEconomics.cacheRatioAssessment).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('tool health assessments', () => {
|
||||
it('computes per-tool assessment', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
toolCalls: [
|
||||
{ id: 'tc-1', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
|
||||
{ id: 'tc-2', name: 'Read', input: { file_path: '/b.ts' }, isTask: false },
|
||||
],
|
||||
}),
|
||||
createMockMessage({
|
||||
type: 'user',
|
||||
isMeta: true,
|
||||
content: [],
|
||||
toolResults: [
|
||||
{ toolUseId: 'tc-1', content: 'ok', isError: false },
|
||||
{ toolUseId: 'tc-2', content: 'ok', isError: false },
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.toolUsage.successRates.Read.assessment).toBe('healthy');
|
||||
});
|
||||
|
||||
it('computes overall tool health', () => {
|
||||
const report = analyzeSession(createMockDetail());
|
||||
expect(report.toolUsage.overallToolHealth).toBe('healthy');
|
||||
});
|
||||
});
|
||||
|
||||
describe('idle assessment', () => {
|
||||
it('returns efficient for low idle', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
timestamp: new Date('2024-01-01T10:00:00Z'),
|
||||
}),
|
||||
createMockMessage({
|
||||
type: 'user',
|
||||
content: 'quick',
|
||||
timestamp: new Date('2024-01-01T10:00:30Z'),
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.idleAnalysis.idleAssessment).toBe('efficient');
|
||||
});
|
||||
|
||||
it('returns high_idle for mostly idle session', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
timestamp: new Date('2024-01-01T10:00:00Z'),
|
||||
}),
|
||||
createMockMessage({
|
||||
type: 'user',
|
||||
content: 'back',
|
||||
timestamp: new Date('2024-01-01T11:00:00Z'),
|
||||
}),
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
timestamp: new Date('2024-01-01T11:00:10Z'),
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.idleAnalysis.idleAssessment).toBe('high_idle');
|
||||
});
|
||||
});
|
||||
|
||||
describe('thrashing assessment', () => {
|
||||
it('returns none when no signals', () => {
|
||||
const report = analyzeSession(createMockDetail());
|
||||
expect(report.thrashingSignals.thrashingAssessment).toBe('none');
|
||||
});
|
||||
|
||||
it('returns mild or severe based on signal count', () => {
|
||||
const makeEditMsg = (file: string, id: string) =>
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
toolCalls: [{ id, name: 'Edit', input: { file_path: file }, isTask: false }],
|
||||
});
|
||||
|
||||
// 3 edits on one file = 1 signal + 3 repeated bash = 1 signal = mild (2)
|
||||
const messages: ParsedMessage[] = [
|
||||
makeEditMsg('/foo.ts', 'e1'),
|
||||
makeEditMsg('/foo.ts', 'e2'),
|
||||
makeEditMsg('/foo.ts', 'e3'),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(['mild', 'severe']).toContain(report.thrashingSignals.thrashingAssessment);
|
||||
});
|
||||
});
|
||||
|
||||
describe('model switch pattern', () => {
|
||||
it('detects opus_plan_mode', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
timestamp: new Date('2024-01-01T10:00:00Z'),
|
||||
}),
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-opus-4-20250514',
|
||||
timestamp: new Date('2024-01-01T10:01:00Z'),
|
||||
}),
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
timestamp: new Date('2024-01-01T10:02:00Z'),
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.modelSwitches.switchPattern).toBe('opus_plan_mode');
|
||||
});
|
||||
|
||||
it('returns null when no switches', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
usage: { input_tokens: 100, output_tokens: 50 },
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.modelSwitches.switchPattern).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('startup overhead assessment', () => {
|
||||
it('returns normal for low overhead', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
usage: { input_tokens: 100, output_tokens: 50 },
|
||||
toolCalls: [{ id: 'tc-1', name: 'Read', input: { file_path: '/f.ts' }, isTask: false }],
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.startupOverhead.overheadAssessment).toBe('normal');
|
||||
});
|
||||
|
||||
it('returns heavy for high overhead', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
// Lots of startup tokens, no work tools
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
usage: { input_tokens: 50000, output_tokens: 10000 },
|
||||
toolCalls: [],
|
||||
}),
|
||||
// Small work message
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
usage: { input_tokens: 100, output_tokens: 50 },
|
||||
toolCalls: [{ id: 'tc-1', name: 'Read', input: { file_path: '/f.ts' }, isTask: false }],
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.startupOverhead.overheadAssessment).toBe('heavy');
|
||||
});
|
||||
});
|
||||
|
||||
describe('file read redundancy assessment', () => {
|
||||
it('returns normal for low redundancy', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
toolCalls: [
|
||||
{ id: 'tc-1', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
|
||||
{ id: 'tc-2', name: 'Read', input: { file_path: '/b.ts' }, isTask: false },
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.fileReadRedundancy.redundancyAssessment).toBe('normal');
|
||||
});
|
||||
|
||||
it('returns wasteful for high redundancy', () => {
|
||||
const messages: ParsedMessage[] = [
|
||||
createMockMessage({
|
||||
type: 'assistant',
|
||||
toolCalls: [
|
||||
{ id: 'tc-1', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
|
||||
{ id: 'tc-2', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
|
||||
{ id: 'tc-3', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
|
||||
{ id: 'tc-4', name: 'Read', input: { file_path: '/a.ts' }, isTask: false },
|
||||
],
|
||||
}),
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ messages }));
|
||||
expect(report.fileReadRedundancy.redundancyAssessment).toBe('wasteful');
|
||||
});
|
||||
});
|
||||
|
||||
describe('model mismatch in subagents', () => {
|
||||
it('detects mismatch for mechanical tasks on opus', () => {
|
||||
const processes: Process[] = [
|
||||
{
|
||||
id: 'agent-1',
|
||||
filePath: '/path/to/agent-1.jsonl',
|
||||
messages: [],
|
||||
startTime: new Date('2024-01-01T10:00:00Z'),
|
||||
endTime: new Date('2024-01-01T10:01:00Z'),
|
||||
durationMs: 60000,
|
||||
metrics: createMockMetrics({ totalTokens: 5000, costUsd: 0.05 }),
|
||||
description: 'rename all variables',
|
||||
subagentType: 'code',
|
||||
isParallel: false,
|
||||
},
|
||||
];
|
||||
|
||||
const report = analyzeSession(createMockDetail({ processes }));
|
||||
// model is 'default (inherits parent)' which doesn't contain 'opus', so no mismatch
|
||||
expect(report.subagentMetrics.byAgent[0].modelMismatch).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in a new issue