diff --git a/CHANGELOG.md b/CHANGELOG.md index 06ae374..22f01ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,37 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- **Parallel-subagent benefit insight.** The subagent phase is aggregated by wall-clock + union (overlapping fan-out isn't double-counted), but the *sum* of agent durations still + carries signal: `sum − union` = time saved by running them concurrently, `sum / union` = + effective concurrency. Adds an Insights line — e.g. `⚡ Parallel subagents: 5 ran in 38m + wall (53m of work · 1.4× concurrent · saved 15m vs sequential)` — shown only when ≥2 + subagents actually overlapped, so sequential sessions get no spurious line. + +### Fixed + +- **Time breakdown: cap "Claude thinking" gaps so mid-turn suspensions aren't counted as thinking.** + Previously only the assistant-end→user gap was capped by `IDLE_THRESHOLD_MS`; the + user→assistant and tool_result→assistant gaps (both attributed to `claudeThink`) were + uncapped. So any long pause that landed *mid-turn* — an overnight gap after a tool result, + a credit stall, a remote-control handoff — was reported as hours of "Claude thinking." + These gaps are now capped at `THINK_CAP_MS` (10 min); the remainder is booked as `humanAway`. + On a real 16 h session with overnight gaps this moved ~9 h out of "thinking" + (11 h 34 m → 2 h 35 m) into away time, where it belongs. +- **Time Breakdown: count parallel subagents/tools by wall-clock union, not sum.** + `computeEnhancedStats` emitted one segment per tool/agent and summed them, so + tools and subagents fanned out in a single assistant turn (e.g. a panel of + review subagents) were double-counted. The `Subagents` and `Tool execution` + bars are now aggregated by wall-clock interval union (subagent wins on + cross-kind overlap), so they reflect real elapsed time and the active-time + percentages stay a true partition that sums to <=100% (previously could read + e.g. 109%). On a fan-out-heavy session this dropped reported subagent time from + ~53m (sum of 19 overlapping agents) to ~38m (true elapsed). + ## [1.0.0] - 2026-02-18 ### Added diff --git a/src/analyzer.test.ts b/src/analyzer.test.ts index b0810d5..5faff72 100644 --- a/src/analyzer.test.ts +++ b/src/analyzer.test.ts @@ -444,3 +444,168 @@ describe('analyzer: warmup cost', () => { expect(result.warmupCost.turnCount).toBe(1); }); }); + +describe('analyzer: segment-union ≤ wall-clock invariant', () => { + // Local copy of the analyzer's wall-clock union (it isn't exported). Time + // accounting must aggregate possibly-overlapping spans by UNION, not SUM — + // overlapping spans can never cover more wall-clock than exists. This guards + // against a regression that double-counts parallel tool/subagent spans. + function unionMs(spans: Array<[number, number]>): number { + if (spans.length === 0) return 0; + const sorted = [...spans].sort((a, b) => a[0] - b[0]); + let total = 0; + let [curStart, curEnd] = sorted[0]; + for (let i = 1; i < sorted.length; i++) { + const [s, e] = sorted[i]; + if (s <= curEnd) { + if (e > curEnd) curEnd = e; + } else { + total += curEnd - curStart; + curStart = s; + curEnd = e; + } + } + return total + (curEnd - curStart); + } + + // seconds within one minute → ISO timestamp (matches the literal style above) + const T = (s: number) => `2026-01-01T00:00:${String(s).padStart(2, '0')}Z`; + + it('holds with overlapping parallel-tool spans (union ≤ span < naive sum)', () => { + // Two tool_use in ONE assistant message → both start at the assistant ts, so + // their toolExec segments overlap. A naive sum overcounts; the union must not. + const result = analyzeSession('s1', [ + userMsg(T(0)), + assistantMsg(T(2), { toolUses: [{ id: 'tu1', name: 'Bash' }, { id: 'tu2', name: 'Bash' }] }), + toolResultMsg(T(32), ['tu1']), // toolExec [2,32] = 30s + toolResultMsg(T(42), ['tu2']), // toolExec [2,42] = 40s, overlaps tu1 + assistantMsg(T(50)), // claudeThink [42,50] = 8s + ]); + + const segs = result.enhancedSegments; + expect(segs.length).toBeGreaterThan(0); + + const minStart = Math.min(...segs.map(s => s.startTime)); + const maxEnd = Math.max(...segs.map(s => s.endTime)); + const span = maxEnd - minStart; + const union = unionMs(segs.map(s => [s.startTime, s.endTime] as [number, number])); + const naiveSum = segs.reduce((acc, s) => acc + s.durationMs, 0); + + // The invariant: union of all phase segments never exceeds the wall-clock span. + expect(union).toBeLessThanOrEqual(span); + // The parallel tools genuinely overlap, so the naive sum DOES exceed the span — + // demonstrating why aggregation must union (the bug class this guards). + expect(naiveSum).toBeGreaterThan(span); + + // Each segment is a valid, non-negative interval consistent with its duration. + for (const s of segs) { + expect(s.endTime).toBeGreaterThanOrEqual(s.startTime); + expect(s.durationMs).toBe(s.endTime - s.startTime); + } + }); + + it('holds for a simple sequential session (no overlap)', () => { + const result = analyzeSession('s2', [ + userMsg(T(0)), + assistantMsg(T(5), { toolUses: [{ id: 'tu1', name: 'Read' }] }), + toolResultMsg(T(8), ['tu1']), + assistantMsg(T(10)), + ]); + const segs = result.enhancedSegments; + const span = Math.max(...segs.map(s => s.endTime)) - Math.min(...segs.map(s => s.startTime)); + const union = unionMs(segs.map(s => [s.startTime, s.endTime] as [number, number])); + expect(union).toBeLessThanOrEqual(span); + }); +}); + +describe('analyzer: parallel subagents counted by wall-clock union', () => { + it('does not sum overlapping (fanned-out) subagent durations', () => { + // 3 Agents dispatched in ONE assistant turn (parallel fan-out), all starting + // at 00:00:10, returning staggered at +60s / +120s / +180s. + // Per-agent durations sum to 360s; true wall-clock elapsed is 180s. + const messages: SessionMessage[] = [ + userMsg('2026-01-01T00:00:00Z', 'go'), + assistantMsg('2026-01-01T00:00:10Z', { + toolUses: [ + { id: 'a1', name: 'Agent' }, + { id: 'a2', name: 'Agent' }, + { id: 'a3', name: 'Agent' }, + ], + }), + toolResultMsg('2026-01-01T00:01:10Z', ['a1']), + toolResultMsg('2026-01-01T00:02:10Z', ['a2']), + toolResultMsg('2026-01-01T00:03:10Z', ['a3']), + assistantMsg('2026-01-01T00:03:20Z'), + ]; + const a = analyzeSession('s', messages); + expect(a.enhancedStats.subagent).toBe(180_000); // union, NOT 360_000 sum + expect(a.enhancedStats.toolExec).toBe(0); + }); + + it('does not sum overlapping parallel non-agent tool calls either', () => { + // 2 Bash calls in one turn, overlapping: spans 60s + 120s (sum 180s), union 120s. + const messages: SessionMessage[] = [ + userMsg('2026-01-01T00:00:00Z', 'go'), + assistantMsg('2026-01-01T00:00:10Z', { + toolUses: [ + { id: 'b1', name: 'Bash' }, + { id: 'b2', name: 'Bash' }, + ], + }), + toolResultMsg('2026-01-01T00:01:10Z', ['b1']), + toolResultMsg('2026-01-01T00:02:10Z', ['b2']), + assistantMsg('2026-01-01T00:02:20Z'), + ]; + const a = analyzeSession('s', messages); + expect(a.enhancedStats.toolExec).toBe(120_000); // union, NOT 180_000 sum + expect(a.enhancedStats.subagent).toBe(0); + }); + + it('sequential subagents still add up (no spurious union collapse)', () => { + // Two NON-overlapping agents: 60s then 60s = 120s union == 120s sum. + const messages: SessionMessage[] = [ + userMsg('2026-01-01T00:00:00Z', 'go'), + assistantMsg('2026-01-01T00:00:10Z', { toolUses: [{ id: 'a1', name: 'Agent' }] }), + toolResultMsg('2026-01-01T00:01:10Z', ['a1']), + assistantMsg('2026-01-01T00:01:20Z', { toolUses: [{ id: 'a2', name: 'Agent' }] }), + toolResultMsg('2026-01-01T00:02:20Z', ['a2']), + assistantMsg('2026-01-01T00:02:30Z'), + ]; + const a = analyzeSession('s', messages); + expect(a.enhancedStats.subagent).toBe(120_000); + }); +}); + +describe('analyzer: thinking gaps are capped (mid-turn suspension ≠ thinking)', () => { + const CAP = 10 * 60 * 1000; // THINK_CAP_MS + + it('caps a long tool_result→assistant gap: 10min think + remainder humanAway', () => { + const result = analyzeSession('s1', [ + userMsg('2026-01-01T00:00:00Z'), + assistantMsg('2026-01-01T00:00:02Z', { toolUses: [{ id: 'tu1', name: 'Bash' }] }), + toolResultMsg('2026-01-01T00:00:12Z', ['tu1']), + assistantMsg('2026-01-01T02:00:12Z'), // 2h gap after the tool result (session suspended) + ]); + // 2s legit first-response think (user→assistant) + 10min cap on the suspended gap + expect(result.enhancedStats.claudeThink).toBe(2000 + CAP); + expect(result.enhancedStats.humanAway).toBe(2 * 60 * 60 * 1000 - CAP); // 1h50m booked as away + }); + + it('caps a long user→assistant first-response gap the same way', () => { + const result = analyzeSession('s1', [ + userMsg('2026-01-01T00:00:00Z'), + assistantMsg('2026-01-01T03:00:00Z'), // 3h before first response (suspended) + ]); + expect(result.enhancedStats.claudeThink).toBe(CAP); + expect(result.enhancedStats.humanAway).toBe(3 * 60 * 60 * 1000 - CAP); + }); + + it('leaves a normal short think gap untouched (no spurious humanAway)', () => { + const result = analyzeSession('s1', [ + userMsg('2026-01-01T00:00:00Z'), + assistantMsg('2026-01-01T00:05:00Z'), // 5min < cap + ]); + expect(result.enhancedStats.claudeThink).toBe(5 * 60 * 1000); + expect(result.enhancedStats.humanAway).toBe(0); + }); +}); diff --git a/src/analyzer.ts b/src/analyzer.ts index a201f8e..f8a7888 100644 --- a/src/analyzer.ts +++ b/src/analyzer.ts @@ -6,6 +6,14 @@ import type { import { estimateCost } from './pricing.js'; const IDLE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes +// A single uninterrupted model response effectively never exceeds this. A longer +// gap before an assistant message means the session was SUSPENDED mid-turn +// (overnight pause, credit stall, remote-control handoff) — not the model +// "thinking" for hours. We cap the thinking slice at this and attribute the +// remainder to humanAway, so long-pause sessions aren't reported as huge +// "Claude thinking" time. (Only the assistant-end→user gap was capped before; +// the user→assistant and tool_result→assistant gaps were not.) +const THINK_CAP_MS = 10 * 60 * 1000; // 10 minutes const SUBAGENT_TOOLS = new Set(['Task', 'Agent']); export function analyzeSession(sessionId: string, messages: SessionMessage[]): SessionAnalysis { @@ -176,6 +184,18 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[] segments.push({ phase, startTime: start, endTime: end, durationMs: end - start, toolName }); } + // Emit a "Claude thinking" (or planning) slice, capped at THINK_CAP_MS. Any + // excess is a mid-turn suspension, not thinking, so it's booked as humanAway. + function emitThink(start: number, end: number) { + const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink'; + if (end - start <= THINK_CAP_MS) { + emit(phase, start, end); + } else { + emit(phase, start, start + THINK_CAP_MS); + emit('humanAway', start + THINK_CAP_MS, end); + } + } + for (const msg of messages) { const ts = Date.parse(msg.timestamp); if (isNaN(ts)) continue; @@ -225,14 +245,12 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[] // Gap from last external user message → this assistant = Claude thinking (first response) if (lastExternalUserTs !== null) { - const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink'; - emit(phase, lastExternalUserTs, ts); + emitThink(lastExternalUserTs, ts); lastExternalUserTs = null; } // Gap from last tool_result → this assistant = Claude thinking (mid-turn, processing results) else if (lastToolResultTs !== null) { - const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink'; - emit(phase, lastToolResultTs, ts); + emitThink(lastToolResultTs, ts); } lastAssistantEndTs = ts; @@ -258,11 +276,52 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[] return segments; } +/** Wall-clock union (ms) of possibly-overlapping [start, end] spans. */ +export function unionMs(spans: Array<[number, number]>): number { + if (spans.length === 0) return 0; + const sorted = [...spans].sort((a, b) => a[0] - b[0]); + let total = 0; + let [curStart, curEnd] = sorted[0]; + for (let i = 1; i < sorted.length; i++) { + const [s, e] = sorted[i]; + if (s <= curEnd) { + if (e > curEnd) curEnd = e; + } else { + total += curEnd - curStart; + curStart = s; + curEnd = e; + } + } + return total + (curEnd - curStart); +} + function computeEnhancedStats(segments: EnhancedTimeSegment[]): EnhancedStats { const stats: EnhancedStats = { humanWait: 0, humanAway: 0, claudeThink: 0, toolExec: 0, subagent: 0, planning: 0 }; + + // claudeThink / planning / humanWait / humanAway are emitted as sequential, + // non-overlapping slices, so summing them is correct. But toolExec and + // subagent emit one segment PER tool/agent — when calls are fanned out in + // parallel (e.g. 5 review subagents from a single assistant turn) those + // segments overlap, and summing them double-counts the concurrency. Aggregate + // those two by WALL-CLOCK UNION instead so the breakdown reflects real elapsed + // time (and the active-time percentages stay a true partition that sums to + // <=100%, instead of e.g. 109%). + const subSpans: Array<[number, number]> = []; + const toolSpans: Array<[number, number]> = []; for (const seg of segments) { - stats[seg.phase] += seg.durationMs; + if (seg.phase === 'subagent') subSpans.push([seg.startTime, seg.endTime]); + else if (seg.phase === 'toolExec') toolSpans.push([seg.startTime, seg.endTime]); + else stats[seg.phase] += seg.durationMs; } + + // subagent = wall-clock with >=1 subagent open. + // toolExec = wall-clock busy with a tool but NOT already counted as subagent + // (subagent wins on the rare cross-kind overlap, e.g. Agent + Bash + // dispatched in the same turn), so the two never double-count. + const subagentMs = unionMs(subSpans); + const busyMs = unionMs([...subSpans, ...toolSpans]); + stats.subagent = subagentMs; + stats.toolExec = Math.max(0, busyMs - subagentMs); return stats; } diff --git a/src/formatter.test.ts b/src/formatter.test.ts index e5e3453..49b7c99 100644 --- a/src/formatter.test.ts +++ b/src/formatter.test.ts @@ -413,3 +413,34 @@ describe('formatter: insights', () => { expect(output).toContain('Warmup overhead'); }); }); + +describe('formatSession: parallel-subagent benefit insight', () => { + const seg = (start: number, end: number) => ({ + phase: 'subagent' as const, startTime: start, endTime: end, durationMs: end - start, + }); + + it('shows concurrency + time saved when subagents overlapped', () => { + // Two agents in one fan-out: [0,180s] and [0,120s] → sum 300s, wall(union) 180s, saved 120s. + const output = strip(formatSession(makeAnalysis({ + enhancedSegments: [seg(0, 180_000), seg(0, 120_000)], + }))); + expect(output).toContain('Parallel subagents'); + expect(output).toMatch(/2 ran in 3m wall/); + expect(output).toMatch(/saved 2m vs sequential/); + }); + + it('shows nothing for a single subagent (no overlap to report)', () => { + const output = strip(formatSession(makeAnalysis({ + enhancedSegments: [seg(0, 180_000)], + }))); + expect(output).not.toContain('Parallel subagents'); + }); + + it('shows nothing when subagents ran sequentially (no time saved)', () => { + // [0,60s] then [60s,120s] — adjacent, no overlap → sum == union, nothing saved. + const output = strip(formatSession(makeAnalysis({ + enhancedSegments: [seg(0, 60_000), seg(60_000, 120_000)], + }))); + expect(output).not.toContain('Parallel subagents'); + }); +}); diff --git a/src/formatter.ts b/src/formatter.ts index b226a59..b35f71e 100644 --- a/src/formatter.ts +++ b/src/formatter.ts @@ -1,5 +1,6 @@ import chalk from 'chalk'; -import type { SessionAnalysis, EnhancedStats, AggregateJson, StuckLoop, WarmupCost } from './types.js'; +import type { SessionAnalysis, EnhancedStats, EnhancedTimeSegment, AggregateJson, StuckLoop, WarmupCost } from './types.js'; +import { unionMs } from './analyzer.js'; const BAR_WIDTH = 20; @@ -212,7 +213,7 @@ export function formatSession(analysis: SessionAnalysis): string { lines.push(` Context: ${sparkline}`); // Insights section - const insightLines = formatInsights(analysis.stuckLoops, analysis.warmupCost); + const insightLines = formatInsights(analysis.stuckLoops, analysis.warmupCost, analysis.enhancedSegments); if (insightLines.length > 0) { lines.push(''); lines.push(hr('Insights')); @@ -720,9 +721,27 @@ export function formatJsonAggregate(analyses: SessionAnalysis[]): AggregateJson // ── Insight helpers ── -function formatInsights(stuckLoops: StuckLoop[], warmupCost: WarmupCost): string[] { +function formatInsights(stuckLoops: StuckLoop[], warmupCost: WarmupCost, segments?: EnhancedTimeSegment[]): string[] { const lines: string[] = []; + // Parallel-subagent benefit: subagent segments are aggregated by wall-clock + // union (overlapping fan-out isn't double-counted), but the SUM still carries + // signal \u2014 sum vs union is the speedup from running agents concurrently. + // Shown only when \u22652 subagents actually overlapped (saved >1s), so sequential + // sessions get no spurious line. + if (segments && segments.length > 0) { + const sub = segments.filter(s => s.phase === 'subagent'); + if (sub.length >= 2) { + const sumMs = sub.reduce((acc, s) => acc + s.durationMs, 0); + const wallMs = unionMs(sub.map(s => [s.startTime, s.endTime] as [number, number])); + const savedMs = sumMs - wallMs; + if (wallMs > 0 && savedMs > 1000) { + const factor = sumMs / wallMs; + lines.push(` ${chalk.magenta('\u26a1')} Parallel subagents: ${sub.length} ran in ${formatDuration(wallMs)} wall (${formatDuration(sumMs)} of work \u00b7 ${factor.toFixed(1)}\u00d7 concurrent \u00b7 saved ${formatDuration(savedMs)} vs sequential)`); + } + } + } + // Stuck loop warnings if (stuckLoops && stuckLoops.length > 0) { for (const loop of stuckLoops) {