Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,37 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added

- **Parallel-subagent benefit insight.** The subagent phase is aggregated by wall-clock
union (overlapping fan-out isn't double-counted), but the *sum* of agent durations still
carries signal: `sum − union` = time saved by running them concurrently, `sum / union` =
effective concurrency. Adds an Insights line — e.g. `⚡ Parallel subagents: 5 ran in 38m
wall (53m of work · 1.4× concurrent · saved 15m vs sequential)` — shown only when ≥2
subagents actually overlapped, so sequential sessions get no spurious line.

### Fixed

- **Time breakdown: cap "Claude thinking" gaps so mid-turn suspensions aren't counted as thinking.**
Previously only the assistant-end→user gap was capped by `IDLE_THRESHOLD_MS`; the
user→assistant and tool_result→assistant gaps (both attributed to `claudeThink`) were
uncapped. So any long pause that landed *mid-turn* — an overnight gap after a tool result,
a credit stall, a remote-control handoff — was reported as hours of "Claude thinking."
These gaps are now capped at `THINK_CAP_MS` (10 min); the remainder is booked as `humanAway`.
On a real 16 h session with overnight gaps this moved ~9 h out of "thinking"
(11 h 34 m → 2 h 35 m) into away time, where it belongs.
- **Time Breakdown: count parallel subagents/tools by wall-clock union, not sum.**
`computeEnhancedStats` emitted one segment per tool/agent and summed them, so
tools and subagents fanned out in a single assistant turn (e.g. a panel of
review subagents) were double-counted. The `Subagents` and `Tool execution`
bars are now aggregated by wall-clock interval union (subagent wins on
cross-kind overlap), so they reflect real elapsed time and the active-time
percentages stay a true partition that sums to <=100% (previously could read
e.g. 109%). On a fan-out-heavy session this dropped reported subagent time from
~53m (sum of 19 overlapping agents) to ~38m (true elapsed).

## [1.0.0] - 2026-02-18

### Added
Expand Down
165 changes: 165 additions & 0 deletions src/analyzer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -444,3 +444,168 @@ describe('analyzer: warmup cost', () => {
expect(result.warmupCost.turnCount).toBe(1);
});
});

describe('analyzer: segment-union ≤ wall-clock invariant', () => {
// Local copy of the analyzer's wall-clock union (it isn't exported). Time
// accounting must aggregate possibly-overlapping spans by UNION, not SUM —
// overlapping spans can never cover more wall-clock than exists. This guards
// against a regression that double-counts parallel tool/subagent spans.
function unionMs(spans: Array<[number, number]>): number {
if (spans.length === 0) return 0;
const sorted = [...spans].sort((a, b) => a[0] - b[0]);
let total = 0;
let [curStart, curEnd] = sorted[0];
for (let i = 1; i < sorted.length; i++) {
const [s, e] = sorted[i];
if (s <= curEnd) {
if (e > curEnd) curEnd = e;
} else {
total += curEnd - curStart;
curStart = s;
curEnd = e;
}
}
return total + (curEnd - curStart);
}

// seconds within one minute → ISO timestamp (matches the literal style above)
const T = (s: number) => `2026-01-01T00:00:${String(s).padStart(2, '0')}Z`;

it('holds with overlapping parallel-tool spans (union ≤ span < naive sum)', () => {
// Two tool_use in ONE assistant message → both start at the assistant ts, so
// their toolExec segments overlap. A naive sum overcounts; the union must not.
const result = analyzeSession('s1', [
userMsg(T(0)),
assistantMsg(T(2), { toolUses: [{ id: 'tu1', name: 'Bash' }, { id: 'tu2', name: 'Bash' }] }),
toolResultMsg(T(32), ['tu1']), // toolExec [2,32] = 30s
toolResultMsg(T(42), ['tu2']), // toolExec [2,42] = 40s, overlaps tu1
assistantMsg(T(50)), // claudeThink [42,50] = 8s
]);

const segs = result.enhancedSegments;
expect(segs.length).toBeGreaterThan(0);

const minStart = Math.min(...segs.map(s => s.startTime));
const maxEnd = Math.max(...segs.map(s => s.endTime));
const span = maxEnd - minStart;
const union = unionMs(segs.map(s => [s.startTime, s.endTime] as [number, number]));
const naiveSum = segs.reduce((acc, s) => acc + s.durationMs, 0);

// The invariant: union of all phase segments never exceeds the wall-clock span.
expect(union).toBeLessThanOrEqual(span);
// The parallel tools genuinely overlap, so the naive sum DOES exceed the span —
// demonstrating why aggregation must union (the bug class this guards).
expect(naiveSum).toBeGreaterThan(span);

// Each segment is a valid, non-negative interval consistent with its duration.
for (const s of segs) {
expect(s.endTime).toBeGreaterThanOrEqual(s.startTime);
expect(s.durationMs).toBe(s.endTime - s.startTime);
}
});

it('holds for a simple sequential session (no overlap)', () => {
const result = analyzeSession('s2', [
userMsg(T(0)),
assistantMsg(T(5), { toolUses: [{ id: 'tu1', name: 'Read' }] }),
toolResultMsg(T(8), ['tu1']),
assistantMsg(T(10)),
]);
const segs = result.enhancedSegments;
const span = Math.max(...segs.map(s => s.endTime)) - Math.min(...segs.map(s => s.startTime));
const union = unionMs(segs.map(s => [s.startTime, s.endTime] as [number, number]));
expect(union).toBeLessThanOrEqual(span);
});
});

describe('analyzer: parallel subagents counted by wall-clock union', () => {
it('does not sum overlapping (fanned-out) subagent durations', () => {
// 3 Agents dispatched in ONE assistant turn (parallel fan-out), all starting
// at 00:00:10, returning staggered at +60s / +120s / +180s.
// Per-agent durations sum to 360s; true wall-clock elapsed is 180s.
const messages: SessionMessage[] = [
userMsg('2026-01-01T00:00:00Z', 'go'),
assistantMsg('2026-01-01T00:00:10Z', {
toolUses: [
{ id: 'a1', name: 'Agent' },
{ id: 'a2', name: 'Agent' },
{ id: 'a3', name: 'Agent' },
],
}),
toolResultMsg('2026-01-01T00:01:10Z', ['a1']),
toolResultMsg('2026-01-01T00:02:10Z', ['a2']),
toolResultMsg('2026-01-01T00:03:10Z', ['a3']),
assistantMsg('2026-01-01T00:03:20Z'),
];
const a = analyzeSession('s', messages);
expect(a.enhancedStats.subagent).toBe(180_000); // union, NOT 360_000 sum
expect(a.enhancedStats.toolExec).toBe(0);
});

it('does not sum overlapping parallel non-agent tool calls either', () => {
// 2 Bash calls in one turn, overlapping: spans 60s + 120s (sum 180s), union 120s.
const messages: SessionMessage[] = [
userMsg('2026-01-01T00:00:00Z', 'go'),
assistantMsg('2026-01-01T00:00:10Z', {
toolUses: [
{ id: 'b1', name: 'Bash' },
{ id: 'b2', name: 'Bash' },
],
}),
toolResultMsg('2026-01-01T00:01:10Z', ['b1']),
toolResultMsg('2026-01-01T00:02:10Z', ['b2']),
assistantMsg('2026-01-01T00:02:20Z'),
];
const a = analyzeSession('s', messages);
expect(a.enhancedStats.toolExec).toBe(120_000); // union, NOT 180_000 sum
expect(a.enhancedStats.subagent).toBe(0);
});

it('sequential subagents still add up (no spurious union collapse)', () => {
// Two NON-overlapping agents: 60s then 60s = 120s union == 120s sum.
const messages: SessionMessage[] = [
userMsg('2026-01-01T00:00:00Z', 'go'),
assistantMsg('2026-01-01T00:00:10Z', { toolUses: [{ id: 'a1', name: 'Agent' }] }),
toolResultMsg('2026-01-01T00:01:10Z', ['a1']),
assistantMsg('2026-01-01T00:01:20Z', { toolUses: [{ id: 'a2', name: 'Agent' }] }),
toolResultMsg('2026-01-01T00:02:20Z', ['a2']),
assistantMsg('2026-01-01T00:02:30Z'),
];
const a = analyzeSession('s', messages);
expect(a.enhancedStats.subagent).toBe(120_000);
});
});

describe('analyzer: thinking gaps are capped (mid-turn suspension ≠ thinking)', () => {
const CAP = 10 * 60 * 1000; // THINK_CAP_MS

it('caps a long tool_result→assistant gap: 10min think + remainder humanAway', () => {
const result = analyzeSession('s1', [
userMsg('2026-01-01T00:00:00Z'),
assistantMsg('2026-01-01T00:00:02Z', { toolUses: [{ id: 'tu1', name: 'Bash' }] }),
toolResultMsg('2026-01-01T00:00:12Z', ['tu1']),
assistantMsg('2026-01-01T02:00:12Z'), // 2h gap after the tool result (session suspended)
]);
// 2s legit first-response think (user→assistant) + 10min cap on the suspended gap
expect(result.enhancedStats.claudeThink).toBe(2000 + CAP);
expect(result.enhancedStats.humanAway).toBe(2 * 60 * 60 * 1000 - CAP); // 1h50m booked as away
});

it('caps a long user→assistant first-response gap the same way', () => {
const result = analyzeSession('s1', [
userMsg('2026-01-01T00:00:00Z'),
assistantMsg('2026-01-01T03:00:00Z'), // 3h before first response (suspended)
]);
expect(result.enhancedStats.claudeThink).toBe(CAP);
expect(result.enhancedStats.humanAway).toBe(3 * 60 * 60 * 1000 - CAP);
});

it('leaves a normal short think gap untouched (no spurious humanAway)', () => {
const result = analyzeSession('s1', [
userMsg('2026-01-01T00:00:00Z'),
assistantMsg('2026-01-01T00:05:00Z'), // 5min < cap
]);
expect(result.enhancedStats.claudeThink).toBe(5 * 60 * 1000);
expect(result.enhancedStats.humanAway).toBe(0);
});
});
69 changes: 64 additions & 5 deletions src/analyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ import type {
import { estimateCost } from './pricing.js';

const IDLE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
// A single uninterrupted model response effectively never exceeds this. A longer
// gap before an assistant message means the session was SUSPENDED mid-turn
// (overnight pause, credit stall, remote-control handoff) — not the model
// "thinking" for hours. We cap the thinking slice at this and attribute the
// remainder to humanAway, so long-pause sessions aren't reported as huge
// "Claude thinking" time. (Only the assistant-end→user gap was capped before;
// the user→assistant and tool_result→assistant gaps were not.)
const THINK_CAP_MS = 10 * 60 * 1000; // 10 minutes
const SUBAGENT_TOOLS = new Set(['Task', 'Agent']);

export function analyzeSession(sessionId: string, messages: SessionMessage[]): SessionAnalysis {
Expand Down Expand Up @@ -176,6 +184,18 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[]
segments.push({ phase, startTime: start, endTime: end, durationMs: end - start, toolName });
}

// Emit a "Claude thinking" (or planning) slice, capped at THINK_CAP_MS. Any
// excess is a mid-turn suspension, not thinking, so it's booked as humanAway.
function emitThink(start: number, end: number) {
const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink';
if (end - start <= THINK_CAP_MS) {
emit(phase, start, end);
} else {
emit(phase, start, start + THINK_CAP_MS);
emit('humanAway', start + THINK_CAP_MS, end);
}
}

for (const msg of messages) {
const ts = Date.parse(msg.timestamp);
if (isNaN(ts)) continue;
Expand Down Expand Up @@ -225,14 +245,12 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[]

// Gap from last external user message → this assistant = Claude thinking (first response)
if (lastExternalUserTs !== null) {
const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink';
emit(phase, lastExternalUserTs, ts);
emitThink(lastExternalUserTs, ts);
lastExternalUserTs = null;
}
// Gap from last tool_result → this assistant = Claude thinking (mid-turn, processing results)
else if (lastToolResultTs !== null) {
const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink';
emit(phase, lastToolResultTs, ts);
emitThink(lastToolResultTs, ts);
}

lastAssistantEndTs = ts;
Expand All @@ -258,11 +276,52 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[]
return segments;
}

/** Wall-clock union (ms) of possibly-overlapping [start, end] spans. */
export function unionMs(spans: Array<[number, number]>): number {
if (spans.length === 0) return 0;
const sorted = [...spans].sort((a, b) => a[0] - b[0]);
let total = 0;
let [curStart, curEnd] = sorted[0];
for (let i = 1; i < sorted.length; i++) {
const [s, e] = sorted[i];
if (s <= curEnd) {
if (e > curEnd) curEnd = e;
} else {
total += curEnd - curStart;
curStart = s;
curEnd = e;
}
}
return total + (curEnd - curStart);
}

function computeEnhancedStats(segments: EnhancedTimeSegment[]): EnhancedStats {
const stats: EnhancedStats = { humanWait: 0, humanAway: 0, claudeThink: 0, toolExec: 0, subagent: 0, planning: 0 };

// claudeThink / planning / humanWait / humanAway are emitted as sequential,
// non-overlapping slices, so summing them is correct. But toolExec and
// subagent emit one segment PER tool/agent — when calls are fanned out in
// parallel (e.g. 5 review subagents from a single assistant turn) those
// segments overlap, and summing them double-counts the concurrency. Aggregate
// those two by WALL-CLOCK UNION instead so the breakdown reflects real elapsed
// time (and the active-time percentages stay a true partition that sums to
// <=100%, instead of e.g. 109%).
const subSpans: Array<[number, number]> = [];
const toolSpans: Array<[number, number]> = [];
for (const seg of segments) {
stats[seg.phase] += seg.durationMs;
if (seg.phase === 'subagent') subSpans.push([seg.startTime, seg.endTime]);
else if (seg.phase === 'toolExec') toolSpans.push([seg.startTime, seg.endTime]);
else stats[seg.phase] += seg.durationMs;
}

// subagent = wall-clock with >=1 subagent open.
// toolExec = wall-clock busy with a tool but NOT already counted as subagent
// (subagent wins on the rare cross-kind overlap, e.g. Agent + Bash
// dispatched in the same turn), so the two never double-count.
const subagentMs = unionMs(subSpans);
const busyMs = unionMs([...subSpans, ...toolSpans]);
stats.subagent = subagentMs;
stats.toolExec = Math.max(0, busyMs - subagentMs);
return stats;
}

Expand Down
31 changes: 31 additions & 0 deletions src/formatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -413,3 +413,34 @@ describe('formatter: insights', () => {
expect(output).toContain('Warmup overhead');
});
});

describe('formatSession: parallel-subagent benefit insight', () => {
const seg = (start: number, end: number) => ({
phase: 'subagent' as const, startTime: start, endTime: end, durationMs: end - start,
});

it('shows concurrency + time saved when subagents overlapped', () => {
// Two agents in one fan-out: [0,180s] and [0,120s] → sum 300s, wall(union) 180s, saved 120s.
const output = strip(formatSession(makeAnalysis({
enhancedSegments: [seg(0, 180_000), seg(0, 120_000)],
})));
expect(output).toContain('Parallel subagents');
expect(output).toMatch(/2 ran in 3m wall/);
expect(output).toMatch(/saved 2m vs sequential/);
});

it('shows nothing for a single subagent (no overlap to report)', () => {
const output = strip(formatSession(makeAnalysis({
enhancedSegments: [seg(0, 180_000)],
})));
expect(output).not.toContain('Parallel subagents');
});

it('shows nothing when subagents ran sequentially (no time saved)', () => {
// [0,60s] then [60s,120s] — adjacent, no overlap → sum == union, nothing saved.
const output = strip(formatSession(makeAnalysis({
enhancedSegments: [seg(0, 60_000), seg(60_000, 120_000)],
})));
expect(output).not.toContain('Parallel subagents');
});
});
25 changes: 22 additions & 3 deletions src/formatter.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import chalk from 'chalk';
import type { SessionAnalysis, EnhancedStats, AggregateJson, StuckLoop, WarmupCost } from './types.js';
import type { SessionAnalysis, EnhancedStats, EnhancedTimeSegment, AggregateJson, StuckLoop, WarmupCost } from './types.js';
import { unionMs } from './analyzer.js';

const BAR_WIDTH = 20;

Expand Down Expand Up @@ -212,7 +213,7 @@ export function formatSession(analysis: SessionAnalysis): string {
lines.push(` Context: ${sparkline}`);

// Insights section
const insightLines = formatInsights(analysis.stuckLoops, analysis.warmupCost);
const insightLines = formatInsights(analysis.stuckLoops, analysis.warmupCost, analysis.enhancedSegments);
if (insightLines.length > 0) {
lines.push('');
lines.push(hr('Insights'));
Expand Down Expand Up @@ -720,9 +721,27 @@ export function formatJsonAggregate(analyses: SessionAnalysis[]): AggregateJson

// ── Insight helpers ──

function formatInsights(stuckLoops: StuckLoop[], warmupCost: WarmupCost): string[] {
function formatInsights(stuckLoops: StuckLoop[], warmupCost: WarmupCost, segments?: EnhancedTimeSegment[]): string[] {
const lines: string[] = [];

// Parallel-subagent benefit: subagent segments are aggregated by wall-clock
// union (overlapping fan-out isn't double-counted), but the SUM still carries
// signal \u2014 sum vs union is the speedup from running agents concurrently.
// Shown only when \u22652 subagents actually overlapped (saved >1s), so sequential
// sessions get no spurious line.
if (segments && segments.length > 0) {
const sub = segments.filter(s => s.phase === 'subagent');
if (sub.length >= 2) {
const sumMs = sub.reduce((acc, s) => acc + s.durationMs, 0);
const wallMs = unionMs(sub.map(s => [s.startTime, s.endTime] as [number, number]));
const savedMs = sumMs - wallMs;
if (wallMs > 0 && savedMs > 1000) {
const factor = sumMs / wallMs;
lines.push(` ${chalk.magenta('\u26a1')} Parallel subagents: ${sub.length} ran in ${formatDuration(wallMs)} wall (${formatDuration(sumMs)} of work \u00b7 ${factor.toFixed(1)}\u00d7 concurrent \u00b7 saved ${formatDuration(savedMs)} vs sequential)`);
}
}
}

// Stuck loop warnings
if (stuckLoops && stuckLoops.length > 0) {
for (const loop of stuckLoops) {
Expand Down