diff --git a/CHANGELOG.md b/CHANGELOG.md index 06ae374..f445707 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- **Time breakdown: cap "Claude thinking" gaps so mid-turn suspensions aren't counted as thinking.** + Previously only the assistant-end→user gap was capped by `IDLE_THRESHOLD_MS`; the + user→assistant and tool_result→assistant gaps (both attributed to `claudeThink`) were + uncapped. So any long pause that landed *mid-turn* — an overnight gap after a tool result, + a credit stall, a remote-control handoff — was reported as hours of "Claude thinking." + These gaps are now capped at `THINK_CAP_MS` (10 min); the remainder is booked as `humanAway`. + On a real 16 h session with overnight gaps this moved ~9 h out of "thinking" + (11 h 34 m → 2 h 35 m) into away time, where it belongs. + ## [1.0.0] - 2026-02-18 ### Added diff --git a/src/analyzer.test.ts b/src/analyzer.test.ts index b0810d5..24983f0 100644 --- a/src/analyzer.test.ts +++ b/src/analyzer.test.ts @@ -444,3 +444,37 @@ describe('analyzer: warmup cost', () => { expect(result.warmupCost.turnCount).toBe(1); }); }); + +describe('analyzer: thinking gaps are capped (mid-turn suspension ≠ thinking)', () => { + const CAP = 10 * 60 * 1000; // THINK_CAP_MS + + it('caps a long tool_result→assistant gap: 10min think + remainder humanAway', () => { + const result = analyzeSession('s1', [ + userMsg('2026-01-01T00:00:00Z'), + assistantMsg('2026-01-01T00:00:02Z', { toolUses: [{ id: 'tu1', name: 'Bash' }] }), + toolResultMsg('2026-01-01T00:00:12Z', ['tu1']), + assistantMsg('2026-01-01T02:00:12Z'), // 2h gap after the tool result (session suspended) + ]); + // 2s legit first-response think (user→assistant) + 10min cap on the suspended gap + expect(result.enhancedStats.claudeThink).toBe(2000 + CAP); + expect(result.enhancedStats.humanAway).toBe(2 * 60 * 60 * 1000 - CAP); // 1h50m booked as away + }); + + it('caps a long user→assistant first-response gap the same way', () => { + const result = analyzeSession('s1', [ + userMsg('2026-01-01T00:00:00Z'), + assistantMsg('2026-01-01T03:00:00Z'), // 3h before first response (suspended) + ]); + expect(result.enhancedStats.claudeThink).toBe(CAP); + expect(result.enhancedStats.humanAway).toBe(3 * 60 * 60 * 1000 - CAP); + }); + + it('leaves a normal short think gap untouched (no spurious humanAway)', () => { + const result = analyzeSession('s1', [ + userMsg('2026-01-01T00:00:00Z'), + assistantMsg('2026-01-01T00:05:00Z'), // 5min < cap + ]); + expect(result.enhancedStats.claudeThink).toBe(5 * 60 * 1000); + expect(result.enhancedStats.humanAway).toBe(0); + }); +}); diff --git a/src/analyzer.ts b/src/analyzer.ts index a201f8e..2309da1 100644 --- a/src/analyzer.ts +++ b/src/analyzer.ts @@ -6,6 +6,14 @@ import type { import { estimateCost } from './pricing.js'; const IDLE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes +// A single uninterrupted model response effectively never exceeds this. A longer +// gap before an assistant message means the session was SUSPENDED mid-turn +// (overnight pause, credit stall, remote-control handoff) — not the model +// "thinking" for hours. We cap the thinking slice at this and attribute the +// remainder to humanAway, so long-pause sessions aren't reported as huge +// "Claude thinking" time. (Only the assistant-end→user gap was capped before; +// the user→assistant and tool_result→assistant gaps were not.) +const THINK_CAP_MS = 10 * 60 * 1000; // 10 minutes const SUBAGENT_TOOLS = new Set(['Task', 'Agent']); export function analyzeSession(sessionId: string, messages: SessionMessage[]): SessionAnalysis { @@ -176,6 +184,18 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[] segments.push({ phase, startTime: start, endTime: end, durationMs: end - start, toolName }); } + // Emit a "Claude thinking" (or planning) slice, capped at THINK_CAP_MS. Any + // excess is a mid-turn suspension, not thinking, so it's booked as humanAway. + function emitThink(start: number, end: number) { + const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink'; + if (end - start <= THINK_CAP_MS) { + emit(phase, start, end); + } else { + emit(phase, start, start + THINK_CAP_MS); + emit('humanAway', start + THINK_CAP_MS, end); + } + } + for (const msg of messages) { const ts = Date.parse(msg.timestamp); if (isNaN(ts)) continue; @@ -225,14 +245,12 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[] // Gap from last external user message → this assistant = Claude thinking (first response) if (lastExternalUserTs !== null) { - const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink'; - emit(phase, lastExternalUserTs, ts); + emitThink(lastExternalUserTs, ts); lastExternalUserTs = null; } // Gap from last tool_result → this assistant = Claude thinking (mid-turn, processing results) else if (lastToolResultTs !== null) { - const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink'; - emit(phase, lastToolResultTs, ts); + emitThink(lastToolResultTs, ts); } lastAssistantEndTs = ts;