Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Fixed

- **Time breakdown: cap "Claude thinking" gaps so mid-turn suspensions aren't counted as thinking.**
Previously only the assistant-end→user gap was capped by `IDLE_THRESHOLD_MS`; the
user→assistant and tool_result→assistant gaps (both attributed to `claudeThink`) were
uncapped. So any long pause that landed *mid-turn* — an overnight gap after a tool result,
a credit stall, a remote-control handoff — was reported as hours of "Claude thinking."
These gaps are now capped at `THINK_CAP_MS` (10 min); the remainder is booked as `humanAway`.
On a real 16 h session with overnight gaps this moved ~9 h out of "thinking"
(11 h 34 m → 2 h 35 m) into away time, where it belongs.

## [1.0.0] - 2026-02-18

### Added
Expand Down
34 changes: 34 additions & 0 deletions src/analyzer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -444,3 +444,37 @@ describe('analyzer: warmup cost', () => {
expect(result.warmupCost.turnCount).toBe(1);
});
});

describe('analyzer: thinking gaps are capped (mid-turn suspension ≠ thinking)', () => {
const CAP = 10 * 60 * 1000; // THINK_CAP_MS

it('caps a long tool_result→assistant gap: 10min think + remainder humanAway', () => {
const result = analyzeSession('s1', [
userMsg('2026-01-01T00:00:00Z'),
assistantMsg('2026-01-01T00:00:02Z', { toolUses: [{ id: 'tu1', name: 'Bash' }] }),
toolResultMsg('2026-01-01T00:00:12Z', ['tu1']),
assistantMsg('2026-01-01T02:00:12Z'), // 2h gap after the tool result (session suspended)
]);
// 2s legit first-response think (user→assistant) + 10min cap on the suspended gap
expect(result.enhancedStats.claudeThink).toBe(2000 + CAP);
expect(result.enhancedStats.humanAway).toBe(2 * 60 * 60 * 1000 - CAP); // 1h50m booked as away
});

it('caps a long user→assistant first-response gap the same way', () => {
const result = analyzeSession('s1', [
userMsg('2026-01-01T00:00:00Z'),
assistantMsg('2026-01-01T03:00:00Z'), // 3h before first response (suspended)
]);
expect(result.enhancedStats.claudeThink).toBe(CAP);
expect(result.enhancedStats.humanAway).toBe(3 * 60 * 60 * 1000 - CAP);
});

it('leaves a normal short think gap untouched (no spurious humanAway)', () => {
const result = analyzeSession('s1', [
userMsg('2026-01-01T00:00:00Z'),
assistantMsg('2026-01-01T00:05:00Z'), // 5min < cap
]);
expect(result.enhancedStats.claudeThink).toBe(5 * 60 * 1000);
expect(result.enhancedStats.humanAway).toBe(0);
});
});
26 changes: 22 additions & 4 deletions src/analyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ import type {
import { estimateCost } from './pricing.js';

const IDLE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
// A single uninterrupted model response effectively never exceeds this. A longer
// gap before an assistant message means the session was SUSPENDED mid-turn
// (overnight pause, credit stall, remote-control handoff) — not the model
// "thinking" for hours. We cap the thinking slice at this and attribute the
// remainder to humanAway, so long-pause sessions aren't reported as huge
// "Claude thinking" time. (Only the assistant-end→user gap was capped before;
// the user→assistant and tool_result→assistant gaps were not.)
const THINK_CAP_MS = 10 * 60 * 1000; // 10 minutes
const SUBAGENT_TOOLS = new Set(['Task', 'Agent']);

export function analyzeSession(sessionId: string, messages: SessionMessage[]): SessionAnalysis {
Expand Down Expand Up @@ -176,6 +184,18 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[]
segments.push({ phase, startTime: start, endTime: end, durationMs: end - start, toolName });
}

// Emit a "Claude thinking" (or planning) slice, capped at THINK_CAP_MS. Any
// excess is a mid-turn suspension, not thinking, so it's booked as humanAway.
function emitThink(start: number, end: number) {
const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink';
if (end - start <= THINK_CAP_MS) {
emit(phase, start, end);
} else {
emit(phase, start, start + THINK_CAP_MS);
emit('humanAway', start + THINK_CAP_MS, end);
}
}

for (const msg of messages) {
const ts = Date.parse(msg.timestamp);
if (isNaN(ts)) continue;
Expand Down Expand Up @@ -225,14 +245,12 @@ function detectEnhancedPhases(messages: SessionMessage[]): EnhancedTimeSegment[]

// Gap from last external user message → this assistant = Claude thinking (first response)
if (lastExternalUserTs !== null) {
const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink';
emit(phase, lastExternalUserTs, ts);
emitThink(lastExternalUserTs, ts);
lastExternalUserTs = null;
}
// Gap from last tool_result → this assistant = Claude thinking (mid-turn, processing results)
else if (lastToolResultTs !== null) {
const phase: EnhancedPhaseType = planModeActive ? 'planning' : 'claudeThink';
emit(phase, lastToolResultTs, ts);
emitThink(lastToolResultTs, ts);
}

lastAssistantEndTs = ts;
Expand Down