Skip to content

Commit 26d5519

Browse files
feat: add per-request visualizer token toggle
Co-authored-by: openhands <[email protected]>
1 parent 4ffaa97 commit 26d5519

File tree

2 files changed

+164
-14
lines changed

2 files changed

+164
-14
lines changed

openhands/sdk/conversation/visualizer.py

Lines changed: 116 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import re
23
from typing import TYPE_CHECKING
34

@@ -16,6 +17,7 @@
1617
)
1718
from openhands.sdk.event.base import Event
1819
from openhands.sdk.event.condenser import Condensation
20+
from openhands.sdk.llm.utils.metrics import TokenUsage
1921

2022

2123
if TYPE_CHECKING:
@@ -47,6 +49,18 @@
4749
r"\*(.*?)\*": "italic",
4850
}
4951

52+
53+
_PER_REQUEST_TOKENS_ENV = "OPENHANDS_VISUALIZER_PER_REQUEST_TOKENS"
54+
_TRUTHY_ENV_VALUES = {"1", "true", "yes", "on"}
55+
56+
57+
def _env_flag(name: str) -> bool:
58+
value = os.getenv(name)
59+
if value is None:
60+
return False
61+
return value.strip().lower() in _TRUTHY_ENV_VALUES
62+
63+
5064
_PANEL_PADDING = (1, 1)
5165

5266

@@ -61,6 +75,7 @@ def __init__(
6175
highlight_regex: dict[str, str] | None = None,
6276
skip_user_messages: bool = False,
6377
conversation_stats: "ConversationStats | None" = None,
78+
per_request_token_stats: bool | None = None,
6479
):
6580
"""Initialize the visualizer.
6681
@@ -72,11 +87,20 @@ def __init__(
7287
skip_user_messages: If True, skip displaying user messages. Useful for
7388
scenarios where user input is not relevant to show.
7489
conversation_stats: ConversationStats object to display metrics information.
90+
per_request_token_stats: Whether to display per-request token usage instead
91+
of accumulated totals. When None, uses the
92+
OPENHANDS_VISUALIZER_PER_REQUEST_TOKENS
93+
environment flag.
7594
"""
7695
self._console = Console()
7796
self._skip_user_messages = skip_user_messages
7897
self._highlight_patterns: dict[str, str] = highlight_regex or {}
7998
self._conversation_stats = conversation_stats
99+
if per_request_token_stats is None:
100+
per_request_token_stats = _env_flag(_PER_REQUEST_TOKENS_ENV)
101+
self._use_per_request_token_stats = per_request_token_stats
102+
self._last_accumulated_usage: TokenUsage | None = None
103+
self._last_request_usage: TokenUsage | None = None
80104

81105
def on_event(self, event: Event) -> None:
82106
"""Main event handler that displays events with Rich formatting."""
@@ -227,6 +251,78 @@ def _create_event_panel(self, event: Event) -> Panel | None:
227251
expand=True,
228252
)
229253

254+
@staticmethod
255+
def _clone_usage(usage: TokenUsage) -> TokenUsage:
256+
return TokenUsage.model_validate(usage.model_dump())
257+
258+
def _get_per_request_usage(self, usage: TokenUsage) -> TokenUsage | None:
259+
current = self._clone_usage(usage)
260+
261+
if self._last_accumulated_usage is None:
262+
self._last_accumulated_usage = current
263+
self._last_request_usage = self._clone_usage(current)
264+
return self._last_request_usage
265+
266+
prev = self._last_accumulated_usage
267+
tracked_fields = (
268+
"prompt_tokens",
269+
"completion_tokens",
270+
"cache_read_tokens",
271+
"cache_write_tokens",
272+
"reasoning_tokens",
273+
)
274+
275+
decreased = any(
276+
getattr(current, field) < getattr(prev, field) for field in tracked_fields
277+
)
278+
if decreased:
279+
self._last_accumulated_usage = current
280+
self._last_request_usage = self._clone_usage(current)
281+
return self._last_request_usage
282+
283+
changed = any(
284+
getattr(current, field) != getattr(prev, field) for field in tracked_fields
285+
)
286+
if changed:
287+
prompt_delta = max(current.prompt_tokens - prev.prompt_tokens, 0)
288+
completion_delta = max(
289+
current.completion_tokens - prev.completion_tokens, 0
290+
)
291+
cache_read_delta = max(
292+
current.cache_read_tokens - prev.cache_read_tokens, 0
293+
)
294+
cache_write_delta = max(
295+
current.cache_write_tokens - prev.cache_write_tokens, 0
296+
)
297+
reasoning_delta = max(current.reasoning_tokens - prev.reasoning_tokens, 0)
298+
self._last_request_usage = TokenUsage(
299+
model=current.model,
300+
prompt_tokens=prompt_delta,
301+
completion_tokens=completion_delta,
302+
cache_read_tokens=cache_read_delta,
303+
cache_write_tokens=cache_write_delta,
304+
reasoning_tokens=reasoning_delta,
305+
context_window=current.context_window,
306+
per_turn_token=prompt_delta + completion_delta,
307+
response_id=current.response_id or prev.response_id,
308+
)
309+
310+
if self._last_request_usage is None:
311+
self._last_request_usage = TokenUsage(
312+
model=current.model,
313+
prompt_tokens=0,
314+
completion_tokens=0,
315+
cache_read_tokens=0,
316+
cache_write_tokens=0,
317+
reasoning_tokens=0,
318+
context_window=current.context_window,
319+
per_turn_token=0,
320+
response_id=current.response_id,
321+
)
322+
323+
self._last_accumulated_usage = current
324+
return self._last_request_usage
325+
230326
def _format_metrics_subtitle(self) -> str | None:
231327
"""Format LLM metrics as a visually appealing subtitle string with icons,
232328
colors, and k/m abbreviations using conversation stats."""
@@ -240,7 +336,12 @@ def _format_metrics_subtitle(self) -> str | None:
240336
usage = combined_metrics.accumulated_token_usage
241337
cost = combined_metrics.accumulated_cost or 0.0
242338

243-
# helper: 1234 -> "1.2K", 1200000 -> "1.2M"
339+
display_usage = usage
340+
if self._use_per_request_token_stats:
341+
per_request_usage = self._get_per_request_usage(usage)
342+
if per_request_usage is not None:
343+
display_usage = per_request_usage
344+
244345
def abbr(n: int | float) -> str:
245346
n = int(n or 0)
246347
if n >= 1_000_000_000:
@@ -253,28 +354,29 @@ def abbr(n: int | float) -> str:
253354
return str(n)
254355
return s.replace(".0", "")
255356

256-
input_tokens = abbr(usage.prompt_tokens or 0)
257-
output_tokens = abbr(usage.completion_tokens or 0)
258-
259-
# Cache hit rate (prompt + cache)
260-
prompt = usage.prompt_tokens or 0
261-
cache_read = usage.cache_read_tokens or 0
262-
cache_rate = f"{(cache_read / prompt * 100):.2f}%" if prompt > 0 else "N/A"
263-
reasoning_tokens = usage.reasoning_tokens or 0
357+
prompt_value = display_usage.prompt_tokens or 0
358+
output_value = display_usage.completion_tokens or 0
359+
cache_prompt = prompt_value
360+
cache_read = display_usage.cache_read_tokens or 0
361+
cache_rate = (
362+
f"{(cache_read / cache_prompt * 100):.2f}%" if cache_prompt > 0 else "N/A"
363+
)
364+
reasoning_tokens = display_usage.reasoning_tokens or 0
264365

265-
# Cost
266366
cost_str = f"{cost:.4f}" if cost > 0 else "$0.00"
267367

268-
# Build with fixed color scheme
269368
parts: list[str] = []
270-
parts.append(f"[cyan]↑ input {input_tokens}[/cyan]")
369+
parts.append(f"[cyan]↑ input {abbr(prompt_value)}[/cyan]")
271370
parts.append(f"[magenta]cache hit {cache_rate}[/magenta]")
272371
if reasoning_tokens > 0:
273372
parts.append(f"[yellow] reasoning {abbr(reasoning_tokens)}[/yellow]")
274-
parts.append(f"[blue]↓ output {output_tokens}[/blue]")
373+
parts.append(f"[blue]↓ output {abbr(output_value)}[/blue]")
275374
parts.append(f"[green]$ {cost_str}[/green]")
276375

277-
return "Tokens: " + " • ".join(parts)
376+
label = (
377+
"Tokens (last request)" if self._use_per_request_token_stats else "Tokens"
378+
)
379+
return f"{label}: " + " • ".join(parts)
278380

279381

280382
def create_default_visualizer(

tests/sdk/conversation/test_visualizer.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,54 @@ def test_metrics_formatting():
340340
assert "0.0234" in subtitle # Cost
341341

342342

343+
def test_metrics_formatting_per_request_toggle(monkeypatch):
344+
"""Per-request metrics should be shown when the env flag is enabled."""
345+
from openhands.sdk.conversation.conversation_stats import ConversationStats
346+
from openhands.sdk.llm.utils.metrics import Metrics
347+
348+
monkeypatch.setenv("OPENHANDS_VISUALIZER_PER_REQUEST_TOKENS", "true")
349+
350+
conversation_stats = ConversationStats()
351+
metrics = Metrics(model_name="test-model")
352+
metrics.add_cost(0.1)
353+
metrics.add_token_usage(
354+
prompt_tokens=150,
355+
completion_tokens=50,
356+
cache_read_tokens=30,
357+
cache_write_tokens=0,
358+
reasoning_tokens=20,
359+
context_window=8000,
360+
response_id="first",
361+
)
362+
conversation_stats.service_to_metrics["test_service"] = metrics
363+
364+
visualizer = ConversationVisualizer(conversation_stats=conversation_stats)
365+
366+
subtitle = visualizer._format_metrics_subtitle()
367+
assert subtitle is not None
368+
assert "Tokens (last request)" in subtitle
369+
assert "input 150" in subtitle
370+
assert "reasoning 20" in subtitle
371+
assert "0.1000" in subtitle
372+
373+
metrics.add_cost(0.05)
374+
metrics.add_token_usage(
375+
prompt_tokens=200,
376+
completion_tokens=75,
377+
cache_read_tokens=25,
378+
cache_write_tokens=0,
379+
reasoning_tokens=0,
380+
context_window=8000,
381+
response_id="second",
382+
)
383+
384+
subtitle_second = visualizer._format_metrics_subtitle()
385+
assert subtitle_second is not None
386+
assert "input 200" in subtitle_second
387+
assert "output 75" in subtitle_second
388+
assert "0.1500" in subtitle_second
389+
390+
343391
def test_event_base_fallback_visualize():
344392
"""Test that Event provides fallback visualization."""
345393
from openhands.sdk.event.base import Event

0 commit comments

Comments
 (0)