1+ import os
12import re
23from typing import TYPE_CHECKING
34
1617)
1718from openhands .sdk .event .base import Event
1819from openhands .sdk .event .condenser import Condensation
20+ from openhands .sdk .llm .utils .metrics import TokenUsage
1921
2022
2123if TYPE_CHECKING :
4749 r"\*(.*?)\*" : "italic" ,
4850}
4951
52+
53+ _PER_REQUEST_TOKENS_ENV = "OPENHANDS_VISUALIZER_PER_REQUEST_TOKENS"
54+ _TRUTHY_ENV_VALUES = {"1" , "true" , "yes" , "on" }
55+
56+
57+ def _env_flag (name : str ) -> bool :
58+ value = os .getenv (name )
59+ if value is None :
60+ return False
61+ return value .strip ().lower () in _TRUTHY_ENV_VALUES
62+
63+
5064_PANEL_PADDING = (1 , 1 )
5165
5266
@@ -61,6 +75,7 @@ def __init__(
6175 highlight_regex : dict [str , str ] | None = None ,
6276 skip_user_messages : bool = False ,
6377 conversation_stats : "ConversationStats | None" = None ,
78+ per_request_token_stats : bool | None = None ,
6479 ):
6580 """Initialize the visualizer.
6681
@@ -72,11 +87,20 @@ def __init__(
7287 skip_user_messages: If True, skip displaying user messages. Useful for
7388 scenarios where user input is not relevant to show.
7489 conversation_stats: ConversationStats object to display metrics information.
90+ per_request_token_stats: Whether to display per-request token usage instead
91+ of accumulated totals. When None, uses the
92+ OPENHANDS_VISUALIZER_PER_REQUEST_TOKENS
93+ environment flag.
7594 """
7695 self ._console = Console ()
7796 self ._skip_user_messages = skip_user_messages
7897 self ._highlight_patterns : dict [str , str ] = highlight_regex or {}
7998 self ._conversation_stats = conversation_stats
99+ if per_request_token_stats is None :
100+ per_request_token_stats = _env_flag (_PER_REQUEST_TOKENS_ENV )
101+ self ._use_per_request_token_stats = per_request_token_stats
102+ self ._last_accumulated_usage : TokenUsage | None = None
103+ self ._last_request_usage : TokenUsage | None = None
80104
81105 def on_event (self , event : Event ) -> None :
82106 """Main event handler that displays events with Rich formatting."""
@@ -227,6 +251,78 @@ def _create_event_panel(self, event: Event) -> Panel | None:
227251 expand = True ,
228252 )
229253
254+ @staticmethod
255+ def _clone_usage (usage : TokenUsage ) -> TokenUsage :
256+ return TokenUsage .model_validate (usage .model_dump ())
257+
258+ def _get_per_request_usage (self , usage : TokenUsage ) -> TokenUsage | None :
259+ current = self ._clone_usage (usage )
260+
261+ if self ._last_accumulated_usage is None :
262+ self ._last_accumulated_usage = current
263+ self ._last_request_usage = self ._clone_usage (current )
264+ return self ._last_request_usage
265+
266+ prev = self ._last_accumulated_usage
267+ tracked_fields = (
268+ "prompt_tokens" ,
269+ "completion_tokens" ,
270+ "cache_read_tokens" ,
271+ "cache_write_tokens" ,
272+ "reasoning_tokens" ,
273+ )
274+
275+ decreased = any (
276+ getattr (current , field ) < getattr (prev , field ) for field in tracked_fields
277+ )
278+ if decreased :
279+ self ._last_accumulated_usage = current
280+ self ._last_request_usage = self ._clone_usage (current )
281+ return self ._last_request_usage
282+
283+ changed = any (
284+ getattr (current , field ) != getattr (prev , field ) for field in tracked_fields
285+ )
286+ if changed :
287+ prompt_delta = max (current .prompt_tokens - prev .prompt_tokens , 0 )
288+ completion_delta = max (
289+ current .completion_tokens - prev .completion_tokens , 0
290+ )
291+ cache_read_delta = max (
292+ current .cache_read_tokens - prev .cache_read_tokens , 0
293+ )
294+ cache_write_delta = max (
295+ current .cache_write_tokens - prev .cache_write_tokens , 0
296+ )
297+ reasoning_delta = max (current .reasoning_tokens - prev .reasoning_tokens , 0 )
298+ self ._last_request_usage = TokenUsage (
299+ model = current .model ,
300+ prompt_tokens = prompt_delta ,
301+ completion_tokens = completion_delta ,
302+ cache_read_tokens = cache_read_delta ,
303+ cache_write_tokens = cache_write_delta ,
304+ reasoning_tokens = reasoning_delta ,
305+ context_window = current .context_window ,
306+ per_turn_token = prompt_delta + completion_delta ,
307+ response_id = current .response_id or prev .response_id ,
308+ )
309+
310+ if self ._last_request_usage is None :
311+ self ._last_request_usage = TokenUsage (
312+ model = current .model ,
313+ prompt_tokens = 0 ,
314+ completion_tokens = 0 ,
315+ cache_read_tokens = 0 ,
316+ cache_write_tokens = 0 ,
317+ reasoning_tokens = 0 ,
318+ context_window = current .context_window ,
319+ per_turn_token = 0 ,
320+ response_id = current .response_id ,
321+ )
322+
323+ self ._last_accumulated_usage = current
324+ return self ._last_request_usage
325+
230326 def _format_metrics_subtitle (self ) -> str | None :
231327 """Format LLM metrics as a visually appealing subtitle string with icons,
232328 colors, and k/m abbreviations using conversation stats."""
@@ -240,7 +336,12 @@ def _format_metrics_subtitle(self) -> str | None:
240336 usage = combined_metrics .accumulated_token_usage
241337 cost = combined_metrics .accumulated_cost or 0.0
242338
243- # helper: 1234 -> "1.2K", 1200000 -> "1.2M"
339+ display_usage = usage
340+ if self ._use_per_request_token_stats :
341+ per_request_usage = self ._get_per_request_usage (usage )
342+ if per_request_usage is not None :
343+ display_usage = per_request_usage
344+
244345 def abbr (n : int | float ) -> str :
245346 n = int (n or 0 )
246347 if n >= 1_000_000_000 :
@@ -253,28 +354,29 @@ def abbr(n: int | float) -> str:
253354 return str (n )
254355 return s .replace (".0" , "" )
255356
256- input_tokens = abbr ( usage .prompt_tokens or 0 )
257- output_tokens = abbr ( usage .completion_tokens or 0 )
258-
259- # Cache hit rate (prompt + cache)
260- prompt = usage . prompt_tokens or 0
261- cache_read = usage . cache_read_tokens or 0
262- cache_rate = f" { ( cache_read / prompt * 100 ):.2f } %" if prompt > 0 else "N/A"
263- reasoning_tokens = usage .reasoning_tokens or 0
357+ prompt_value = display_usage .prompt_tokens or 0
358+ output_value = display_usage .completion_tokens or 0
359+ cache_prompt = prompt_value
360+ cache_read = display_usage . cache_read_tokens or 0
361+ cache_rate = (
362+ f" { ( cache_read / cache_prompt * 100 ):.2f } %" if cache_prompt > 0 else "N/A"
363+ )
364+ reasoning_tokens = display_usage .reasoning_tokens or 0
264365
265- # Cost
266366 cost_str = f"{ cost :.4f} " if cost > 0 else "$0.00"
267367
268- # Build with fixed color scheme
269368 parts : list [str ] = []
270- parts .append (f"[cyan]↑ input { input_tokens } [/cyan]" )
369+ parts .append (f"[cyan]↑ input { abbr ( prompt_value ) } [/cyan]" )
271370 parts .append (f"[magenta]cache hit { cache_rate } [/magenta]" )
272371 if reasoning_tokens > 0 :
273372 parts .append (f"[yellow] reasoning { abbr (reasoning_tokens )} [/yellow]" )
274- parts .append (f"[blue]↓ output { output_tokens } [/blue]" )
373+ parts .append (f"[blue]↓ output { abbr ( output_value ) } [/blue]" )
275374 parts .append (f"[green]$ { cost_str } [/green]" )
276375
277- return "Tokens: " + " • " .join (parts )
376+ label = (
377+ "Tokens (last request)" if self ._use_per_request_token_stats else "Tokens"
378+ )
379+ return f"{ label } : " + " • " .join (parts )
278380
279381
280382def create_default_visualizer (
0 commit comments