Skip to content
32 changes: 14 additions & 18 deletions openhands/sdk/conversation/visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,14 +234,13 @@ def _format_metrics_subtitle(self) -> str | None:
return None

combined_metrics = self._conversation_stats.get_combined_metrics()
if not combined_metrics or not combined_metrics.accumulated_token_usage:
if not combined_metrics or not combined_metrics.token_usages:
return None

usage = combined_metrics.accumulated_token_usage
latest_usage = combined_metrics.token_usages[-1]
cost = combined_metrics.accumulated_cost or 0.0

# helper: 1234 -> "1.2K", 1200000 -> "1.2M"
def abbr(n: int | float) -> str:
def abbr(n: int | float) -> str: # helper: 1234 -> "1.2K", 1200000 -> "1.2M"
n = int(n or 0)
if n >= 1_000_000_000:
s = f"{n / 1_000_000_000:.2f}B"
Expand All @@ -253,26 +252,23 @@ def abbr(n: int | float) -> str:
return str(n)
return s.replace(".0", "")

input_tokens = abbr(usage.prompt_tokens or 0)
output_tokens = abbr(usage.completion_tokens or 0)
input_tokens = latest_usage.prompt_tokens or 0
output_tokens = latest_usage.completion_tokens or 0
cache_read = latest_usage.cache_read_tokens or 0
cache_rate = (
f"{(cache_read / input_tokens * 100):.2f}%" if input_tokens > 0 else "N/A"
)
reasoning_tokens = latest_usage.reasoning_tokens or 0

# Cache hit rate (prompt + cache)
prompt = usage.prompt_tokens or 0
cache_read = usage.cache_read_tokens or 0
cache_rate = f"{(cache_read / prompt * 100):.2f}%" if prompt > 0 else "N/A"
reasoning_tokens = usage.reasoning_tokens or 0
cost_str = f"{cost:.4f}"

# Cost
cost_str = f"{cost:.4f}" if cost > 0 else "$0.00"

# Build with fixed color scheme
parts: list[str] = []
parts.append(f"[cyan]↑ input {input_tokens}[/cyan]")
parts.append(f"[cyan]↑ input {abbr(input_tokens)}[/cyan]")
parts.append(f"[magenta]cache hit {cache_rate}[/magenta]")
if reasoning_tokens > 0:
parts.append(f"[yellow] reasoning {abbr(reasoning_tokens)}[/yellow]")
parts.append(f"[blue]↓ output {output_tokens}[/blue]")
parts.append(f"[green]$ {cost_str}[/green]")
parts.append(f"[blue]↓ output {abbr(output_tokens)}[/blue]")
parts.append(f"[green]$ {cost_str} (total)[/green]")

return "Tokens: " + " • ".join(parts)

Expand Down
42 changes: 41 additions & 1 deletion tests/sdk/conversation/test_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,47 @@ def test_metrics_formatting():
assert "500" in subtitle # Output tokens
assert "20.00%" in subtitle # Cache hit rate
assert "200" in subtitle # Reasoning tokens
assert "0.0234" in subtitle # Cost
assert "$ 0.0234 (total)" in subtitle


def test_metrics_formatting_uses_latest_request():
"""Tokens should reflect the latest request while cost stays cumulative."""
from openhands.sdk.conversation.conversation_stats import ConversationStats
from openhands.sdk.llm.utils.metrics import Metrics

conversation_stats = ConversationStats()
metrics = Metrics(model_name="test-model")
metrics.add_cost(0.1)
metrics.add_token_usage(
prompt_tokens=120,
completion_tokens=40,
cache_read_tokens=12,
cache_write_tokens=0,
reasoning_tokens=5,
context_window=8000,
response_id="first",
)
metrics.add_cost(0.05)
metrics.add_token_usage(
prompt_tokens=200,
completion_tokens=75,
cache_read_tokens=25,
cache_write_tokens=0,
reasoning_tokens=0,
context_window=8000,
response_id="second",
)
conversation_stats.service_to_metrics["test_service"] = metrics

visualizer = ConversationVisualizer(conversation_stats=conversation_stats)

subtitle = visualizer._format_metrics_subtitle()
assert subtitle is not None
assert "input 200" in subtitle
assert "output 75" in subtitle
assert "cache hit 10.00%" not in subtitle # ensure using latest cache values
assert "cache hit 12.50%" in subtitle
assert "$ 0.1500 (total)" in subtitle


def test_event_base_fallback_visualize():
Expand Down