Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ If you are an agent working in this repo: **do not improvise architecture**. Fol

### Current Focus: Phase 4A

**Phase 5.2 is complete** — Costs page now ships per-task and per-agent breakdowns (#558) on top of the spend summary (#557). Backend: `GET /api/v2/costs/tasks?days=N&limit=M` (top-N tasks with titles, agent, tokens, cost) and `GET /api/v2/costs/by-agent?days=N` (per-agent rollup + total input/output tokens), both via `TokenRepository.get_top_tasks_by_cost` and `get_costs_by_agent`. Task board cards show an inline `MoneyBag02Icon` cost badge with token-breakdown tooltip when cost data exists. Fixed a v2 data-loss bug where `react_agent` int-cast UUID task IDs and stored NULL in `token_usage`.

**Phase 5.1 is complete** — Settings page now ships three working tabs: Agent (#554), API Keys (#555), and PROOF9 Defaults + Workspace Config (#556). Backend: `GET/PUT /api/v2/proof/config` and `/api/v2/workspaces/config`, plus `run_proof()` now honors `enabled_gates` filtering and `strictness` (`strict` vs `warn`). Atomic JSON writes via `codeframe/ui/routers/_helpers.atomic_write_json`. The 9-gate canonical order and `proof_config.json` filename live in `codeframe/core/proof/models.py`.

**Phase 3.5C is complete** — `CaptureGlitchModal` form (description/markdown, source, scope, gate obligations, severity, expiry) reachable from the PROOF9 page and the persistent sidebar "Capture Glitch" button. REQ detail view (`/proof/[req_id]`) ships markdown description rendering, `ProofScope` metadata display, obligations table with `Latest Run` column, sortable/filterable evidence history, and empty-state CTA. Backend: `ScopeOut` model on `RequirementResponse`. Issues #568, #569.
Expand Down
4 changes: 3 additions & 1 deletion codeframe/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,9 @@ class TokenUsage(BaseModel):
"""Token usage record for a single LLM call (Sprint 10)."""

id: Optional[int] = None
task_id: Optional[int] = None # None for non-task calls
# Tasks use integer PKs in the v1 schema and UUID strings in v2 workspaces;
# SQLite is type-flexible, so we accept either at the model boundary.
task_id: Optional[Union[int, str]] = None # None for non-task calls
agent_id: str
project_id: int
model_name: str = Field(..., description="e.g., claude-sonnet-4-5")
Expand Down
12 changes: 8 additions & 4 deletions codeframe/core/react_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,15 +359,19 @@ def _persist_token_usage(self, task_id: str) -> None:
db.initialize()
tracker = MetricsTracker(db=db)

# Cast task_id to int for the persistence layer (core uses str, DB uses int).
# v1 tasks have integer PKs; v2 workspaces use UUID strings.
# Pass the raw value — SQLite preserves the type, and downstream
# analytics (issue #558) group by whatever was stored. Forcing
# int() here used to drop every v2 record's task linkage.
persist_task_id: int | str
try:
task_id_int: int | None = int(task_id)
persist_task_id = int(task_id)
except (ValueError, TypeError):
task_id_int = None
persist_task_id = str(task_id)

for record in self._token_records:
tracker.record_token_usage_sync(
task_id=task_id_int,
task_id=persist_task_id,
agent_id="react-agent",
project_id=0,
model_name=record["model"],
Expand Down
6 changes: 3 additions & 3 deletions codeframe/lib/metrics_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
import logging
import re
from datetime import datetime, timedelta, timezone
from typing import Dict, Any, List, Optional
from typing import Any, Dict, List, Optional, Union
from codeframe.core.models import CallType, TokenUsage
from codeframe.persistence.database import Database

Expand Down Expand Up @@ -163,7 +163,7 @@ def calculate_cost(model_name: str, input_tokens: int, output_tokens: int) -> fl

async def record_token_usage(
self,
task_id: Optional[int],
task_id: Optional[Union[int, str]],
agent_id: str,
project_id: int,
model_name: str,
Expand Down Expand Up @@ -238,7 +238,7 @@ async def record_token_usage(

def record_token_usage_sync(
self,
task_id: Optional[int],
task_id: Optional[Union[int, str]],
agent_id: str,
project_id: int,
model_name: str,
Expand Down
165 changes: 165 additions & 0 deletions codeframe/persistence/repositories/token_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,171 @@ def get_costs_summary(self, days: int) -> Dict[str, Any]:
"daily": daily,
}

def _window_iso_bounds(self, days: int) -> tuple[str, str]:
"""Return inclusive start / exclusive end ISO strings for a `days` window.

Mirrors get_costs_summary's bounds so the per-task and per-agent
aggregations cover the same rows. Space-separated, offset-free format
works against both ``CURRENT_TIMESTAMP`` defaults and ``.isoformat()``.
"""
if days <= 0:
raise ValueError("days must be a positive integer")
end_date = datetime.now(timezone.utc).date()
start_date = end_date - timedelta(days=days - 1)
start_iso = start_date.strftime("%Y-%m-%d %H:%M:%S")
end_iso = (end_date + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
return start_iso, end_iso

def get_top_tasks_by_cost(
self,
days: int,
limit: int = 10,
) -> List[Dict[str, Any]]:
"""Aggregate spend per task and return the top N by cost.

Args:
days: Trailing window in days.
limit: Maximum number of tasks to return.

Returns:
List of dicts, sorted by total_cost_usd DESC:
{
"task_id": <native value from token_usage.task_id>,
"agent_id": str,
"input_tokens": int,
"output_tokens": int,
"total_cost_usd": float,
}
Excludes rows where task_id IS NULL. The reported ``agent_id`` is
the agent that made the most calls for that task (ties broken
arbitrarily). ``task_id`` is returned as stored — SQLite preserves
the inserted type, so v2 UUID strings come back as strings and v1
integers come back as integers.
"""
if limit <= 0:
raise ValueError("limit must be a positive integer")
start_iso, end_iso = self._window_iso_bounds(days)

cursor = self.conn.cursor()
cursor.execute(
"""
SELECT
task_id,
COALESCE(SUM(input_tokens), 0) AS input_tokens,
COALESCE(SUM(output_tokens), 0) AS output_tokens,
COALESCE(SUM(estimated_cost_usd), 0.0) AS total_cost_usd
FROM token_usage
WHERE task_id IS NOT NULL
AND timestamp >= ?
AND timestamp < ?
GROUP BY task_id
ORDER BY total_cost_usd DESC
LIMIT ?
""",
(start_iso, end_iso, limit),
)
rows = cursor.fetchall()

# TODO(perf): the dominant-agent lookup is N+1 against the limit.
# Acceptable at limit=10 (analytics view) and even limit=1000 (badge
# map for a board). Fold into a single CTE if the cap grows further.
result: List[Dict[str, Any]] = []
for row in rows:
task_id = row["task_id"]
# Find the most-used agent for this task in the same window.
cursor.execute(
"""
SELECT agent_id, COUNT(*) AS calls
FROM token_usage
WHERE task_id = ?
AND timestamp >= ?
AND timestamp < ?
GROUP BY agent_id
ORDER BY calls DESC
LIMIT 1
""",
(task_id, start_iso, end_iso),
)
agent_row = cursor.fetchone()
agent_id = agent_row["agent_id"] if agent_row else ""

result.append({
"task_id": task_id,
"agent_id": agent_id,
"input_tokens": int(row["input_tokens"] or 0),
"output_tokens": int(row["output_tokens"] or 0),
"total_cost_usd": float(row["total_cost_usd"] or 0.0),
})

return result

def get_costs_by_agent(self, days: int) -> Dict[str, Any]:
"""Aggregate spend per agent over a trailing `days` window.

Args:
days: Trailing window in days.

Returns:
{
"by_agent": [
{
"agent_id": str,
"input_tokens": int,
"output_tokens": int,
"total_cost_usd": float,
"call_count": int,
},
...
],
"total_input_tokens": int,
"total_output_tokens": int,
}

Includes records with NULL ``task_id`` — calls without a task still
attribute to an agent. Sorted by total_cost_usd DESC.
"""
start_iso, end_iso = self._window_iso_bounds(days)

cursor = self.conn.cursor()
cursor.execute(
"""
SELECT
agent_id,
COALESCE(SUM(input_tokens), 0) AS input_tokens,
COALESCE(SUM(output_tokens), 0) AS output_tokens,
COALESCE(SUM(estimated_cost_usd), 0.0) AS total_cost_usd,
COUNT(*) AS call_count
FROM token_usage
WHERE timestamp >= ? AND timestamp < ?
GROUP BY agent_id
ORDER BY total_cost_usd DESC
""",
(start_iso, end_iso),
)
rows = cursor.fetchall()

by_agent: List[Dict[str, Any]] = []
total_input = 0
total_output = 0
for row in rows:
inp = int(row["input_tokens"] or 0)
out = int(row["output_tokens"] or 0)
by_agent.append({
"agent_id": row["agent_id"],
"input_tokens": inp,
"output_tokens": out,
"total_cost_usd": float(row["total_cost_usd"] or 0.0),
"call_count": int(row["call_count"] or 0),
})
total_input += inp
total_output += out

return {
"by_agent": by_agent,
"total_input_tokens": total_input,
"total_output_tokens": total_output,
}

def get_project_costs_aggregate(self, project_id: int) -> Dict[str, Any]:
"""Get aggregated cost statistics for a project.

Expand Down
Loading
Loading