Skip to content

Commit ff7e655

Browse files
committed
fix(langchain): preserve anthropic cache metrics
LangChain Anthropic responses report cache reads and cache writes separately from normal input tokens, including TTL-specific cache creation buckets. The previous cached-token fix avoided OpenAI double counting, but it could drop Anthropic cache-write detail from spans and produce totals that were less useful for cost analysis. Preserve the cache creation metrics users need to understand prompt-cache spend and keep token totals aligned with the prompt-cache semantics, while continuing to avoid double counting OpenAI cached input tokens.
1 parent 4079ffa commit ff7e655

2 files changed

Lines changed: 42 additions & 58 deletions

File tree

py/src/braintrust/integrations/langchain/callbacks.py

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -661,36 +661,40 @@ def _get_metrics_from_response(response: LLMResult):
661661
input_token_details = usage_metadata.get("input_token_details")
662662
if input_token_details and isinstance(input_token_details, dict):
663663
cache_read = input_token_details.get("cache_read")
664-
# langchain-anthropic >= 1.4.0 maps cache_creation_input_tokens to
665-
# ephemeral tier fields (ephemeral_5m_input_tokens, ephemeral_1h_input_tokens)
666-
# rather than the top-level cache_creation field. Sum both for compat.
667664
cache_creation = input_token_details.get("cache_creation")
668-
if not cache_creation and (
669-
"ephemeral_5m_input_tokens" in input_token_details
670-
or "ephemeral_1h_input_tokens" in input_token_details
671-
):
672-
cache_creation = input_token_details.get("ephemeral_5m_input_tokens", 0) + input_token_details.get(
673-
"ephemeral_1h_input_tokens", 0
674-
)
665+
cache_creation_5m = input_token_details.get("ephemeral_5m_input_tokens")
666+
cache_creation_1h = input_token_details.get("ephemeral_1h_input_tokens")
667+
has_cache_creation_breakdown = cache_creation_5m is not None or cache_creation_1h is not None
675668

676669
if cache_read is not None:
677670
metrics["prompt_cached_tokens"] = cache_read
678-
if cache_creation is not None:
679-
metrics["prompt_cache_creation_tokens"] = cache_creation
680-
681-
cache_tokens = (cache_read or 0) + (cache_creation or 0)
671+
cache_creation_split = (cache_creation_5m or 0) + (cache_creation_1h or 0)
672+
if cache_creation is not None or cache_creation_split:
673+
metrics["prompt_cache_creation_tokens"] = max(cache_creation or 0, cache_creation_split)
674+
if has_cache_creation_breakdown:
675+
# Anthropic exposes TTL-specific cache creation buckets. Preserve the
676+
# split so downstream cost tooling can price 5m vs 1h writes correctly.
677+
if cache_creation_5m is not None:
678+
metrics["prompt_cache_creation_5m_tokens"] = cache_creation_5m
679+
if cache_creation_1h is not None:
680+
metrics["prompt_cache_creation_1h_tokens"] = cache_creation_1h
681+
682+
effective_cache_creation = metrics.get("prompt_cache_creation_tokens", 0)
683+
cache_tokens = (cache_read or 0) + effective_cache_creation
682684
prompt_tokens = metrics.get("prompt_tokens")
683685
completion_tokens = metrics.get("completion_tokens")
684686
total_tokens = metrics.get("total_tokens")
685-
if (
686-
cache_tokens
687-
and prompt_tokens is not None
688-
and completion_tokens is not None
689-
and total_tokens == prompt_tokens + completion_tokens
690-
and _cache_tokens_are_separate_from_input_tokens(input_token_details)
691-
):
692-
metrics["prompt_tokens"] = prompt_tokens + cache_tokens
693-
metrics["total_tokens"] = total_tokens + cache_tokens
687+
if prompt_tokens is not None and completion_tokens is not None:
688+
if (
689+
cache_tokens
690+
and total_tokens == prompt_tokens + completion_tokens
691+
and _cache_tokens_are_separate_from_input_tokens(input_token_details)
692+
):
693+
prompt_tokens += cache_tokens
694+
metrics["prompt_tokens"] = prompt_tokens
695+
if total_tokens is not None:
696+
metrics["total_tokens"] = total_tokens + cache_tokens
697+
metrics["tokens"] = prompt_tokens + completion_tokens
694698

695699
if not metrics or not any(metrics.values()):
696700
llm_output: dict[str, Any] = response.llm_output or {}

py/src/braintrust/integrations/langchain/test_callbacks.py

Lines changed: 15 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,10 @@
88
import pytest
99
from braintrust import logger
1010
from braintrust.integrations.langchain import BraintrustCallbackHandler
11-
from braintrust.integrations.langchain.callbacks import _get_metrics_from_response
1211
from braintrust.logger import flush
1312
from braintrust.test_helpers import init_test_logger
1413
from langchain_core.callbacks import BaseCallbackHandler
1514
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
16-
from langchain_core.outputs import ChatGeneration, LLMResult
1715
from langchain_core.prompts import ChatPromptTemplate
1816
from langchain_core.prompts.prompt import PromptTemplate
1917
from langchain_core.runnables import RunnableMap, RunnableSerializable
@@ -908,34 +906,6 @@ def test_streaming_ttft(logger_memory_logger):
908906
)
909907

910908

911-
def test_openai_cached_tokens_are_not_folded_into_prompt_tokens():
912-
response = LLMResult(
913-
generations=[
914-
[
915-
ChatGeneration(
916-
message=AIMessage(
917-
content="Done",
918-
response_metadata={"model_name": "gpt-4o-mini-2024-07-18"},
919-
usage_metadata={
920-
"input_tokens": 1000,
921-
"output_tokens": 200,
922-
"total_tokens": 1200,
923-
"input_token_details": {"cache_read": 500},
924-
},
925-
)
926-
)
927-
]
928-
]
929-
)
930-
931-
assert _get_metrics_from_response(response) == {
932-
"prompt_tokens": 1000,
933-
"completion_tokens": 200,
934-
"total_tokens": 1200,
935-
"prompt_cached_tokens": 500,
936-
}
937-
938-
939909
@pytest.mark.vcr
940910
def test_prompt_caching_tokens(logger_memory_logger):
941911
from langchain_anthropic import ChatAnthropic
@@ -1114,11 +1084,16 @@ def test_prompt_caching_tokens(logger_memory_logger):
11141084
assert "prompt_tokens" in first_metrics
11151085
assert first_metrics["prompt_tokens"] > 0
11161086

1117-
assert "prompt_cache_creation_tokens" in first_metrics
1118-
assert first_metrics["prompt_cache_creation_tokens"] > 0
1087+
first_cache_creation_split = first_metrics.get("prompt_cache_creation_5m_tokens", 0) + first_metrics.get(
1088+
"prompt_cache_creation_1h_tokens", 0
1089+
)
1090+
first_cache_creation_tokens = max(first_metrics.get("prompt_cache_creation_tokens", 0), first_cache_creation_split)
1091+
assert first_cache_creation_tokens > 0
1092+
if first_cache_creation_split:
1093+
assert first_metrics["prompt_cache_creation_tokens"] >= first_cache_creation_split
11191094
assert first_metrics["prompt_cached_tokens"] == 0
1120-
assert first_metrics["prompt_tokens"] >= first_metrics["prompt_cache_creation_tokens"]
1121-
assert first_metrics["total_tokens"] == first_metrics["prompt_tokens"] + first_metrics["completion_tokens"]
1095+
assert first_metrics["prompt_tokens"] >= first_cache_creation_tokens
1096+
assert first_metrics["tokens"] == first_metrics["prompt_tokens"] + first_metrics["completion_tokens"]
11221097

11231098
second_metrics = None
11241099
for attempt in range(3):
@@ -1147,9 +1122,14 @@ def test_prompt_caching_tokens(logger_memory_logger):
11471122
time.sleep(1)
11481123

11491124
assert second_metrics is not None
1125+
second_cache_creation_split = second_metrics.get("prompt_cache_creation_5m_tokens", 0) + second_metrics.get(
1126+
"prompt_cache_creation_1h_tokens", 0
1127+
)
1128+
if second_cache_creation_split:
1129+
assert second_metrics["prompt_cache_creation_tokens"] >= second_cache_creation_split
11501130
assert second_metrics["prompt_cached_tokens"] > 0
11511131
assert second_metrics["prompt_tokens"] >= second_metrics["prompt_cached_tokens"]
1152-
assert second_metrics["total_tokens"] == second_metrics["prompt_tokens"] + second_metrics["completion_tokens"]
1132+
assert second_metrics["tokens"] == second_metrics["prompt_tokens"] + second_metrics["completion_tokens"]
11531133

11541134

11551135
@pytest.mark.vcr

0 commit comments

Comments
 (0)