diff --git a/src/ragas/cost.py b/src/ragas/cost.py index 144f66a12..f33f33d27 100644 --- a/src/ragas/cost.py +++ b/src/ragas/cost.py @@ -22,6 +22,7 @@ def __add__(self, y: "TokenUsage") -> "TokenUsage": return TokenUsage( input_tokens=self.input_tokens + y.input_tokens, output_tokens=self.output_tokens + y.output_tokens, + model=self.model ) else: raise ValueError("Cannot add TokenUsage objects with different models") @@ -67,8 +68,9 @@ def get_token_usage_for_openai( return TokenUsage(input_tokens=0, output_tokens=0) output_tokens = get_from_dict(llm_output, "token_usage.completion_tokens", 0) input_tokens = get_from_dict(llm_output, "token_usage.prompt_tokens", 0) + model = get_from_dict(llm_output, "model_name", "") - return TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens) + return TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens, model=model) def get_token_usage_for_anthropic( @@ -92,10 +94,16 @@ def get_token_usage_for_anthropic( "usage.output_tokens", 0, ), + model=get_from_dict( + g.message.response_metadata, + "model", + "") ) ) - - return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0)) + model = next( + (usage.model for usage in token_usages if usage.model), "" + ) + return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0, model=model)) else: return TokenUsage(input_tokens=0, output_tokens=0) @@ -120,10 +128,17 @@ def get_token_usage_for_bedrock( "usage.completion_tokens", 0, ), + model=get_from_dict( + g.message.response_metadata, + "model_id", + "" + ) ) ) - - return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0)) + model = next( + (usage.model for usage in token_usages if usage.model), "" + ) + return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0, model=model)) return TokenUsage(input_tokens=0, output_tokens=0) diff --git a/tests/unit/test_cost.py b/tests/unit/test_cost.py index 715f28f94..1f525dc8f 100644 --- a/tests/unit/test_cost.py +++ b/tests/unit/test_cost.py @@ -133,19 +133,19 @@ def test_token_usage_cost(): def test_parse_llm_results(): # openai token_usage = get_token_usage_for_openai(openai_llm_result) - assert token_usage == TokenUsage(input_tokens=10, output_tokens=10) + assert token_usage == TokenUsage(input_tokens=10, output_tokens=10, model="gpt-4o") # anthropic token_usage = get_token_usage_for_anthropic(anthropic_llm_result) - assert token_usage == TokenUsage(input_tokens=9, output_tokens=12) + assert token_usage == TokenUsage(input_tokens=9, output_tokens=12, model="claude-3-opus-20240229") # Bedrock LLaMa token_usage = get_token_usage_for_bedrock(bedrock_llama_result) - assert token_usage == TokenUsage(input_tokens=10, output_tokens=10) + assert token_usage == TokenUsage(input_tokens=10, output_tokens=10, model="us.meta.llama3-1-70b-instruct-v1:0") # Bedrock Claude token_usage = get_token_usage_for_bedrock(bedrock_claude_result) - assert token_usage == TokenUsage(input_tokens=10, output_tokens=10) + assert token_usage == TokenUsage(input_tokens=10, output_tokens=10, model="us.anthropic.claude-3-5-sonnet-20240620-v1:0") def test_cost_callback_handler(): @@ -153,7 +153,7 @@ def test_cost_callback_handler(): cost_cb.on_llm_end(openai_llm_result) # cost - assert cost_cb.total_tokens() == TokenUsage(input_tokens=10, output_tokens=10) + assert cost_cb.total_tokens() == TokenUsage(input_tokens=10, output_tokens=10, model="gpt-4o") assert cost_cb.total_cost(0.1) == 2.0 assert (