Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
629cbb1
fix(ai): add message truncation to openai–agents
shellmayr Oct 21, 2025
5a16b2e
normalize data in messages
shellmayr Oct 24, 2025
0afac42
tests(huggingface): Support 1.0.0rc7 (#4979)
alexander-alderman-webb Oct 22, 2025
75d0cfe
Make logger template format safer to missing kwargs (#4981)
sl0thentr0py Oct 22, 2025
68bdbda
feat: Officially support 3.14 & run integration tests on 3.14 (#4974)
sentrivana Oct 22, 2025
b751725
ci: Run `common` test suite on Python 3.14t (#4969)
alexander-alderman-webb Oct 22, 2025
4a44dfc
feat(langchain): Support v1 (#4874)
sentrivana Oct 23, 2025
7cfd63b
fix(ai): add message truncation to litellm (#4973)
shellmayr Oct 23, 2025
f645e97
feat(integrations): pydantic-ai integration (#4906)
constantinius Oct 23, 2025
26e6b22
feat(integrations): MCP Python SDK (#4964)
constantinius Oct 23, 2025
eaa8d42
fix(ai): truncate messages for google genai (#4992)
shellmayr Oct 23, 2025
03ddc83
ci: Prepare for new major branch, remove potel-base from actions (#5003)
sentrivana Oct 24, 2025
ef02d11
build(deps): bump actions/upload-artifact from 4 to 5 (#5032)
dependabot[bot] Oct 28, 2025
7c4d41c
ci: 🤖 Update test matrix with new releases (10/27) (#5033)
github-actions[bot] Oct 28, 2025
fd32188
fix(strawberry): Remove autodetection, always use sync extension (#4984)
sentrivana Oct 28, 2025
85dbd04
fix startlette deprecation warning (#5034)
DeoLeung Oct 28, 2025
b43ad35
fix(tracemetrics): Bump metric buffer size to 1k (#5031)
k-fish Oct 28, 2025
24b8f15
chore(metrics): Rename _metrics to metrics (#5035)
alexander-alderman-webb Oct 28, 2025
02a2e13
fix(django): Improve logic for classifying cache hits and misses (#5029)
alexander-alderman-webb Oct 29, 2025
4e6b9d9
fix(integrations): hooking into error tracing function to find out if…
constantinius Oct 29, 2025
a391d62
fix(google-genai): Set agent name (#5038)
sentrivana Oct 29, 2025
2320823
release: 2.43.0
getsentry-bot Oct 29, 2025
f3b57b4
Update CHANGELOG.md
sentrivana Oct 29, 2025
2dfa9c1
docs: Elaborate on Strawberry autodetection in changelog (#5039)
sentrivana Oct 29, 2025
b6fe7d9
ci: Run integration tests on Python 3.14t (#4995)
alexander-alderman-webb Oct 29, 2025
2409cb3
fix(ai): add message truncation to openai–agents
shellmayr Oct 21, 2025
9815f2b
make tests useful
shellmayr Oct 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
get_start_span_function,
set_data_normalized,
normalize_message_roles,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -61,12 +62,17 @@ def invoke_agent_span(context, agent, kwargs):

if len(messages) > 0:
normalized_messages = normalize_message_roles(messages)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(
normalized_messages, span, scope
)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Missing Data Normalization in Agent Span

The invoke_agent_span function is missing a call to _normalize_data() before passing normalized_messages to truncate_and_annotate_messages(). This differs from utils.py and means the truncation function receives Python objects instead of the expected serialized data, preventing it from working correctly. _normalize_data also needs to be imported.

Fix in Cursor Fix in Web


_set_agent_data(span, agent)

Expand Down
19 changes: 13 additions & 6 deletions sentry_sdk/integrations/openai_agents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
normalize_message_roles,
set_data_normalized,
normalize_message_role,
truncate_and_annotate_messages,
_normalize_data,
)
from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP
from sentry_sdk.integrations import DidNotEnable
Expand Down Expand Up @@ -135,12 +137,17 @@ def _set_input_data(span, get_response_kwargs):
}
)

set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalize_message_roles(request_messages),
unpack=False,
)
normalized_messages = normalize_message_roles(request_messages)
serializable_messages = _normalize_data(normalized_messages, unpack=False)
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(serializable_messages, span, scope)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)


def _set_output_data(span, result):
Expand Down
102 changes: 90 additions & 12 deletions tests/integrations/openai_agents/test_openai_agents.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
import asyncio
import json
import os
import re
import pytest
from unittest.mock import MagicMock, patch
import os

from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
from sentry_sdk.integrations.openai_agents.utils import safe_serialize
from sentry_sdk.utils import parse_version

import agents
import pytest
from agents import (
Agent,
ModelResponse,
Usage,
ModelSettings,
Usage,
)
from agents.items import (
McpCall,
ResponseFunctionToolCall,
ResponseOutputMessage,
ResponseOutputText,
ResponseFunctionToolCall,
)
from agents.version import __version__ as OPENAI_AGENTS_VERSION

from openai.types.responses.response_usage import (
InputTokensDetails,
OutputTokensDetails,
)

from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
from sentry_sdk.integrations.openai_agents.utils import safe_serialize
from sentry_sdk.utils import parse_version

test_run_config = agents.RunConfig(tracing_disabled=True)


Expand Down Expand Up @@ -1051,8 +1051,8 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events):

get_response_kwargs = {"input": test_input}

from sentry_sdk.integrations.openai_agents.utils import _set_input_data
from sentry_sdk import start_span
from sentry_sdk.integrations.openai_agents.utils import _set_input_data

with start_span(op="test") as span:
_set_input_data(span, get_response_kwargs)
Expand All @@ -1061,8 +1061,6 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events):
from sentry_sdk.consts import SPANDATA

if SPANDATA.GEN_AI_REQUEST_MESSAGES in span._data:
import json

stored_messages = json.loads(span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES])

# Verify roles were properly mapped
Expand All @@ -1077,3 +1075,83 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events):
# Verify no "ai" roles remain in any message
for message in stored_messages:
assert message["role"] != "ai"


@pytest.mark.asyncio
async def test_openai_agents_message_truncation(
sentry_init, capture_events, test_agent, mock_usage
):
"""Test that large messages are truncated properly in OpenAI Agents integration."""
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_get_response:
large_content = (
"This is a very long message that will exceed our size limits. " * 1000
)

large_response = ModelResponse(
output=[
ResponseOutputMessage(
id="msg_large",
type="message",
status="completed",
content=[
ResponseOutputText(
text=large_content,
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
usage=mock_usage,
response_id="resp_large",
)

mock_get_response.return_value = large_response

sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
send_default_pii=True,
)

events = capture_events()

# Create messages with mixed large/small content by patching get_response
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_inner:
mock_inner.side_effect = [large_response] * 5

# We'll test with the agent itself, not the messages
# since OpenAI agents tracks messages internally
result = await agents.Runner.run(
test_agent, "Test input", run_config=test_run_config
)

assert result is not None

assert len(events) > 0
tx = events[0]
assert tx["type"] == "transaction"

# Check ai_client spans (these have the truncation)
ai_client_spans = [
span for span in tx.get("spans", []) if span.get("op") == "gen_ai.chat"
]
assert len(ai_client_spans) > 0

# Just verify that messages are being set and truncation is applied
# The actual truncation behavior is tested in the ai_monitoring tests
ai_client_span = ai_client_spans[0]
if "gen_ai.request.messages" in ai_client_span["data"]:
messages_data = ai_client_span["data"]["gen_ai.request.messages"]
assert isinstance(messages_data, str)

parsed_messages = json.loads(messages_data)
assert isinstance(parsed_messages, list)
# Verify messages were processed
assert len(parsed_messages) >= 1
Comment on lines +1149 to +1159
Copy link
Contributor

@alexander-alderman-webb alexander-alderman-webb Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this checking that truncation is applied?

I would have thought len(parsed_messages) >= 1 is always true, even without truncation.