diff --git a/.gitignore b/.gitignore index f7637f87..8b16fd17 100644 --- a/.gitignore +++ b/.gitignore @@ -66,5 +66,8 @@ examples/internal/email-evals/email-evals # Added by goreleaser init: dist/ +# btx spec cache +btx/.spec-cache/ + # emacs *~ diff --git a/.golangci.yml b/.golangci.yml index 86cdc213..c0206a63 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -14,6 +14,8 @@ linters: - text: "package-comments:" linters: [revive] path: examples/ + - linters: [forcetypeassert] + path: _test\.go diff --git a/Makefile b/Makefile index 6dfcd7ab..d75b1ddc 100644 --- a/Makefile +++ b/Makefile @@ -33,6 +33,7 @@ local-braintrust-replaces: build: go build ./... for dir in $(NESTED_MODULE_DIRS); do go build -C $$dir ./...; done + go build -C btx ./... clean: go clean @@ -41,24 +42,29 @@ clean: test: VCR_MODE=replay go test ./... for dir in $(NESTED_MODULE_DIRS); do VCR_MODE=replay go test -C $$dir ./...; done + VCR_MODE=replay go test -C btx ./... test-quiet: VCR_MODE=replay go test ./... | grep -v -E "^ok|no test files|^\\?" || true for dir in $(NESTED_MODULE_DIRS); do VCR_MODE=replay go test -C $$dir ./... | grep -v -E "^ok|no test files|^\\?" || true; done + VCR_MODE=replay go test -C btx ./... | grep -v -E "^ok|no test files|^\\?" || true test-vcr-off: VCR_MODE=off go test ./... for dir in $(NESTED_MODULE_DIRS); do VCR_MODE=off go test -C $$dir ./...; done + VCR_MODE=off go test -C btx ./... test-vcr-record: VCR_MODE=record go test ./... for dir in $(NESTED_MODULE_DIRS); do VCR_MODE=record go test -C $$dir ./...; done + VCR_MODE=record go test -C btx ./... # Verify that VCR cassettes work without API keys # This ensures VCR-enabled tests can run in CI/CD without credentials test-vcr-verify: env -u BRAINTRUST_API_KEY VCR_MODE=replay go test ./... for dir in $(NESTED_MODULE_DIRS); do env -u BRAINTRUST_API_KEY VCR_MODE=replay go test -C $$dir ./...; done + env -u BRAINTRUST_API_KEY VCR_MODE=replay go test -C btx ./... cover: go test $$(go list ./... | grep -v /examples/) -coverpkg=./... -coverprofile=coverage.out @@ -70,9 +76,11 @@ lint: ./scripts/apply_local_braintrust_replaces.sh golangci-lint fmt -d golangci-lint run ./... + cd btx && golangci-lint fmt -d && golangci-lint run ./... fmt: golangci-lint fmt + cd btx && golangci-lint fmt mod-verify: ./scripts/apply_local_braintrust_replaces.sh @@ -81,10 +89,13 @@ mod-verify: # This preserves explicit version pins in nested go.mod files (e.g. set by # prepare_release.sh before tags exist) rather than resetting them to v0.0.0. for dir in $(NESTED_MODULE_DIRS); do GOWORK=off go mod tidy -C $$dir; done + GOWORK=off go mod tidy -C btx go mod verify for dir in $(NESTED_MODULE_DIRS); do (cd $$dir && go mod verify); done + (cd btx && go mod verify) git diff --exit-code go.mod go.sum \ - $(foreach dir,$(NESTED_MODULE_DIRS),$(dir)/go.mod $(dir)/go.sum) + $(foreach dir,$(NESTED_MODULE_DIRS),$(dir)/go.mod $(dir)/go.sum) \ + btx/go.mod btx/go.sum ./scripts/check_nested_modules.sh check-nested-modules: diff --git a/btx/btx_test.go b/btx/btx_test.go new file mode 100644 index 00000000..42acc39f --- /dev/null +++ b/btx/btx_test.go @@ -0,0 +1,220 @@ +package btx + +import ( + "context" + "fmt" + "net/http" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + + braintrust "github.com/braintrustdata/braintrust-sdk-go" + "github.com/braintrustdata/braintrust-sdk-go/internal/oteltest" + "github.com/braintrustdata/braintrust-sdk-go/internal/vcr" + "github.com/braintrustdata/braintrust-sdk-go/trace/attachmentprocessor" +) + +// skipSpecs lists spec display names that should be skipped. +// Add specs here when they test features not yet supported by the Go SDK. +var skipSpecs = map[string]string{ + // Prompt caching metrics (prompt_cache_creation_5m_tokens etc.) are not yet + // tracked by the Go SDK's Anthropic instrumentation. + "anthropic/prompt_caching_5m": "prompt caching metrics not yet supported", + "anthropic/prompt_caching_1h": "prompt caching metrics not yet supported", + // Bedrock attachment format: the Go SDK puts the braintrust_attachment + // reference under source (Anthropic-style), but the spec expects it under + // image.source.bytes (Bedrock-native nesting). + "bedrock/attachments": "attachment nesting format differs from spec", +} + +// specRoot is set by TestMain after fetching specs. +var specRoot string + +func TestMain(m *testing.M) { + root, err := fetchSpec() + if err != nil { + fmt.Fprintf(os.Stderr, "btx: failed to fetch spec: %v\n", err) + os.Exit(1) + } + specRoot = root + + // Default BRAINTRUST_AUTO_CONVERT_AI_ATTACHMENTS based on VCR mode, + // but only if the user hasn't explicitly set it. + if os.Getenv("BRAINTRUST_AUTO_CONVERT_AI_ATTACHMENTS") == "" { + val := "false" + if vcr.GetVCRMode() == vcr.ModeReplay { + val = "true" + } + _ = os.Setenv("BRAINTRUST_AUTO_CONVERT_AI_ATTACHMENTS", val) + } + + os.Exit(m.Run()) +} + +func TestBTXSpec(t *testing.T) { + providers := []string{"openai", "anthropic", "google", "bedrock"} + specs, err := loadSpecs(specRoot, providers) + require.NoError(t, err, "failed to load specs") + require.NotEmpty(t, specs, "no specs found") + + for _, spec := range specs { + t.Run(spec.DisplayName, func(t *testing.T) { + if reason, ok := skipSpecs[spec.DisplayName]; ok { + t.Skipf("skipped: %s", reason) + } + + runSpec(t, spec) + }) + } +} + +func runSpec(t *testing.T, spec LlmSpanSpec) { + t.Helper() + + mode := vcr.GetVCRMode() + httpClient := newBTXHTTPClient(t, spec) + ctx := t.Context() + + var spans []map[string]any + + if mode == vcr.ModeReplay { + // Replay mode: capture spans in-memory, no network calls. + tp, exporter := oteltest.Setup(t) + + traceID, err := executeSpec(ctx, spec, tp, httpClient) + require.NoError(t, err, "spec execution failed") + require.NotEmpty(t, traceID, "empty trace ID") + + spans = convertExportedSpans(exporter) + } else { + // Record and off modes: hit real APIs and export spans to the real + // Braintrust backend, then fetch them back via BTQL for validation. + tp := sdktrace.NewTracerProvider() + projectName := btxProjectName() + _, err := braintrust.New(tp, braintrust.WithProject(projectName)) + require.NoError(t, err, "failed to create Braintrust client") + + traceID, err := executeSpec(ctx, spec, tp, httpClient) + require.NoError(t, err, "spec execution failed") + require.NotEmpty(t, traceID, "empty trace ID") + + // Shut down to flush all spans to the backend. + require.NoError(t, tp.Shutdown(context.Background()), "failed to shutdown tracer provider") + + projectID := btxProjectID(t) + spans, err = fetchSpansBTQL(traceID, projectID, len(spec.ExpectedBrainstoreSpans)) + require.NoError(t, err, "failed to fetch spans from BTQL") + } + + err := validateSpans(spans, spec) + if err != nil { + t.Fatal(err) + } +} + +// convertExportedSpans converts in-memory OTel spans to brainstore format. +// It also runs the attachment processor to transform inline base64 data URLs +// into braintrust_attachment references, mirroring what the Braintrust span +// processor does in production. +func convertExportedSpans(exporter *oteltest.Exporter) []map[string]any { + otelSpans := exporter.Flush() + + // Create an attachment processor with a no-op uploader so that base64 + // data URLs are replaced with braintrust_attachment references without + // actually uploading anything. + ap := attachmentprocessor.NewProcessor(&attachmentprocessor.NoopUploader{}, nil) + + var result []map[string]any + for _, span := range otelSpans { + // Extract all string attributes into a map. + attrs := make(map[string]string) + hasBraintrustAttr := false + for _, a := range span.Stub.Attributes { + if a.Value.Type().String() == "STRING" { + key := string(a.Key) + attrs[key] = a.Value.AsString() + if !hasBraintrustAttr && len(key) > 11 && key[:11] == "braintrust." { + hasBraintrustAttr = true + } + } + } + + // Only include spans that have braintrust attributes. + if !hasBraintrustAttr { + continue + } + + // Process attachments in input and output JSON, converting inline + // base64 data to braintrust_attachment references. + for _, key := range []string{"braintrust.input_json", "braintrust.output_json"} { + if v, ok := attrs[key]; ok { + attrs[key] = ap.ProcessAndUpload(v) + } + } + + brainstoreSpan := spanFromOTel(span.Name(), attrs) + result = append(result, brainstoreSpan) + } + + return result +} + +// newBTXHTTPClient creates an HTTP client with VCR support using custom cassette paths. +// Cassettes are stored at testdata/cassettes//.yaml. +func newBTXHTTPClient(t *testing.T, spec LlmSpanSpec) *http.Client { + t.Helper() + + mode := vcr.GetVCRMode() + if mode == vcr.ModeOff { + return &http.Client{Timeout: 120 * time.Second} + } + + // Build cassette path: testdata/cassettes// + // (go-vcr appends .yaml automatically) + cassettePath := filepath.Join("testdata", "cassettes", spec.Provider, spec.Name) + + r, err := vcr.NewVCRRecorder(t, cassettePath) + require.NoError(t, err, "failed to create VCR recorder for %s", spec.DisplayName) + + t.Cleanup(func() { + if err := r.Stop(); err != nil { + t.Errorf("failed to stop VCR recorder: %v", err) + } + }) + + return &http.Client{ + Transport: r, + Timeout: 30 * time.Second, + } +} + +// btxProjectName returns the Braintrust project name for live/record mode. +func btxProjectName() string { + if name := os.Getenv("BRAINTRUST_PROJECT"); name != "" { + return name + } + if name := os.Getenv("BRAINTRUST_DEFAULT_PROJECT_NAME"); name != "" { + return name + } + return "go-unit-test" +} + +// btxProjectID returns the Braintrust project ID for live mode BTQL queries. +// It checks ID env vars first, then falls back to resolving the project name +// to an ID via the API. +func btxProjectID(t *testing.T) string { + t.Helper() + if id := os.Getenv("BRAINTRUST_PROJECT_ID"); id != "" { + return id + } + if id := os.Getenv("BRAINTRUST_DEFAULT_PROJECT_ID"); id != "" { + return id + } + id, err := resolveProjectID(btxProjectName()) + require.NoError(t, err, "failed to resolve project %q to ID", btxProjectName()) + return id +} diff --git a/btx/go.mod b/btx/go.mod new file mode 100644 index 00000000..3e134698 --- /dev/null +++ b/btx/go.mod @@ -0,0 +1,84 @@ +module github.com/braintrustdata/braintrust-sdk-go/btx + +go 1.25.0 + +toolchain go1.26.1 + +require ( + github.com/anthropics/anthropic-sdk-go v1.23.0 + github.com/aws/aws-sdk-go-v2/config v1.32.15 + github.com/aws/aws-sdk-go-v2/credentials v1.19.14 + github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.50.4 + github.com/braintrustdata/braintrust-sdk-go v0.7.0 + github.com/braintrustdata/braintrust-sdk-go/trace/contrib/anthropic v0.6.1 + github.com/braintrustdata/braintrust-sdk-go/trace/contrib/bedrockruntime v0.6.1 + github.com/braintrustdata/braintrust-sdk-go/trace/contrib/genai v0.6.1 + github.com/braintrustdata/braintrust-sdk-go/trace/contrib/openai v0.6.1 + github.com/google/uuid v1.6.0 + github.com/openai/openai-go v1.12.0 + github.com/stretchr/testify v1.11.1 + go.opentelemetry.io/otel/sdk v1.43.0 + go.opentelemetry.io/otel/trace v1.43.0 + google.golang.org/genai v1.41.0 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + cloud.google.com/go v0.123.0 // indirect + cloud.google.com/go/auth v0.17.0 // indirect + cloud.google.com/go/compute/metadata v0.9.0 // indirect + github.com/aws/aws-sdk-go-v2 v1.41.5 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 // indirect + github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 // indirect + github.com/aws/smithy-go v1.25.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/s2a-go v0.1.9 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect + github.com/googleapis/gax-go/v2 v2.15.0 // indirect + github.com/gorilla/websocket v1.5.3 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/tidwall/gjson v1.18.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 // indirect + go.opentelemetry.io/otel v1.43.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 // indirect + go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0 // indirect + go.opentelemetry.io/otel/metric v1.43.0 // indirect + go.opentelemetry.io/proto/otlp v1.10.0 // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/net v0.52.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.35.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d // indirect + google.golang.org/grpc v1.81.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect + gopkg.in/dnaeon/go-vcr.v3 v3.2.0 // indirect +) + +replace ( + github.com/braintrustdata/braintrust-sdk-go => .. + github.com/braintrustdata/braintrust-sdk-go/trace/contrib/anthropic => ../trace/contrib/anthropic + github.com/braintrustdata/braintrust-sdk-go/trace/contrib/bedrockruntime => ../trace/contrib/bedrockruntime + github.com/braintrustdata/braintrust-sdk-go/trace/contrib/genai => ../trace/contrib/genai + github.com/braintrustdata/braintrust-sdk-go/trace/contrib/openai => ../trace/contrib/openai +) diff --git a/btx/go.sum b/btx/go.sum new file mode 100644 index 00000000..5916109f --- /dev/null +++ b/btx/go.sum @@ -0,0 +1,142 @@ +cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= +cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= +cloud.google.com/go/auth v0.17.0 h1:74yCm7hCj2rUyyAocqnFzsAYXgJhrG26XCFimrc/Kz4= +cloud.google.com/go/auth v0.17.0/go.mod h1:6wv/t5/6rOPAX4fJiRjKkJCvswLwdet7G8+UGXt7nCQ= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= +github.com/anthropics/anthropic-sdk-go v1.23.0 h1:YVNnxfVVPJM+zvQ1oDgTJUBtLttGpBHe1WtJBr0QeAs= +github.com/anthropics/anthropic-sdk-go v1.23.0/go.mod h1:WTz31rIUHUHqai2UslPpw5CwXrQP3geYBioRV4WOLvE= +github.com/aws/aws-sdk-go-v2 v1.41.5 h1:dj5kopbwUsVUVFgO4Fi5BIT3t4WyqIDjGKCangnV/yY= +github.com/aws/aws-sdk-go-v2 v1.41.5/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 h1:eBMB84YGghSocM7PsjmmPffTa+1FBUeNvGvFou6V/4o= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI= +github.com/aws/aws-sdk-go-v2/config v1.32.15 h1:i7rHbaySnBXGvCkDndaBU8f3EAlRVgViwNfkwFUrXgE= +github.com/aws/aws-sdk-go-v2/config v1.32.15/go.mod h1:yLJzL0IkI9+4BwjPSOueyHzppJj3t0dhK5tbmmcFk5Q= +github.com/aws/aws-sdk-go-v2/credentials v1.19.14 h1:n+UcGWAIZHkXzYt87uMFBv/l8THYELoX6gVcUvgl6fI= +github.com/aws/aws-sdk-go-v2/credentials v1.19.14/go.mod h1:cJKuyWB59Mqi0jM3nFYQRmnHVQIcgoxjEMAbLkpr62w= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 h1:NUS3K4BTDArQqNu2ih7yeDLaS3bmHD0YndtA6UP884g= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21/go.mod h1:YWNWJQNjKigKY1RHVJCuupeWDrrHjRqHm0N9rdrWzYI= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 h1:Rgg6wvjjtX8bNHcvi9OnXWwcE0a2vGpbwmtICOsvcf4= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21/go.mod h1:A/kJFst/nm//cyqonihbdpQZwiUhhzpqTsdbhDdRF9c= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 h1:PEgGVtPoB6NTpPrBgqSE5hE/o47Ij9qk/SEZFbUOe9A= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21/go.mod h1:p+hz+PRAYlY3zcpJhPwXlLC4C+kqn70WIHwnzAfs6ps= +github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.50.4 h1:W6tKfa/s37faUnwJ71pGqsBO7/wfUX1L7tVprupQGo4= +github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.50.4/go.mod h1:BZ+9thH0QOTDUwE8KAv/ZwUzsNC7CSMJXj/wtnZMs5k= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 h1:5EniKhLZe4xzL7a+fU3C2tfUN4nWIqlLesfrjkuPFTY= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7/go.mod h1:x0nZssQ3qZSnIcePWLvcoFisRXJzcTVvYpAAdYX8+GI= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 h1:c31//R3xgIJMSC8S6hEVq+38DcvUlgFY0FM6mSI5oto= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21/go.mod h1:r6+pf23ouCB718FUxaqzZdbpYFyDtehyZcmP5KL9FkA= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 h1:QKZH0S178gCmFEgst8hN0mCX1KxLgHBKKY/CLqwP8lg= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.9/go.mod h1:7yuQJoT+OoH8aqIxw9vwF+8KpvLZ8AWmvmUWHsGQZvI= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 h1:lFd1+ZSEYJZYvv9d6kXzhkZu07si3f+GQ1AaYwa2LUM= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.15/go.mod h1:WSvS1NLr7JaPunCXqpJnWk1Bjo7IxzZXrZi1QQCkuqM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 h1:dzztQ1YmfPrxdrOiuZRMF6fuOwWlWpD2StNLTceKpys= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19/go.mod h1:YO8TrYtFdl5w/4vmjL8zaBSsiNp3w0L1FfKVKenZT7w= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 h1:p8ogvvLugcR/zLBXTXrTkj0RYBUdErbMnAFFp12Lm/U= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.10/go.mod h1:60dv0eZJfeVXfbT1tFJinbHrDfSJ2GZl4Q//OSSNAVw= +github.com/aws/smithy-go v1.25.0 h1:Sz/XJ64rwuiKtB6j98nDIPyYrV1nVNJ4YU74gttcl5U= +github.com/aws/smithy-go v1.25.0/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.6 h1:GW/XbdyBFQ8Qe+YAmFU9uHLo7OnF5tL52HFAgMmyrf4= +github.com/googleapis/enterprise-certificate-proxy v0.3.6/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA= +github.com/googleapis/gax-go/v2 v2.15.0 h1:SyjDc1mGgZU5LncH8gimWo9lW1DtIfPibOG81vgd/bo= +github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0= +github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0 h1:mS47AX77OtFfKG4vtp+84kuGSFZHTyxtXIN269vChY0= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0/go.mod h1:PJnsC41lAGncJlPUniSwM81gc80GkgWJWr3cu2nKEtU= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= +go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/genai v1.41.0 h1:ayXl75LjTmqTu0y94yr96d17gIb4zF8gWVzX2TgioEY= +google.golang.org/genai v1.41.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d h1:wT2n40TBqFY6wiwazVK9/iTWbsQrgk5ZfCSVFLO9LQA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.81.0 h1:W3G9N3KQf3BU+YuCtGKJk0CmxQNbAISICD/9AORxLIw= +google.golang.org/grpc v1.81.0/go.mod h1:xGH9GfzOyMTGIOXBJmXt+BX/V0kcdQbdcuwQ/zNw42I= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/dnaeon/go-vcr.v3 v3.2.0 h1:Rltp0Vf+Aq0u4rQXgmXgtgoRDStTnFN83cWgSGSoRzM= +gopkg.in/dnaeon/go-vcr.v3 v3.2.0/go.mod h1:2IMOnnlx9I6u9x+YBsM3tAMx6AlOxnJ0pWxQAzZ79Ag= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/btx/span_fetcher.go b/btx/span_fetcher.go new file mode 100644 index 00000000..10831a37 --- /dev/null +++ b/btx/span_fetcher.go @@ -0,0 +1,239 @@ +package btx + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "time" +) + +const ( + btqlRetryInterval = 30 * time.Second + btqlMaxWait = 600 * time.Second +) + +// spanFromOTel converts an in-memory OTel span (as captured by oteltest.Exporter) +// into the brainstore span format expected by the spec validator. +// +// The input is a tracetest.SpanStub (wrapped in oteltest.Span). We extract the +// braintrust.* attributes and build a map matching the brainstore schema. +func spanFromOTel(spanName string, attrs map[string]string) map[string]any { + result := make(map[string]any) + + // Parse JSON attributes. + jsonFields := map[string]string{ + "braintrust.input_json": "input", + "braintrust.output_json": "output", + "braintrust.metadata": "metadata", + "braintrust.metrics": "metrics", + "braintrust.span_attributes": "span_attributes", + "braintrust.tags": "tags", + } + + for attrKey, fieldName := range jsonFields { + if raw, ok := attrs[attrKey]; ok { + var val any + if err := json.Unmarshal([]byte(raw), &val); err == nil { + result[fieldName] = val + } + } + } + + // Inject the OTel span name into span_attributes.name, and default + // type to "llm" when not already set by the middleware. + sa, ok := result["span_attributes"].(map[string]any) + if !ok { + sa = make(map[string]any) + result["span_attributes"] = sa + } + sa["name"] = spanName + if _, ok := sa["type"]; !ok { + sa["type"] = "llm" + } + + return result +} + +// resolveProjectID looks up a project by name and returns its ID. +func resolveProjectID(projectName string) (string, error) { + apiKey := os.Getenv("BRAINTRUST_API_KEY") + if apiKey == "" { + return "", fmt.Errorf("BRAINTRUST_API_KEY not set") + } + apiURL := os.Getenv("BRAINTRUST_API_URL") + if apiURL == "" { + apiURL = "https://api.braintrust.dev" + } + + req, err := http.NewRequest(http.MethodGet, apiURL+"/v1/project?project_name="+projectName, nil) + if err != nil { + return "", err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", fmt.Errorf("fetching project: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("project lookup failed (HTTP %d): %s", resp.StatusCode, string(body)) + } + + var result struct { + Objects []struct { + ID string `json:"id"` + } `json:"objects"` + } + if err := json.Unmarshal(body, &result); err != nil { + return "", fmt.Errorf("parsing project response: %w", err) + } + if len(result.Objects) == 0 { + return "", fmt.Errorf("project %q not found", projectName) + } + return result.Objects[0].ID, nil +} + +// fetchSpansBTQL fetches spans from the Braintrust API via BTQL. +// It retries until the expected number of spans are available. +func fetchSpansBTQL(rootSpanID, projectID string, numExpected int) ([]map[string]any, error) { + apiKey := os.Getenv("BRAINTRUST_API_KEY") + if apiKey == "" { + return nil, fmt.Errorf("BRAINTRUST_API_KEY not set (required for live mode)") + } + apiURL := os.Getenv("BRAINTRUST_API_URL") + if apiURL == "" { + apiURL = "https://api.braintrust.dev" + } + + query := buildBTQLQuery(rootSpanID, projectID) + + var totalWait time.Duration + for totalWait < btqlMaxWait { + spans, err := executeBTQL(apiURL, apiKey, query) + if err != nil { + return nil, err + } + + // Filter out scorer spans. + filtered := filterScorerSpans(spans) + + // Check if we have enough spans. + if len(filtered) > numExpected { + return nil, fmt.Errorf("too many spans: expected %d, got %d", numExpected, len(filtered)) + } + + if len(filtered) == numExpected && allSpansReady(filtered) { + return filtered, nil + } + + fmt.Printf("btx: waiting for spans (%d/%d ready), retrying in %v...\n", + len(filtered), numExpected, btqlRetryInterval) + time.Sleep(btqlRetryInterval) + totalWait += btqlRetryInterval + } + + return nil, fmt.Errorf("timed out waiting for %d spans after %v", numExpected, btqlMaxWait) +} + +// buildBTQLQuery constructs the BTQL query JSON. +func buildBTQLQuery(rootSpanID, projectID string) map[string]any { + return map[string]any{ + "query": map[string]any{ + "select": []any{map[string]any{"op": "star"}}, + "from": map[string]any{ + "op": "function", + "name": map[string]any{"op": "ident", "name": []any{"project_logs"}}, + "args": []any{map[string]any{"op": "literal", "value": projectID}}, + }, + "filter": map[string]any{ + "op": "and", + "left": map[string]any{ + "op": "eq", + "left": map[string]any{"op": "ident", "name": []any{"root_span_id"}}, + "right": map[string]any{"op": "literal", "value": rootSpanID}, + }, + "right": map[string]any{ + "op": "ne", + "left": map[string]any{"op": "ident", "name": []any{"span_parents"}}, + "right": map[string]any{"op": "literal", "value": nil}, + }, + }, + "sort": []any{map[string]any{"expr": map[string]any{"op": "ident", "name": []any{"created"}}, "dir": "asc"}}, + "limit": 1000, + }, + "use_columnstore": true, + "use_brainstore": true, + "brainstore_realtime": true, + } +} + +// executeBTQL sends a BTQL query and returns the result rows. +func executeBTQL(apiURL, apiKey string, query map[string]any) ([]map[string]any, error) { + body, err := json.Marshal(query) + if err != nil { + return nil, fmt.Errorf("marshaling BTQL query: %w", err) + } + + req, err := http.NewRequest(http.MethodPost, apiURL+"/btql", bytes.NewReader(body)) + if err != nil { + return nil, fmt.Errorf("creating BTQL request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+apiKey) + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("executing BTQL query: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("reading BTQL response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("BTQL query failed (HTTP %d): %s", resp.StatusCode, string(respBody)) + } + + var result struct { + Data []map[string]any `json:"data"` + } + if err := json.Unmarshal(respBody, &result); err != nil { + return nil, fmt.Errorf("parsing BTQL response: %w", err) + } + + return result.Data, nil +} + +// filterScorerSpans removes spans where span_attributes.purpose == "scorer". +func filterScorerSpans(spans []map[string]any) []map[string]any { + var result []map[string]any + for _, span := range spans { + sa, ok := span["span_attributes"].(map[string]any) + if ok && sa["purpose"] == "scorer" { + continue + } + result = append(result, span) + } + return result +} + +// allSpansReady checks that all spans have output or metrics populated. +func allSpansReady(spans []map[string]any) bool { + for _, span := range spans { + if span["output"] == nil && span["metrics"] == nil { + return false + } + } + return true +} diff --git a/btx/span_validator.go b/btx/span_validator.go new file mode 100644 index 00000000..0e67e4ee --- /dev/null +++ b/btx/span_validator.go @@ -0,0 +1,316 @@ +package btx + +import ( + "encoding/json" + "fmt" + "math" + "sort" + "strings" +) + +// validateSpans compares actual spans against expected spans from the spec. +// It collects all errors and returns them together for easier debugging. +func validateSpans(actualSpans []map[string]any, spec LlmSpanSpec) error { + // Filter to LLM spans only. + var llmSpans []map[string]any + for _, span := range actualSpans { + sa, ok := span["span_attributes"].(map[string]any) + if !ok { + continue + } + if sa["type"] == "llm" { + llmSpans = append(llmSpans, span) + } + } + + // Sort by exec_counter for deterministic ordering. + sort.SliceStable(llmSpans, func(i, j int) bool { + return execCounter(llmSpans[i]) < execCounter(llmSpans[j]) + }) + + expected := spec.ExpectedBrainstoreSpans + + if len(llmSpans) < len(expected) { + return fmt.Errorf("%s: expected at least %d LLM spans, got %d", + spec.DisplayName, len(expected), len(llmSpans)) + } + + var allErrors []string + + for i, exp := range expected { + actual := llmSpans[i] + var errors []string + validateValue(actual, exp, fmt.Sprintf("span[%d]", i), &errors) + + if len(errors) > 0 { + spanJSON, _ := json.MarshalIndent(actual, "", " ") + header := fmt.Sprintf("--- Span %d ---", i) + if name := spanName(actual); name != "" { + header = fmt.Sprintf("--- Span %d (%s) ---", i, name) + } + allErrors = append(allErrors, header) + allErrors = append(allErrors, errors...) + allErrors = append(allErrors, fmt.Sprintf("\nFull span JSON:\n%s", string(spanJSON))) + } + } + + if len(allErrors) > 0 { + return fmt.Errorf("%s: span validation failed:\n\n%s", + spec.DisplayName, strings.Join(allErrors, "\n")) + } + + return nil +} + +// validateValue recursively validates an actual value against an expected value. +func validateValue(actual, expected any, path string, errors *[]string) { + switch exp := expected.(type) { + case OrMatcher: + validateOrMatcher(actual, exp, path, errors) + + case FnMatcher: + validateFnMatcher(actual, exp, path, errors) + + case StartsWithMatcher: + actualStr, ok := actual.(string) + if !ok { + *errors = append(*errors, fmt.Sprintf("%s: expected string for starts_with, got %T (%v)", path, actual, actual)) + return + } + if !strings.HasPrefix(actualStr, exp.Prefix) { + *errors = append(*errors, fmt.Sprintf("%s: expected to start with %q, got %q", path, exp.Prefix, actualStr)) + } + + case nil: + // nil expected = don't care, always passes. + + case map[string]any: + actualMap, ok := actual.(map[string]any) + if !ok { + // Reverse single-item list vs object: actual is [dict], expected is dict. + if actualList, isList := actual.([]any); isList && len(actualList) == 1 { + if innerMap, isMap := actualList[0].(map[string]any); isMap { + validateValue(innerMap, expected, path, errors) + return + } + } + *errors = append(*errors, fmt.Sprintf("%s: expected map, got %T (%v)", path, actual, actual)) + return + } + for key, expVal := range exp { + actualVal, exists := actualMap[key] + if !exists { + // If expected is undefined_or_null, missing key is acceptable. + if fn, ok := expVal.(FnMatcher); ok && fn.Expr == "undefined_or_null" { + continue + } + *errors = append(*errors, fmt.Sprintf("%s.%s: key missing in actual", path, key)) + continue + } + validateValue(actualVal, expVal, path+"."+key, errors) + } + + case []any: + // Single-item list vs object special case. + if len(exp) == 1 { + if _, isMap := exp[0].(map[string]any); isMap { + if actualMap, isActualMap := actual.(map[string]any); isActualMap { + validateValue(actualMap, exp[0], path, errors) + return + } + } + } + + actualList, ok := actual.([]any) + if !ok { + *errors = append(*errors, fmt.Sprintf("%s: expected list, got %T (%v)", path, actual, actual)) + return + } + if len(actualList) < len(exp) { + *errors = append(*errors, fmt.Sprintf("%s: expected at least %d elements, got %d", path, len(exp), len(actualList))) + return + } + for i, expItem := range exp { + validateValue(actualList[i], expItem, fmt.Sprintf("%s[%d]", path, i), errors) + } + + default: + // Scalar comparison. + if !scalarEqual(actual, expected) { + *errors = append(*errors, fmt.Sprintf("%s: expected=%v (%T), actual=%v (%T)", path, expected, expected, actual, actual)) + } + } +} + +// validateOrMatcher validates that actual matches at least one alternative. +func validateOrMatcher(actual any, matcher OrMatcher, path string, errors *[]string) { + var allSubErrors [][]string + + for _, alt := range matcher.Alternatives { + var subErrors []string + validateValue(actual, alt, path, &subErrors) + if len(subErrors) == 0 { + return // One alternative matched. + } + allSubErrors = append(allSubErrors, subErrors) + } + + // None matched — report all alternatives' errors. + *errors = append(*errors, fmt.Sprintf("%s: none of %d alternatives matched:", path, len(matcher.Alternatives))) + for i, subErrors := range allSubErrors { + *errors = append(*errors, fmt.Sprintf(" Alternative %d:", i)) + for _, e := range subErrors { + *errors = append(*errors, " "+e) + } + } +} + +// validateFnMatcher validates using a named predicate or lambda expression. +func validateFnMatcher(actual any, matcher FnMatcher, path string, errors *[]string) { + switch matcher.Expr { + case "is_non_negative_number": + if !isNumber(actual) { + *errors = append(*errors, fmt.Sprintf("%s: expected number for is_non_negative_number, got %T (%v)", path, actual, actual)) + return + } + if toFloat64(actual) < 0 { + *errors = append(*errors, fmt.Sprintf("%s: expected non-negative number, got %v", path, actual)) + } + + case "is_positive_number": + if !isNumber(actual) { + *errors = append(*errors, fmt.Sprintf("%s: expected number for is_positive_number, got %T (%v)", path, actual, actual)) + return + } + if toFloat64(actual) <= 0 { + *errors = append(*errors, fmt.Sprintf("%s: expected positive number, got %v", path, actual)) + } + + case "is_non_empty_string": + str, ok := actual.(string) + if !ok || str == "" { + *errors = append(*errors, fmt.Sprintf("%s: expected non-empty string, got %T (%v)", path, actual, actual)) + } + + case "is_reasoning_message": + validateReasoningMessage(actual, path, errors) + + case "undefined_or_null": + if actual != nil { + *errors = append(*errors, fmt.Sprintf("%s: expected null/undefined, got %T (%v)", path, actual, actual)) + } + + default: + // Lambda expressions or unknown predicates. + // In Go we can't evaluate Python lambdas, so treat as "non-null and non-empty". + if actual == nil { + *errors = append(*errors, fmt.Sprintf("%s: expected non-null for fn %q, got nil", path, matcher.Expr)) + return + } + if str, ok := actual.(string); ok && str == "" { + *errors = append(*errors, fmt.Sprintf("%s: expected non-empty for fn %q, got empty string", path, matcher.Expr)) + } + } +} + +// validateReasoningMessage validates that the value is a list of {type: "summary_text", text: } dicts, +// or an empty list. +func validateReasoningMessage(actual any, path string, errors *[]string) { + list, ok := actual.([]any) + if !ok { + *errors = append(*errors, fmt.Sprintf("%s: expected list for is_reasoning_message, got %T", path, actual)) + return + } + // Empty list is valid. + for i, item := range list { + m, ok := item.(map[string]any) + if !ok { + *errors = append(*errors, fmt.Sprintf("%s[%d]: expected map in reasoning message, got %T", path, i, item)) + continue + } + if m["type"] != "summary_text" { + *errors = append(*errors, fmt.Sprintf("%s[%d].type: expected 'summary_text', got %v", path, i, m["type"])) + } + text, ok := m["text"].(string) + if !ok || text == "" { + *errors = append(*errors, fmt.Sprintf("%s[%d].text: expected non-empty string, got %v", path, i, m["text"])) + } + } +} + +// scalarEqual compares two scalar values with numeric tolerance. +func scalarEqual(actual, expected any) bool { + // Handle numeric comparisons with epsilon. + if isNumber(actual) && isNumber(expected) { + return math.Abs(toFloat64(actual)-toFloat64(expected)) < 1e-9 + } + + return fmt.Sprintf("%v", actual) == fmt.Sprintf("%v", expected) +} + +// isNumber returns true if the value is a numeric type. +func isNumber(v any) bool { + switch v.(type) { + case int, int8, int16, int32, int64, + uint, uint8, uint16, uint32, uint64, + float32, float64: + return true + } + return false +} + +// toFloat64 converts a numeric value to float64. +func toFloat64(v any) float64 { + switch n := v.(type) { + case int: + return float64(n) + case int8: + return float64(n) + case int16: + return float64(n) + case int32: + return float64(n) + case int64: + return float64(n) + case uint: + return float64(n) + case uint8: + return float64(n) + case uint16: + return float64(n) + case uint32: + return float64(n) + case uint64: + return float64(n) + case float32: + return float64(n) + case float64: + return n + default: + return 0 + } +} + +// execCounter extracts span_attributes.exec_counter as a float64 for sorting. +func execCounter(span map[string]any) float64 { + sa, ok := span["span_attributes"].(map[string]any) + if !ok { + return 0 + } + if ec, ok := sa["exec_counter"]; ok { + return toFloat64(ec) + } + return 0 +} + +// spanName extracts span_attributes.name from a span. +func spanName(span map[string]any) string { + sa, ok := span["span_attributes"].(map[string]any) + if !ok { + return "" + } + if name, ok := sa["name"].(string); ok { + return name + } + return "" +} diff --git a/btx/spec_executor.go b/btx/spec_executor.go new file mode 100644 index 00000000..1ed7db3a --- /dev/null +++ b/btx/spec_executor.go @@ -0,0 +1,864 @@ +package btx + +import ( + "context" + "fmt" + "net/http" + "os" + + "encoding/base64" + "strings" + + "github.com/anthropics/anthropic-sdk-go" + anthropicoption "github.com/anthropics/anthropic-sdk-go/option" + awsconfig "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/bedrockruntime" + brtypes "github.com/aws/aws-sdk-go-v2/service/bedrockruntime/types" + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/packages/param" + "github.com/openai/openai-go/responses" + "github.com/openai/openai-go/shared" + "google.golang.org/genai" + + oteltrace "go.opentelemetry.io/otel/trace" + + "github.com/braintrustdata/braintrust-sdk-go/internal/vcr" + traceanthropic "github.com/braintrustdata/braintrust-sdk-go/trace/contrib/anthropic" + tracebedrockruntime "github.com/braintrustdata/braintrust-sdk-go/trace/contrib/bedrockruntime" + tracegenai "github.com/braintrustdata/braintrust-sdk-go/trace/contrib/genai" + traceopenai "github.com/braintrustdata/braintrust-sdk-go/trace/contrib/openai" +) + +// executeSpec runs all requests in a spec under a parent OTel span and returns +// the trace ID (hex string). The httpClient should be VCR-wrapped for replay. +func executeSpec(ctx context.Context, spec LlmSpanSpec, tp oteltrace.TracerProvider, httpClient *http.Client) (string, error) { + tracer := tp.Tracer("btx") + ctx, rootSpan := tracer.Start(ctx, spec.Name) + defer rootSpan.End() + + traceID := rootSpan.SpanContext().TraceID().String() + + switch spec.Provider { + case "openai": + if err := executeOpenAI(ctx, spec, tp, httpClient); err != nil { + return traceID, err + } + case "anthropic": + if err := executeAnthropic(ctx, spec, tp, httpClient); err != nil { + return traceID, err + } + case "google": + if err := executeGoogle(ctx, spec, tp, httpClient); err != nil { + return traceID, err + } + case "bedrock": + if err := executeBedrock(ctx, spec, tp, httpClient); err != nil { + return traceID, err + } + default: + return traceID, fmt.Errorf("unsupported provider: %s", spec.Provider) + } + + return traceID, nil +} + +// executeOpenAI dispatches to the correct OpenAI executor based on endpoint. +func executeOpenAI(ctx context.Context, spec LlmSpanSpec, tp oteltrace.TracerProvider, httpClient *http.Client) error { + apiKey := os.Getenv("OPENAI_API_KEY") + if vcr.GetVCRMode() == vcr.ModeReplay { + apiKey = "dummy-openai-key" + } + + mw := traceopenai.NewMiddleware(traceopenai.WithTracerProvider(tp)) //nolint:bodyclose // false positive: middleware factory + client := openai.NewClient( + option.WithAPIKey(apiKey), + option.WithHTTPClient(httpClient), + option.WithMiddleware(mw), + ) + + switch spec.Endpoint { + case "/v1/chat/completions": + return executeChatCompletions(ctx, spec, client) + case "/v1/responses": + return executeResponses(ctx, spec, client) + default: + return fmt.Errorf("unsupported OpenAI endpoint: %s", spec.Endpoint) + } +} + +// executeChatCompletions handles OpenAI chat completions (streaming and non-streaming). +func executeChatCompletions(ctx context.Context, spec LlmSpanSpec, client openai.Client) error { + var history []openai.ChatCompletionMessageParamUnion + + for _, req := range spec.Requests { + messages := buildChatMessages(req) + // Prepend history from previous turns. + allMessages := append(history, messages...) + + params := openai.ChatCompletionNewParams{ + Model: openai.ChatModel(stringFromMap(req, "model")), + Messages: allMessages, + } + + if temp, ok := req["temperature"]; ok { + params.Temperature = openai.Float(toFloat64(temp)) + } + if mt, ok := req["max_tokens"]; ok { + params.MaxTokens = openai.Int(int64(toFloat64(mt))) + } + if tools, ok := req["tools"].([]any); ok { + params.Tools = buildChatTools(tools) + } + + isStreaming := boolFromMap(req, "stream") + + if isStreaming { + // Set stream_options if present. + if so, ok := req["stream_options"].(map[string]any); ok { + if boolFromMap(so, "include_usage") { + params.StreamOptions = openai.ChatCompletionStreamOptionsParam{ + IncludeUsage: param.Opt[bool]{Value: true}, + } + } + } + + stream := client.Chat.Completions.NewStreaming(ctx, params) + for stream.Next() { + // Consume stream to trigger instrumentation. + } + if err := stream.Err(); err != nil { + return fmt.Errorf("streaming chat completions: %w", err) + } + // For multi-turn streaming, we don't accumulate assistant responses into history. + // The spec tests don't require it for streaming chat completions. + } else { + resp, err := client.Chat.Completions.New(ctx, params) + if err != nil { + return fmt.Errorf("chat completions: %w", err) + } + // Accumulate assistant response for multi-turn. + if len(resp.Choices) > 0 { + history = append(allMessages, openai.AssistantMessage(resp.Choices[0].Message.Content)) + } + } + } + + return nil +} + +// executeResponses handles the OpenAI Responses API (multi-turn, reasoning). +func executeResponses(ctx context.Context, spec LlmSpanSpec, client openai.Client) error { + var historyItems []responses.ResponseInputItemUnionParam + + for _, req := range spec.Requests { + inputItems := buildResponsesInput(req) + // Prepend history from previous turns. + allInput := append(historyItems, inputItems...) + + params := responses.ResponseNewParams{ + Model: shared.ResponsesModel(stringFromMap(req, "model")), + Input: responses.ResponseNewParamsInputUnion{ + OfInputItemList: allInput, + }, + } + + if reasoning, ok := req["reasoning"].(map[string]any); ok { + var rp shared.ReasoningParam + if effort, ok := reasoning["effort"].(string); ok { + rp.Effort = shared.ReasoningEffort(effort) + } + if summary, ok := reasoning["summary"].(string); ok { + rp.Summary = shared.ReasoningSummary(summary) + } + params.Reasoning = rp + } + + resp, err := client.Responses.New(ctx, params) + if err != nil { + return fmt.Errorf("responses API: %w", err) + } + + // Accumulate the response output items for the next turn. + historyItems = append(allInput, responsesToInputItems(resp)...) + } + + return nil +} + +// executeAnthropic dispatches to the Anthropic executor. +func executeAnthropic(ctx context.Context, spec LlmSpanSpec, tp oteltrace.TracerProvider, httpClient *http.Client) error { + apiKey := os.Getenv("ANTHROPIC_API_KEY") + if vcr.GetVCRMode() == vcr.ModeReplay { + apiKey = "dummy-anthropic-key" + } + + //nolint:bodyclose // false positive: middleware factory, not HTTP response + opts := []anthropicoption.RequestOption{ + anthropicoption.WithAPIKey(apiKey), + anthropicoption.WithHTTPClient(httpClient), + anthropicoption.WithMiddleware(traceanthropic.NewMiddleware(traceanthropic.WithTracerProvider(tp))), + } + + client := anthropic.NewClient(opts...) + + switch spec.Endpoint { + case "/v1/messages": + return executeAnthropicMessages(ctx, spec, client) + default: + return fmt.Errorf("unsupported Anthropic endpoint: %s", spec.Endpoint) + } +} + +// executeAnthropicMessages handles Anthropic messages (streaming and non-streaming). +func executeAnthropicMessages(ctx context.Context, spec LlmSpanSpec, client anthropic.Client) error { + var history []anthropic.MessageParam + + for _, req := range spec.Requests { + messages := buildAnthropicMessages(req) + allMessages := append(history, messages...) + + params := anthropic.MessageNewParams{ + Model: anthropic.Model(stringFromMap(req, "model")), + Messages: allMessages, + } + + if mt, ok := req["max_tokens"]; ok { + params.MaxTokens = int64(toFloat64(mt)) + } + if temp, ok := req["temperature"]; ok { + params.Temperature = anthropic.Float(toFloat64(temp)) + } + + // Handle system prompt. + if sys := buildAnthropicSystem(req); len(sys) > 0 { + params.System = sys + } + + isStreaming := boolFromMap(req, "stream") + + // Build extra headers from spec. + var extraOpts []anthropicoption.RequestOption + for k, v := range spec.Headers { + extraOpts = append(extraOpts, anthropicoption.WithHeader(k, v)) + } + + if isStreaming { + stream := client.Messages.NewStreaming(ctx, params, extraOpts...) + for stream.Next() { + // Consume stream to trigger instrumentation. + } + if err := stream.Err(); err != nil { + return fmt.Errorf("streaming anthropic messages: %w", err) + } + } else { + resp, err := client.Messages.New(ctx, params, extraOpts...) + if err != nil { + return fmt.Errorf("anthropic messages: %w", err) + } + // Accumulate for multi-turn. + assistantContent := make([]anthropic.ContentBlockParamUnion, 0, len(resp.Content)) + for _, block := range resp.Content { + if block.Type == "text" { + assistantContent = append(assistantContent, anthropic.NewTextBlock(block.Text)) + } + } + if len(assistantContent) > 0 { + history = append(allMessages, anthropic.NewAssistantMessage(assistantContent...)) + } + } + } + + return nil +} + +// --- Message building helpers --- + +// buildChatMessages converts a spec request's messages to OpenAI ChatCompletionMessageParamUnion. +func buildChatMessages(req map[string]any) []openai.ChatCompletionMessageParamUnion { + rawMsgs, ok := req["messages"].([]any) + if !ok { + return nil + } + + var messages []openai.ChatCompletionMessageParamUnion + for _, raw := range rawMsgs { + msg, ok := raw.(map[string]any) + if !ok { + continue + } + role := stringFromMap(msg, "role") + content := msg["content"] + + switch role { + case "system": + messages = append(messages, openai.SystemMessage(fmt.Sprintf("%v", content))) + case "user": + parts := buildChatContentParts(content) + if parts != nil { + messages = append(messages, openai.UserMessage(parts)) + } else { + messages = append(messages, openai.UserMessage(fmt.Sprintf("%v", content))) + } + case "assistant": + messages = append(messages, openai.AssistantMessage(fmt.Sprintf("%v", content))) + } + } + return messages +} + +// buildChatContentParts builds multipart content (text + images) for chat completions. +func buildChatContentParts(content any) []openai.ChatCompletionContentPartUnionParam { + parts, ok := content.([]any) + if !ok { + return nil + } + + var result []openai.ChatCompletionContentPartUnionParam + for _, part := range parts { + pm, ok := part.(map[string]any) + if !ok { + continue + } + switch pm["type"] { + case "text": + result = append(result, openai.TextContentPart(stringFromMap(pm, "text"))) + case "image_url": + if iu, ok := pm["image_url"].(map[string]any); ok { + result = append(result, openai.ImageContentPart(openai.ChatCompletionContentPartImageImageURLParam{ + URL: stringFromMap(iu, "url"), + })) + } + case "file": + if f, ok := pm["file"].(map[string]any); ok { + fp := openai.ChatCompletionContentPartFileFileParam{} + if fd := stringFromMap(f, "file_data"); fd != "" { + fp.FileData = param.Opt[string]{Value: fd} + } + if fn := stringFromMap(f, "filename"); fn != "" { + fp.Filename = param.Opt[string]{Value: fn} + } + result = append(result, openai.FileContentPart(fp)) + } + } + } + if len(result) == 0 { + return nil + } + return result +} + +// buildChatTools converts spec tool definitions to OpenAI tool params. +func buildChatTools(tools []any) []openai.ChatCompletionToolParam { + var result []openai.ChatCompletionToolParam + for _, tool := range tools { + tm, ok := tool.(map[string]any) + if !ok { + continue + } + fn, ok := tm["function"].(map[string]any) + if !ok { + continue + } + + param := openai.ChatCompletionToolParam{ + Type: "function", + Function: openai.FunctionDefinitionParam{ + Name: stringFromMap(fn, "name"), + }, + } + if desc, ok := fn["description"].(string); ok { + param.Function.Description = openai.String(desc) + } + if params, ok := fn["parameters"].(map[string]any); ok { + param.Function.Parameters = openai.FunctionParameters(params) + } + result = append(result, param) + } + return result +} + +// buildResponsesInput converts spec request input items to OpenAI Responses API params. +func buildResponsesInput(req map[string]any) []responses.ResponseInputItemUnionParam { + rawInput, ok := req["input"].([]any) + if !ok { + return nil + } + + var items []responses.ResponseInputItemUnionParam + for _, raw := range rawInput { + msg, ok := raw.(map[string]any) + if !ok { + continue + } + role := stringFromMap(msg, "role") + content := stringFromMap(msg, "content") + if role != "" && content != "" { + items = append(items, responses.ResponseInputItemParamOfMessage(content, responses.EasyInputMessageRole(role))) + } + } + return items +} + +// responsesToInputItems converts a Responses API response into input items +// for accumulating multi-turn history. The output items must be fed back +// as properly typed input items (not ID references) so the middleware logs +// full context. +func responsesToInputItems(resp *responses.Response) []responses.ResponseInputItemUnionParam { + var items []responses.ResponseInputItemUnionParam + for _, output := range resp.Output { + switch output.Type { + case "message": + var contentParams []responses.ResponseOutputMessageContentUnionParam + for _, c := range output.Content { + if c.Type == "output_text" { + contentParams = append(contentParams, responses.ResponseOutputMessageContentUnionParam{ + OfOutputText: &responses.ResponseOutputTextParam{ + Text: c.Text, + Annotations: []responses.ResponseOutputTextAnnotationUnionParam{}, + }, + }) + } + } + items = append(items, responses.ResponseInputItemParamOfOutputMessage( + contentParams, + output.ID, + responses.ResponseOutputMessageStatus(output.Status), + )) + case "reasoning": + var summaries []responses.ResponseReasoningItemSummaryParam + for _, s := range output.Summary { + summaries = append(summaries, responses.ResponseReasoningItemSummaryParam{ + Text: s.Text, + }) + } + items = append(items, responses.ResponseInputItemParamOfReasoning(output.ID, summaries)) + } + } + return items +} + +// buildAnthropicMessages converts spec request messages to Anthropic MessageParam. +func buildAnthropicMessages(req map[string]any) []anthropic.MessageParam { + rawMsgs, ok := req["messages"].([]any) + if !ok { + return nil + } + + var messages []anthropic.MessageParam + for _, raw := range rawMsgs { + msg, ok := raw.(map[string]any) + if !ok { + continue + } + role := stringFromMap(msg, "role") + content := msg["content"] + + switch role { + case "user": + blocks := buildAnthropicContentBlocks(content) + if blocks != nil { + messages = append(messages, anthropic.NewUserMessage(blocks...)) + } else { + messages = append(messages, anthropic.NewUserMessage(anthropic.NewTextBlock(fmt.Sprintf("%v", content)))) + } + case "assistant": + messages = append(messages, anthropic.NewAssistantMessage(anthropic.NewTextBlock(fmt.Sprintf("%v", content)))) + } + } + return messages +} + +// buildAnthropicContentBlocks builds content blocks for multipart Anthropic messages. +func buildAnthropicContentBlocks(content any) []anthropic.ContentBlockParamUnion { + parts, ok := content.([]any) + if !ok { + return nil + } + + var blocks []anthropic.ContentBlockParamUnion + for _, part := range parts { + pm, ok := part.(map[string]any) + if !ok { + continue + } + switch pm["type"] { + case "text": + blocks = append(blocks, anthropic.NewTextBlock(stringFromMap(pm, "text"))) + case "image": + if src, ok := pm["source"].(map[string]any); ok { + mediaType := stringFromMap(src, "media_type") + data := stringFromMap(src, "data") + blocks = append(blocks, anthropic.NewImageBlockBase64(mediaType, data)) + } + case "document": + if src, ok := pm["source"].(map[string]any); ok { + data := stringFromMap(src, "data") + blocks = append(blocks, anthropic.NewDocumentBlock(anthropic.Base64PDFSourceParam{ + Data: data, + })) + } + } + } + if len(blocks) == 0 { + return nil + } + return blocks +} + +// buildAnthropicSystem builds the system prompt from a spec request. +func buildAnthropicSystem(req map[string]any) []anthropic.TextBlockParam { + sys := req["system"] + if sys == nil { + return nil + } + + switch s := sys.(type) { + case string: + return []anthropic.TextBlockParam{{Text: s}} + case []any: + // List of system message blocks. + var blocks []anthropic.TextBlockParam + for _, item := range s { + if m, ok := item.(map[string]any); ok { + block := anthropic.TextBlockParam{ + Text: stringFromMap(m, "text"), + } + // Handle cache_control if present. + if cc, ok := m["cache_control"].(map[string]any); ok { + _ = stringFromMap(cc, "type") // always "ephemeral" + ccParam := anthropic.NewCacheControlEphemeralParam() + if ttl, ok := cc["ttl"].(string); ok { + ccParam.TTL = anthropic.CacheControlEphemeralTTL(ttl) + } + block.CacheControl = ccParam + } + blocks = append(blocks, block) + } + } + return blocks + default: + return nil + } +} + +// --- Map helpers --- + +func stringFromMap(m map[string]any, key string) string { + if v, ok := m[key]; ok { + if s, ok := v.(string); ok { + return s + } + } + return "" +} + +func boolFromMap(m map[string]any, key string) bool { + if v, ok := m[key]; ok { + if b, ok := v.(bool); ok { + return b + } + } + return false +} + +// --- AWS Bedrock executor --- + +// bedrockRegion is the AWS region used for Bedrock cassettes. Both record and +// replay must use the same region because go-vcr matches on method+URL, and +// the URL includes the regional hostname. +const bedrockRegion = "us-east-2" + +// executeBedrock dispatches to the correct Bedrock executor based on endpoint. +func executeBedrock(ctx context.Context, spec LlmSpanSpec, tp oteltrace.TracerProvider, httpClient *http.Client) error { + region := os.Getenv("AWS_REGION") + if region == "" { + region = bedrockRegion + } + + opts := []func(*awsconfig.LoadOptions) error{ + awsconfig.WithRegion(region), + awsconfig.WithHTTPClient(httpClient), + } + if vcr.GetVCRMode() == vcr.ModeReplay { + opts = append(opts, awsconfig.WithCredentialsProvider( + credentials.NewStaticCredentialsProvider("AKIAFAKE", "fake", ""), + )) + } + + // Disable retries — VCR records a single request/response pair, and + // retries would produce requests that don't match the cassette. + opts = append(opts, awsconfig.WithRetryMaxAttempts(1)) + + cfg, err := awsconfig.LoadDefaultConfig(ctx, opts...) + if err != nil { + return fmt.Errorf("loading AWS config: %w", err) + } + + client := bedrockruntime.NewFromConfig(cfg, + tracebedrockruntime.NewMiddleware(tracebedrockruntime.WithTracerProvider(tp)), + ) + + switch { + case strings.HasSuffix(spec.Endpoint, "/converse"): + return executeBedrockConverse(ctx, spec, client) + case strings.HasSuffix(spec.Endpoint, "/converse-stream"): + return executeBedrockConverseStream(ctx, spec, client) + default: + return fmt.Errorf("unsupported Bedrock endpoint: %s", spec.Endpoint) + } +} + +// executeBedrockConverse handles non-streaming Bedrock Converse calls. +func executeBedrockConverse(ctx context.Context, spec LlmSpanSpec, client *bedrockruntime.Client) error { + for _, req := range spec.Requests { + modelID := stringFromMap(req, "modelId") + messages := buildBedrockMessages(req) + + input := &bedrockruntime.ConverseInput{ + ModelId: &modelID, + Messages: messages, + } + + _, err := client.Converse(ctx, input) + if err != nil { + return fmt.Errorf("bedrock converse: %w", err) + } + } + return nil +} + +// executeBedrockConverseStream handles streaming Bedrock ConverseStream calls. +func executeBedrockConverseStream(ctx context.Context, spec LlmSpanSpec, client *bedrockruntime.Client) error { + for _, req := range spec.Requests { + modelID := stringFromMap(req, "modelId") + messages := buildBedrockMessages(req) + + input := &bedrockruntime.ConverseStreamInput{ + ModelId: &modelID, + Messages: messages, + } + + out, err := client.ConverseStream(ctx, input) + if err != nil { + return fmt.Errorf("bedrock converse stream: %w", err) + } + + // Consume the stream to trigger span finalization. + for ev := range out.GetStream().Events() { + _ = ev // Drain all events. + } + if err := out.GetStream().Close(); err != nil { + return fmt.Errorf("closing bedrock stream: %w", err) + } + } + return nil +} + +// buildBedrockMessages converts spec request messages to Bedrock typed messages. +func buildBedrockMessages(req map[string]any) []brtypes.Message { + rawMsgs, ok := req["messages"].([]any) + if !ok { + return nil + } + + var messages []brtypes.Message + for _, rm := range rawMsgs { + mm, ok := rm.(map[string]any) + if !ok { + continue + } + msg := brtypes.Message{ + Role: brtypes.ConversationRole(stringFromMap(mm, "role")), + } + if rawContent, ok := mm["content"].([]any); ok { + for _, rc := range rawContent { + block := buildBedrockContentBlock(rc) + if block != nil { + msg.Content = append(msg.Content, block) + } + } + } + messages = append(messages, msg) + } + return messages +} + +// buildBedrockContentBlock converts a spec content block to a Bedrock typed content block. +func buildBedrockContentBlock(raw any) brtypes.ContentBlock { + m, ok := raw.(map[string]any) + if !ok { + return nil + } + + // Text block: {text: "..."} + if text, ok := m["text"].(string); ok { + return &brtypes.ContentBlockMemberText{Value: text} + } + + // Image block: {image: {format: "png", source: {bytes: ""}}} + if img, ok := m["image"].(map[string]any); ok { + format := stringFromMap(img, "format") + if src, ok := img["source"].(map[string]any); ok { + if b64, ok := src["bytes"].(string); ok { + data, err := base64.StdEncoding.DecodeString(b64) + if err != nil { + return nil + } + return &brtypes.ContentBlockMemberImage{ + Value: brtypes.ImageBlock{ + Format: brtypes.ImageFormat(format), + Source: &brtypes.ImageSourceMemberBytes{Value: data}, + }, + } + } + } + } + + return nil +} + +// --- Google/Gemini executor --- + +// executeGoogle dispatches to the correct Google executor based on endpoint. +func executeGoogle(ctx context.Context, spec LlmSpanSpec, tp oteltrace.TracerProvider, httpClient *http.Client) error { + apiKey := os.Getenv("GOOGLE_API_KEY") + if apiKey == "" { + apiKey = os.Getenv("GEMINI_API_KEY") + } + if vcr.GetVCRMode() == vcr.ModeReplay { + apiKey = "dummy-google-key" + } + + // Wrap the HTTP client with Gemini tracing. + tracedClient := tracegenai.WrapClient(httpClient, tracegenai.WithTracerProvider(tp)) + + client, err := genai.NewClient(ctx, &genai.ClientConfig{ + HTTPClient: tracedClient, + APIKey: apiKey, + Backend: genai.BackendGeminiAPI, + }) + if err != nil { + return fmt.Errorf("creating genai client: %w", err) + } + + // The endpoint contains the model name and operation, e.g. + // "/v1/models/gemini-3.1-flash-lite-preview:generateContent" + if strings.Contains(spec.Endpoint, ":generateContent") { + return executeGenerateContent(ctx, spec, client) + } + + return fmt.Errorf("unsupported Google endpoint: %s", spec.Endpoint) +} + +// extractModelFromEndpoint extracts the model name from a Gemini endpoint path. +// e.g. "/v1/models/gemini-3.1-flash-lite-preview:generateContent" → "gemini-3.1-flash-lite-preview" +func extractModelFromEndpoint(endpoint string) string { + // Find the model segment between "/models/" and ":" + const prefix = "/models/" + idx := strings.Index(endpoint, prefix) + if idx < 0 { + return endpoint + } + rest := endpoint[idx+len(prefix):] + if colonIdx := strings.Index(rest, ":"); colonIdx >= 0 { + return rest[:colonIdx] + } + return rest +} + +// executeGenerateContent handles Google Gemini generateContent calls. +func executeGenerateContent(ctx context.Context, spec LlmSpanSpec, client *genai.Client) error { + model := extractModelFromEndpoint(spec.Endpoint) + + for _, req := range spec.Requests { + contents := buildGeminiContents(req) + + var config *genai.GenerateContentConfig + if gc, ok := req["generationConfig"].(map[string]any); ok { + config = buildGeminiConfig(gc) + } + + _, err := client.Models.GenerateContent(ctx, model, contents, config) + if err != nil { + return fmt.Errorf("generateContent: %w", err) + } + } + + return nil +} + +// buildGeminiContents converts spec request contents to genai.Content structs. +func buildGeminiContents(req map[string]any) []*genai.Content { + rawContents, ok := req["contents"].([]any) + if !ok { + return nil + } + + var contents []*genai.Content + for _, rc := range rawContents { + cm, ok := rc.(map[string]any) + if !ok { + continue + } + content := &genai.Content{ + Role: stringFromMap(cm, "role"), + } + if rawParts, ok := cm["parts"].([]any); ok { + for _, rp := range rawParts { + pm, ok := rp.(map[string]any) + if !ok { + continue + } + part := buildGeminiPart(pm) + if part != nil { + content.Parts = append(content.Parts, part) + } + } + } + contents = append(contents, content) + } + return contents +} + +// buildGeminiPart converts a spec part map to a genai.Part. +func buildGeminiPart(pm map[string]any) *genai.Part { + // Text part. + if text, ok := pm["text"].(string); ok { + return &genai.Part{Text: text} + } + + // Inline data (base64 image/binary). + if id, ok := pm["inline_data"].(map[string]any); ok { + mimeType := stringFromMap(id, "mime_type") + dataStr := stringFromMap(id, "data") + data, err := base64.StdEncoding.DecodeString(dataStr) + if err != nil { + return nil + } + return &genai.Part{ + InlineData: &genai.Blob{ + MIMEType: mimeType, + Data: data, + }, + } + } + + return nil +} + +// buildGeminiConfig converts a spec generationConfig map to genai.GenerateContentConfig. +func buildGeminiConfig(gc map[string]any) *genai.GenerateContentConfig { + config := &genai.GenerateContentConfig{} + if temp, ok := gc["temperature"]; ok { + config.Temperature = genai.Ptr(float32(toFloat64(temp))) + } + if topP, ok := gc["topP"]; ok { + config.TopP = genai.Ptr(float32(toFloat64(topP))) + } + if topK, ok := gc["topK"]; ok { + config.TopK = genai.Ptr(float32(toFloat64(topK))) + } + return config +} diff --git a/btx/spec_fetch.go b/btx/spec_fetch.go new file mode 100644 index 00000000..c4211e5c --- /dev/null +++ b/btx/spec_fetch.go @@ -0,0 +1,125 @@ +// Package btx implements the BTX (Braintrust Cross-language) test runner for the Go SDK. +// It validates SDK instrumentation against YAML spec files from braintrustdata/braintrust-spec. +package btx + +import ( + "archive/tar" + "compress/gzip" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" +) + +const specCacheDir = ".spec-cache" + +// fetchSpec downloads and extracts the spec tarball from GitHub if not already cached. +// It returns the path to the spec root directory (containing test/llm_span/). +// If BTX_SPEC_ROOT is set, it is returned directly without any download. +func fetchSpec() (string, error) { + if root := os.Getenv("BTX_SPEC_ROOT"); root != "" { + return root, nil + } + + ref, err := readSpecRef() + if err != nil { + return "", fmt.Errorf("reading spec ref: %w", err) + } + + cacheDir := filepath.Join(specCacheDir, ref) + marker := filepath.Join(cacheDir, "test", "llm_span") + + // Idempotent: skip if already cached. + if info, err := os.Stat(marker); err == nil && info.IsDir() { + return cacheDir, nil + } + + url := fmt.Sprintf("https://github.com/braintrustdata/braintrust-spec/archive/%s.tar.gz", ref) + fmt.Printf("btx: fetching spec from %s\n", url) + + resp, err := http.Get(url) //nolint:gosec // URL is constructed from a pinned ref. + if err != nil { + return "", fmt.Errorf("downloading spec: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("downloading spec: HTTP %d", resp.StatusCode) + } + + if err := extractTarGz(resp.Body, cacheDir); err != nil { + return "", fmt.Errorf("extracting spec: %w", err) + } + + return cacheDir, nil +} + +// readSpecRef reads the pinned ref from spec_ref.txt. +func readSpecRef() (string, error) { + data, err := os.ReadFile("spec_ref.txt") + if err != nil { + return "", err + } + return strings.TrimSpace(string(data)), nil +} + +// extractTarGz extracts a tar.gz stream into destDir, stripping the top-level +// directory from the archive (e.g. "braintrust-spec-v0.0.7/" is removed). +func extractTarGz(r io.Reader, destDir string) error { + gz, err := gzip.NewReader(r) + if err != nil { + return err + } + defer func() { _ = gz.Close() }() + + tr := tar.NewReader(gz) + for { + header, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + // Strip the top-level directory. + parts := strings.SplitN(header.Name, "/", 2) + if len(parts) < 2 || parts[1] == "" { + continue + } + relPath := parts[1] + target := filepath.Join(destDir, filepath.FromSlash(relPath)) + + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(target, 0o755); err != nil { + return err + } + case tar.TypeReg: + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + if err := extractFile(target, tr); err != nil { + return err + } + } + } + return nil +} + +// extractFile writes a single file from the tar reader. +func extractFile(target string, r io.Reader) (retErr error) { + f, err := os.Create(target) + if err != nil { + return err + } + defer func() { + if closeErr := f.Close(); retErr == nil { + retErr = closeErr + } + }() + _, err = io.Copy(f, r) //nolint:gosec // Trusted archive from GitHub. + return err +} diff --git a/btx/spec_loader.go b/btx/spec_loader.go new file mode 100644 index 00000000..1379dfe6 --- /dev/null +++ b/btx/spec_loader.go @@ -0,0 +1,303 @@ +package btx + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/google/uuid" + "gopkg.in/yaml.v3" + + "github.com/braintrustdata/braintrust-sdk-go/internal/vcr" +) + +// LlmSpanSpec represents a parsed YAML spec file for an LLM span test. +type LlmSpanSpec struct { + Name string + Type string + Provider string + Endpoint string + Headers map[string]string + Requests []map[string]any + ExpectedBrainstoreSpans []map[string]any + SourcePath string + DisplayName string // "provider/name" +} + +// FnMatcher represents a !fn custom YAML tag. +type FnMatcher struct { + Expr string +} + +// StartsWithMatcher represents a !starts_with custom YAML tag. +type StartsWithMatcher struct { + Prefix string +} + +// OrMatcher represents a !or custom YAML tag. +type OrMatcher struct { + Alternatives []any +} + +// loadSpecs walks the spec directory and returns all specs for the given providers. +// specRoot can point to the repository root (containing test/llm_span/) or +// directly to the llm_span directory itself. +func loadSpecs(specRoot string, providers []string) ([]LlmSpanSpec, error) { + llmSpanDir := filepath.Join(specRoot, "test", "llm_span") + if _, err := os.Stat(llmSpanDir); err != nil { + // Try using specRoot directly as the llm_span directory. + llmSpanDir = specRoot + } + + providerSet := make(map[string]bool, len(providers)) + for _, p := range providers { + providerSet[p] = true + } + + var specs []LlmSpanSpec + err := filepath.Walk(llmSpanDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() || !strings.HasSuffix(path, ".yaml") { + return nil + } + + // Determine provider from directory structure: llm_span//.yaml + rel, err := filepath.Rel(llmSpanDir, path) + if err != nil { + return err + } + parts := strings.SplitN(filepath.ToSlash(rel), "/", 2) + if len(parts) < 2 { + return nil + } + provider := parts[0] + if !providerSet[provider] { + return nil + } + + spec, err := loadSpec(path, provider) + if err != nil { + return fmt.Errorf("loading spec %s: %w", path, err) + } + specs = append(specs, spec) + + return nil + }) + if err != nil { + return nil, err + } + + sort.Slice(specs, func(i, j int) bool { + return specs[i].DisplayName < specs[j].DisplayName + }) + + return specs, nil +} + +// loadSpec parses a single YAML spec file. +func loadSpec(path, provider string) (LlmSpanSpec, error) { + data, err := os.ReadFile(path) + if err != nil { + return LlmSpanSpec{}, err + } + + // Parse into a yaml.Node tree to handle custom tags. + var doc yaml.Node + if err := yaml.Unmarshal(data, &doc); err != nil { + return LlmSpanSpec{}, fmt.Errorf("parsing YAML: %w", err) + } + + if doc.Kind != yaml.DocumentNode || len(doc.Content) == 0 { + return LlmSpanSpec{}, fmt.Errorf("unexpected YAML structure in %s", path) + } + + raw := resolveNode(doc.Content[0]) + rawMap, ok := raw.(map[string]any) + if !ok { + return LlmSpanSpec{}, fmt.Errorf("expected map at top level of %s", path) + } + + // Resolve variables and template substitution. + variables := resolveVariables(rawMap) + if len(variables) > 0 { + substituted, ok := substituteTemplates(rawMap, variables).(map[string]any) + if !ok { + return LlmSpanSpec{}, fmt.Errorf("template substitution did not produce a map in %s", path) + } + rawMap = substituted + } + + spec := LlmSpanSpec{ + Name: stringField(rawMap, "name"), + Type: stringField(rawMap, "type"), + Provider: provider, + Endpoint: stringField(rawMap, "endpoint"), + SourcePath: path, + } + spec.DisplayName = spec.Provider + "/" + spec.Name + + // Parse headers. + if h, ok := rawMap["headers"]; ok { + if hm, ok := h.(map[string]any); ok { + spec.Headers = make(map[string]string, len(hm)) + for k, v := range hm { + spec.Headers[k] = fmt.Sprintf("%v", v) + } + } + } + + // Parse requests. + if reqs, ok := rawMap["requests"]; ok { + if reqList, ok := reqs.([]any); ok { + for _, r := range reqList { + if rm, ok := r.(map[string]any); ok { + spec.Requests = append(spec.Requests, rm) + } + } + } + } + + // Parse expected spans. + if spans, ok := rawMap["expected_brainstore_spans"]; ok { + if spanList, ok := spans.([]any); ok { + for _, s := range spanList { + if sm, ok := s.(map[string]any); ok { + spec.ExpectedBrainstoreSpans = append(spec.ExpectedBrainstoreSpans, sm) + } + } + } + } + + return spec, nil +} + +// resolveNode converts a yaml.Node tree into Go values, handling custom tags. +func resolveNode(node *yaml.Node) any { + switch node.Tag { + case "!fn": + return FnMatcher{Expr: node.Value} + case "!starts_with": + return StartsWithMatcher{Prefix: node.Value} + case "!or": + // !or is applied to a sequence. + if node.Kind == yaml.SequenceNode { + items := make([]any, len(node.Content)) + for i, child := range node.Content { + items[i] = resolveNode(child) + } + return OrMatcher{Alternatives: items} + } + return OrMatcher{} + case "!gen": + return resolveGenerator(node.Value) + } + + switch node.Kind { + case yaml.MappingNode: + m := make(map[string]any, len(node.Content)/2) + for i := 0; i < len(node.Content)-1; i += 2 { + key := node.Content[i].Value + val := resolveNode(node.Content[i+1]) + m[key] = val + } + return m + + case yaml.SequenceNode: + items := make([]any, len(node.Content)) + for i, child := range node.Content { + items[i] = resolveNode(child) + } + return items + + case yaml.ScalarNode: + return resolveScalar(node) + + case yaml.AliasNode: + return resolveNode(node.Alias) + + default: + return node.Value + } +} + +// resolveScalar converts a YAML scalar node into the appropriate Go type. +func resolveScalar(node *yaml.Node) any { + // Unmarshal using yaml.v3's type inference for scalars. + var val any + if err := node.Decode(&val); err != nil { + return node.Value + } + return val +} + +// resolveGenerator resolves a !gen tag value. +func resolveGenerator(name string) string { + switch name { + case "test_runner_client": + return "go-btx" + case "vcr_nonce": + if vcr.GetVCRMode() == vcr.ModeReplay { + return "replay-nonce" + } + return uuid.New().String()[:8] + default: + return name + } +} + +// resolveVariables extracts and resolves the "variables" map from a raw spec. +func resolveVariables(rawMap map[string]any) map[string]string { + vars, ok := rawMap["variables"] + if !ok { + return nil + } + varMap, ok := vars.(map[string]any) + if !ok { + return nil + } + result := make(map[string]string, len(varMap)) + for k, v := range varMap { + result[k] = fmt.Sprintf("%v", v) + } + return result +} + +// substituteTemplates recursively replaces {{var}} placeholders in strings. +func substituteTemplates(val any, vars map[string]string) any { + switch v := val.(type) { + case string: + result := v + for name, value := range vars { + result = strings.ReplaceAll(result, "{{"+name+"}}", value) + } + return result + case map[string]any: + m := make(map[string]any, len(v)) + for key, value := range v { + m[key] = substituteTemplates(value, vars) + } + return m + case []any: + items := make([]any, len(v)) + for i, item := range v { + items[i] = substituteTemplates(item, vars) + } + return items + default: + return val + } +} + +// stringField extracts a string field from a map, returning "" if missing. +func stringField(m map[string]any, key string) string { + if v, ok := m[key]; ok { + if s, ok := v.(string); ok { + return s + } + } + return "" +} diff --git a/btx/spec_ref.txt b/btx/spec_ref.txt new file mode 100644 index 00000000..41a28195 --- /dev/null +++ b/btx/spec_ref.txt @@ -0,0 +1 @@ +v0.0.7 diff --git a/btx/testdata/cassettes/anthropic/attachments.yaml b/btx/testdata/cassettes/anthropic/attachments.yaml new file mode 100644 index 00000000..da005897 --- /dev/null +++ b/btx/testdata/cassettes/anthropic/attachments.yaml @@ -0,0 +1,106 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 337 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"max_tokens":128,"messages":[{"content":[{"text":"What color is this image?","type":"text"},{"source":{"data":"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==","media_type":"image/png","type":"base64"},"type":"image"}],"role":"user"}],"model":"claude-haiku-4-5-20251001","temperature":0}' + form: {} + headers: + Accept: + - application/json + Anthropic-Version: + - "2023-06-01" + Content-Type: + - application/json + User-Agent: + - Anthropic/Go 1.23.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - go + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.23.0 + X-Stainless-Retry-Count: + - "0" + X-Stainless-Runtime: + - go + X-Stainless-Runtime-Version: + - go1.26.3 + X-Stainless-Timeout: + - "600" + url: https://api.anthropic.com/v1/messages + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"model":"claude-haiku-4-5-20251001","id":"msg_01M8mYj6B1hkuvMYo2U6TF3s","type":"message","role":"assistant","content":[{"type":"text","text":"This image is **red**. It appears to be a small red dot or circular shape against a white background."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":17,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":26,"service_tier":"standard","inference_geo":"not_available"}}' + headers: + Anthropic-Ratelimit-Input-Tokens-Limit: + - "4000000" + Anthropic-Ratelimit-Input-Tokens-Remaining: + - "4000000" + Anthropic-Ratelimit-Input-Tokens-Reset: + - "2026-05-21T04:26:08Z" + Anthropic-Ratelimit-Output-Tokens-Limit: + - "800000" + Anthropic-Ratelimit-Output-Tokens-Remaining: + - "800000" + Anthropic-Ratelimit-Output-Tokens-Reset: + - "2026-05-21T04:26:09Z" + Anthropic-Ratelimit-Requests-Limit: + - "20000" + Anthropic-Ratelimit-Requests-Remaining: + - "19999" + Anthropic-Ratelimit-Requests-Reset: + - "2026-05-21T04:26:08Z" + Anthropic-Ratelimit-Tokens-Limit: + - "4800000" + Anthropic-Ratelimit-Tokens-Remaining: + - "4800000" + Anthropic-Ratelimit-Tokens-Reset: + - "2026-05-21T04:26:08Z" + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - 9ff0d23a1e808691-SEA + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 04:26:09 GMT + Request-Id: + - req_011CbF5bSRAo6cP1RVUaGQ87 + Server: + - cloudflare + Set-Cookie: + - _cfuvid=.tCnp8PRS.MJ6NGnqhgx1IWSFtf6bAt_6GF_Gjcpr2Q-1779337568.3377452-1.0.1.1-iIkSKggr3QDIGHCdaUT5pQeqagV4Br29LfBPhTLGvGY; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Traceresponse: + - 00-32d1a1419d2f3c0d0e1a043ba13ac9d4-c1ede22db6b12faa-01 + Vary: + - Accept-Encoding + X-Envoy-Upstream-Service-Time: + - "864" + X-Robots-Tag: + - none + status: 200 OK + code: 200 + duration: 1.105611083s diff --git a/btx/testdata/cassettes/anthropic/messages.yaml b/btx/testdata/cassettes/anthropic/messages.yaml new file mode 100644 index 00000000..363b9102 --- /dev/null +++ b/btx/testdata/cassettes/anthropic/messages.yaml @@ -0,0 +1,106 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 232 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"max_tokens":128,"messages":[{"content":[{"text":"What is the capital of France?","type":"text"}],"role":"user"}],"model":"claude-haiku-4-5-20251001","temperature":0,"system":[{"text":"You are a helpful assistant.","type":"text"}]}' + form: {} + headers: + Accept: + - application/json + Anthropic-Version: + - "2023-06-01" + Content-Type: + - application/json + User-Agent: + - Anthropic/Go 1.23.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - go + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.23.0 + X-Stainless-Retry-Count: + - "0" + X-Stainless-Runtime: + - go + X-Stainless-Runtime-Version: + - go1.26.3 + X-Stainless-Timeout: + - "600" + url: https://api.anthropic.com/v1/messages + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"model":"claude-haiku-4-5-20251001","id":"msg_016h886hcjVsv71vC9gJDGyo","type":"message","role":"assistant","content":[{"type":"text","text":"The capital of France is Paris."}],"stop_reason":"end_turn","stop_sequence":null,"stop_details":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":10,"service_tier":"standard","inference_geo":"not_available"}}' + headers: + Anthropic-Ratelimit-Input-Tokens-Limit: + - "4000000" + Anthropic-Ratelimit-Input-Tokens-Remaining: + - "4000000" + Anthropic-Ratelimit-Input-Tokens-Reset: + - "2026-05-21T04:26:11Z" + Anthropic-Ratelimit-Output-Tokens-Limit: + - "800000" + Anthropic-Ratelimit-Output-Tokens-Remaining: + - "800000" + Anthropic-Ratelimit-Output-Tokens-Reset: + - "2026-05-21T04:26:11Z" + Anthropic-Ratelimit-Requests-Limit: + - "20000" + Anthropic-Ratelimit-Requests-Remaining: + - "19999" + Anthropic-Ratelimit-Requests-Reset: + - "2026-05-21T04:26:11Z" + Anthropic-Ratelimit-Tokens-Limit: + - "4800000" + Anthropic-Ratelimit-Tokens-Remaining: + - "4800000" + Anthropic-Ratelimit-Tokens-Reset: + - "2026-05-21T04:26:11Z" + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - 9ff0d24c3e288691-SEA + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 04:26:11 GMT + Request-Id: + - req_011CbF5beoZ6sEE1M4RmEVHX + Server: + - cloudflare + Set-Cookie: + - _cfuvid=YElUVazmLwof99qfTTNNaIRUBBLx8.q8ULk0imAl7Jw-1779337571.234837-1.0.1.1-.wMOd7SIfhOAQQuedEWH1sg_z4FSY1pZYjM_tZCYKhk; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Traceresponse: + - 00-3cdce7558c22cb834149cb8fbd76e28e-288340abbe20c310-01 + Vary: + - Accept-Encoding + X-Envoy-Upstream-Service-Time: + - "393" + X-Robots-Tag: + - none + status: 200 OK + code: 200 + duration: 542.257416ms diff --git a/btx/testdata/cassettes/anthropic/streaming.yaml b/btx/testdata/cassettes/anthropic/streaming.yaml new file mode 100644 index 00000000..0c1c08c8 --- /dev/null +++ b/btx/testdata/cassettes/anthropic/streaming.yaml @@ -0,0 +1,127 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 234 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"max_tokens":128,"messages":[{"content":[{"text":"Count from 1 to 5.","type":"text"}],"role":"user"}],"model":"claude-haiku-4-5-20251001","temperature":0,"system":[{"text":"You are a helpful assistant.","type":"text"}],"stream":true}' + form: {} + headers: + Accept: + - application/json + Anthropic-Version: + - "2023-06-01" + Content-Type: + - application/json + User-Agent: + - Anthropic/Go 1.23.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - go + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.23.0 + X-Stainless-Retry-Count: + - "0" + X-Stainless-Runtime: + - go + X-Stainless-Runtime-Version: + - go1.26.3 + url: https://api.anthropic.com/v1/messages + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: |+ + event: message_start + data: {"type":"message_start","message":{"model":"claude-haiku-4-5-20251001","id":"msg_01PsZbf871hmDixkBUBvWRYx","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"stop_details":null,"usage":{"input_tokens":22,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"}} } + + event: content_block_start + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + event: ping + data: {"type": "ping"} + + event: content_block_delta + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"1\n2\n3\n4\n5"} } + + event: content_block_stop + data: {"type":"content_block_stop","index":0 } + + event: message_delta + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null,"stop_details":null},"usage":{"input_tokens":22,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":13} } + + event: message_stop + data: {"type":"message_stop" } + + headers: + Anthropic-Ratelimit-Input-Tokens-Limit: + - "4000000" + Anthropic-Ratelimit-Input-Tokens-Remaining: + - "4000000" + Anthropic-Ratelimit-Input-Tokens-Reset: + - "2026-05-21T04:26:12Z" + Anthropic-Ratelimit-Output-Tokens-Limit: + - "800000" + Anthropic-Ratelimit-Output-Tokens-Remaining: + - "800000" + Anthropic-Ratelimit-Output-Tokens-Reset: + - "2026-05-21T04:26:12Z" + Anthropic-Ratelimit-Requests-Limit: + - "20000" + Anthropic-Ratelimit-Requests-Remaining: + - "19999" + Anthropic-Ratelimit-Requests-Reset: + - "2026-05-21T04:26:12Z" + Anthropic-Ratelimit-Tokens-Limit: + - "4800000" + Anthropic-Ratelimit-Tokens-Remaining: + - "4800000" + Anthropic-Ratelimit-Tokens-Reset: + - "2026-05-21T04:26:12Z" + Cache-Control: + - no-cache + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - 9ff0d255deac8691-SEA + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Thu, 21 May 2026 04:26:13 GMT + Request-Id: + - req_011CbF5bmPS5YCHXzJNQSv9q + Server: + - cloudflare + Set-Cookie: + - _cfuvid=fQcqdy30HC0QSEW2AVBhEFtRqO8nRQglireKzQCvrzQ-1779337572.7732944-1.0.1.1-5uge4xXRxqqfR2yAGO6LvPBDjpzVr3tyPEf.NjtqKGI; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Traceresponse: + - 00-68b5d08b4ac4fe06e60d2fcb465f84fe-68fa561f7dec267d-01 + Vary: + - Accept-Encoding + X-Envoy-Upstream-Service-Time: + - "373" + X-Robots-Tag: + - none + status: 200 OK + code: 200 + duration: 609.740125ms diff --git a/btx/testdata/cassettes/bedrock/attachments.yaml b/btx/testdata/cassettes/bedrock/attachments.yaml new file mode 100644 index 00000000..beb1d33d --- /dev/null +++ b/btx/testdata/cassettes/bedrock/attachments.yaml @@ -0,0 +1,50 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: "" + proto_major: 0 + proto_minor: 0 + content_length: 224 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"messages":[{"content":[{"text":"What color is this image?"},{"image":{"format":"png","source":{"bytes":"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="}}}],"role":"user"}]}' + form: {} + headers: + Amz-Sdk-Invocation-Id: + - c1d89c2c-c182-4353-a8f9-5dc733535e81 + Amz-Sdk-Request: + - attempt=1; max=3 + Content-Type: + - application/json + User-Agent: + - aws-sdk-go-v2/1.41.5 ua/2.1 os/macos lang/go#1.26.3 md/GOOS#darwin md/GOARCH#arm64 api/bedrockruntime#1.50.4 m/E,g + X-Amz-Date: + - 20260519T215705Z + url: https://bedrock-runtime.us-east-2.amazonaws.com/model/us.amazon.nova-lite-v1%3A0/converse + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 350 + uncompressed: false + body: '{"metrics":{"latencyMs":525},"output":{"message":{"content":[{"text":"I can''t see any image in the provided information. Could you please provide more context or a description of the image so I can assist you better?"}],"role":"assistant"}},"stopReason":"end_turn","usage":{"inputTokens":536,"outputTokens":32,"serverToolUsage":{},"totalTokens":568}}' + headers: + Content-Length: + - "350" + Content-Type: + - application/json + Date: + - Tue, 19 May 2026 21:57:06 GMT + X-Amzn-Requestid: + - cc12a1c9-0b74-4796-8981-55e8bbf69ce5 + status: 200 OK + code: 200 + duration: 918.572167ms diff --git a/btx/testdata/cassettes/bedrock/converse.yaml b/btx/testdata/cassettes/bedrock/converse.yaml new file mode 100644 index 00000000..3fe291cd --- /dev/null +++ b/btx/testdata/cassettes/bedrock/converse.yaml @@ -0,0 +1,50 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: "" + proto_major: 0 + proto_minor: 0 + content_length: 84 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"messages":[{"content":[{"text":"What is the capital of France?"}],"role":"user"}]}' + form: {} + headers: + Amz-Sdk-Invocation-Id: + - 591a200e-ad13-4267-bbfd-6747f01d1ce1 + Amz-Sdk-Request: + - attempt=1; max=3 + Content-Type: + - application/json + User-Agent: + - aws-sdk-go-v2/1.41.5 ua/2.1 os/macos lang/go#1.26.3 md/GOOS#darwin md/GOARCH#arm64 api/bedrockruntime#1.50.4 m/E,g + X-Amz-Date: + - 20260521T042614Z + url: https://bedrock-runtime.us-east-2.amazonaws.com/model/us.amazon.nova-lite-v1%3A0/converse + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: 966 + uncompressed: false + body: '{"metrics":{"latencyMs":955},"output":{"message":{"content":[{"text":"The capital of France is Paris. Paris is not only the capital but also the largest city in France. It is situated in the northern central part of the country, along the Seine River. The city is famous for its rich history, culture, and landmarks, such as the Eiffel Tower, Notre-Dame Cathedral, the Louvre Museum, and the Champs-Élysées. Paris is a global center for art, fashion, gastronomy, and culture, and it is well-known for its significant contributions to various fields, including philosophy, science, and politics. The city is divided into 20 arrondissements (districts), each with its own unique character and attractions. Paris is also a major hub for transportation, education, and business, making it one of the most important cities in the world."}],"role":"assistant"}},"stopReason":"end_turn","usage":{"inputTokens":7,"outputTokens":164,"serverToolUsage":{},"totalTokens":171}}' + headers: + Content-Length: + - "966" + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 04:26:15 GMT + X-Amzn-Requestid: + - f54ee992-aef1-4b63-82d2-9376602077bd + status: 200 OK + code: 200 + duration: 1.276778958s diff --git a/btx/testdata/cassettes/bedrock/converse_stream.yaml b/btx/testdata/cassettes/bedrock/converse_stream.yaml new file mode 100644 index 00000000..95152d45 --- /dev/null +++ b/btx/testdata/cassettes/bedrock/converse_stream.yaml @@ -0,0 +1,133 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: "" + proto_major: 0 + proto_minor: 0 + content_length: 72 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"messages":[{"content":[{"text":"count to 10 slowly"}],"role":"user"}]}' + form: {} + headers: + Amz-Sdk-Invocation-Id: + - cfce2a80-973a-4857-9088-ef97e0b24b44 + Amz-Sdk-Request: + - attempt=1; max=3 + Content-Type: + - application/json + User-Agent: + - aws-sdk-go-v2/1.41.5 ua/2.1 os/macos lang/go#1.26.3 md/GOOS#darwin md/GOARCH#arm64 api/bedrockruntime#1.50.4 m/E,g + X-Amz-Date: + - 20260521T042616Z + url: https://bedrock-runtime.us-east-2.amazonaws.com/model/us.amazon.nova-lite-v1%3A0/converse-stream + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: false + body: !!binary | + AAAAoAAAAFKQYHAnCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cG + UHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2Rl + ZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJIiwicm9sZSI6ImFzc2lzdGFudCJ9Cd + eWBQAAANUAAABX0TjrFws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250 + ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY2 + 9udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiJTdXJlIn0sInAiOiJhYmNk + ZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaMDEyMy + J9f0iu/QAAAL8AAABXArqE+ws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpj + b250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudH + siY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIsIEkifSwicCI6ImFi + Y2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJIn3NzCXNAAAA3AAAAFfcKIlmCz + pldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxp + Y2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleC + I6MCwiZGVsdGEiOnsidGV4dCI6IidsbCBjb3VudCJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5v + cHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NSJ9v5+cwgAAAJ + 8AAABXw3ur/ws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5 + cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudE + Jsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgdG8gIn0sInAiOiJhYiJ9MUucigAA + AMsAAABXDujC9As6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LX + R5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVu + dEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIxMCBzbG93bHkifSwicCI6ImFiY2 + RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PIn02umjoAAAAwwAAAFc+ + mIk1CzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEG + FwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJ + bmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiBmb3IgeW91OlxuXG4ifSwicCI6ImFiY2RlZm + doaWprbG1ub3BxcnN0dXZ3eHl6QUJDIn0nIF3sAAAAnwAAAFfDe6v/CzpldmVudC10eXBl + BwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb2 + 4NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEi + OnsidGV4dCI6IjEuLi4ifSwicCI6ImFiIn22wrCWAAAAygAAAFcziOtECzpldmVudC10eX + BlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pz + b24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdG + EiOnsidGV4dCI6IiAoIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RF + RkdISUpLTE1OT1BRUlNUVSJ9Z8nMrQAAANEAAABXJLhN1ws6ZXZlbnQtdHlwZQcAEWNvbn + RlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNz + YWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleH + QiOiJwYXVzZSJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElK + S0xNTk9QUVJTVFVWV1hZIn0SqTFgAAAAuQAAAFeN+nFbCzpldmVudC10eXBlBwARY29udG + VudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3Nh + Z2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dC + I6IilcbiJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkMifYyY6vwAAACj + AAAAV6eq/ngLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eX + BlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRC + bG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiMi4uLiJ9LCJwIjoiYWJjZGVmIn0O+7 + j6AAAA3AAAAFfcKIlmCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRl + bnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb2 + 50ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiAocGF1c2UpXG4zLi4uIn0s + InAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVV + ZXWFlaMCJ9YcsptwAAAKsAAABXl9q1uQs6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0Rl + bHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAA + VldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgKHBhdXNl + KVxuNC4uLiJ9LCJwIjoiYWJjZCJ9NYm6QAAAAMkAAABXdCiRlAs6ZXZlbnQtdHlwZQcAEW + NvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTpt + ZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7In + RleHQiOiIgKHBhdXNlKVxuNS4uLiAoIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2 + d3h5ekFCQ0RFRiJ9Z2sFFwAAANQAAABX7FjCpws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG + 9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5 + cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiJwYX + VzZSlcbjYuLi4gKHBhdXNlIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFC + Q0RFRkdISUpLTE1OIn3oy+T1AAAAtQAAAFdICpxaCzpldmVudC10eXBlBwARY29udGVudE + Jsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2Ut + dHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6Ii + lcbjcuLi4gKHBhdXNlKVxuIn0sInAiOiJhYmNkZWZnaGlqayJ9mkf4DQAAAM8AAABX+2hk + NAs6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcH + BsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5k + ZXgiOjAsImRlbHRhIjp7InRleHQiOiI4Li4uIChwYXVzZSlcbjkifSwicCI6ImFiY2RlZm + doaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTSJ9hZetowAAAK0AAABXGJpAGQs6 + ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaW + NhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgi + OjAsImRlbHRhIjp7InRleHQiOiIuLi4gKHBhdXNlKVxuMTAifSwicCI6ImFiY2RlIn0OvZ + amAAAAuQAAAFeN+nFbCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRl + bnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb2 + 50ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6Ii4ifSwicCI6ImFiY2RlZmdo + aWprbG1ub3BxcnN0dXZ3eHl6QUJDREUifWMWT5YAAACvAAAAVhVdI+8LOmV2ZW50LXR5cG + UHABBjb250ZW50QmxvY2tTdG9wDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29u + DTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsInAiOiJhYm + NkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRIn1K+3IPAAAAswAA + AFEuKczPCzpldmVudC10eXBlBwALbWVzc2FnZVN0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcG + xpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJwIjoiYWJjZGVmZ2hpamts + bW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1giLCJzdG9wUmVhc29uIj + oiZW5kX3R1cm4ifRZXc3YAAAD+AAAATgNCXcILOmV2ZW50LXR5cGUHAAhtZXRhZGF0YQ06 + Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbn + R7Im1ldHJpY3MiOnsibGF0ZW5jeU1zIjo1NjJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFy + c3R1dnd4eXpBQkNERUZHSElKS0xNTk9QIiwidXNhZ2UiOnsiaW5wdXRUb2tlbnMiOjYsIm + 91dHB1dFRva2VucyI6NjMsInNlcnZlclRvb2xVc2FnZSI6e30sInRvdGFsVG9rZW5zIjo2 + OX19Qqf76w== + headers: + Content-Type: + - application/vnd.amazon.eventstream + Date: + - Thu, 21 May 2026 04:26:16 GMT + X-Amzn-Requestid: + - 959700b8-5976-49ff-a3bd-b604f7ada051 + status: 200 OK + code: 200 + duration: 509.692125ms diff --git a/btx/testdata/cassettes/google/attachments.yaml b/btx/testdata/cassettes/google/attachments.yaml new file mode 100644 index 00000000..b287b27f --- /dev/null +++ b/btx/testdata/cassettes/google/attachments.yaml @@ -0,0 +1,98 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 261 + transfer_encoding: [] + trailer: {} + host: generativelanguage.googleapis.com + remote_addr: "" + request_uri: "" + body: | + {"contents":[{"parts":[{"text":"What color is this image?"},{"inlineData":{"data":"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==","mimeType":"image/png"}}],"role":"user"}],"generationConfig":{"temperature":0}} + form: {} + headers: + Content-Type: + - application/json + User-Agent: + - google-genai-sdk/1.41.0 gl-go/go1.26.3 + X-Goog-Api-Client: + - google-genai-sdk/1.41.0 gl-go/go1.26.3 + X-Server-Timeout: + - "30" + url: https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-flash-lite-preview:generateContent + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: | + { + "candidates": [ + { + "content": { + "parts": [ + { + "text": "The color of the image is red.", + "thoughtSignature": "EjQKMgEMOdbHnM18UV2cSxC6K+nzWGGY+YuA69GEp2hTs954u/O1cyhwokU9prkQoYO2XR4b" + } + ], + "role": "model" + }, + "finishReason": "STOP", + "index": 0 + } + ], + "usageMetadata": { + "promptTokenCount": 1096, + "candidatesTokenCount": 8, + "totalTokenCount": 1104, + "promptTokensDetails": [ + { + "modality": "IMAGE", + "tokenCount": 1089 + }, + { + "modality": "TEXT", + "tokenCount": 7 + } + ], + "serviceTier": "standard" + }, + "modelVersion": "gemini-3.1-flash-lite", + "responseId": "aokOasWeFPfgz7IPtpToAQ" + } + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 04:26:18 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=824 + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-Xss-Protection: + - "0" + status: 200 OK + code: 200 + duration: 984.814417ms diff --git a/btx/testdata/cassettes/google/generate_content.yaml b/btx/testdata/cassettes/google/generate_content.yaml new file mode 100644 index 00000000..60b080d2 --- /dev/null +++ b/btx/testdata/cassettes/google/generate_content.yaml @@ -0,0 +1,94 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 120 + transfer_encoding: [] + trailer: {} + host: generativelanguage.googleapis.com + remote_addr: "" + request_uri: "" + body: | + {"contents":[{"parts":[{"text":"What is the capital of France?"}],"role":"user"}],"generationConfig":{"temperature":0}} + form: {} + headers: + Content-Type: + - application/json + User-Agent: + - google-genai-sdk/1.41.0 gl-go/go1.26.3 + X-Goog-Api-Client: + - google-genai-sdk/1.41.0 gl-go/go1.26.3 + X-Server-Timeout: + - "30" + url: https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-flash-lite-preview:generateContent + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: | + { + "candidates": [ + { + "content": { + "parts": [ + { + "text": "The capital of France is Paris.", + "thoughtSignature": "EjQKMgEMOdbH97nYeIlnQB7to1yne75zWcBxfVBCB0jJ77el87h0p0K9TTM7RijkSkZ6Zj76" + } + ], + "role": "model" + }, + "finishReason": "STOP", + "index": 0 + } + ], + "usageMetadata": { + "promptTokenCount": 8, + "candidatesTokenCount": 7, + "totalTokenCount": 15, + "promptTokensDetails": [ + { + "modality": "TEXT", + "tokenCount": 8 + } + ], + "serviceTier": "standard" + }, + "modelVersion": "gemini-3.1-flash-lite", + "responseId": "bIkOarfCELjRz7IP0erfWA" + } + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 21 May 2026 04:26:20 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=610 + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-Xss-Protection: + - "0" + status: 200 OK + code: 200 + duration: 814.640625ms diff --git a/btx/testdata/cassettes/openai/attachments.yaml b/btx/testdata/cassettes/openai/attachments.yaml new file mode 100644 index 00000000..11be7469 --- /dev/null +++ b/btx/testdata/cassettes/openai/attachments.yaml @@ -0,0 +1,140 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 351 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"messages":[{"content":"you are a helpful assistant","role":"system"},{"content":[{"text":"What color is this image?","type":"text"},{"image_url":{"url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="},"type":"image_url"}],"role":"user"}],"model":"gpt-4o-mini","temperature":0}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + User-Agent: + - OpenAI/Go 1.12.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - go + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.12.0 + X-Stainless-Retry-Count: + - "0" + X-Stainless-Runtime: + - go + X-Stainless-Runtime-Version: + - go1.26.3 + url: https://api.openai.com/v1/chat/completions + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: | + { + "id": "chatcmpl-DhpHCsG356A2cf8gVzpeyXxVLwm8W", + "object": "chat.completion", + "created": 1779337582, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The image is red.", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 8522, + "completion_tokens": 5, + "total_tokens": 8527, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_7947799c5e" + } + headers: + Access-Control-Expose-Headers: + - X-Request-ID + - CF-Ray + - CF-Ray + Alt-Svc: + - h3=":443"; ma=86400 + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - 9ff0d2915a6f9b6d-SEA + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 04:26:23 GMT + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - "621" + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - "2020-10-01" + Server: + - cloudflare + Set-Cookie: + - __cf_bm=oEkmKL2D2cr3R6XYwUo4xs69uYulw6dGy1TkgTvwfG4-1779337582.299653-1.0.1.1-cXne4H6qjsJScLGgGCs8INyl1haPyNy8UwsPng7eVR8NzSwpw_7yxEBrJfCYcD7EQkcl8WjGwEoJkfiRfjYZ4tJjNK0skEc.Lk210M0iM8kf4jVoHvMlhwV3T5EwxZMn; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 21 May 2026 04:56:23 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + X-Openai-Proxy-Wasm: + - v0.1 + X-Ratelimit-Limit-Input-Images: + - "50000" + X-Ratelimit-Limit-Requests: + - "30000" + X-Ratelimit-Limit-Tokens: + - "150000000" + X-Ratelimit-Remaining-Input-Images: + - "49999" + X-Ratelimit-Remaining-Requests: + - "29999" + X-Ratelimit-Remaining-Tokens: + - "149999220" + X-Ratelimit-Reset-Input-Images: + - 1ms + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_b415a1455bf74d109a3333153bc11bbb + status: 200 OK + code: 200 + duration: 1.122033792s diff --git a/btx/testdata/cassettes/openai/completions.yaml b/btx/testdata/cassettes/openai/completions.yaml new file mode 100644 index 00000000..761108b3 --- /dev/null +++ b/btx/testdata/cassettes/openai/completions.yaml @@ -0,0 +1,134 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 169 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"messages":[{"content":"you are a helpful assistant","role":"system"},{"content":"What is the capital of France?","role":"user"}],"model":"gpt-4o-mini","temperature":0}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + User-Agent: + - OpenAI/Go 1.12.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - go + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.12.0 + X-Stainless-Retry-Count: + - "0" + X-Stainless-Runtime: + - go + X-Stainless-Runtime-Version: + - go1.26.3 + url: https://api.openai.com/v1/chat/completions + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: | + { + "id": "chatcmpl-DhpHEWbJYNCNBEaT2DDRd2GzV9HEd", + "object": "chat.completion", + "created": 1779337584, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The capital of France is Paris.", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 23, + "completion_tokens": 7, + "total_tokens": 30, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_8f0dac2a97" + } + headers: + Access-Control-Expose-Headers: + - X-Request-ID + - CF-Ray + - CF-Ray + Alt-Svc: + - h3=":443"; ma=86400 + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - 9ff0d2a00a9d9b6d-SEA + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 04:26:25 GMT + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - "413" + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - "2020-10-01" + Server: + - cloudflare + Set-Cookie: + - __cf_bm=mAEmnsKaUH0J5K.9QmT83qzDXSs_40ncF1sUXmIzVBQ-1779337584.6511145-1.0.1.1-SMtLGT_arcp2jOaZBXXzFnGYNT_0qOd_w1xsZ0Ixc3IAD4s_yIMFn4KBatVfruwMshAoRBTLH5b22IsuSRhVvR1yNqk_78ZpTN6eiz.S1ZMhPV_kAF6NxtkFyafeeVMK; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 21 May 2026 04:56:25 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + X-Openai-Proxy-Wasm: + - v0.1 + X-Ratelimit-Limit-Requests: + - "30000" + X-Ratelimit-Limit-Tokens: + - "150000000" + X-Ratelimit-Remaining-Requests: + - "29999" + X-Ratelimit-Remaining-Tokens: + - "149999982" + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_43dced4ab4254f91844a1f4d0348c68d + status: 200 OK + code: 200 + duration: 814.79975ms diff --git a/btx/testdata/cassettes/openai/reasoning.yaml b/btx/testdata/cassettes/openai/reasoning.yaml new file mode 100644 index 00000000..1652b491 --- /dev/null +++ b/btx/testdata/cassettes/openai/reasoning.yaml @@ -0,0 +1,361 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 219 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"input":[{"content":"Look at this sequence: 2, 6, 12, 20, 30. What is the pattern and what would be the formula for the nth term?\n","role":"user"}],"model":"o4-mini","reasoning":{"effort":"high","summary":"detailed"}}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + User-Agent: + - OpenAI/Go 1.12.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - go + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.12.0 + X-Stainless-Retry-Count: + - "0" + X-Stainless-Runtime: + - go + X-Stainless-Runtime-Version: + - go1.26.3 + url: https://api.openai.com/v1/responses + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: |- + { + "id": "resp_0c2cd20eb299ac8d006a0e8973115881999ccd340d31018ba1", + "object": "response", + "created_at": 1779337587, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1779337594, + "error": null, + "frequency_penalty": 0.0, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "o4-mini-2025-04-16", + "moderation": null, + "output": [ + { + "id": "rs_0c2cd20eb299ac8d006a0e89736de08199af93a017faf6f1e0", + "type": "reasoning", + "summary": [ + { + "type": "summary_text", + "text": "**Exploring the sequence pattern**\n\nThe user's sequence is 2, 6, 12, 20, 30. I notice that these numbers can be expressed as n(n+1) \u2014 for example, 2 = 1*2, 6 = 2*3, and so on. This shows the pattern follows the formula a_n = n(n+1), so they\u2019re also pronic numbers. Alternatively, I observe that the differences between terms increase consistently: +4, +6, +8, +10, indicating that the second differences are constant at 2, which points to a quadratic nature." + }, + { + "type": "summary_text", + "text": "**Clarifying the nth term formula**\n\nI'm working on the sequence formula, realizing that the nth term, a_n, can be expressed as n(n+1) or equivalently n^2 + n. This pattern connects to pronic numbers, where each term results from multiplying two consecutive integers. Starting from n=1 gives us 2, 6, and so on. \n\nThe differences between terms increase consistently by 2, indicating a quadratic nature. So, my final conclusion is that the nth term formula is a_n = n(n+1), identifying these numbers as pronic." + } + ] + }, + { + "id": "msg_0c2cd20eb299ac8d006a0e897a3d648199a583c5e5b3a90739", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "The easiest way to see what\u2019s going on is to look at the differences:\n\n 2, 6, 12, 20, 30 \n \u21974 \u21976 \u21978 \u219710 \n\nSince the increments go up by 2 each time, the second difference is constant (2), so the sequence is given by a quadratic in n. A more direct observation is:\n\n 2 = 1\u00b72 \n 6 = 2\u00b73 \n 12 = 3\u00b74 \n 20 = 4\u00b75 \n 30 = 5\u00b76 \n\nIn other words the nth term is the product of two consecutive integers:\n\n a\u2099 = n\u2009(n + 1) \n\nEquivalently,\n\n a\u2099 = n\u00b2 + n." + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "presence_penalty": 0.0, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": "in_memory", + "reasoning": { + "effort": "high", + "summary": "detailed" + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 41, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 1003, + "output_tokens_details": { + "reasoning_tokens": 768 + }, + "total_tokens": 1044 + }, + "user": null, + "metadata": {} + } + headers: + Access-Control-Expose-Headers: + - X-Request-ID + - CF-Ray + - CF-Ray + Alt-Svc: + - h3=":443"; ma=86400 + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - 9ff0d2ac38bd9b6d-SEA + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 04:26:34 GMT + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - "8061" + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - "2020-10-01" + Server: + - cloudflare + Set-Cookie: + - __cf_bm=YffoGhR.CQ1MJ4LxzaM3nxMZ0JDjQkDtk6RgJDt_eYQ-1779337586.5951757-1.0.1.1-Tc82esciUit8iigxGSc4NXOP0OL23t4zLu6kI5P.gyXbbG54UKt41_e_KbWUTqwCIUFOE7iYP1kCYLzBFF.wjeP3xoofxy3Lep0b.CMWDyVcA_XZfJMEVCVV8J7jzLOS; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 21 May 2026 04:56:34 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + X-Ratelimit-Limit-Requests: + - "30000" + X-Ratelimit-Limit-Tokens: + - "150000000" + X-Ratelimit-Remaining-Requests: + - "29999" + X-Ratelimit-Remaining-Tokens: + - "149999752" + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_79e4895b3da040ab99ce78d99f1a5034 + status: 200 OK + code: 200 + duration: 8.495933791s + - id: 1 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 2191 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"input":[{"content":"Look at this sequence: 2, 6, 12, 20, 30. What is the pattern and what would be the formula for the nth term?\n","role":"user"},{"id":"rs_0c2cd20eb299ac8d006a0e89736de08199af93a017faf6f1e0","summary":[{"text":"**Exploring the sequence pattern**\n\nThe user''s sequence is 2, 6, 12, 20, 30. I notice that these numbers can be expressed as n(n+1) — for example, 2 = 1*2, 6 = 2*3, and so on. This shows the pattern follows the formula a_n = n(n+1), so they’re also pronic numbers. Alternatively, I observe that the differences between terms increase consistently: +4, +6, +8, +10, indicating that the second differences are constant at 2, which points to a quadratic nature.","type":"summary_text"},{"text":"**Clarifying the nth term formula**\n\nI''m working on the sequence formula, realizing that the nth term, a_n, can be expressed as n(n+1) or equivalently n^2 + n. This pattern connects to pronic numbers, where each term results from multiplying two consecutive integers. Starting from n=1 gives us 2, 6, and so on. \n\nThe differences between terms increase consistently by 2, indicating a quadratic nature. So, my final conclusion is that the nth term formula is a_n = n(n+1), identifying these numbers as pronic.","type":"summary_text"}],"type":"reasoning"},{"id":"msg_0c2cd20eb299ac8d006a0e897a3d648199a583c5e5b3a90739","content":[{"annotations":[],"text":"The easiest way to see what’s going on is to look at the differences:\n\n 2, 6, 12, 20, 30 \n ↗4 ↗6 ↗8 ↗10 \n\nSince the increments go up by 2 each time, the second difference is constant (2), so the sequence is given by a quadratic in n. A more direct observation is:\n\n 2 = 1·2 \n 6 = 2·3 \n 12 = 3·4 \n 20 = 4·5 \n 30 = 5·6 \n\nIn other words the nth term is the product of two consecutive integers:\n\n aₙ = n (n + 1) \n\nEquivalently,\n\n aₙ = n² + n.","type":"output_text"}],"status":"completed","role":"assistant","type":"message"},{"content":"Using the pattern you discovered, what would be the 10th term? And can you find the sum of the first 10 terms?","role":"user"}],"model":"o4-mini","reasoning":{"effort":"high","summary":"detailed"}}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + User-Agent: + - OpenAI/Go 1.12.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - go + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.12.0 + X-Stainless-Retry-Count: + - "0" + X-Stainless-Runtime: + - go + X-Stainless-Runtime-Version: + - go1.26.3 + url: https://api.openai.com/v1/responses + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: |- + { + "id": "resp_0c2cd20eb299ac8d006a0e897b27288199a8ec347091012ef2", + "object": "response", + "created_at": 1779337595, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1779337602, + "error": null, + "frequency_penalty": 0.0, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "o4-mini-2025-04-16", + "moderation": null, + "output": [ + { + "id": "rs_0c2cd20eb299ac8d006a0e897bb9fc8199ae7e583d3970e8dc", + "type": "reasoning", + "summary": [ + { + "type": "summary_text", + "text": "**Calculating terms and sums**\n\nThe user asks to find the 10th term using the pattern \\( a_n = n(n+1) \\). For the 10th term, I calculate \\( a_{10} = 10 \\times 11 = 110 \\). Then, I find the sum of the first 10 terms. The sum \\( S_{10} = \\sum_{n=1}^{10} n(n+1) = \\sum n^2 + \\sum n \\) equals 440 when calculated. I also mention the closed form for \\( S_n = \\frac{n(n+1)(n+2)}{3} \\)." + }, + { + "type": "summary_text", + "text": "**Presenting results**\n\nI calculate the 10th term as \\(10 \\times 11 = 110\\) and find the sum of the first ten terms to be 440. Additionally, I should mention the closed-form expression for the sum, which is \\(S_n = \\frac{n(n+1)(n+2)}{3}\\). So, the final answers are the 10th term, 110, and the sum, 440, along with the formula for future reference. Keeping these points clear is key!" + } + ] + }, + { + "id": "msg_0c2cd20eb299ac8d006a0e8981fcc88199ba398ddb74e142ad", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "The nth term is \n a\u2099 = n(n + 1). \n\nSo for n = 10: \n a\u2081\u2080 = 10\u00b711 = 110. \n\nThe sum of the first 10 terms is \n S\u2081\u2080 = \u2211\u2096\u208c\u2081\u00b9\u2070 k(k + 1) \n = \u2211k\u00b2 + \u2211k \n = [10\u00b711\u00b721/6] + [10\u00b711/2] \n = 385 + 55 \n = 440. \n\n(Equivalently, S\u2099 = n(n + 1)(n + 2)/3, so S\u2081\u2080 = 10\u00b711\u00b712/3 = 440.)" + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "presence_penalty": 0.0, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": "in_memory", + "reasoning": { + "effort": "high", + "summary": "detailed" + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 252, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 552, + "output_tokens_details": { + "reasoning_tokens": 320 + }, + "total_tokens": 804 + }, + "user": null, + "metadata": {} + } + headers: + Access-Control-Expose-Headers: + - X-Request-ID + - CF-Ray + - CF-Ray + Alt-Svc: + - h3=":443"; ma=86400 + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - 9ff0d2e15d939b6d-SEA + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 04:26:42 GMT + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - "7359" + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - "2020-10-01" + Server: + - cloudflare + Set-Cookie: + - __cf_bm=Y9PzXOtB7xeKcN1_PXJQYfPdvVHfQ_yxsVMIEyjUw94-1779337595.0944118-1.0.1.1-R0lfO9ih0qDZ_BPwDI.yDQMGZ5HS52qQ7kR5FwurWcEd4yvZPj_ReiYzuyUsyXxtLj41JvzQ4yyXDvHjMt3BOho8sFeuJbdlXVBMEMch.qaGKaZj2LfMETX6EJpQaZVU; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 21 May 2026 04:56:42 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + X-Ratelimit-Limit-Requests: + - "30000" + X-Ratelimit-Limit-Tokens: + - "150000000" + X-Ratelimit-Remaining-Requests: + - "29999" + X-Ratelimit-Remaining-Tokens: + - "149999540" + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_7bd871fd031544edb26d5c708f3a8bc7 + status: 200 OK + code: 200 + duration: 7.67716125s diff --git a/btx/testdata/cassettes/openai/streaming.yaml b/btx/testdata/cassettes/openai/streaming.yaml new file mode 100644 index 00000000..7cf30596 --- /dev/null +++ b/btx/testdata/cassettes/openai/streaming.yaml @@ -0,0 +1,186 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 239 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"messages":[{"content":"you are a thoughtful assistant","role":"system"},{"content":"Count from 1 to 10 slowly.","role":"user"}],"model":"gpt-4o-mini","max_tokens":800,"temperature":0,"stream_options":{"include_usage":true},"stream":true}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + User-Agent: + - OpenAI/Go 1.12.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - go + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.12.0 + X-Stainless-Retry-Count: + - "0" + X-Stainless-Runtime: + - go + X-Stainless-Runtime-Version: + - go1.26.3 + url: https://api.openai.com/v1/chat/completions + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: false + body: |+ + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"asDfIQux8"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"Sure"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"X59S07B"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"PwfvZImKrQ"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" Here"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"p6pfem"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" we"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"F3D1ktGQ"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" go"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"f8fg0Lxa"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":":\n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"hFvnfQ"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"gUMqJcuJxS"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"EesTUq4G"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Ir4gK7m"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"l9qbQT17Dg"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"qfzrb5lF"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"GgVLuuz"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"8bvfv8lN2V"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"o12HbxcB"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"2RctJVk"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"aojIpPsRNC"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"XOVg4UMf"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"NOieOLd"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"5"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Ru1WTGMfui"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"o1IWVx7w"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"99Xt5C3"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"6"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"pcp1CKiCP2"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"d4ehQwqb"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"mzyeQfl"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"7"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"c0GCb0Ks3O"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"6nXqnGc2"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"kbmRlBo"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"8"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"V6M8eyfjCe"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"tT3iij2t"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"FRM8e35"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"9"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"SaCPFi7iqK"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"HIxk6XKu"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"50U3Qmi"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"10"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"VYAJVCqbA"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"..."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"BlBf7ylt"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" \n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"FYcGM"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"Take"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"7mOq6lb"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" your"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"3PTzMW"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":" time"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wBO2Yb"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"GtRdQXG6hV"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"JoNd0"} + + data: {"id":"chatcmpl-DhpHYgdDv0uqctcgkThvo4PYmKasS","object":"chat.completion.chunk","created":1779337604,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0ed05801bb","choices":[],"usage":{"prompt_tokens":25,"completion_tokens":40,"total_tokens":65,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"soKjQi73B0"} + + data: [DONE] + + headers: + Access-Control-Expose-Headers: + - X-Request-ID + - CF-Ray + - CF-Ray + Alt-Svc: + - h3=":443"; ma=86400 + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - 9ff0d318cd6f9b6d-SEA + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Thu, 21 May 2026 04:26:44 GMT + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - "418" + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - "2020-10-01" + Server: + - cloudflare + Set-Cookie: + - __cf_bm=OBBJ5BX4gpY5tyMW9Toms_wCzdLR8TZAWdhZCqZsN3s-1779337603.965605-1.0.1.1-NK9GUTTbMEEJ1T3SWn.l8jodATq_Hdv53WzUmyr6pzAj08.eHsEc4UClQN.csPrzCeb2QBxJg.Pk4X4EtmcX4d8BY2TOtuLuaWsWuZRIS4o53uSPouOOUkuxHGDiKJTE; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 21 May 2026 04:56:44 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + X-Openai-Proxy-Wasm: + - v0.1 + X-Ratelimit-Limit-Requests: + - "30000" + X-Ratelimit-Limit-Tokens: + - "150000000" + X-Ratelimit-Remaining-Requests: + - "29999" + X-Ratelimit-Remaining-Tokens: + - "149999985" + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_c436221dcc5f450c88ae67ca4d63f959 + status: 200 OK + code: 200 + duration: 648.877417ms diff --git a/btx/testdata/cassettes/openai/tools.yaml b/btx/testdata/cassettes/openai/tools.yaml new file mode 100644 index 00000000..6d72731e --- /dev/null +++ b/btx/testdata/cassettes/openai/tools.yaml @@ -0,0 +1,144 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 509 + transfer_encoding: [] + trailer: {} + host: "" + remote_addr: "" + request_uri: "" + body: '{"messages":[{"content":"What is the weather like in Paris, France?","role":"user"}],"model":"gpt-4o","max_tokens":500,"temperature":0,"tools":[{"function":{"name":"get_weather","description":"Get the current weather for a location","parameters":{"properties":{"location":{"description":"The city and state, e.g. San Francisco, CA","type":"string"},"unit":{"description":"The unit of temperature","enum":["celsius","fahrenheit"],"type":"string"}},"required":["location"],"type":"object"}},"type":"function"}]}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + User-Agent: + - OpenAI/Go 1.12.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Lang: + - go + X-Stainless-Os: + - MacOS + X-Stainless-Package-Version: + - 1.12.0 + X-Stainless-Retry-Count: + - "0" + X-Stainless-Runtime: + - go + X-Stainless-Runtime-Version: + - go1.26.3 + url: https://api.openai.com/v1/chat/completions + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: | + { + "id": "chatcmpl-DhpHaWRnxhfCdgTsxeMFschubWC3e", + "object": "chat.completion", + "created": 1779337606, + "model": "gpt-4o-2024-08-06", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_3hp1Gw0cVY5ACWqyP7Klzb7U", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"location\":\"Paris, France\"}" + } + } + ], + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "tool_calls" + } + ], + "usage": { + "prompt_tokens": 85, + "completion_tokens": 16, + "total_tokens": 101, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_eb58b86ca5" + } + headers: + Access-Control-Expose-Headers: + - X-Request-ID + - CF-Ray + - CF-Ray + Alt-Svc: + - h3=":443"; ma=86400 + Cf-Cache-Status: + - DYNAMIC + Cf-Ray: + - 9ff0d3253c0f9b6d-SEA + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 04:26:47 GMT + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - "1083" + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - "2020-10-01" + Server: + - cloudflare + Set-Cookie: + - __cf_bm=H.SjIwIuDgmhVpwSXh9x7789HqjSW2yUPCWaqn8oPzo-1779337605.9577239-1.0.1.1-7EwXUn8RWT3BCwqnechxBoCSevVK6rZdISC2M6DFd_HVxkeodfLd.fcHHhzNRuCU3DvDzQHK48QHkaaF2C7ITrpeU3vxeY3u7wRS5XImDsLTGb5n6ZfqbcuBqK6evXR_; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 21 May 2026 04:56:47 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + X-Openai-Proxy-Wasm: + - v0.1 + X-Ratelimit-Limit-Requests: + - "10000" + X-Ratelimit-Limit-Tokens: + - "30000000" + X-Ratelimit-Remaining-Requests: + - "9999" + X-Ratelimit-Remaining-Tokens: + - "29999987" + X-Ratelimit-Reset-Requests: + - 6ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_2790b6dd6a9246d7ac148efaf0f70c98 + status: 200 OK + code: 200 + duration: 1.183771333s diff --git a/client.go b/client.go index 9216ab2d..25e5b8f2 100644 --- a/client.go +++ b/client.go @@ -116,14 +116,15 @@ func New(tp *trace.TracerProvider, opts ...Option) (*Client, error) { func (c *Client) setupTracing() error { // Build trace config from client config traceConfig := bttrace.Config{ - DefaultProjectID: c.config.DefaultProjectID, - DefaultProjectName: c.config.DefaultProjectName, - FilterAISpans: c.config.FilterAISpans, - EnableBuiltinAdkTraces: c.config.EnableBuiltinAdkTraces, - SpanFilterFuncs: convertSpanFilters(c.config.SpanFilterFuncs), - EnableTraceConsoleLog: c.config.EnableTraceConsoleLog, - Exporter: c.config.Exporter, - Logger: c.logger, + DefaultProjectID: c.config.DefaultProjectID, + DefaultProjectName: c.config.DefaultProjectName, + FilterAISpans: c.config.FilterAISpans, + EnableBuiltinAdkTraces: c.config.EnableBuiltinAdkTraces, + SpanFilterFuncs: convertSpanFilters(c.config.SpanFilterFuncs), + EnableTraceConsoleLog: c.config.EnableTraceConsoleLog, + AutoConvertAIAttachments: c.config.AutoConvertAIAttachments, + Exporter: c.config.Exporter, + Logger: c.logger, } // Add Braintrust span processor to the provided TracerProvider diff --git a/config/config.go b/config/config.go index f66503d6..a69e3d01 100644 --- a/config/config.go +++ b/config/config.go @@ -22,11 +22,12 @@ type Config struct { BlockingLogin bool // Tracing configuration - FilterAISpans bool - EnableBuiltinAdkTraces bool // if false (default), drop Google ADK native spans to avoid duplicates - EnableTraceConsoleLog bool // log traces to stdout for debugging - SpanFilterFuncs []SpanFilterFunc - Exporter trace.SpanExporter + FilterAISpans bool + EnableBuiltinAdkTraces bool // if false (default), drop Google ADK native spans to avoid duplicates + EnableTraceConsoleLog bool // log traces to stdout for debugging + AutoConvertAIAttachments bool // scan spans for base64 attachments and upload them (default: true) + SpanFilterFuncs []SpanFilterFunc + Exporter trace.SpanExporter // Logger Logger logger.Logger @@ -49,18 +50,20 @@ type SpanFilterFunc func(span trace.ReadOnlySpan) int // - BRAINTRUST_ENABLE_TRACE_CONSOLE_LOG: Log traces to stdout for debugging (default: false) // - BRAINTRUST_OTEL_FILTER_AI_SPANS: Filter to keep only AI-related spans (default: false) // - BRAINTRUST_OTEL_ENABLE_BUILTIN_ADK_TRACES: Enable exporting spans from Google ADK's built-in telemetry (default: false) +// - BRAINTRUST_AUTO_CONVERT_AI_ATTACHMENTS: Scan spans for base64 attachments and upload them (default: true) func FromEnv() *Config { return &Config{ - APIKey: getEnvString("BRAINTRUST_API_KEY", ""), - APIURL: getEnvString("BRAINTRUST_API_URL", "https://api.braintrust.dev"), - AppURL: getEnvString("BRAINTRUST_APP_URL", "https://www.braintrust.dev"), - OrgName: getEnvString("BRAINTRUST_ORG_NAME", ""), - DefaultProjectID: getEnvString("BRAINTRUST_DEFAULT_PROJECT_ID", ""), - DefaultProjectName: getEnvString("BRAINTRUST_DEFAULT_PROJECT", "default-go-project"), - BlockingLogin: getEnvBool("BRAINTRUST_BLOCKING_LOGIN", false), - FilterAISpans: getEnvBool("BRAINTRUST_OTEL_FILTER_AI_SPANS", false), - EnableTraceConsoleLog: getEnvBool("BRAINTRUST_ENABLE_TRACE_CONSOLE_LOG", false), - EnableBuiltinAdkTraces: getEnvBool("BRAINTRUST_OTEL_ENABLE_BUILTIN_ADK_TRACES", false), + APIKey: getEnvString("BRAINTRUST_API_KEY", ""), + APIURL: getEnvString("BRAINTRUST_API_URL", "https://api.braintrust.dev"), + AppURL: getEnvString("BRAINTRUST_APP_URL", "https://www.braintrust.dev"), + OrgName: getEnvString("BRAINTRUST_ORG_NAME", ""), + DefaultProjectID: getEnvString("BRAINTRUST_DEFAULT_PROJECT_ID", ""), + DefaultProjectName: getEnvString("BRAINTRUST_DEFAULT_PROJECT", "default-go-project"), + BlockingLogin: getEnvBool("BRAINTRUST_BLOCKING_LOGIN", false), + FilterAISpans: getEnvBool("BRAINTRUST_OTEL_FILTER_AI_SPANS", false), + EnableTraceConsoleLog: getEnvBool("BRAINTRUST_ENABLE_TRACE_CONSOLE_LOG", false), + EnableBuiltinAdkTraces: getEnvBool("BRAINTRUST_OTEL_ENABLE_BUILTIN_ADK_TRACES", false), + AutoConvertAIAttachments: getEnvBool("BRAINTRUST_AUTO_CONVERT_AI_ATTACHMENTS", true), } } diff --git a/examples/internal/load-test/main.go b/examples/internal/load-test/main.go new file mode 100644 index 00000000..7d36b6f9 --- /dev/null +++ b/examples/internal/load-test/main.go @@ -0,0 +1,321 @@ +// Package main runs a heavy synthetic load test that exercises the Braintrust +// SDK's span pipeline (including the attachment processor / uploader) without +// requiring any real LLM API calls. +// +// Each "LLM call" is a manually-created span that sets braintrust.input_json +// and braintrust.output_json containing a base64 image attachment in the +// standard message format. With BRAINTRUST_AUTO_CONVERT_AI_ATTACHMENTS enabled +// (the default), the trace processor will detect the base64 payload, upload +// it as an attachment, and replace it with an attachment reference. +// +// Tunable via environment variables: +// +// BRAINTRUST_API_KEY - required, real API key +// LOAD_TEST_SPANS - total number of "LLM" spans (default 200) +// LOAD_TEST_CONCURRENCY - max goroutines in flight (default 16) +// LOAD_TEST_ATTACHMENTS_PER_SPAN - attachments per input (default 2) +// LOAD_TEST_ATTACHMENT_SIZE_KB - per-attachment payload KB (default 32) +// LOAD_TEST_REAL_LLM - if "1", also make a real +// OpenAI call per span (default off) +// OPENAI_API_KEY - required only when LOAD_TEST_REAL_LLM=1 +// +// To run: +// +// LOAD_TEST_SPANS=50 LOAD_TEST_CONCURRENCY=8 LOAD_TEST_ATTACHMENTS_PER_SPAN=2 LOAD_TEST_ATTACHMENT_SIZE_KB=16 go run ./examples/internal/load-test +package main + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/json" + "fmt" + "image" + "image/color" + "image/png" + "log" + "math" + "math/rand" + "os" + "strconv" + "sync" + "sync/atomic" + "time" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/trace" + oteltrace "go.opentelemetry.io/otel/trace" + + "github.com/braintrustdata/braintrust-sdk-go" + traceopenai "github.com/braintrustdata/braintrust-sdk-go/trace/contrib/openai" +) + +type config struct { + totalSpans int + concurrency int + attachmentsPerSpan int + attachmentSizeKB int + realLLM bool +} + +func loadConfig() config { + return config{ + totalSpans: envInt("LOAD_TEST_SPANS", 200), + concurrency: envInt("LOAD_TEST_CONCURRENCY", 16), + attachmentsPerSpan: envInt("LOAD_TEST_ATTACHMENTS_PER_SPAN", 2), + attachmentSizeKB: envInt("LOAD_TEST_ATTACHMENT_SIZE_KB", 32), + realLLM: os.Getenv("LOAD_TEST_REAL_LLM") == "1", + } +} + +func main() { + if os.Getenv("BRAINTRUST_API_KEY") == "" { + log.Fatal("BRAINTRUST_API_KEY is required") + } + cfg := loadConfig() + if cfg.realLLM && os.Getenv("OPENAI_API_KEY") == "" { + log.Fatal("LOAD_TEST_REAL_LLM=1 requires OPENAI_API_KEY") + } + + fmt.Println("=== Braintrust Load Test ===") + fmt.Printf(" spans=%d concurrency=%d attachments/span=%d size=%dKB realLLM=%v\n\n", + cfg.totalSpans, cfg.concurrency, cfg.attachmentsPerSpan, cfg.attachmentSizeKB, cfg.realLLM) + + tp := trace.NewTracerProvider() + defer func() { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + if err := tp.Shutdown(shutdownCtx); err != nil { + log.Printf("tracer provider shutdown error: %v", err) + } + }() + otel.SetTracerProvider(tp) + + bt, err := braintrust.New(tp, + braintrust.WithProject("go-sdk-examples"), + braintrust.WithBlockingLogin(true), + ) + if err != nil { + log.Fatalf("braintrust.New: %v", err) + } + + tracer := otel.Tracer("load-test") + rootCtx, rootSpan := tracer.Start(context.Background(), "load-test/run") + rootSpan.SetAttributes( + attribute.Int("load_test.total_spans", cfg.totalSpans), + attribute.Int("load_test.concurrency", cfg.concurrency), + attribute.Int("load_test.attachments_per_span", cfg.attachmentsPerSpan), + attribute.Int("load_test.attachment_size_kb", cfg.attachmentSizeKB), + attribute.Bool("load_test.real_llm", cfg.realLLM), + ) + + var oaClient openai.Client + if cfg.realLLM { + oaClient = openai.NewClient(option.WithMiddleware(traceopenai.NewMiddleware())) + } + + start := time.Now() + runLoad(rootCtx, tracer, cfg, &oaClient) + elapsed := time.Since(start) + + rootSpan.SetAttributes(attribute.Int64("load_test.elapsed_ms", elapsed.Milliseconds())) + + fmt.Printf("\n✓ produced %d spans in %s (%.1f spans/s)\n", + cfg.totalSpans, elapsed.Round(time.Millisecond), + float64(cfg.totalSpans)/elapsed.Seconds()) + + // Capture the permalink while the root span is still live (recording). + permalink := bt.Permalink(rootSpan) + rootSpan.End() + + flushStart := time.Now() + flushCtx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + if err := tp.ForceFlush(flushCtx); err != nil { + log.Printf("ForceFlush warning: %v", err) + } + fmt.Printf("✓ flushed in %s\n", time.Since(flushStart).Round(time.Millisecond)) + + fmt.Printf("🔗 View trace: %s\n", permalink) +} + +func runLoad(ctx context.Context, tracer oteltrace.Tracer, cfg config, oaClient *openai.Client) { + sem := make(chan struct{}, cfg.concurrency) + var wg sync.WaitGroup + var completed atomic.Int64 + + // Progress reporter + progressDone := make(chan struct{}) + go func() { + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + for { + select { + case <-progressDone: + return + case <-ticker.C: + done := completed.Load() + fmt.Printf(" ... %d/%d spans started\n", done, cfg.totalSpans) + } + } + }() + + for i := 0; i < cfg.totalSpans; i++ { + sem <- struct{}{} + wg.Add(1) + go func(idx int) { + defer wg.Done() + defer func() { <-sem }() + simulateLLMCall(ctx, tracer, cfg, oaClient, idx) + completed.Add(1) + }(i) + } + + wg.Wait() + close(progressDone) +} + +// simulateLLMCall creates a span that looks like a chat-completion call with +// image attachments, populating braintrust.input_json and braintrust.output_json. +// When cfg.realLLM is true, it also makes an actual OpenAI call (which produces +// its own auto-instrumented child span). +func simulateLLMCall( + ctx context.Context, + tracer oteltrace.Tracer, + cfg config, + oaClient *openai.Client, + idx int, +) { + ctx, span := tracer.Start(ctx, "llm.chat") + defer span.End() + + // Build a chat-completion-style input with N base64 image attachments. + content := []any{ + map[string]any{"type": "text", "text": fmt.Sprintf("Describe the %d attached images. (request #%d)", cfg.attachmentsPerSpan, idx)}, + } + for i := 0; i < cfg.attachmentsPerSpan; i++ { + content = append(content, map[string]any{ + "type": "image_url", + "image_url": map[string]any{ + "url": makeBase64DataURL(cfg.attachmentSizeKB), + }, + }) + } + input := []map[string]any{ + {"role": "system", "content": "You are a helpful assistant that describes images."}, + {"role": "user", "content": content}, + } + + // Synthetic output (also includes an attachment to exercise output-side processing). + output := []map[string]any{ + { + "role": "assistant", + "content": []any{ + map[string]any{"type": "text", "text": fmt.Sprintf("Synthetic description for request #%d.", idx)}, + }, + }, + } + + inputJSON, err := json.Marshal(input) + if err != nil { + log.Printf("marshal input: %v", err) + return + } + outputJSON, err := json.Marshal(output) + if err != nil { + log.Printf("marshal output: %v", err) + return + } + + span.SetAttributes( + attribute.String("braintrust.input_json", string(inputJSON)), + attribute.String("braintrust.output_json", string(outputJSON)), + attribute.String("braintrust.span_attributes", `{"type":"llm","name":"chat"}`), + attribute.String("model", "gpt-4o-mini"), + attribute.Int("load_test.idx", idx), + ) + + // Optionally make a real LLM call to also exercise the OpenAI middleware. + if cfg.realLLM && oaClient != nil { + _, err := oaClient.Chat.Completions.New(ctx, openai.ChatCompletionNewParams{ + Model: openai.ChatModelGPT4oMini, + Messages: []openai.ChatCompletionMessageParamUnion{ + openai.UserMessage(fmt.Sprintf("Reply with the number %d.", idx)), + }, + }) + if err != nil { + log.Printf("openai call %d: %v", idx, err) + } + } +} + +// makeBase64DataURL returns a `data:image/png;base64,...` URL containing a +// real, decodable PNG whose encoded size is roughly `sizeKB` KB. Random pixel +// colors prevent the PNG encoder from deflating away most of the bytes and +// also defeat any content-hash dedup in the attachment uploader. +func makeBase64DataURL(sizeKB int) string { + if sizeKB <= 0 { + sizeKB = 1 + } + return "data:image/png;base64," + base64.StdEncoding.EncodeToString(makePNG(sizeKB)) +} + +// makePNG returns a valid PNG byte stream of roughly `sizeKB` KB. We pick a +// square image whose raw pixel count, after PNG's overhead and compression of +// random RGB noise, lands near the target. Random pixels barely compress, so +// a square of side ~= sqrt(sizeKB*1024/3) is a good first cut. +func makePNG(sizeKB int) []byte { + side := int(math.Round(math.Sqrt(float64(sizeKB) * 1024.0 / 3.0))) + if side < 4 { + side = 4 + } + img := image.NewRGBA(image.Rect(0, 0, side, side)) + //nolint:gosec // not cryptographic; we just want non-compressible pixel noise + r := rand.New(rand.NewSource(time.Now().UnixNano())) + for y := 0; y < side; y++ { + for x := 0; x < side; x++ { + img.SetRGBA(x, y, color.RGBA{ + R: uint8(r.Intn(256)), + G: uint8(r.Intn(256)), + B: uint8(r.Intn(256)), + A: 255, + }) + } + } + var buf bytes.Buffer + enc := png.Encoder{CompressionLevel: png.NoCompression} + if err := enc.Encode(&buf, img); err != nil { + // Should never happen for an in-memory RGBA, but fall back to a tiny + // known-good PNG rather than returning garbage. + return tinyPNG() + } + return buf.Bytes() +} + +// tinyPNG is a minimal 10x10 red PNG used as a fallback if encoding fails. +func tinyPNG() []byte { + return []byte{ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, + 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, + 0x08, 0x02, 0x00, 0x00, 0x00, 0x02, 0x50, 0x58, 0xea, 0x00, 0x00, 0x00, + 0x12, 0x49, 0x44, 0x41, 0x54, 0x78, 0xda, 0x63, 0xf8, 0xcf, 0xc0, 0x80, + 0x07, 0x31, 0x8c, 0x4a, 0x63, 0x43, 0x00, 0xb7, 0xca, 0x63, 0x9d, 0xd6, + 0xd5, 0xef, 0x74, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, + 0x42, 0x60, 0x82, + } +} + +func envInt(key string, def int) int { + v := os.Getenv(key) + if v == "" { + return def + } + n, err := strconv.Atoi(v) + if err != nil || n <= 0 { + return def + } + return n +} diff --git a/go.mod b/go.mod index fc379b07..a7eafb5a 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.25.0 toolchain go1.26.1 require ( + github.com/google/uuid v1.6.0 github.com/stretchr/testify v1.11.1 go.opentelemetry.io/otel v1.43.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 @@ -22,7 +23,6 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect diff --git a/go.work b/go.work index ed035f87..74d20b6f 100644 --- a/go.work +++ b/go.work @@ -4,6 +4,7 @@ toolchain go1.26.1 use ( . + ./btx ./examples ./trace/contrib/adk ./trace/contrib/all diff --git a/options.go b/options.go index 1c28cae0..c44b868d 100644 --- a/options.go +++ b/options.go @@ -112,3 +112,13 @@ func WithSpanFilterFuncs(filterFuncs ...config.SpanFilterFunc) Option { c.SpanFilterFuncs = append(c.SpanFilterFuncs, filterFuncs...) } } + +// WithAutoConvertAIAttachments controls whether the SDK automatically scans +// spans for base64 LLM attachments and replaces them with uploaded +// references. Defaults to true. +// Environment variable: BRAINTRUST_AUTO_CONVERT_AI_ATTACHMENTS +func WithAutoConvertAIAttachments(enabled bool) Option { + return func(c *config.Config) { + c.AutoConvertAIAttachments = enabled + } +} diff --git a/trace/attachmentprocessor/formats.go b/trace/attachmentprocessor/formats.go new file mode 100644 index 00000000..4d619d38 --- /dev/null +++ b/trace/attachmentprocessor/formats.go @@ -0,0 +1,421 @@ +package attachmentprocessor + +import ( + "encoding/base64" + "regexp" + "strings" +) + +// minBase64Len is the minimum length of a base64 string to consider it a +// real attachment (avoids false positives on short strings). +const minBase64Len = 20 + +// dataURIPrefix matches "data:;base64,". +const dataURIPrefix = `data:([\w/\-.+]+);base64,` + +// base64Str matches a base64 string of at least minBase64Len characters. +const base64Str = `([A-Za-z0-9+/=]{20,})` + +var dataURIPattern = regexp.MustCompile(dataURIPrefix + base64Str) + +// heuristic patterns for the fast-path check against raw JSON strings. +var ( + // Matches a quoted data URI (OpenAI format). + dataURIHeuristic = `"` + dataURIPrefix + base64Str + `"` + // Matches "bytes" or "data" keys with a base64 value (Bedrock/Anthropic/Gemini). + byteValueHeuristic = `"(?:bytes|data)"\s*:\s*"` + base64Str + `"` +) + +// Format describes how to detect and replace a provider-specific base64 +// attachment structure. Each vendor is a self-contained unit; adding a new +// provider is a matter of appending an entry to Formats. +type Format struct { + // Name is a human-readable label for logging and test-coverage tracking. + Name string + // HeuristicFragment is a regex fragment for the combined fast-path + // heuristic. Duplicate fragments across formats are de-duplicated. + HeuristicFragment string + // Match returns true if this format should handle the given JSON node. + // parentKey is the key of this node within its parent (empty for root/array items). + Match func(parentKey string, node any) bool + // Replace builds the replacement value. It receives the matched node and + // an Uploader to enqueue the binary data. It returns the replacement value + // and the decoded binary data, or nil if replacement should be skipped. + Replace func(node any, upload UploadFunc) (replacement any, ok bool) +} + +// UploadFunc enqueues an attachment for background upload. Returns false if +// the uploader has shut down or the queue is full. +type UploadFunc func(ref Reference, data []byte) bool + +// Formats lists all supported vendor attachment formats, checked in order +// during tree traversal. +var Formats = []Format{ + openAIFormat, + bedrockFormat, + anthropicFormat, + geminiFormat, +} + +// BuildHeuristic compiles a single regex from all format heuristic fragments. +func BuildHeuristic(formats []Format) *regexp.Regexp { + seen := make(map[string]bool) + var fragments []string + for _, f := range formats { + if !seen[f.HeuristicFragment] { + seen[f.HeuristicFragment] = true + fragments = append(fragments, f.HeuristicFragment) + } + } + return regexp.MustCompile(strings.Join(fragments, "|")) +} + +// ── OpenAI ───────────────────────────────────────────────────────── + +// openAIFormat handles data URIs in text node values. +// e.g. image_url.url = "data:image/png;base64,..." +var openAIFormat = Format{ + Name: "openai", + HeuristicFragment: dataURIHeuristic, + Match: func(_ string, node any) bool { + s, ok := node.(string) + if !ok { + return false + } + return isEntirelyDataURI(s) && dataURIPattern.MatchString(s) + }, + Replace: func(node any, upload UploadFunc) (any, bool) { + s, ok := node.(string) + if !ok { + return nil, false + } + m := dataURIPattern.FindStringSubmatch(s) + if m == nil { + return nil, false + } + contentType := m[1] + b64Data := m[2] + return uploadAndCreateRef(contentType, b64Data, upload) + }, +} + +// isEntirelyDataURI returns true when the trimmed value is entirely a data URI +// — no quotes, backslashes, or spaces mixed in. +func isEntirelyDataURI(value string) bool { + t := strings.TrimSpace(value) + return strings.HasPrefix(t, "data:") && + !strings.Contains(t, "\"") && + !strings.Contains(t, "\\") && + !strings.Contains(t, " ") +} + +// ── Bedrock Converse ─────────────────────────────────────────────── + +// Per-block-type format-to-MIME mappings for the AWS Bedrock Converse API. +var converseBlockTypeFormats = map[string]map[string]string{ + "image": { + "gif": "image/gif", + "jpeg": "image/jpeg", + "png": "image/png", + "webp": "image/webp", + }, + "video": { + "flv": "video/x-flv", + "mkv": "video/x-matroska", + "mov": "video/quicktime", + "mp4": "video/mp4", + "mpeg": "video/mpeg", + "mpg": "video/mpeg", + "three_gp": "video/3gpp", + "webm": "video/webm", + "wmv": "video/x-ms-wmv", + }, + "audio": { + "aac": "audio/aac", + "flac": "audio/flac", + "m4a": "audio/mp4", + "mka": "audio/x-matroska", + "mkv": "audio/x-matroska", + "mp3": "audio/mpeg", + "mp4": "audio/mp4", + "mpeg": "audio/mpeg", + "mpga": "audio/mpeg", + "ogg": "audio/ogg", + "opus": "audio/opus", + "pcm": "audio/pcm", + "wav": "audio/wav", + "webm": "audio/webm", + "x-aac": "audio/aac", + }, + "document": { + "csv": "text/csv", + "doc": "application/msword", + "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "html": "text/html", + "md": "text/markdown", + "pdf": "application/pdf", + "txt": "text/plain", + "xls": "application/vnd.ms-excel", + "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + }, +} + +type converseBlock struct { + blockTypeKey string + inner map[string]any + formatMap map[string]string +} + +func getConverseBlock(obj map[string]any) *converseBlock { + for blockKey, fmtMap := range converseBlockTypeFormats { + inner, ok := obj[blockKey] + if !ok { + continue + } + innerMap, ok := inner.(map[string]any) + if !ok { + continue + } + fmtVal, ok := innerMap["format"] + if !ok { + continue + } + fmtStr, ok := fmtVal.(string) + if !ok { + continue + } + if _, found := fmtMap[strings.ToLower(fmtStr)]; !found { + continue + } + src, ok := innerMap["source"] + if !ok { + continue + } + srcMap, ok := src.(map[string]any) + if !ok { + continue + } + bytesVal, ok := srcMap["bytes"] + if !ok { + continue + } + bytesStr, ok := bytesVal.(string) + if !ok || len(bytesStr) < minBase64Len { + continue + } + return &converseBlock{ + blockTypeKey: blockKey, + inner: innerMap, + formatMap: fmtMap, + } + } + return nil +} + +var bedrockFormat = Format{ + Name: "bedrock", + HeuristicFragment: byteValueHeuristic, + Match: func(_ string, node any) bool { + obj, ok := node.(map[string]any) + if !ok { + return false + } + return getConverseBlock(obj) != nil + }, + Replace: func(node any, upload UploadFunc) (any, bool) { + obj, ok := node.(map[string]any) + if !ok { + return nil, false + } + block := getConverseBlock(obj) + if block == nil { + return nil, false + } + + fmtStr, ok := block.inner["format"].(string) + if !ok { + return nil, false + } + contentType, ok := block.formatMap[strings.ToLower(fmtStr)] + if !ok { + return nil, false + } + + srcMap, ok := block.inner["source"].(map[string]any) + if !ok { + return nil, false + } + b64Data, ok := srcMap["bytes"].(string) + if !ok { + return nil, false + } + refVal, ok := uploadAndCreateRef(contentType, b64Data, upload) + if !ok { + return nil, false + } + + // Rebuild: copy all fields, replace source.bytes in the matched block. + result := make(map[string]any, len(obj)) + for k, v := range obj { + if k == block.blockTypeKey { + newInner := make(map[string]any, len(block.inner)) + for ik, iv := range block.inner { + if ik == "source" { + origSource, ok := iv.(map[string]any) + if !ok { + newInner[ik] = iv + continue + } + newSource := make(map[string]any, len(origSource)) + for sk, sv := range origSource { + if sk == "bytes" { + newSource[sk] = refVal + } else { + newSource[sk] = sv + } + } + newInner[ik] = newSource + } else { + newInner[ik] = iv + } + } + result[k] = newInner + } else { + result[k] = v + } + } + return result, true + }, +} + +// ── Anthropic ────────────────────────────────────────────────────── + +// anthropicFormat handles {"type":"base64","media_type":"image/png","data":""}. +// The entire source object is replaced with the attachment reference. +var anthropicFormat = Format{ + Name: "anthropic", + HeuristicFragment: byteValueHeuristic, + Match: func(_ string, node any) bool { + obj, ok := node.(map[string]any) + if !ok { + return false + } + typ, _ := obj["type"].(string) + if typ != "base64" { + return false + } + mediaType, ok := obj["media_type"].(string) + if !ok || mediaType == "" { + return false + } + data, ok := obj["data"].(string) + return ok && len(data) >= minBase64Len + }, + Replace: func(node any, upload UploadFunc) (any, bool) { + obj, ok := node.(map[string]any) + if !ok { + return nil, false + } + contentType, ok := obj["media_type"].(string) + if !ok { + return nil, false + } + b64Data, ok := obj["data"].(string) + if !ok { + return nil, false + } + return uploadAndCreateRef(contentType, b64Data, upload) + }, +} + +// ── Gemini ───────────────────────────────────────────────────────── + +// geminiFormat handles {"inlineData": {"mimeType":"image/png","data":""}}. +// Images → image_url: {url: ref}; non-images → file: {file_data: ref}. +var geminiFormat = Format{ + Name: "gemini", + HeuristicFragment: byteValueHeuristic, + Match: func(_ string, node any) bool { + obj, ok := node.(map[string]any) + if !ok { + return false + } + inlineData, ok := obj["inlineData"] + if !ok { + return false + } + idMap, ok := inlineData.(map[string]any) + if !ok { + return false + } + mimeType, ok := idMap["mimeType"].(string) + if !ok || mimeType == "" { + return false + } + data, ok := idMap["data"].(string) + return ok && len(data) >= minBase64Len + }, + Replace: func(node any, upload UploadFunc) (any, bool) { + obj, ok := node.(map[string]any) + if !ok { + return nil, false + } + idMap, ok := obj["inlineData"].(map[string]any) + if !ok { + return nil, false + } + contentType, ok := idMap["mimeType"].(string) + if !ok { + return nil, false + } + b64Data, ok := idMap["data"].(string) + if !ok { + return nil, false + } + + refVal, ok := uploadAndCreateRef(contentType, b64Data, upload) + if !ok { + return nil, false + } + + isImage := strings.HasPrefix(contentType, "image/") + + // Rebuild: swap inlineData for the appropriate wrapper. + result := make(map[string]any, len(obj)) + for k, v := range obj { + if k == "inlineData" { + if isImage { + result["image_url"] = map[string]any{"url": refVal} + } else { + result["file"] = map[string]any{"file_data": refVal} + } + } else { + result[k] = v + } + } + return result, true + }, +} + +// ── Shared helpers ───────────────────────────────────────────────── + +// uploadAndCreateRef decodes base64 data, creates a reference, and enqueues +// the upload. Returns the reference as a map (for JSON embedding) and true, +// or nil, false on failure. +func uploadAndCreateRef(contentType, b64Data string, upload UploadFunc) (any, bool) { + data, err := base64.StdEncoding.DecodeString(b64Data) + if err != nil { + return nil, false + } + ref := NewReference(contentType) + if !upload(ref, data) { + return nil, false + } + // Return as map[string]any so it embeds naturally in the JSON tree. + return map[string]any{ + "type": ref.Type, + "content_type": ref.ContentType, + "filename": ref.Filename, + "key": ref.Key, + }, true +} diff --git a/trace/attachmentprocessor/processor.go b/trace/attachmentprocessor/processor.go new file mode 100644 index 00000000..d4ecfd08 --- /dev/null +++ b/trace/attachmentprocessor/processor.go @@ -0,0 +1,172 @@ +package attachmentprocessor + +import ( + "encoding/json" + "regexp" + + "github.com/braintrustdata/braintrust-sdk-go/logger" +) + +// Processor scans JSON strings for base64 attachments across multiple LLM +// provider formats, uploads them, and returns modified JSON with attachment +// references. +type Processor struct { + uploader Uploader + heuristic *regexp.Regexp + formats []Format + log logger.Logger +} + +// NewProcessor creates a processor with the given uploader, logger, and the +// default set of vendor formats. +func NewProcessor(uploader Uploader, log logger.Logger) *Processor { + return NewProcessorWithFormats(uploader, log, Formats) +} + +// NewProcessorWithFormats creates a processor with an explicit set of formats. +func NewProcessorWithFormats(uploader Uploader, log logger.Logger, formats []Format) *Processor { + if log == nil { + log = logger.Discard() + } + return &Processor{ + uploader: uploader, + heuristic: BuildHeuristic(formats), + formats: formats, + log: log, + } +} + +// ProcessAndUpload scans json for base64 attachments, uploads them, and +// returns the modified JSON. Returns the original string unchanged when no +// attachments are found or if the uploader has shut down. +// +// JSON parse errors on individual spans are silently skipped — they don't +// affect processing of subsequent spans. The uploader shuts itself down on +// upload failures (network errors, auth errors, etc.), which causes all +// future calls to bail out via IsShutdown(). +func (p *Processor) ProcessAndUpload(jsonStr string) string { + if jsonStr == "" || p.uploader.IsShutdown() { + return jsonStr + } + + if !p.heuristic.MatchString(jsonStr) { + return jsonStr + } + + result, err := p.processJSON(jsonStr) + if err != nil { + // Per-span errors (malformed JSON, etc.) — skip this span, don't + // kill the processor. Upload failures are handled by the uploader + // itself (it sets rejectNewJobs, so IsShutdown() returns true). + p.log.Debug("attachment processing skipped for span", "error", err) + return jsonStr + } + return result +} + +// maxWalkDepth is the maximum JSON nesting depth we'll recurse into. +// Go's goroutine stack overflow is fatal (not recoverable), so we cap +// depth to avoid crashing the process on pathological input. +const maxWalkDepth = 128 + +func (p *Processor) processJSON(jsonStr string) (string, error) { + var root any + if err := json.Unmarshal([]byte(jsonStr), &root); err != nil { + return "", err + } + + modified := false + failed := false + result, _ := p.walkAndReplace(root, "", &modified, &failed, 0) + if failed || !modified { + return jsonStr, nil + } + + out, err := json.Marshal(result) + if err != nil { + return "", err + } + return string(out), nil +} + +// walkAndReplace traverses the JSON tree. For each node it checks all +// registered formats. The first format whose matcher returns true handles the +// replacement — no further recursion into that subtree. +// +// Returns the (possibly replaced) node and whether anything in this subtree +// was modified. Containers are only copied when a descendant was replaced. +// +// If an enqueue fails mid-walk, *failed is set to true. The caller should +// discard the partially-rewritten tree and return the original JSON to avoid +// a mix of replaced references and inline base64 data. +func (p *Processor) walkAndReplace(node any, parentKey string, modified *bool, failed *bool, depth int) (any, bool) { + if depth >= maxWalkDepth || *failed { + return node, false + } + + uploadFn := func(ref Reference, data []byte) bool { + ok := p.uploader.Enqueue(ref, data) + if !ok { + *failed = true + } + return ok + } + + // Check each registered format. + for _, fmt := range p.formats { + if fmt.Match(parentKey, node) { + replacement, ok := fmt.Replace(node, uploadFn) + if ok { + *modified = true + return replacement, true + } + } + } + + // No format matched — recurse into children. Only allocate a new + // container when a child was actually replaced. + switch v := node.(type) { + case map[string]any: + var result map[string]any + for k, child := range v { + newChild, changed := p.walkAndReplace(child, k, modified, failed, depth+1) + if *failed { + return node, false + } + if changed { + if result == nil { + result = make(map[string]any, len(v)) + for k2, v2 := range v { + result[k2] = v2 + } + } + result[k] = newChild + } + } + if result != nil { + return result, true + } + return node, false + case []any: + var result []any + for i, child := range v { + newChild, changed := p.walkAndReplace(child, "", modified, failed, depth+1) + if *failed { + return node, false + } + if changed { + if result == nil { + result = make([]any, len(v)) + copy(result, v) + } + result[i] = newChild + } + } + if result != nil { + return result, true + } + return node, false + default: + return node, false + } +} diff --git a/trace/attachmentprocessor/processor_test.go b/trace/attachmentprocessor/processor_test.go new file mode 100644 index 00000000..cf170d4e --- /dev/null +++ b/trace/attachmentprocessor/processor_test.go @@ -0,0 +1,364 @@ +package attachmentprocessor + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// 1×1 red PNG pixel, valid base64. +const base64PNG = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==" + +// Fake base64 content standing in for a PDF document. +const base64PDF = "JVBERi0xLjQKMSAwIG9iago=" + +// formatTestCase is a single parameterised test entry. Every Format in +// Formats MUST have at least one test case. The TestAllFormatsHaveTestCases +// test below enforces this. +type formatTestCase struct { + name string + format string // must match Format.Name + inputJSON string + assertions func(t *testing.T, root any) +} + +var formatTestCases = []formatTestCase{ + // ── OpenAI ────────────────────────────────────────────────────── + { + name: "openai-image", + format: "openai", + inputJSON: `[{"role":"user","content":[{"type":"text","text":"describe this"},` + + `{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}}]}]`, + assertions: func(t *testing.T, root any) { + t.Helper() + arr := root.([]any) + content := arr[0].(map[string]any)["content"].([]any) + imageURL := content[1].(map[string]any)["image_url"].(map[string]any) + url := imageURL["url"] + assertAttachmentRef(t, url, "image/png") + }, + }, + // ── Bedrock image ─────────────────────────────────────────────── + { + name: "bedrock-image", + format: "bedrock", + inputJSON: `[{"role":"user","content":[{"type":"text","text":"describe this"},` + + `{"type":"image","image":{"format":"png","source":{"bytes":"` + base64PNG + `"}}}]}]`, + assertions: func(t *testing.T, root any) { + t.Helper() + arr := root.([]any) + content := arr[0].(map[string]any)["content"].([]any) + image := content[1].(map[string]any)["image"].(map[string]any) + assert.Equal(t, "png", image["format"]) + source := image["source"].(map[string]any) + assertAttachmentRef(t, source["bytes"], "image/png") + }, + }, + // ── Bedrock document ──────────────────────────────────────────── + { + name: "bedrock-document", + format: "bedrock", + inputJSON: `[{"role":"user","content":[{"type":"text","text":"summarize this"},` + + `{"type":"document","document":{"format":"pdf","name":"report","source":{"bytes":"` + base64PDF + `"}}}]}]`, + assertions: func(t *testing.T, root any) { + t.Helper() + arr := root.([]any) + content := arr[0].(map[string]any)["content"].([]any) + doc := content[1].(map[string]any)["document"].(map[string]any) + assert.Equal(t, "pdf", doc["format"]) + assert.Equal(t, "report", doc["name"]) + source := doc["source"].(map[string]any) + assertAttachmentRef(t, source["bytes"], "application/pdf") + }, + }, + // ── Bedrock audio (ambiguous mp4 → audio/mp4, not video/mp4) ── + { + name: "bedrock-audio", + format: "bedrock", + inputJSON: `[{"role":"user","content":[{"type":"text","text":"transcribe this"},` + + `{"type":"audio","audio":{"format":"mp4","source":{"bytes":"` + base64PDF + `"}}}]}]`, + assertions: func(t *testing.T, root any) { + t.Helper() + arr := root.([]any) + content := arr[0].(map[string]any)["content"].([]any) + audio := content[1].(map[string]any)["audio"].(map[string]any) + assert.Equal(t, "mp4", audio["format"]) + source := audio["source"].(map[string]any) + assertAttachmentRef(t, source["bytes"], "audio/mp4") + }, + }, + // ── Anthropic image ───────────────────────────────────────────── + { + name: "anthropic-image", + format: "anthropic", + inputJSON: `[{"role":"user","content":[{"type":"text","text":"describe this"},` + + `{"type":"image","source":{"type":"base64","media_type":"image/png","data":"` + base64PNG + `"}}]}]`, + assertions: func(t *testing.T, root any) { + t.Helper() + arr := root.([]any) + content := arr[0].(map[string]any)["content"].([]any) + source := content[1].(map[string]any)["source"] + assertAttachmentRef(t, source, "image/png") + }, + }, + // ── Anthropic document ────────────────────────────────────────── + { + name: "anthropic-document", + format: "anthropic", + inputJSON: `[{"role":"user","content":[{"type":"text","text":"summarize this"},` + + `{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"` + base64PDF + `"}}]}]`, + assertions: func(t *testing.T, root any) { + t.Helper() + arr := root.([]any) + content := arr[0].(map[string]any)["content"].([]any) + source := content[1].(map[string]any)["source"] + assertAttachmentRef(t, source, "application/pdf") + }, + }, + // ── Gemini image ──────────────────────────────────────────────── + { + name: "gemini-image", + format: "gemini", + inputJSON: `{"contents":[{"role":"user","parts":[{"text":"describe this"},` + + `{"inlineData":{"mimeType":"image/png","data":"` + base64PNG + `"}}]}]}`, + assertions: func(t *testing.T, root any) { + t.Helper() + obj := root.(map[string]any) + contents := obj["contents"].([]any) + parts := contents[0].(map[string]any)["parts"].([]any) + part := parts[1].(map[string]any) + assert.Nil(t, part["inlineData"], "inlineData should be removed") + imageURL := part["image_url"].(map[string]any) + assertAttachmentRef(t, imageURL["url"], "image/png") + }, + }, + // ── Gemini document (non-image → file.file_data) ──────────────── + { + name: "gemini-document", + format: "gemini", + inputJSON: `{"contents":[{"role":"user","parts":[{"text":"summarize this"},` + + `{"inlineData":{"mimeType":"application/pdf","data":"` + base64PDF + `"}}]}]}`, + assertions: func(t *testing.T, root any) { + t.Helper() + obj := root.(map[string]any) + contents := obj["contents"].([]any) + parts := contents[0].(map[string]any)["parts"].([]any) + part := parts[1].(map[string]any) + assert.Nil(t, part["inlineData"], "inlineData should be removed") + assert.Nil(t, part["image_url"], "non-image should not use image_url") + file := part["file"].(map[string]any) + assertAttachmentRef(t, file["file_data"], "application/pdf") + }, + }, +} + +func TestAttachmentFormatReplacesBase64WithRef(t *testing.T) { + for _, tc := range formatTestCases { + t.Run(tc.name, func(t *testing.T) { + // Verify heuristic matches. + heuristic := BuildHeuristic(Formats) + assert.True(t, heuristic.MatchString(tc.inputJSON), + "BASE64_HEURISTIC should match test data for %s", tc.name) + + // Run processor. + p := NewProcessor(&NoopUploader{}, nil) + result := p.ProcessAndUpload(tc.inputJSON) + require.NotEqual(t, tc.inputJSON, result, + "base64 data should have been replaced for %s", tc.name) + + var root any + require.NoError(t, json.Unmarshal([]byte(result), &root)) + tc.assertions(t, root) + }) + } +} + +// TestAllFormatsHaveTestCases ensures that adding a new format without test +// data causes a test failure (as required by the spec). +func TestAllFormatsHaveTestCases(t *testing.T) { + covered := make(map[string]bool) + for _, tc := range formatTestCases { + covered[tc.format] = true + } + for _, f := range Formats { + assert.True(t, covered[f.Name], + "format %q has no test cases in formatTestCases — add at least one", f.Name) + } +} + +// ── Negative cases ───────────────────────────────────────────────── + +func TestNonDataURIInputIsUnchanged(t *testing.T) { + inputJSON := `[{"role":"user","content":"Hello, how are you?"}]` + p := NewProcessor(&NoopUploader{}, nil) + result := p.ProcessAndUpload(inputJSON) + assert.Equal(t, inputJSON, result) +} + +func TestPartialDataURIInTextIsNotReplaced(t *testing.T) { + inputJSON := `[{"role":"user","content":"Check this: data:image/png;base64,` + base64PNG + ` please"}]` + p := NewProcessor(&NoopUploader{}, nil) + result := p.ProcessAndUpload(inputJSON) + assert.Equal(t, inputJSON, result) +} + +func TestShortBase64IsNotReplaced(t *testing.T) { + // Short base64 string (< 20 chars) should not trigger replacement. + inputJSON := `[{"role":"user","content":[{"type":"image","source":{"type":"base64","media_type":"image/png","data":"abc123"}}]}]` + p := NewProcessor(&NoopUploader{}, nil) + result := p.ProcessAndUpload(inputJSON) + assert.Equal(t, inputJSON, result) +} + +func TestMalformedJSONDoesNotKillProcessor(t *testing.T) { + uploader := &NoopUploader{} + p := NewProcessor(uploader, nil) + + // First call: malformed JSON that passes the heuristic but fails to parse. + badJSON := `{"data":"` + base64PNG + `" INVALID` + result := p.ProcessAndUpload(badJSON) + assert.Equal(t, badJSON, result, "should return original on parse error") + assert.False(t, uploader.IsShutdown(), "uploader should NOT be shut down by a JSON parse error") + + // Second call: valid JSON with an attachment — should still work. + goodJSON := `[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}}]}]` + result = p.ProcessAndUpload(goodJSON) + assert.NotEqual(t, goodJSON, result, "subsequent valid spans should still be processed") + assert.Contains(t, result, "braintrust_attachment") +} + +func TestEmptyInputReturnsEmpty(t *testing.T) { + p := NewProcessor(&NoopUploader{}, nil) + assert.Equal(t, "", p.ProcessAndUpload("")) +} + +func TestHeuristicSkipsPlainText(t *testing.T) { + // JSON with no base64 patterns at all. + inputJSON := `{"messages":[{"role":"user","content":"just text"}]}` + p := NewProcessor(&NoopUploader{}, nil) + result := p.ProcessAndUpload(inputJSON) + assert.Equal(t, inputJSON, result) +} + +func TestUploaderShutdownSkipsProcessing(t *testing.T) { + uploader := &NoopUploader{} + uploader.Shutdown() + p := NewProcessor(uploader, nil) + inputJSON := `[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}}]}]` + result := p.ProcessAndUpload(inputJSON) + assert.Equal(t, inputJSON, result) +} + +func TestUploaderRejectionReturnsOriginal(t *testing.T) { + // An uploader that rejects all enqueue calls. + uploader := &rejectingUploader{} + p := NewProcessor(uploader, nil) + inputJSON := `[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}}]}]` + result := p.ProcessAndUpload(inputJSON) + // Should return original since the upload was rejected. + assert.Equal(t, inputJSON, result) +} + +func TestPartialEnqueueFailureReturnsOriginal(t *testing.T) { + // Uploader that succeeds N times then rejects. With 2 attachments in the + // payload and a limit of 1, the second enqueue fails mid-walk. + uploader := &limitedUploader{remaining: 1} + p := NewProcessor(uploader, nil) + + // Two OpenAI-format images in one message. + inputJSON := `[{"role":"user","content":[` + + `{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}},` + + `{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}}` + + `]}]` + + result := p.ProcessAndUpload(inputJSON) + assert.Equal(t, inputJSON, result, + "should return original JSON unchanged when an enqueue fails mid-walk") +} + +func TestAlwaysRejectingUploaderReturnsOriginal(t *testing.T) { + uploader := &rejectingUploader{} + p := NewProcessor(uploader, nil) + + inputJSON := `[{"role":"user","content":[` + + `{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}},` + + `{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}}` + + `]}]` + + result := p.ProcessAndUpload(inputJSON) + assert.Equal(t, inputJSON, result, + "should return original JSON unchanged when uploader rejects all enqueues") +} + +// ── isEntirelyDataURI ────────────────────────────────────────────── + +func TestIsEntirelyDataURI(t *testing.T) { + tests := []struct { + input string + expected bool + }{ + {"data:image/png;base64,abc123", true}, + {" data:image/png;base64,abc123 ", true}, + {`Check this: data:image/png;base64,abc123 please`, false}, // space + {`"data:image/png;base64,abc123"`, false}, // quotes + {`data:image/png;base64,abc\n123`, false}, // backslash + {"not-a-data-uri", false}, + } + for _, tt := range tests { + assert.Equal(t, tt.expected, isEntirelyDataURI(tt.input), "input: %q", tt.input) + } +} + +// ── Content type to extension ────────────────────────────────────── + +func TestContentTypeToExtension(t *testing.T) { + tests := []struct { + contentType string + expected string + }{ + {"image/png", ".png"}, + {"image/jpeg", ".jpg"}, + {"application/pdf", ".pdf"}, + {"video/mp4", ".mp4"}, + {"audio/mpeg", ".mp3"}, + {"application/octet-stream", ".octet"}, + } + for _, tt := range tests { + assert.Equal(t, tt.expected, contentTypeToExtension(tt.contentType), + "contentType: %q", tt.contentType) + } +} + +// ── Helpers ───────────────────────────────────────────────────────── + +func assertAttachmentRef(t *testing.T, node any, expectedContentType string) { + t.Helper() + require.NotNil(t, node, "attachment ref node should not be nil") + ref, ok := node.(map[string]any) + require.True(t, ok, "attachment ref should be a map, got %T", node) + assert.Equal(t, "braintrust_attachment", ref["type"]) + assert.Equal(t, expectedContentType, ref["content_type"]) + assert.NotEmpty(t, ref["filename"]) + assert.NotEmpty(t, ref["key"]) +} + +// rejectingUploader is an uploader that rejects all enqueue calls. +type rejectingUploader struct{ NoopUploader } + +func (u *rejectingUploader) Enqueue(_ Reference, _ []byte) bool { return false } + +// limitedUploader accepts the first N enqueue calls, then rejects. +type limitedUploader struct { + NoopUploader + remaining int +} + +func (u *limitedUploader) Enqueue(_ Reference, _ []byte) bool { + if u.remaining <= 0 { + return false + } + u.remaining-- + return true +} diff --git a/trace/attachmentprocessor/reference.go b/trace/attachmentprocessor/reference.go new file mode 100644 index 00000000..de93edb3 --- /dev/null +++ b/trace/attachmentprocessor/reference.go @@ -0,0 +1,88 @@ +// Package attachmentprocessor scans span attributes for base64-encoded LLM +// attachments and replaces them with Braintrust attachment references after +// uploading the data to object storage. +package attachmentprocessor + +import ( + "fmt" + "strings" + + "github.com/google/uuid" +) + +// Reference is the JSON-serialisable object that replaces inline base64 +// attachment data on a span. Its shape is the cross-SDK Braintrust attachment +// reference format. +type Reference struct { + Type string `json:"type"` + ContentType string `json:"content_type"` + Filename string `json:"filename"` + Key string `json:"key"` +} + +// NewReference creates a reference with a freshly-generated UUID key. +func NewReference(contentType string) Reference { + return Reference{ + Type: "braintrust_attachment", + ContentType: contentType, + Filename: "attachment" + contentTypeToExtension(contentType), + Key: uuid.New().String(), + } +} + +// contentTypeToExtension maps a MIME type to a file extension. +func contentTypeToExtension(contentType string) string { + switch strings.ToLower(contentType) { + case "image/png": + return ".png" + case "image/jpeg", "image/jpg": + return ".jpg" + case "image/gif": + return ".gif" + case "image/webp": + return ".webp" + case "image/svg+xml": + return ".svg" + case "application/pdf": + return ".pdf" + case "text/plain": + return ".txt" + case "text/csv": + return ".csv" + case "text/html": + return ".html" + case "text/markdown": + return ".md" + case "application/json": + return ".json" + case "application/msword": + return ".doc" + case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": + return ".docx" + case "application/vnd.ms-excel": + return ".xls" + case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": + return ".xlsx" + case "video/mp4": + return ".mp4" + case "video/webm": + return ".webm" + case "video/quicktime": + return ".mov" + case "audio/mpeg", "audio/mp3": + return ".mp3" + case "audio/wav": + return ".wav" + default: + parts := strings.SplitN(contentType, "/", 2) + if len(parts) == 2 { + sub := parts[1] + // Strip parameters and suffixes like ";charset=utf-8" or "-xml". + if idx := strings.IndexAny(sub, ";-"); idx >= 0 { + sub = sub[:idx] + } + return fmt.Sprintf(".%s", sub) + } + return "" + } +} diff --git a/trace/attachmentprocessor/span.go b/trace/attachmentprocessor/span.go new file mode 100644 index 00000000..71edaac9 --- /dev/null +++ b/trace/attachmentprocessor/span.go @@ -0,0 +1,80 @@ +package attachmentprocessor + +import ( + "time" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/instrumentation" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" +) + +// transformedSpan wraps a ReadOnlySpan and overrides its attributes. +// This is the Go equivalent of Java's TransformedReadableSpan. +// +// The private() method is satisfied by embedding the original ReadOnlySpan +// (the same technique used by otel/sdk/trace/tracetest.spanSnapshot). +type transformedSpan struct { + // Embed the original to satisfy the private() interface method. + sdktrace.ReadOnlySpan + + attrs []attribute.KeyValue +} + +// NewTransformedSpan creates a transformedSpan that overrides the given +// attribute keys with new values. All other attributes are preserved. +// Override entries for keys not present in the original span are appended +// to the attribute list rather than silently dropped. +func NewTransformedSpan(delegate sdktrace.ReadOnlySpan, overrides map[attribute.Key]string) sdktrace.ReadOnlySpan { + origAttrs := delegate.Attributes() + newAttrs := make([]attribute.KeyValue, 0, len(origAttrs)+len(overrides)) + seen := make(map[attribute.Key]bool, len(overrides)) + for _, a := range origAttrs { + if v, ok := overrides[a.Key]; ok { + newAttrs = append(newAttrs, attribute.String(string(a.Key), v)) + seen[a.Key] = true + } else { + newAttrs = append(newAttrs, a) + } + } + // Append overrides for keys that weren't already on the span. + for k, v := range overrides { + if !seen[k] { + newAttrs = append(newAttrs, attribute.String(string(k), v)) + } + } + return transformedSpan{ + ReadOnlySpan: delegate, + attrs: newAttrs, + } +} + +// Override all methods of ReadOnlySpan to avoid nil-pointer dereferences +// from the embedded interface (which could be nil in degenerate cases). +// The delegate methods forward to the embedded span; Attributes() returns +// the overridden slice. + +func (s transformedSpan) Name() string { return s.ReadOnlySpan.Name() } +func (s transformedSpan) SpanContext() trace.SpanContext { return s.ReadOnlySpan.SpanContext() } +func (s transformedSpan) Parent() trace.SpanContext { return s.ReadOnlySpan.Parent() } +func (s transformedSpan) SpanKind() trace.SpanKind { return s.ReadOnlySpan.SpanKind() } +func (s transformedSpan) StartTime() time.Time { return s.ReadOnlySpan.StartTime() } +func (s transformedSpan) EndTime() time.Time { return s.ReadOnlySpan.EndTime() } +func (s transformedSpan) Attributes() []attribute.KeyValue { return s.attrs } +func (s transformedSpan) Links() []sdktrace.Link { return s.ReadOnlySpan.Links() } +func (s transformedSpan) Events() []sdktrace.Event { return s.ReadOnlySpan.Events() } +func (s transformedSpan) Status() sdktrace.Status { return s.ReadOnlySpan.Status() } +func (s transformedSpan) DroppedAttributes() int { return s.ReadOnlySpan.DroppedAttributes() } +func (s transformedSpan) DroppedLinks() int { return s.ReadOnlySpan.DroppedLinks() } +func (s transformedSpan) DroppedEvents() int { return s.ReadOnlySpan.DroppedEvents() } +func (s transformedSpan) ChildSpanCount() int { return s.ReadOnlySpan.ChildSpanCount() } +func (s transformedSpan) Resource() *resource.Resource { return s.ReadOnlySpan.Resource() } +func (s transformedSpan) InstrumentationScope() instrumentation.Scope { + return s.ReadOnlySpan.InstrumentationScope() +} + +//nolint:staticcheck // Required by ReadOnlySpan interface for backward compatibility. +func (s transformedSpan) InstrumentationLibrary() instrumentation.Library { + return s.ReadOnlySpan.InstrumentationLibrary() +} diff --git a/trace/attachmentprocessor/span_test.go b/trace/attachmentprocessor/span_test.go new file mode 100644 index 00000000..5d535d4c --- /dev/null +++ b/trace/attachmentprocessor/span_test.go @@ -0,0 +1,101 @@ +package attachmentprocessor + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/otel/attribute" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" +) + +// makeReadOnlySpan creates a real ReadOnlySpan with the given attributes for testing. +func makeReadOnlySpan(t *testing.T, attrs ...attribute.KeyValue) sdktrace.ReadOnlySpan { + t.Helper() + exporter := tracetest.NewInMemoryExporter() + tp := sdktrace.NewTracerProvider(sdktrace.WithSyncer(exporter)) + _, span := tp.Tracer("test").Start(context.Background(), "test-span") + span.SetAttributes(attrs...) + span.End() + + stubs := exporter.GetSpans() + if len(stubs) != 1 { + t.Fatalf("expected 1 span, got %d", len(stubs)) + } + return stubs[0].Snapshot() +} + +func TestNewTransformedSpan_OverridesExistingKey(t *testing.T) { + orig := makeReadOnlySpan(t, + attribute.String("braintrust.input_json", "original"), + attribute.String("other.attr", "keep-me"), + ) + + transformed := NewTransformedSpan(orig, map[attribute.Key]string{ + "braintrust.input_json": "replaced", + }) + + attrs := transformed.Attributes() + got := make(map[string]string) + for _, a := range attrs { + got[string(a.Key)] = a.Value.AsString() + } + + assert.Equal(t, "replaced", got["braintrust.input_json"]) + assert.Equal(t, "keep-me", got["other.attr"]) + assert.Len(t, attrs, 2) +} + +func TestNewTransformedSpan_AppendsNewKey(t *testing.T) { + orig := makeReadOnlySpan(t, + attribute.String("existing", "value"), + ) + + transformed := NewTransformedSpan(orig, map[attribute.Key]string{ + "new.key": "new-value", + }) + + attrs := transformed.Attributes() + got := make(map[string]string) + for _, a := range attrs { + got[string(a.Key)] = a.Value.AsString() + } + + // The new key should be appended, not silently dropped. + assert.Equal(t, "value", got["existing"]) + assert.Equal(t, "new-value", got["new.key"]) + assert.Len(t, attrs, 2) +} + +func TestNewTransformedSpan_MixedOverrideAndAppend(t *testing.T) { + orig := makeReadOnlySpan(t, + attribute.String("braintrust.input_json", "orig-in"), + attribute.String("other", "preserved"), + ) + + transformed := NewTransformedSpan(orig, map[attribute.Key]string{ + "braintrust.input_json": "new-in", + "braintrust.output_json": "new-out", // not on original + }) + + got := make(map[string]string) + for _, a := range transformed.Attributes() { + got[string(a.Key)] = a.Value.AsString() + } + + assert.Equal(t, "new-in", got["braintrust.input_json"]) + assert.Equal(t, "new-out", got["braintrust.output_json"]) + assert.Equal(t, "preserved", got["other"]) + assert.Len(t, transformed.Attributes(), 3) +} + +func TestNewTransformedSpan_PreservesDelegateMethods(t *testing.T) { + orig := makeReadOnlySpan(t, attribute.String("k", "v")) + transformed := NewTransformedSpan(orig, map[attribute.Key]string{}) + + assert.Equal(t, orig.Name(), transformed.Name()) + assert.Equal(t, orig.SpanContext(), transformed.SpanContext()) + assert.Equal(t, orig.StartTime(), transformed.StartTime()) + assert.Equal(t, orig.EndTime(), transformed.EndTime()) +} diff --git a/trace/attachmentprocessor/uploader.go b/trace/attachmentprocessor/uploader.go new file mode 100644 index 00000000..7e0afeee --- /dev/null +++ b/trace/attachmentprocessor/uploader.go @@ -0,0 +1,560 @@ +package attachmentprocessor + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/braintrustdata/braintrust-sdk-go/logger" +) + +// Uploader enqueues attachment data for background upload. +type Uploader interface { + // Enqueue adds an upload job. Returns false if the uploader is shut down + // or the queue is full. + Enqueue(ref Reference, data []byte) bool + // ForceFlush blocks until all currently-enqueued uploads complete or + // timeout expires. + ForceFlush(timeout time.Duration) bool + // Shutdown stops the uploader, waiting up to a generous timeout for + // pending uploads. + Shutdown() + // IsShutdown returns true if the uploader has been shut down. + IsShutdown() bool +} + +// UploaderConfig holds configuration for the S3 uploader. +type UploaderConfig struct { + APIURL string + APIKey string + OrgID string // If empty, resolved via login endpoint. + HTTPClient *http.Client + Logger logger.Logger + MaxRetries int + InitialBackoff time.Duration + RequestTimeout time.Duration + QueueSize int + ShutdownTimeout time.Duration + LoginURL string // App URL for login endpoint (e.g. "https://www.braintrust.dev"). +} + +func (c *UploaderConfig) defaults() { + if c.MaxRetries <= 0 { + c.MaxRetries = 8 + } + if c.InitialBackoff <= 0 { + c.InitialBackoff = 500 * time.Millisecond + } + if c.RequestTimeout <= 0 { + c.RequestTimeout = 60 * time.Second + } + if c.QueueSize <= 0 { + c.QueueSize = 1024 + } + if c.ShutdownTimeout <= 0 { + c.ShutdownTimeout = 120 * time.Second + } + if c.HTTPClient == nil { + c.HTTPClient = &http.Client{Timeout: c.RequestTimeout} + } + if c.Logger == nil { + c.Logger = logger.Discard() + } +} + +// uploadJob is an item in the upload queue. +type uploadJob struct { + ref Reference + data []byte +} + +// S3Uploader uploads attachments in the background via signed URLs. +type S3Uploader struct { + cfg UploaderConfig + log logger.Logger + + queue chan uploadJob + stop chan struct{} // closed by Shutdown to tell the worker to drain and exit + done chan struct{} // closed when the worker exits + + mu sync.Mutex + rejectNewJobs bool + workerStarted bool + + // orgID resolution: orgIDOnce ensures resolveOrgID runs at most once, + // even if multiple goroutines call getOrgID concurrently (defensive + // against future concurrency — today only the single worker calls it). + orgIDOnce sync.Once + orgID string + orgIDErr error + + shutdownOnce sync.Once + + // idleMu/idleCond track when the worker is idle (queue empty AND no + // in-flight upload). ForceFlush waits on idleCond. + idleMu sync.Mutex + idleCond *sync.Cond + inflight int // number of jobs currently being processed +} + +// NewS3Uploader creates and returns a new S3 uploader. The background worker +// starts lazily on the first Enqueue call. +func NewS3Uploader(cfg UploaderConfig) *S3Uploader { + cfg.defaults() + u := &S3Uploader{ + cfg: cfg, + log: cfg.Logger, + queue: make(chan uploadJob, cfg.QueueSize), + stop: make(chan struct{}), + done: make(chan struct{}), + } + u.idleCond = sync.NewCond(&u.idleMu) + return u +} + +// Enqueue adds an upload job. Returns false if the uploader is shut down or the queue is full. +func (u *S3Uploader) Enqueue(ref Reference, data []byte) bool { + u.mu.Lock() + if u.rejectNewJobs { + u.mu.Unlock() + return false + } + u.ensureWorkerStartedLocked() + + // Track this job as pending before sending to the channel so that + // ForceFlush can't observe an idle state between the channel send + // and the worker picking it up. + u.idleMu.Lock() + u.inflight++ + u.idleMu.Unlock() + + // Hold mu across the send so this can't race with Shutdown setting + // rejectNewJobs. The send is non-blocking to avoid holding mu while + // the queue is full. The queue channel is never closed (Shutdown uses + // a separate stop channel instead), so there's no send-on-closed risk. + // + // Lock ordering note: mu is acquired first, then idleMu. Other paths + // that touch both locks must follow this order to avoid deadlock. + select { + case u.queue <- uploadJob{ref: ref, data: data}: + u.mu.Unlock() + return true + default: + u.mu.Unlock() + // Queue full — undo the inflight bump. + u.idleMu.Lock() + u.inflight-- + u.idleMu.Unlock() + u.idleCond.Broadcast() + return false + } +} + +// ForceFlush blocks until all currently-enqueued uploads complete or timeout expires. +func (u *S3Uploader) ForceFlush(timeout time.Duration) bool { + timer := time.NewTimer(timeout) + defer timer.Stop() + + // cancelled is read/written under idleMu so the timeout path can't lose + // a Broadcast to the waiter (sync.Cond missed-signal race). + cancelled := false + done := make(chan struct{}) + go func() { + u.idleMu.Lock() + for u.inflight > 0 && !cancelled { + u.idleCond.Wait() + } + u.idleMu.Unlock() + close(done) + }() + + select { + case <-done: + return true + case <-timer.C: + // Hold idleMu while setting cancelled+broadcasting so the waiter + // can't miss the wakeup: if the waiter is between its check and + // Wait(), it's still holding idleMu and we'll block here until it + // enters Wait(); if it's already in Wait(), Broadcast wakes it. + u.idleMu.Lock() + cancelled = true + u.idleMu.Unlock() + u.idleCond.Broadcast() + return false + } +} + +// Shutdown stops the uploader, waiting up to a generous timeout for pending +// uploads. Safe to call multiple times. +func (u *S3Uploader) Shutdown() { + u.shutdownOnce.Do(func() { + u.mu.Lock() + u.rejectNewJobs = true + started := u.workerStarted + u.mu.Unlock() + + if !started { + close(u.done) + return + } + + // Signal the worker to drain remaining jobs and exit. + close(u.stop) + + // Wait for worker to finish with timeout. + select { + case <-u.done: + case <-time.After(u.cfg.ShutdownTimeout): + u.log.Warn("attachment uploader shutdown timed out") + } + }) +} + +// IsShutdown returns true if the uploader has been shut down. +func (u *S3Uploader) IsShutdown() bool { + u.mu.Lock() + defer u.mu.Unlock() + return u.rejectNewJobs +} + +// ensureWorkerStartedLocked starts the worker goroutine if not already running. +// Must be called with u.mu held. +func (u *S3Uploader) ensureWorkerStartedLocked() { + if u.workerStarted { + return + } + u.workerStarted = true + go u.workerLoop() +} + +func (u *S3Uploader) workerLoop() { + defer close(u.done) + u.log.Debug("attachment uploader worker started") + + for { + select { + case job := <-u.queue: + u.processJob(job) + case <-u.stop: + // Drain remaining jobs before exiting. + for { + select { + case job := <-u.queue: + u.processJob(job) + default: + u.idleCond.Broadcast() + u.log.Debug("attachment uploader worker stopped") + return + } + } + } + } +} + +// processJob runs a single upload with panic recovery, then accounts for +// the completed job. A panic in upload code would otherwise kill the worker +// goroutine permanently while leaving workerStarted=true and rejectNewJobs=false — +// silently wedging the uploader. +func (u *S3Uploader) processJob(job uploadJob) { + defer func() { + if r := recover(); r != nil { + u.log.Error("attachment upload panicked", "key", job.ref.Key, "panic", r) + u.failAndReject() + } + // Always account for the job, even on panic, so ForceFlush can + // make progress. + u.idleMu.Lock() + u.inflight-- + u.idleMu.Unlock() + u.idleCond.Broadcast() + }() + u.upload(job) +} + +func (u *S3Uploader) upload(job uploadJob) { + orgID, err := u.getOrgID() + if err != nil { + u.log.Warn("failed to resolve org ID for attachment upload", "error", err) + u.reportStatus(job.ref.Key, "error", err.Error()) + u.failAndReject() + return + } + + signedURL, headers, err := u.requestUploadURL(orgID, job.ref) + if err != nil { + u.log.Warn("failed to request upload URL", "key", job.ref.Key, "error", err) + u.reportStatus(job.ref.Key, "error", err.Error()) + u.failAndReject() + return + } + + if err := u.uploadToSignedURL(signedURL, headers, job.ref.ContentType, job.data); err != nil { + u.log.Warn("failed to upload to signed URL", "key", job.ref.Key, "error", err) + u.reportStatus(job.ref.Key, "error", err.Error()) + u.failAndReject() + return + } + + u.reportStatus(job.ref.Key, "done", "") +} + +func (u *S3Uploader) failAndReject() { + u.mu.Lock() + u.rejectNewJobs = true + u.mu.Unlock() +} + +func (u *S3Uploader) getOrgID() (string, error) { + u.orgIDOnce.Do(func() { + if u.cfg.OrgID != "" { + u.orgID = u.cfg.OrgID + return + } + u.orgID, u.orgIDErr = u.resolveOrgID() + }) + return u.orgID, u.orgIDErr +} + +func (u *S3Uploader) resolveOrgID() (string, error) { + loginURL := u.cfg.LoginURL + if loginURL == "" { + loginURL = u.cfg.APIURL + } + reqURL := strings.TrimRight(loginURL, "/") + "/api/apikey/login" + + req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, reqURL, nil) + if err != nil { + return "", fmt.Errorf("create login request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+u.cfg.APIKey) + + resp, err := u.cfg.HTTPClient.Do(req) + if err != nil { + return "", fmt.Errorf("login request failed: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("login returned status %d: %s", resp.StatusCode, string(body)) + } + + var loginResp struct { + OrgInfo []struct { + ID string `json:"id"` + } `json:"org_info"` + } + if err := json.NewDecoder(resp.Body).Decode(&loginResp); err != nil { + return "", fmt.Errorf("decode login response: %w", err) + } + if len(loginResp.OrgInfo) == 0 { + return "", fmt.Errorf("no org info returned from login") + } + return loginResp.OrgInfo[0].ID, nil +} + +// ── S3 HTTP operations ───────────────────────────────────────────── + +func (u *S3Uploader) requestUploadURL(orgID string, ref Reference) (signedURL string, headers map[string]string, err error) { + body, err := json.Marshal(map[string]string{ + "key": ref.Key, + "filename": ref.Filename, + "content_type": ref.ContentType, + "org_id": orgID, + }) + if err != nil { + return "", nil, err + } + + reqURL := strings.TrimRight(u.cfg.APIURL, "/") + "/attachment" + respBody, err := u.doWithRetry(http.MethodPost, reqURL, "application/json", body, true) + if err != nil { + return "", nil, err + } + + var result struct { + SignedURL string `json:"signedUrl"` + Headers map[string]string `json:"headers"` + } + if err := json.Unmarshal(respBody, &result); err != nil { + return "", nil, fmt.Errorf("decode upload URL response: %w", err) + } + if result.SignedURL == "" { + return "", nil, fmt.Errorf("signed URL response missing signedUrl") + } + if result.Headers == nil { + result.Headers = map[string]string{} + } + return result.SignedURL, result.Headers, nil +} + +func (u *S3Uploader) uploadToSignedURL(signedURL string, headers map[string]string, contentType string, data []byte) error { + req, err := http.NewRequestWithContext(context.Background(), http.MethodPut, signedURL, bytes.NewReader(data)) + if err != nil { + return err + } + req.Header.Set("Content-Type", contentType) + for k, v := range headers { + req.Header.Set(k, v) + } + addProviderSpecificHeaders(signedURL, req) + + resp, err := u.doRequestWithRetry(req) + if err != nil { + return err + } + _ = resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return fmt.Errorf("upload to object store: HTTP %d", resp.StatusCode) + } + return nil +} + +func (u *S3Uploader) reportStatus(key, status, errMsg string) { + orgID, err := u.getOrgID() + if err != nil { + u.log.Warn("failed to get org ID for status report", "error", err) + return + } + + statusMap := map[string]any{"upload_status": status} + if errMsg != "" { + statusMap["error_message"] = errMsg + } + + body, err := json.Marshal(map[string]any{ + "key": key, + "org_id": orgID, + "status": statusMap, + }) + if err != nil { + u.log.Warn("failed to marshal status report", "error", err) + return + } + + reqURL := strings.TrimRight(u.cfg.APIURL, "/") + "/attachment/status" + if _, err := u.doWithRetry(http.MethodPost, reqURL, "application/json", body, true); err != nil { + u.log.Warn("failed to report attachment status", "key", key, "status", status, "error", err) + } +} + +// ── HTTP helpers ─────────────────────────────────────────────────── + +func (u *S3Uploader) doWithRetry(method, reqURL, contentType string, body []byte, auth bool) ([]byte, error) { + req, err := http.NewRequestWithContext(context.Background(), method, reqURL, bytes.NewReader(body)) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", contentType) + if auth { + req.Header.Set("Authorization", "Bearer "+u.cfg.APIKey) + } + + resp, err := u.doRequestWithRetry(req) + if err != nil { + return nil, err + } + defer func() { _ = resp.Body.Close() }() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("read response body: %w", err) + } + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody)) + } + return respBody, nil +} + +func (u *S3Uploader) doRequestWithRetry(req *http.Request) (*http.Response, error) { + var lastErr error + backoff := u.cfg.InitialBackoff + + for attempt := 0; attempt <= u.cfg.MaxRetries; attempt++ { + if attempt > 0 { + u.log.Debug("retrying request", "url", req.URL.String(), "attempt", attempt) + // Sleep cancellable by Shutdown so retry backoff doesn't keep + // the worker (and thus Shutdown) running far past the user's + // deadline. + timer := time.NewTimer(backoff) + select { + case <-timer.C: + case <-u.stop: + timer.Stop() + return nil, fmt.Errorf("request to %s cancelled during retry backoff", req.URL.String()) + } + backoff *= 2 + } + + // Clone body for retry. + if req.GetBody != nil { + body, err := req.GetBody() + if err != nil { + return nil, err + } + req.Body = body + } + + resp, err := u.cfg.HTTPClient.Do(req) + if err != nil { + lastErr = err + continue + } + + // Don't retry client errors (4xx) or successes. + if resp.StatusCode < 500 { + return resp, nil + } + + // Server error (5xx) — retry. + _ = resp.Body.Close() + lastErr = fmt.Errorf("server error: HTTP %d", resp.StatusCode) + } + + return nil, fmt.Errorf("request to %s failed after %d retries: %w", + req.URL.String(), u.cfg.MaxRetries, lastErr) +} + +// addProviderSpecificHeaders inspects the signed URL host and adds any +// headers required by that specific cloud storage provider. Braintrust's +// backend may issue signed URLs for different providers depending on the +// org's configuration. +func addProviderSpecificHeaders(signedURL string, req *http.Request) { + u, err := url.Parse(signedURL) + if err != nil { + return + } + // Azure Blob Storage requires this header on PUT uploads or the + // request fails with HTTP 400. + if strings.HasSuffix(u.Host, ".blob.core.windows.net") { + req.Header.Set("x-ms-blob-type", "BlockBlob") + } +} + +// NoopUploader is an uploader that accepts all jobs but does nothing. +// Useful for testing the processor in isolation. +type NoopUploader struct { + shutdown atomic.Bool +} + +// Enqueue accepts the job but does nothing. +func (u *NoopUploader) Enqueue(_ Reference, _ []byte) bool { return !u.shutdown.Load() } + +// ForceFlush is a no-op that always succeeds. +func (u *NoopUploader) ForceFlush(_ time.Duration) bool { return true } + +// Shutdown marks the uploader as shut down. +func (u *NoopUploader) Shutdown() { u.shutdown.Store(true) } + +// IsShutdown returns true if the uploader has been shut down. +func (u *NoopUploader) IsShutdown() bool { return u.shutdown.Load() } diff --git a/trace/attachmentprocessor/uploader_test.go b/trace/attachmentprocessor/uploader_test.go new file mode 100644 index 00000000..0a7052af --- /dev/null +++ b/trace/attachmentprocessor/uploader_test.go @@ -0,0 +1,280 @@ +package attachmentprocessor + +import ( + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/braintrustdata/braintrust-sdk-go/logger" +) + +func TestS3UploaderEndToEnd(t *testing.T) { + var mu sync.Mutex + var uploadedData []byte + var uploadedContentType string + var statusReported string + var attachmentReqBody map[string]string + var serverURL string // set after server starts + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + mu.Lock() + defer mu.Unlock() + + switch { + case r.URL.Path == "/api/apikey/login": + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"org_info":[{"id":"org-123","name":"test-org"}]}`)) + + case r.URL.Path == "/attachment" && r.Method == http.MethodPost: + body, _ := io.ReadAll(r.Body) + _ = json.Unmarshal(body, &attachmentReqBody) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"signedUrl":"` + serverURL + `/upload","headers":{"x-custom":"val"}}`)) + + case r.URL.Path == "/upload" && r.Method == http.MethodPut: + uploadedData, _ = io.ReadAll(r.Body) + uploadedContentType = r.Header.Get("Content-Type") + w.WriteHeader(http.StatusOK) + + case r.URL.Path == "/attachment/status" && r.Method == http.MethodPost: + body, _ := io.ReadAll(r.Body) + var req map[string]any + _ = json.Unmarshal(body, &req) + status := req["status"].(map[string]any) + statusReported = status["upload_status"].(string) + w.WriteHeader(http.StatusOK) + + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer server.Close() + serverURL = server.URL + + u := NewS3Uploader(UploaderConfig{ + APIURL: server.URL, + APIKey: "test-key", + LoginURL: server.URL, + HTTPClient: server.Client(), + Logger: logger.Discard(), + QueueSize: 16, + }) + + ref := Reference{ + Type: "braintrust_attachment", + ContentType: "image/png", + Filename: "attachment.png", + Key: "test-key-123", + } + + ok := u.Enqueue(ref, []byte("fake-png-data")) + require.True(t, ok, "enqueue should succeed") + + flushed := u.ForceFlush(5 * time.Second) + assert.True(t, flushed, "flush should succeed") + + mu.Lock() + defer mu.Unlock() + + assert.Equal(t, []byte("fake-png-data"), uploadedData) + assert.Equal(t, "image/png", uploadedContentType) + assert.Equal(t, "done", statusReported) + assert.Equal(t, "test-key-123", attachmentReqBody["key"]) + assert.Equal(t, "attachment.png", attachmentReqBody["filename"]) + assert.Equal(t, "image/png", attachmentReqBody["content_type"]) + assert.Equal(t, "org-123", attachmentReqBody["org_id"]) +} + +func TestS3UploaderShutdown(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/apikey/login": + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"org_info":[{"id":"org-123","name":"test-org"}]}`)) + default: + w.WriteHeader(http.StatusOK) + } + })) + defer server.Close() + + u := NewS3Uploader(UploaderConfig{ + APIURL: server.URL, + APIKey: "test-key", + LoginURL: server.URL, + HTTPClient: server.Client(), + Logger: logger.Discard(), + }) + + u.Shutdown() + assert.True(t, u.IsShutdown()) + + ok := u.Enqueue(Reference{Key: "k"}, []byte("data")) + assert.False(t, ok, "enqueue after shutdown should return false") +} + +func TestS3UploaderEnqueueDuringShutdown(t *testing.T) { + var srvURL string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/apikey/login": + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"org_info":[{"id":"org-1"}]}`)) + case "/attachment": + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"signedUrl":"` + srvURL + `/upload","headers":{}}`)) + default: + w.WriteHeader(http.StatusOK) + } + })) + defer server.Close() + srvURL = server.URL + + u := NewS3Uploader(UploaderConfig{ + APIURL: server.URL, + APIKey: "k", + LoginURL: server.URL, + HTTPClient: server.Client(), + Logger: logger.Discard(), + QueueSize: 4, + }) + + // Warm up the worker so queue is open. + u.Enqueue(NewReference("image/png"), []byte("warm")) + + // Race Enqueue against Shutdown — must not panic. + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + u.Shutdown() + }() + go func() { + defer wg.Done() + // May return true or false, but must not panic. + u.Enqueue(NewReference("image/png"), []byte("race")) + }() + wg.Wait() + assert.True(t, u.IsShutdown()) +} + +func TestS3UploaderDoubleShutdown(t *testing.T) { + u := NewS3Uploader(UploaderConfig{ + APIURL: "http://localhost", + APIKey: "test-key", + Logger: logger.Discard(), + }) + + // First shutdown should succeed; second should not panic. + u.Shutdown() + assert.True(t, u.IsShutdown()) + u.Shutdown() // must not panic +} + +// panickyTransport panics on the first request. Used to verify the worker +// goroutine survives a panic in the upload path and continues running. +type panickyTransport struct { + calls atomic.Int32 +} + +func (t *panickyTransport) RoundTrip(req *http.Request) (*http.Response, error) { + if t.calls.Add(1) == 1 { + panic("simulated panic in upload") + } + // Subsequent calls return a generic 500 so the test can assert + // the worker is still alive. + return &http.Response{ + StatusCode: 500, + Body: http.NoBody, + Header: make(http.Header), + }, nil +} + +func TestS3UploaderPanicRecovery(t *testing.T) { + transport := &panickyTransport{} + u := NewS3Uploader(UploaderConfig{ + APIURL: "http://test", + APIKey: "k", + OrgID: "org-1", // skip login + HTTPClient: &http.Client{Transport: transport}, + Logger: logger.Discard(), + MaxRetries: 1, + InitialBackoff: 1 * time.Millisecond, + }) + + // First enqueue triggers the panic. The worker must not die. + ok := u.Enqueue(NewReference("image/png"), []byte("data")) + require.True(t, ok) + + // Wait until the panic has been processed (inflight back to 0). Use + // ForceFlush which waits on the idle condition. + assert.True(t, u.ForceFlush(2*time.Second), + "worker should account for the panicked job and become idle") + + // After a panic, failAndReject is called → uploader rejects further work. + assert.True(t, u.IsShutdown(), "uploader should reject new jobs after a panic") + + // Cleanup. + u.Shutdown() +} + +func TestProviderSpecificHeaders(t *testing.T) { + // Azure Blob Storage URL should get the x-ms-blob-type header. + req, _ := http.NewRequest(http.MethodPut, "https://myaccount.blob.core.windows.net/container/blob?sig=xxx", nil) + addProviderSpecificHeaders("https://myaccount.blob.core.windows.net/container/blob?sig=xxx", req) + assert.Equal(t, "BlockBlob", req.Header.Get("x-ms-blob-type")) + + // S3 URL should not get the Azure-specific header. + req2, _ := http.NewRequest(http.MethodPut, "https://s3.amazonaws.com/bucket/key", nil) + addProviderSpecificHeaders("https://s3.amazonaws.com/bucket/key", req2) + assert.Empty(t, req2.Header.Get("x-ms-blob-type")) +} + +func TestS3UploaderPreConfiguredOrgID(t *testing.T) { + var capturedOrgID string + var srvURL string + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.URL.Path == "/attachment" && r.Method == http.MethodPost: + body, _ := io.ReadAll(r.Body) + var req map[string]string + _ = json.Unmarshal(body, &req) + capturedOrgID = req["org_id"] + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"signedUrl":"` + srvURL + `/upload","headers":{}}`)) + case r.URL.Path == "/upload": + w.WriteHeader(http.StatusOK) + case r.URL.Path == "/attachment/status": + w.WriteHeader(http.StatusOK) + default: + t.Errorf("unexpected request to %s", r.URL.Path) + w.WriteHeader(http.StatusNotFound) + } + })) + defer server.Close() + srvURL = server.URL + + u := NewS3Uploader(UploaderConfig{ + APIURL: server.URL, + APIKey: "test-key", + OrgID: "pre-configured-org", + HTTPClient: server.Client(), + Logger: logger.Discard(), + }) + + ref := NewReference("image/png") + ok := u.Enqueue(ref, []byte("data")) + require.True(t, ok) + + u.ForceFlush(5 * time.Second) + + assert.Equal(t, "pre-configured-org", capturedOrgID) +} diff --git a/trace/contrib/anthropic/messages.go b/trace/contrib/anthropic/messages.go index 7d4ad258..1700516e 100644 --- a/trace/contrib/anthropic/messages.go +++ b/trace/contrib/anthropic/messages.go @@ -37,14 +37,11 @@ func newMessagesTracer(cfg *middlewareConfig) *messagesTracer { func (mt *messagesTracer) StartSpan(ctx context.Context, t time.Time, request io.Reader) (context.Context, trace.Span, error) { mt.startTime = t - ctx, span := mt.cfg.tracer().Start( - ctx, - "anthropic.messages.create", - trace.WithTimestamp(t), - ) var raw map[string]interface{} if err := json.NewDecoder(request).Decode(&raw); err != nil { + // Fall back to default span name on parse error. + ctx, span := mt.cfg.tracer().Start(ctx, "anthropic.messages.create", trace.WithTimestamp(t)) return ctx, span, err } @@ -78,22 +75,37 @@ func (mt *messagesTracer) StartSpan(ctx context.Context, t time.Time, request io } } - // Build input messages array, prepending system prompt if present + // Use a distinct span name for streaming calls. + spanName := "anthropic.messages.create" + if mt.streaming { + spanName = "anthropic.messages.stream" + } + + ctx, span := mt.cfg.tracer().Start(ctx, spanName, trace.WithTimestamp(t)) + + // Build input messages array, appending system prompt last var msgs []any - // Prepend system prompt as a message if present + // Add user/assistant messages first, normalizing content blocks + if messages, ok := raw["messages"].([]any); ok { + for _, m := range messages { + if msg, ok := m.(map[string]any); ok { + msgs = append(msgs, normalizeMessageContent(msg)) + } else { + msgs = append(msgs, m) + } + } + } + + // Append system prompt as a message at the end. + // The system field can be a string or a list of content blocks. if system, ok := raw["system"]; ok { msgs = append(msgs, map[string]any{ "role": "system", - "content": system, + "content": simplifyContentBlocks(system), }) } - // Add user/assistant messages - if messages, ok := raw["messages"].([]any); ok { - msgs = append(msgs, messages...) - } - if len(msgs) > 0 { if err := internal.SetJSONAttr(span, "braintrust.input_json", msgs); err != nil { return ctx, span, err @@ -104,6 +116,10 @@ func (mt *messagesTracer) StartSpan(ctx context.Context, t time.Time, request io return ctx, span, err } + if err := internal.SetJSONAttr(span, "braintrust.span_attributes", map[string]string{"type": "llm"}); err != nil { + return ctx, span, err + } + return ctx, span, nil } @@ -407,3 +423,51 @@ func (mt *messagesTracer) handleMessageResponse(span trace.Span, rawMsg map[stri return nil } + +// normalizeMessageContent simplifies a message's content field when it is a +// single text block with no extra fields (e.g. cache_control), converting +// [{type: "text", text: "hello"}] to just "hello". +func normalizeMessageContent(msg map[string]any) map[string]any { + content := msg["content"] + // Only attempt simplification for list content. + if _, isList := content.([]any); !isList { + return msg + } + simplified := simplifyContentBlocks(content) + // If simplifyContentBlocks returned a string, it was simplified. + if _, isStr := simplified.(string); !isStr { + return msg + } + // Shallow-copy to avoid mutating the original map. + out := make(map[string]any, len(msg)) + for k, v := range msg { + out[k] = v + } + out["content"] = simplified + return out +} + +// simplifyContentBlocks converts a list of content blocks to a plain string +// when the list contains exactly one text block with only type+text fields. +func simplifyContentBlocks(content any) any { + blocks, ok := content.([]any) + if !ok || len(blocks) != 1 { + return content + } + block, ok := blocks[0].(map[string]any) + if !ok { + return content + } + if block["type"] != "text" { + return content + } + text, ok := block["text"].(string) + if !ok { + return content + } + // Only simplify if there are no extra fields (e.g. cache_control). + if len(block) > 2 { + return content + } + return text +} diff --git a/trace/contrib/anthropic/traceanthropic_test.go b/trace/contrib/anthropic/traceanthropic_test.go index 2175b02a..00644cdd 100644 --- a/trace/contrib/anthropic/traceanthropic_test.go +++ b/trace/contrib/anthropic/traceanthropic_test.go @@ -329,7 +329,7 @@ func TestMiddlewareIntegrationStreaming(t *testing.T) { // Validate spans were generated correctly span := exporter.FlushOne() - assertSpanValid(t, span, timeRange) + assertStreamingSpanValid(t, span, timeRange) // Verify span content input := span.Attr("braintrust.input_json").String() @@ -388,13 +388,24 @@ func setUpTest(t *testing.T) (anthropic.Client, *oteltest.Exporter) { return client, exporter } -// assertSpanValid asserts all the common properties of an Anthropic span are valid. +// assertSpanValid asserts all the common properties of a non-streaming Anthropic span. func assertSpanValid(t *testing.T, span oteltest.Span, timeRange oteltest.TimeRange) { + t.Helper() + assertSpanValidWithName(t, span, timeRange, "anthropic.messages.create") +} + +// assertStreamingSpanValid asserts all the common properties of a streaming Anthropic span. +func assertStreamingSpanValid(t *testing.T, span oteltest.Span, timeRange oteltest.TimeRange) { + t.Helper() + assertSpanValidWithName(t, span, timeRange, "anthropic.messages.stream") +} + +func assertSpanValidWithName(t *testing.T, span oteltest.Span, timeRange oteltest.TimeRange, expectedName string) { t.Helper() assert := assert.New(t) span.AssertInTimeRange(timeRange) - span.AssertNameIs("anthropic.messages.create") + span.AssertNameIs(expectedName) assert.Equal(codes.Unset, span.Stub.Status.Code) metadata := span.Metadata() @@ -476,7 +487,7 @@ func TestStreamingWithThinking(t *testing.T) { // Validate span span := exporter.FlushOne() - assertSpanValid(t, span, timeRange) + assertStreamingSpanValid(t, span, timeRange) // Verify the span output contains both thinking and text blocks outputStr := span.Attr("braintrust.output_json").String() @@ -544,7 +555,7 @@ func TestStreamingWithCitations(t *testing.T) { require.Greater(t, citationDeltas, 0, "expected citations_delta events in streaming response") span := exporter.FlushOne() - assertSpanValid(t, span, timeRange) + assertStreamingSpanValid(t, span, timeRange) output := span.Output() messages, ok := output.([]any) @@ -720,7 +731,7 @@ func TestStreamingWithTools(t *testing.T) { // Validate span span := exporter.FlushOne() - assertSpanValid(t, span, timeRange) + assertStreamingSpanValid(t, span, timeRange) // Verify metadata metadata := span.Metadata() diff --git a/trace/contrib/bedrockruntime/stream.go b/trace/contrib/bedrockruntime/stream.go index 8059e8a7..98b5ed20 100644 --- a/trace/contrib/bedrockruntime/stream.go +++ b/trace/contrib/bedrockruntime/stream.go @@ -22,11 +22,11 @@ type converseStreamTracer struct { } func (t *converseStreamTracer) StartSpan(ctx context.Context, start time.Time, in any) (context.Context, trace.Span) { - ctx, span := t.cfg.tracer().Start(ctx, "bedrock.converse_stream", trace.WithTimestamp(start)) + ctx, span := t.cfg.tracer().Start(ctx, "bedrock.converse-stream", trace.WithTimestamp(start)) t.metadata = map[string]any{ "provider": "bedrock", - "endpoint": "converse_stream", + "endpoint": "converse-stream", } params, ok := in.(*bedrockruntime.ConverseStreamInput) diff --git a/trace/contrib/bedrockruntime/tracebedrockruntime_test.go b/trace/contrib/bedrockruntime/tracebedrockruntime_test.go index 6217abd9..b7f16a2e 100644 --- a/trace/contrib/bedrockruntime/tracebedrockruntime_test.go +++ b/trace/contrib/bedrockruntime/tracebedrockruntime_test.go @@ -303,10 +303,10 @@ func TestConverseStream(t *testing.T) { assert.Contains(t, gotText.String(), "Paris") span := exporter.FlushOne() - assertSpanValid(t, span, timeRange, "bedrock.converse_stream") + assertSpanValid(t, span, timeRange, "bedrock.converse-stream") metadata := span.Metadata() - assert.Equal(t, "converse_stream", metadata["endpoint"]) + assert.Equal(t, "converse-stream", metadata["endpoint"]) assert.Equal(t, true, metadata["stream"]) assert.Equal(t, "end_turn", metadata["stop_reason"]) diff --git a/trace/contrib/openai/chatcompletions.go b/trace/contrib/openai/chatcompletions.go index 5198f01f..9564d01c 100644 --- a/trace/contrib/openai/chatcompletions.go +++ b/trace/contrib/openai/chatcompletions.go @@ -98,6 +98,10 @@ func (ct *chatCompletionsTracer) StartSpan(ctx context.Context, t time.Time, req return ctx, span, err } + if err := internal.SetJSONAttr(span, "braintrust.span_attributes", map[string]string{"type": "llm"}); err != nil { + return ctx, span, err + } + return ctx, span, nil } diff --git a/trace/contrib/openai/responses.go b/trace/contrib/openai/responses.go index e3d17803..5e97e5cc 100644 --- a/trace/contrib/openai/responses.go +++ b/trace/contrib/openai/responses.go @@ -96,6 +96,10 @@ func (rt *responsesTracer) StartSpan(ctx context.Context, t time.Time, request i } span.SetAttributes(attribute.String("braintrust.metadata", string(b))) + if err := internal.SetJSONAttr(span, "braintrust.span_attributes", map[string]string{"type": "llm"}); err != nil { + return ctx, span, err + } + return ctx, span, nil } diff --git a/trace/trace.go b/trace/trace.go index ab501406..b6fa527c 100644 --- a/trace/trace.go +++ b/trace/trace.go @@ -30,6 +30,7 @@ import ( "net/url" "strings" "sync" + "time" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/baggage" @@ -41,6 +42,7 @@ import ( "github.com/braintrustdata/braintrust-sdk-go/internal/auth" "github.com/braintrustdata/braintrust-sdk-go/logger" + "github.com/braintrustdata/braintrust-sdk-go/trace/attachmentprocessor" ) // Config holds configuration for Braintrust tracing @@ -54,12 +56,18 @@ type Config struct { EnableBuiltinAdkTraces bool // if false (default), drop spans from Google ADK (gcp.vertex.agent) to avoid duplicates SpanFilterFuncs []SpanFilterFunc + // Attachment processing + AutoConvertAIAttachments bool // scan spans for base64 attachments and upload them + // Debug EnableTraceConsoleLog bool // Test override: provide custom exporter (e.g., memory exporter for tests) Exporter sdktrace.SpanExporter + // Test override: custom attachment uploader (e.g., noop for tests) + AttachmentUploader attachmentprocessor.Uploader + // Logger Logger logger.Logger } @@ -121,6 +129,24 @@ func GetSpanProcessor(session *auth.Session, cfg Config) (sdktrace.SpanProcessor log.Debug("AI span filtering enabled") } + // Set up attachment processor if enabled. + var ap *attachmentprocessor.Processor + var uploader attachmentprocessor.Uploader + if cfg.AutoConvertAIAttachments { + if cfg.AttachmentUploader != nil { + uploader = cfg.AttachmentUploader + } else { + uploader = attachmentprocessor.NewS3Uploader(attachmentprocessor.UploaderConfig{ + APIURL: apiInfo.APIURL, + APIKey: apiInfo.APIKey, + LoginURL: session.AppPublicURL(), + Logger: log, + }) + } + ap = attachmentprocessor.NewProcessor(uploader, log) + log.Debug("attachment processing enabled") + } + // Wrap with Braintrust span processor (adds parent labels, filtering, etc.) // The processor will get endpoints and org name from session dynamically btProcessor, err := newSpanProcessor( @@ -130,6 +156,8 @@ func GetSpanProcessor(session *auth.Session, cfg Config) (sdktrace.SpanProcessor rootFilters, session, log, + ap, + uploader, ) if err != nil { return nil, err @@ -342,13 +370,21 @@ func (o *otelAttrs) makeAttrs() { o.mu.Unlock() } +// OTel attribute keys for Braintrust span input/output. +const ( + inputJSONAttrKey = attribute.Key("braintrust.input_json") + outputJSONAttrKey = attribute.Key("braintrust.output_json") +) + type spanProcessor struct { - wrapped sdktrace.SpanProcessor - filters []SpanFilterFunc - rootFilters []SpanFilterFunc - otelAttrs *otelAttrs - session *auth.Session // Session provides endpoints and org name - logger logger.Logger + wrapped sdktrace.SpanProcessor + filters []SpanFilterFunc + rootFilters []SpanFilterFunc + otelAttrs *otelAttrs + session *auth.Session // Session provides endpoints and org name + logger logger.Logger + attachmentProcessor *attachmentprocessor.Processor // nil when attachment processing is disabled + attachmentUploader attachmentprocessor.Uploader // nil when attachment processing is disabled } // newSpanProcessor creates a new span processor that wraps another processor and adds parent labeling. @@ -359,6 +395,8 @@ func newSpanProcessor( rootFilters []SpanFilterFunc, session *auth.Session, log logger.Logger, + ap *attachmentprocessor.Processor, + uploader attachmentprocessor.Uploader, ) (*spanProcessor, error) { // Get app URL from session appURL := session.AppPublicURL() @@ -367,12 +405,14 @@ func newSpanProcessor( attrs := newOtelAttrs(defaultParent, "", appURL) sp := &spanProcessor{ - wrapped: proc, - filters: filters, - rootFilters: rootFilters, - otelAttrs: attrs, - session: session, - logger: log, + wrapped: proc, + filters: filters, + rootFilters: rootFilters, + otelAttrs: attrs, + session: session, + logger: log, + attachmentProcessor: ap, + attachmentUploader: uploader, } return sp, nil @@ -419,9 +459,46 @@ func (sp *spanProcessor) OnStart(ctx context.Context, span sdktrace.ReadWriteSpa // OnEnd is called when a span ends. func (sp *spanProcessor) OnEnd(span sdktrace.ReadOnlySpan) { // Apply filters to determine if we should forward this span - if sp.shouldForwardSpan(span) { - sp.wrapped.OnEnd(span) + if !sp.shouldForwardSpan(span) { + return + } + + // Process attachments if enabled. + if sp.attachmentProcessor != nil { + span = sp.processAttachments(span) + } + + sp.wrapped.OnEnd(span) +} + +// processAttachments scans input_json and output_json for base64 attachments, +// uploads them, and returns a transformed span with replacement references. +func (sp *spanProcessor) processAttachments(span sdktrace.ReadOnlySpan) sdktrace.ReadOnlySpan { + var inputJSON, outputJSON string + for _, a := range span.Attributes() { + switch a.Key { + case inputJSONAttrKey: + inputJSON = a.Value.AsString() + case outputJSONAttrKey: + outputJSON = a.Value.AsString() + } } + + newInputJSON := sp.attachmentProcessor.ProcessAndUpload(inputJSON) + newOutputJSON := sp.attachmentProcessor.ProcessAndUpload(outputJSON) + + if newInputJSON == inputJSON && newOutputJSON == outputJSON { + return span + } + + overrides := make(map[attribute.Key]string) + if newInputJSON != inputJSON { + overrides[inputJSONAttrKey] = newInputJSON + } + if newOutputJSON != outputJSON { + overrides[outputJSONAttrKey] = newOutputJSON + } + return attachmentprocessor.NewTransformedSpan(span, overrides) } // shouldForwardSpan applies filter functions to determine if a span should be forwarded. @@ -452,14 +529,54 @@ func (sp *spanProcessor) shouldForwardSpan(span sdktrace.ReadOnlySpan) bool { return true } -// Shutdown shuts down the span processor. +// Shutdown shuts down the span processor. It is bounded by ctx's deadline +// (if any) so a stuck uploader cannot block process exit beyond the caller's +// budget. func (sp *spanProcessor) Shutdown(ctx context.Context) error { - return sp.wrapped.Shutdown(ctx) + // Shut down the span exporter first so all buffered spans (including + // those with attachment references) are flushed to the collector. + err := sp.wrapped.Shutdown(ctx) + + if sp.attachmentUploader == nil { + return err + } + + // Shutdown drains any remaining queued uploads and exits, bounded by + // the uploader's internal ShutdownTimeout (default 120s). Run it in a + // goroutine so we can give up early if ctx expires. + done := make(chan struct{}) + go func() { + sp.attachmentUploader.Shutdown() + close(done) + }() + + select { + case <-done: + case <-ctx.Done(): + sp.logger.Warn("attachment uploader shutdown abandoned: context done", "error", ctx.Err()) + } + return err } // ForceFlush forces a flush of the span processor. func (sp *spanProcessor) ForceFlush(ctx context.Context) error { - return sp.wrapped.ForceFlush(ctx) + err := sp.wrapped.ForceFlush(ctx) + if sp.attachmentUploader != nil { + sp.attachmentUploader.ForceFlush(timeoutFromContext(ctx, 30*time.Second)) + } + return err +} + +// timeoutFromContext returns the time remaining until ctx's deadline, or +// fallback if ctx has no deadline. +func timeoutFromContext(ctx context.Context, fallback time.Duration) time.Duration { + if deadline, ok := ctx.Deadline(); ok { + if remaining := time.Until(deadline); remaining > 0 { + return remaining + } + return 0 + } + return fallback } var _ sdktrace.SpanProcessor = &spanProcessor{} diff --git a/trace/trace_test.go b/trace/trace_test.go index 38f6e164..12de777a 100644 --- a/trace/trace_test.go +++ b/trace/trace_test.go @@ -17,6 +17,7 @@ import ( "github.com/braintrustdata/braintrust-sdk-go/internal/auth" "github.com/braintrustdata/braintrust-sdk-go/logger" + "github.com/braintrustdata/braintrust-sdk-go/trace/attachmentprocessor" ) // Test helper: create a session for testing with proper auth info @@ -700,3 +701,138 @@ func TestHTTPOtelOptsEnableGzipCompression(t *testing.T) { assert.Equal(t, "gzip", <-contentEncoding) assert.NoError(t, tp.Shutdown(context.Background())) } + +func TestAttachmentProcessing_ReplacesBase64InSpan(t *testing.T) { + assert := assert.New(t) + + tp := sdktrace.NewTracerProvider() + exporter := tracetest.NewInMemoryExporter() + + session := newTestSession() + cfg := Config{ + DefaultProjectID: "attachment-test", + AutoConvertAIAttachments: true, + AttachmentUploader: &attachmentprocessor.NoopUploader{}, + Exporter: exporter, + Logger: logger.Discard(), + } + + err := AddSpanProcessor(tp, session, cfg) + assert.NoError(err) + + tracer := tp.Tracer("test") + + // Create a span with OpenAI-format base64 image data + base64PNG := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==" + inputJSON := `[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}}]}]` + + _, span := tracer.Start(context.Background(), "chat-completion") + span.SetAttributes(attribute.String("braintrust.input_json", inputJSON)) + span.End() + + _ = tp.ForceFlush(context.Background()) + spans := exporter.GetSpans() + + assert.Len(spans, 1) + assert.Equal("chat-completion", spans[0].Name) + + // Find the input_json attribute + var exportedInputJSON string + for _, a := range spans[0].Attributes { + if string(a.Key) == "braintrust.input_json" { + exportedInputJSON = a.Value.AsString() + break + } + } + + assert.NotEmpty(exportedInputJSON) + assert.NotEqual(inputJSON, exportedInputJSON, "base64 data should have been replaced") + assert.NotContains(exportedInputJSON, base64PNG, "base64 data should not appear in exported span") + assert.Contains(exportedInputJSON, "braintrust_attachment", "should contain attachment reference") +} + +func TestAttachmentProcessing_DisabledByConfig(t *testing.T) { + assert := assert.New(t) + + tp := sdktrace.NewTracerProvider() + exporter := tracetest.NewInMemoryExporter() + + session := newTestSession() + cfg := Config{ + DefaultProjectID: "no-attachment-test", + AutoConvertAIAttachments: false, // disabled + Exporter: exporter, + Logger: logger.Discard(), + } + + err := AddSpanProcessor(tp, session, cfg) + assert.NoError(err) + + tracer := tp.Tracer("test") + + base64PNG := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==" + inputJSON := `[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,` + base64PNG + `"}}]}]` + + _, span := tracer.Start(context.Background(), "chat-completion") + span.SetAttributes(attribute.String("braintrust.input_json", inputJSON)) + span.End() + + _ = tp.ForceFlush(context.Background()) + spans := exporter.GetSpans() + + assert.Len(spans, 1) + + // Find the input_json attribute — should be unchanged + var exportedInputJSON string + for _, a := range spans[0].Attributes { + if string(a.Key) == "braintrust.input_json" { + exportedInputJSON = a.Value.AsString() + break + } + } + + assert.Equal(inputJSON, exportedInputJSON, "input should be unchanged when attachment processing is disabled") +} + +func TestAttachmentProcessing_NoAttachmentsPassThrough(t *testing.T) { + assert := assert.New(t) + + tp := sdktrace.NewTracerProvider() + exporter := tracetest.NewInMemoryExporter() + + session := newTestSession() + cfg := Config{ + DefaultProjectID: "passthrough-test", + AutoConvertAIAttachments: true, + AttachmentUploader: &attachmentprocessor.NoopUploader{}, + Exporter: exporter, + Logger: logger.Discard(), + } + + err := AddSpanProcessor(tp, session, cfg) + assert.NoError(err) + + tracer := tp.Tracer("test") + + // Create a span with no base64 data + inputJSON := `[{"role":"user","content":"Hello, world!"}]` + + _, span := tracer.Start(context.Background(), "plain-chat") + span.SetAttributes(attribute.String("braintrust.input_json", inputJSON)) + span.End() + + _ = tp.ForceFlush(context.Background()) + spans := exporter.GetSpans() + + assert.Len(spans, 1) + + var exportedInputJSON string + for _, a := range spans[0].Attributes { + if string(a.Key) == "braintrust.input_json" { + exportedInputJSON = a.Value.AsString() + break + } + } + + assert.Equal(inputJSON, exportedInputJSON, "non-attachment spans should pass through unchanged") +}