From 4072a03c59bfe4bfbc83365323532909a9943f00 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Wed, 27 May 2026 10:12:10 +0200 Subject: [PATCH 1/2] test(tools): trim aijson re-tests, keep docker-agent integration Follow-up to #2899. The repair_test.go suite shipped in that PR duplicates github.com/docker/aijson's own test cases through the NewHandler facade. Per rumpl's review comment, the docker-agent-specific surface area worth pinning is narrower: - NewHandler delegates to aijson.Unmarshal (wiring canary) - the OnRepair hook emits tool_input_repaired with tool + repairs fields - the hot path stays quiet for well-formed calls Drop pkg/tools/repair_test.go and add those three tests to the existing tools_test.go. aijson covers the repair semantics themselves. --- pkg/tools/repair_test.go | 146 --------------------------------------- pkg/tools/tools_test.go | 86 +++++++++++++++++++++++ 2 files changed, 86 insertions(+), 146 deletions(-) delete mode 100644 pkg/tools/repair_test.go diff --git a/pkg/tools/repair_test.go b/pkg/tools/repair_test.go deleted file mode 100644 index e2d3c3d33..000000000 --- a/pkg/tools/repair_test.go +++ /dev/null @@ -1,146 +0,0 @@ -package tools - -import ( - "context" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// These tests pin the shape-repair behavior NewHandler relies on. The -// repair logic itself lives in github.com/docker/aijson; what's tested here -// is the integration: that arguments NewHandler observes in practice are -// repaired before the typed handler runs, and that unrepairable input -// surfaces the original error rather than a synthesised one. - -// argsWithStrings exercises the slice-of-string repair path that's by far -// the most commonly broken in real LLM tool calls (paths, urls, patterns). -type argsWithStrings struct { - Paths []string `json:"paths"` - JSON bool `json:"json,omitempty"` -} - -type argsWithInt struct { - N int `json:"n"` - Tags []string `json:"tags,omitempty"` -} - -func runHandler[T any](t *testing.T, args string) (T, error) { - t.Helper() - var got T - handler := NewHandler(func(_ context.Context, a T) (*ToolCallResult, error) { - got = a - return ResultSuccess("ok"), nil - }) - _, err := handler(t.Context(), ToolCall{ - ID: "call_1", - Type: "function", - Function: FunctionCall{ - Name: "test_tool", - Arguments: args, - }, - }) - return got, err -} - -func TestNewHandler_UnwrapsStringifiedArray(t *testing.T) { - // Common DeepSeek/Qwen mistake: send an array as a JSON string. - got, err := runHandler[argsWithStrings](t, `{"paths":"[\"a.txt\",\"b.txt\"]"}`) - require.NoError(t, err) - assert.Equal(t, []string{"a.txt", "b.txt"}, got.Paths) -} - -func TestNewHandler_WrapsBareString(t *testing.T) { - // Single-string-instead-of-array, the most common shape mistake. - got, err := runHandler[argsWithStrings](t, `{"paths":"only.txt"}`) - require.NoError(t, err) - assert.Equal(t, []string{"only.txt"}, got.Paths) -} - -func TestNewHandler_WrapsSingleObjectPlaceholder(t *testing.T) { - // Some models wrap a single argument in an object. - got, err := runHandler[argsWithStrings](t, `{"paths":{"path":"only.txt"}}`) - require.NoError(t, err) - assert.Equal(t, []string{"only.txt"}, got.Paths) -} - -func TestNewHandler_OrderingPreventsDoubleWrap(t *testing.T) { - // If the bare-string-wrap fired before the unwrap-stringified-array - // repair, this input would become [`["a","b"]`] instead of ["a","b"]. - // The clean two-element slice is the load-bearing assertion. - got, err := runHandler[argsWithStrings](t, `{"paths":"[\"a\",\"b\"]"}`) - require.NoError(t, err) - assert.Equal(t, []string{"a", "b"}, got.Paths) -} - -func TestNewHandler_DropsNullForPrimitive(t *testing.T) { - // Some custom UnmarshalJSON impls trip on null where a primitive is - // expected. Dropping the field lets the type's zero value win. - got, err := runHandler[argsWithInt](t, `{"n":null}`) - require.NoError(t, err) - assert.Equal(t, 0, got.N) -} - -func TestNewHandler_LeavesValidArrayUntouched(t *testing.T) { - got, err := runHandler[argsWithStrings](t, `{"paths":["a","b"]}`) - require.NoError(t, err) - assert.Equal(t, []string{"a", "b"}, got.Paths) -} - -// TestNewHandler_VisitsPromotedFieldsFromEmbeddedStruct mirrors the shape -// of LSP arg structs in pkg/tools/builtin/lsp (ReferencesArgs and friends -// embed PositionArgs). Repairs must reach promoted fields, otherwise LSP -// tools regress silently when models send a single file instead of an -// array. -func TestNewHandler_VisitsPromotedFieldsFromEmbeddedStruct(t *testing.T) { - type Base struct { - Files []string `json:"files"` - } - type WithEmbedding struct { - Base - - Extra string `json:"extra,omitempty"` - } - got, err := runHandler[WithEmbedding](t, `{"files":"only.txt"}`) - require.NoError(t, err) - assert.Equal(t, []string{"only.txt"}, got.Files) -} - -func TestNewHandler_RepairsMultipleFieldsInOneCall(t *testing.T) { - type combo struct { - Paths []string `json:"paths"` - Tags []string `json:"tags"` - } - got, err := runHandler[combo](t, `{"paths":"only.txt","tags":"[\"go\",\"ai\"]"}`) - require.NoError(t, err) - assert.Equal(t, []string{"only.txt"}, got.Paths) - assert.Equal(t, []string{"go", "ai"}, got.Tags) -} - -func TestNewHandler_RefusesMultiKeyObjectAsArray(t *testing.T) { - // Two keys in the placeholder object — too ambiguous to safely wrap. - // Surface the original schema error rather than guessing. - _, err := runHandler[argsWithStrings](t, `{"paths":{"path":"a.txt","extra":"ignore"}}`) - require.Error(t, err) -} - -func TestNewHandler_UnrepairableInputReturnsOriginalError(t *testing.T) { - type fileArgs struct { - Paths []string `json:"paths"` - } - handler := NewHandler(func(_ context.Context, _ fileArgs) (*ToolCallResult, error) { - t.Fatal("handler should not be called for unrepairable input") - return nil, nil - }) - - _, err := handler(t.Context(), ToolCall{ - ID: "call_1", - Type: "function", - Function: FunctionCall{ - Name: "read_multiple_files", - Arguments: `{not even json`, - }, - }) - require.Error(t, err) -} diff --git a/pkg/tools/tools_test.go b/pkg/tools/tools_test.go index 0ada71e0d..7927f30c8 100644 --- a/pkg/tools/tools_test.go +++ b/pkg/tools/tools_test.go @@ -1,7 +1,9 @@ package tools import ( + "bytes" "context" + "log/slog" "testing" "github.com/stretchr/testify/assert" @@ -79,6 +81,90 @@ func TestNewHandler_InvalidArguments(t *testing.T) { require.Error(t, err) } +// The next three tests pin the docker-agent-specific behavior of NewHandler. +// Repair semantics themselves are covered by github.com/docker/aijson; what +// is local to this package is (a) that NewHandler actually delegates to +// aijson, and (b) that the OnRepair hook fans out to the tool_input_repaired +// slog event with the expected fields (and stays quiet on the hot path). + +// TestNewHandler_DelegatesToAijson is the wiring canary: if someone swaps +// aijson.Unmarshal back to encoding/json.Unmarshal, this test catches it. +func TestNewHandler_DelegatesToAijson(t *testing.T) { + type args struct { + Paths []string `json:"paths"` + } + var got args + handler := NewHandler(func(_ context.Context, a args) (*ToolCallResult, error) { + got = a + return ResultSuccess("ok"), nil + }) + + _, err := handler(t.Context(), ToolCall{ + Type: "function", + Function: FunctionCall{ + Name: "read_multiple_files", + Arguments: `{"paths":"only.txt"}`, + }, + }) + require.NoError(t, err) + assert.Equal(t, []string{"only.txt"}, got.Paths) +} + +func TestNewHandler_EmitsRepairTelemetry(t *testing.T) { + var buf bytes.Buffer + prev := slog.Default() + slog.SetDefault(slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelInfo}))) + t.Cleanup(func() { slog.SetDefault(prev) }) + + type args struct { + Paths []string `json:"paths"` + } + handler := NewHandler(func(_ context.Context, _ args) (*ToolCallResult, error) { + return ResultSuccess("ok"), nil + }) + + _, err := handler(t.Context(), ToolCall{ + Type: "function", + Function: FunctionCall{ + Name: "read_multiple_files", + Arguments: `{"paths":"only.txt"}`, + }, + }) + require.NoError(t, err) + + out := buf.String() + assert.Contains(t, out, "tool_input_repaired") + assert.Contains(t, out, "tool=read_multiple_files") + assert.Contains(t, out, "wrap_in_array") +} + +// TestNewHandler_NoTelemetryOnValidInput pins the hot-path contract: a +// well-formed tool call must NOT emit tool_input_repaired. Without this, +// a regression in aijson's strict-first ordering would silently flood logs. +func TestNewHandler_NoTelemetryOnValidInput(t *testing.T) { + var buf bytes.Buffer + prev := slog.Default() + slog.SetDefault(slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelInfo}))) + t.Cleanup(func() { slog.SetDefault(prev) }) + + type args struct { + Paths []string `json:"paths"` + } + handler := NewHandler(func(_ context.Context, _ args) (*ToolCallResult, error) { + return ResultSuccess("ok"), nil + }) + + _, err := handler(t.Context(), ToolCall{ + Type: "function", + Function: FunctionCall{ + Name: "read_multiple_files", + Arguments: `{"paths":["a.txt","b.txt"]}`, + }, + }) + require.NoError(t, err) + assert.NotContains(t, buf.String(), "tool_input_repaired") +} + func TestToolCallResultWithoutPayload(t *testing.T) { result := &ToolCallResult{ Output: "large output", From f8ba9feef9ea77fc82df5b707779fd1219dc4a7b Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Wed, 27 May 2026 10:31:29 +0200 Subject: [PATCH 2/2] test(tools): use aijson.KindWrapInArray constant in telemetry assertion Track the exported Kind constant rather than its underlying string value so the assertion follows the library if aijson ever renames the symbol. Addresses bot review on PR #2905. --- pkg/tools/tools_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/tools/tools_test.go b/pkg/tools/tools_test.go index 7927f30c8..540a4a6f2 100644 --- a/pkg/tools/tools_test.go +++ b/pkg/tools/tools_test.go @@ -6,6 +6,7 @@ import ( "log/slog" "testing" + "github.com/docker/aijson" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -135,7 +136,9 @@ func TestNewHandler_EmitsRepairTelemetry(t *testing.T) { out := buf.String() assert.Contains(t, out, "tool_input_repaired") assert.Contains(t, out, "tool=read_multiple_files") - assert.Contains(t, out, "wrap_in_array") + // Track the exported aijson constant rather than the underlying string + // so the assertion follows the library if it ever renames the value. + assert.Contains(t, out, string(aijson.KindWrapInArray)) } // TestNewHandler_NoTelemetryOnValidInput pins the hot-path contract: a