diff --git a/internal/providers/anthropic_cache_split_test.go b/internal/providers/anthropic_cache_split_test.go index 83c4285194..d20d197b22 100644 --- a/internal/providers/anthropic_cache_split_test.go +++ b/internal/providers/anthropic_cache_split_test.go @@ -8,7 +8,7 @@ import ( // the cache boundary marker is split into 2 blocks: stable (cached) + dynamic. func TestAnthropicSystemBlocksSplit(t *testing.T) { prompt := "stable content\n" + CacheBoundaryMarker + "\ndynamic content" - blocks := splitSystemPromptForCache(prompt) + blocks := SplitSystemPromptForCache(prompt) if len(blocks) != 2 { t.Fatalf("expected 2 blocks, got %d", len(blocks)) } @@ -30,7 +30,7 @@ func TestAnthropicSystemBlocksSplit(t *testing.T) { // marker → single block with cache_control. func TestAnthropicSingleBlockFallback(t *testing.T) { prompt := "no boundary here" - blocks := splitSystemPromptForCache(prompt) + blocks := SplitSystemPromptForCache(prompt) if len(blocks) != 1 { t.Fatalf("expected 1 block, got %d", len(blocks)) } @@ -46,7 +46,7 @@ func TestAnthropicSingleBlockFallback(t *testing.T) { // after the boundary produces only 1 block (no empty block appended). func TestAnthropicEmptyDynamic(t *testing.T) { prompt := "stable only\n" + CacheBoundaryMarker + "\n" - blocks := splitSystemPromptForCache(prompt) + blocks := SplitSystemPromptForCache(prompt) if len(blocks) != 1 { t.Fatalf("expected 1 block for empty dynamic, got %d", len(blocks)) } @@ -59,7 +59,7 @@ func TestAnthropicEmptyDynamic(t *testing.T) { // (empty stable section) still produces valid blocks without empty text. func TestAnthropicEmptyStable(t *testing.T) { prompt := CacheBoundaryMarker + "\ndynamic only" - blocks := splitSystemPromptForCache(prompt) + blocks := SplitSystemPromptForCache(prompt) // Stable is empty string after TrimSpace — should still produce a block // (Anthropic API handles empty text blocks gracefully). if len(blocks) < 1 { diff --git a/internal/providers/anthropic_request.go b/internal/providers/anthropic_request.go index 58dfb3a08b..aa2c0599e1 100644 --- a/internal/providers/anthropic_request.go +++ b/internal/providers/anthropic_request.go @@ -10,10 +10,11 @@ import ( // to avoid circular import; agent_test verifies they match). const CacheBoundaryMarker = "" -// splitSystemPromptForCache splits a system prompt at the cache boundary marker. +// SplitSystemPromptForCache splits a system prompt at CacheBoundaryMarker. // Returns 2 blocks if boundary found: stable (with cache_control) + dynamic (without). // Returns 1 block with cache_control if no boundary (backwards compat). -func splitSystemPromptForCache(content string) []map[string]any { +// Used by both Anthropic and DashScope cache middleware (identical wire format). +func SplitSystemPromptForCache(content string) []map[string]any { ephemeral := map[string]any{"type": "ephemeral"} before, after, ok := strings.Cut(content, CacheBoundaryMarker) if !ok { @@ -95,7 +96,7 @@ func (p *AnthropicProvider) buildRequestBody(model string, req ChatRequest, stre for _, msg := range req.Messages { switch msg.Role { case "system": - systemBlocks = append(systemBlocks, splitSystemPromptForCache(msg.Content)...) + systemBlocks = append(systemBlocks, SplitSystemPromptForCache(msg.Content)...) case "user": if len(msg.Images) > 0 { diff --git a/internal/providers/dashscope_cache_middleware.go b/internal/providers/dashscope_cache_middleware.go new file mode 100644 index 0000000000..c873284195 --- /dev/null +++ b/internal/providers/dashscope_cache_middleware.go @@ -0,0 +1,62 @@ +package providers + +import "os" + +// wrapSystemForDashScopeCache transforms a system message string content into +// Anthropic-style content blocks with cache_control:ephemeral markers. +// +// DashScope verified 2026-05-08 to accept and process this wire format on +// coding-intl.dashscope.aliyuncs.com. Result: 90% discount on cached prefix +// tokens, 5min sliding TTL. +// +// Non-system messages and non-string content pass through unchanged +// (idempotent, supports already-blocked input). +func wrapSystemForDashScopeCache(msg map[string]any) map[string]any { + if msg["role"] != "system" { + return msg + } + content, ok := msg["content"].(string) + if !ok { + return msg + } + msg["content"] = SplitSystemPromptForCache(content) + return msg +} + +// applyDashScopeToolPrefixCache adds cache_control:ephemeral to the last tool +// definition, caching the entire tool prefix (descriptions, schemas). +// +// alreadyMarked: cache markers already consumed by system message blocks. +// DashScope limits 4 markers/request; skip tool marker if limit reached. +func applyDashScopeToolPrefixCache(tools []map[string]any, alreadyMarked int) []map[string]any { + if len(tools) == 0 || alreadyMarked >= 4 { + return tools + } + last := tools[len(tools)-1] + last["cache_control"] = map[string]any{"type": "ephemeral"} + return tools +} + +// countCacheControlMarkers counts cache_control fields in a message's content +// blocks. Used to track marker budget across system + tools. +func countCacheControlMarkers(msg map[string]any) int { + blocks, ok := msg["content"].([]map[string]any) + if !ok { + return 0 + } + count := 0 + for _, b := range blocks { + if b["cache_control"] != nil { + count++ + } + } + return count +} + +// dashScopeCacheDisabled returns true when env var GOCLAW_DISABLE_DASHSCOPE_CACHE +// is set to a truthy value. Provides runtime escape hatch without requiring +// code redeploy or config change. +func dashScopeCacheDisabled() bool { + v := os.Getenv("GOCLAW_DISABLE_DASHSCOPE_CACHE") + return v == "true" || v == "1" || v == "yes" +} diff --git a/internal/providers/dashscope_cache_middleware_test.go b/internal/providers/dashscope_cache_middleware_test.go new file mode 100644 index 0000000000..ab401668e8 --- /dev/null +++ b/internal/providers/dashscope_cache_middleware_test.go @@ -0,0 +1,182 @@ +package providers + +import ( + "encoding/json" + "reflect" + "testing" +) + +func TestWrapSystemForDashScopeCache_NoBoundary(t *testing.T) { + msg := map[string]any{ + "role": "system", + "content": "You are a helpful assistant.", + } + out := wrapSystemForDashScopeCache(msg) + blocks, ok := out["content"].([]map[string]any) + if !ok { + t.Fatalf("expected []map content, got %T", out["content"]) + } + if len(blocks) != 1 { + t.Fatalf("expected 1 block, got %d", len(blocks)) + } + if blocks[0]["cache_control"] == nil { + t.Error("block[0] missing cache_control") + } +} + +func TestWrapSystemForDashScopeCache_WithBoundary(t *testing.T) { + msg := map[string]any{ + "role": "system", + "content": "Stable prefix\n" + CacheBoundaryMarker + "\nDynamic suffix", + } + out := wrapSystemForDashScopeCache(msg) + blocks := out["content"].([]map[string]any) + if len(blocks) != 2 { + t.Fatalf("expected 2 blocks, got %d", len(blocks)) + } + if blocks[0]["cache_control"] == nil { + t.Error("stable block missing cache_control") + } + if blocks[1]["cache_control"] != nil { + t.Error("dynamic block should not have cache_control") + } +} + +func TestWrapSystemForDashScopeCache_NonSystemUntouched(t *testing.T) { + msg := map[string]any{ + "role": "user", + "content": "Hello", + } + out := wrapSystemForDashScopeCache(msg) + if !reflect.DeepEqual(out, msg) { + t.Error("user message should pass through unchanged") + } +} + +func TestWrapSystemForDashScopeCache_NonStringContentUntouched(t *testing.T) { + blocks := []map[string]any{{"type": "text", "text": "x", "cache_control": map[string]any{"type": "ephemeral"}}} + msg := map[string]any{"role": "system", "content": blocks} + out := wrapSystemForDashScopeCache(msg) + got, _ := json.Marshal(out["content"]) + want, _ := json.Marshal(blocks) + if string(got) != string(want) { + t.Errorf("idempotent fail: got %s want %s", got, want) + } +} + +func TestBuildRequestBody_DashScopeEndpoint_WrapsSystem(t *testing.T) { + p := NewOpenAIProvider("test", "key", "https://coding-intl.dashscope.aliyuncs.com/v1", "qwen3.6-plus") + req := ChatRequest{ + Messages: []Message{ + {Role: "system", Content: "You are an assistant."}, + {Role: "user", Content: "Hi"}, + }, + } + body := p.buildRequestBody("qwen3.6-plus", req, false) + msgs := body["messages"].([]map[string]any) + sysContent := msgs[0]["content"] + if _, ok := sysContent.([]map[string]any); !ok { + t.Fatalf("expected DashScope system content as []block, got %T", sysContent) + } +} + +func TestBuildRequestBody_OpenAINative_DoesNotWrap(t *testing.T) { + p := NewOpenAIProvider("test", "key", "https://api.openai.com/v1", "gpt-4o") + req := ChatRequest{Messages: []Message{{Role: "system", Content: "..."}, {Role: "user", Content: "Hi"}}} + body := p.buildRequestBody("gpt-4o", req, false) + msgs := body["messages"].([]map[string]any) + if _, ok := msgs[0]["content"].(string); !ok { + t.Errorf("OpenAI native should keep string content, got %T", msgs[0]["content"]) + } +} + +func TestApplyDashScopeToolPrefixCache_AddMarkerOnLast(t *testing.T) { + tools := []map[string]any{ + {"type": "function", "function": map[string]any{"name": "tool_a"}}, + {"type": "function", "function": map[string]any{"name": "tool_b"}}, + {"type": "function", "function": map[string]any{"name": "tool_c"}}, + } + out := applyDashScopeToolPrefixCache(tools, 1) + if len(out) != 3 { + t.Fatalf("len changed: got %d", len(out)) + } + if out[0]["cache_control"] != nil || out[1]["cache_control"] != nil { + t.Error("non-last tools should not have cache_control") + } + if out[2]["cache_control"] == nil { + t.Error("last tool missing cache_control") + } +} + +func TestApplyDashScopeToolPrefixCache_EmptyArray(t *testing.T) { + out := applyDashScopeToolPrefixCache([]map[string]any{}, 0) + if len(out) != 0 { + t.Errorf("expected empty, got %d", len(out)) + } +} + +func TestApplyDashScopeToolPrefixCache_RespectsMarkerLimit(t *testing.T) { + tools := []map[string]any{{"type": "function", "function": map[string]any{"name": "x"}}} + out := applyDashScopeToolPrefixCache(tools, 4) + if out[0]["cache_control"] != nil { + t.Error("should skip tool marker when limit reached") + } +} + +func TestCountCacheControlMarkers(t *testing.T) { + msg := map[string]any{ + "role": "system", + "content": []map[string]any{ + {"type": "text", "text": "x", "cache_control": map[string]any{"type": "ephemeral"}}, + {"type": "text", "text": "y"}, + }, + } + if got := countCacheControlMarkers(msg); got != 1 { + t.Errorf("got %d, want 1", got) + } +} + +func TestBuildRequestBody_DashScopeWithTools_AppliesToolCache(t *testing.T) { + p := NewOpenAIProvider("test", "key", "https://coding-intl.dashscope.aliyuncs.com/v1", "qwen3.6-plus") + req := ChatRequest{ + Messages: []Message{{Role: "system", Content: "..."}, {Role: "user", Content: "Hi"}}, + Tools: []ToolDefinition{ + {Type: "function", Function: &ToolFunctionSchema{Name: "search", Description: "search docs", Parameters: map[string]any{}}}, + {Type: "function", Function: &ToolFunctionSchema{Name: "fetch", Description: "fetch url", Parameters: map[string]any{}}}, + }, + } + body := p.buildRequestBody("qwen3.6-plus", req, false) + tools := body["tools"].([]map[string]any) + if tools[0]["cache_control"] != nil { + t.Error("first tool should not have cache_control") + } + if tools[len(tools)-1]["cache_control"] == nil { + t.Error("last tool should have cache_control") + } +} + +func TestBuildRequestBody_OpenAINativeWithTools_NoToolCache(t *testing.T) { + p := NewOpenAIProvider("test", "key", "https://api.openai.com/v1", "gpt-4o") + req := ChatRequest{ + Messages: []Message{{Role: "system", Content: "..."}, {Role: "user", Content: "Hi"}}, + Tools: []ToolDefinition{ + {Type: "function", Function: &ToolFunctionSchema{Name: "x", Description: "y", Parameters: map[string]any{}}}, + }, + } + body := p.buildRequestBody("gpt-4o", req, false) + tools, ok := body["tools"].([]map[string]any) + if ok && len(tools) > 0 && tools[0]["cache_control"] != nil { + t.Error("OpenAI native should not have tool cache_control") + } +} + +func TestBuildRequestBody_DashScopeWithEnvDisable_DoesNotWrap(t *testing.T) { + t.Setenv("GOCLAW_DISABLE_DASHSCOPE_CACHE", "true") + p := NewOpenAIProvider("test", "key", "https://coding-intl.dashscope.aliyuncs.com/v1", "qwen3.6-plus") + req := ChatRequest{Messages: []Message{{Role: "system", Content: "..."}, {Role: "user", Content: "Hi"}}} + body := p.buildRequestBody("qwen3.6-plus", req, false) + msgs := body["messages"].([]map[string]any) + if _, ok := msgs[0]["content"].(string); !ok { + t.Errorf("env disable should keep string content, got %T", msgs[0]["content"]) + } +} diff --git a/internal/providers/dashscope_usage_test.go b/internal/providers/dashscope_usage_test.go new file mode 100644 index 0000000000..0d9d43e615 --- /dev/null +++ b/internal/providers/dashscope_usage_test.go @@ -0,0 +1,57 @@ +package providers + +import ( + "encoding/json" + "testing" +) + +func TestOpenAIUsage_DashScopeCacheHit_Unmarshal(t *testing.T) { + raw := `{ + "prompt_tokens": 2318, + "completion_tokens": 195, + "total_tokens": 2513, + "prompt_tokens_details": { + "text_tokens": 2318, + "cache_creation_input_tokens": 0, + "cached_tokens": 2304 + } + }` + var u openAIUsage + if err := json.Unmarshal([]byte(raw), &u); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if u.PromptTokensDetails.CachedTokens != 2304 { + t.Errorf("cached_tokens: got %d want 2304", u.PromptTokensDetails.CachedTokens) + } + if u.PromptTokensDetails.CacheCreationInputTokens != 0 { + t.Errorf("cache_creation: got %d want 0", u.PromptTokensDetails.CacheCreationInputTokens) + } +} + +func TestOpenAIUsage_DashScopeCacheCreate_Unmarshal(t *testing.T) { + raw := `{ + "prompt_tokens": 2318, + "prompt_tokens_details": { + "cache_creation_input_tokens": 2304, + "cached_tokens": 0 + } + }` + var u openAIUsage + if err := json.Unmarshal([]byte(raw), &u); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if u.PromptTokensDetails.CacheCreationInputTokens != 2304 { + t.Errorf("got %d want 2304", u.PromptTokensDetails.CacheCreationInputTokens) + } +} + +func TestOpenAIUsage_NoDetails_OK(t *testing.T) { + raw := `{"prompt_tokens": 100, "completion_tokens": 20, "total_tokens": 120}` + var u openAIUsage + if err := json.Unmarshal([]byte(raw), &u); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if u.PromptTokensDetails != nil { + t.Errorf("expected nil PromptTokensDetails, got %+v", u.PromptTokensDetails) + } +} diff --git a/internal/providers/openai_chat.go b/internal/providers/openai_chat.go index b3991990d2..547ed0ef6f 100644 --- a/internal/providers/openai_chat.go +++ b/internal/providers/openai_chat.go @@ -111,6 +111,7 @@ func (p *OpenAIProvider) ChatStream(ctx context.Context, req ChatRequest, onChun } if chunk.Usage.PromptTokensDetails != nil { result.Usage.CacheReadTokens = chunk.Usage.PromptTokensDetails.CachedTokens + result.Usage.CacheCreationTokens = chunk.Usage.PromptTokensDetails.CacheCreationInputTokens } if chunk.Usage.CompletionTokensDetails != nil && chunk.Usage.CompletionTokensDetails.ReasoningTokens > 0 { result.Usage.ThinkingTokens = chunk.Usage.CompletionTokensDetails.ReasoningTokens diff --git a/internal/providers/openai_endpoints.go b/internal/providers/openai_endpoints.go index 34366301e3..74f8eb36ef 100644 --- a/internal/providers/openai_endpoints.go +++ b/internal/providers/openai_endpoints.go @@ -54,3 +54,26 @@ func (p *OpenAIProvider) dashScopePassthroughKeys() bool { } return false } + +// isDashScope returns true when this provider routes requests to DashScope/Bailian +// (supports cache_control:ephemeral wire format - verified live 2026-05-08). +// Uses 3-source detection (URL + providerType + name) to handle reverse-proxied +// DashScope endpoints. Includes "bailian" because live qwen-richard provider has +// provider_type=bailian. +// +// Used by buildRequestBody to wrap system content with Anthropic-style +// cache_control blocks for prompt caching (90% discount on cached prefix tokens). +func (p *OpenAIProvider) isDashScope() bool { + if isDashScopeAPIBase(p.apiBase) { + return true + } + pt := strings.ToLower(strings.TrimSpace(p.providerType)) + if strings.Contains(pt, "dashscope") || strings.Contains(pt, "bailian") { + return true + } + name := strings.ToLower(p.name) + if strings.Contains(name, "dashscope") || strings.Contains(name, "bailian") { + return true + } + return false +} diff --git a/internal/providers/openai_endpoints_dashscope_test.go b/internal/providers/openai_endpoints_dashscope_test.go new file mode 100644 index 0000000000..bfdb78602a --- /dev/null +++ b/internal/providers/openai_endpoints_dashscope_test.go @@ -0,0 +1,32 @@ +package providers + +import "testing" + +func TestOpenAIProvider_isDashScope(t *testing.T) { + cases := []struct { + name string + apiBase string + ptype string + pname string + want bool + }{ + {"coding-intl URL", "https://coding-intl.dashscope.aliyuncs.com/v1", "openai_compat", "qwen-richard", true}, + {"providerType=bailian", "https://custom-proxy.example.com/v1", "bailian", "internal-qwen", true}, + {"providerType=dashscope", "https://proxy.example.com/v1", "dashscope", "x", true}, + {"name contains dashscope", "https://proxy.com/v1", "openai_compat", "my-dashscope-mirror", true}, + {"name contains bailian", "https://proxy.com/v1", "openai_compat", "company-bailian-relay", true}, + {"openai native", "https://api.openai.com/v1", "openai", "gpt", false}, + {"anthropic", "https://api.anthropic.com", "anthropic", "claude", false}, + {"openrouter", "https://openrouter.ai/api/v1", "openai_compat", "openrouter", false}, + {"empty", "", "", "", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + p := &OpenAIProvider{apiBase: tc.apiBase, providerType: tc.ptype, name: tc.pname} + if got := p.isDashScope(); got != tc.want { + t.Errorf("isDashScope() = %v, want %v (apiBase=%q ptype=%q name=%q)", + got, tc.want, tc.apiBase, tc.ptype, tc.pname) + } + }) + } +} diff --git a/internal/providers/openai_http.go b/internal/providers/openai_http.go index 80a069a91c..bf0b1ac970 100644 --- a/internal/providers/openai_http.go +++ b/internal/providers/openai_http.go @@ -126,6 +126,7 @@ func (p *OpenAIProvider) parseResponse(resp *openAIResponse) *ChatResponse { } if resp.Usage.PromptTokensDetails != nil { result.Usage.CacheReadTokens = resp.Usage.PromptTokensDetails.CachedTokens + result.Usage.CacheCreationTokens = resp.Usage.PromptTokensDetails.CacheCreationInputTokens } if resp.Usage.CompletionTokensDetails != nil && resp.Usage.CompletionTokensDetails.ReasoningTokens > 0 { result.Usage.ThinkingTokens = resp.Usage.CompletionTokensDetails.ReasoningTokens diff --git a/internal/providers/openai_request.go b/internal/providers/openai_request.go index 84cc264a3a..d699c4a341 100644 --- a/internal/providers/openai_request.go +++ b/internal/providers/openai_request.go @@ -129,6 +129,15 @@ func (p *OpenAIProvider) buildRequestBody(model string, req ChatRequest, stream msgs = append(msgs, msg) } + // Apply DashScope cache_control wrapping (verified live 2026-05-08). + // Uses 3-source detection from p.isDashScope() (URL + providerType + name) + // to handle reverse-proxied endpoints. No-op for non-DashScope endpoints + // or when env disabled. For native OpenAI, role mapping above renames + // "system"→"developer" so wrap is a no-op (role guard). + if p.isDashScope() && !dashScopeCacheDisabled() && len(msgs) > 0 { + msgs[0] = wrapSystemForDashScopeCache(msgs[0]) + } + // Safety net: strip trailing assistant message to prevent HTTP 400 from // proxy providers (LiteLLM, OpenRouter) that don't support assistant prefill. // This should rarely trigger — the agent loop ensures user message is last. @@ -151,6 +160,19 @@ func (p *OpenAIProvider) buildRequestBody(model string, req ChatRequest, stream body["tool_choice"] = "auto" } + // DashScope tool prefix cache: cache_control on last tool definition + // caches the entire tools array (descriptions + schemas, ~5-10K tokens). + // Combined with system block cache: 2/4 markers used, 99.5% hit rate verified. + if p.isDashScope() && !dashScopeCacheDisabled() { + if t, ok := body["tools"].([]map[string]any); ok && len(t) > 0 { + markersFromSystem := 0 + if len(msgs) > 0 { + markersFromSystem = countCacheControlMarkers(msgs[0]) + } + body["tools"] = applyDashScopeToolPrefixCache(t, markersFromSystem) + } + } + // Together returns HTTP 400 on some requests when stream_options is present. if stream && !p.isTogetherEndpoint() { body["stream_options"] = map[string]any{ diff --git a/internal/providers/openai_test_helpers.go b/internal/providers/openai_test_helpers.go new file mode 100644 index 0000000000..ecdf91a9dd --- /dev/null +++ b/internal/providers/openai_test_helpers.go @@ -0,0 +1,8 @@ +package providers + +// BuildRequestBodyForTest is a test-only export of buildRequestBody for +// integration smoke tests in the tests/integration package. NOT part of the +// public API - do not call from production code paths. +func (p *OpenAIProvider) BuildRequestBodyForTest(model string, req ChatRequest, stream bool) map[string]any { + return p.buildRequestBody(model, req, stream) +} diff --git a/internal/providers/openai_types.go b/internal/providers/openai_types.go index 57361e1cd5..0387b41aad 100644 --- a/internal/providers/openai_types.go +++ b/internal/providers/openai_types.go @@ -54,7 +54,8 @@ type openAIUsage struct { } type openAIPromptDetails struct { - CachedTokens int `json:"cached_tokens"` + CachedTokens int `json:"cached_tokens"` + CacheCreationInputTokens int `json:"cache_creation_input_tokens,omitempty"` } type openAICompletionDetails struct { diff --git a/tests/integration/qwen_cache_smoke_test.go b/tests/integration/qwen_cache_smoke_test.go new file mode 100644 index 0000000000..7f787e1c90 --- /dev/null +++ b/tests/integration/qwen_cache_smoke_test.go @@ -0,0 +1,149 @@ +//go:build integration + +package integration + +import ( + "context" + "fmt" + "os" + "strings" + "testing" + "time" + + "github.com/nextlevelbuilder/goclaw/internal/providers" +) + +// TestQwenCacheSmoke verifies cache_control:ephemeral is honored by DashScope +// for multiple Qwen model variants. Uses a fresh per-run salt so call 1 always +// creates a new cache entry; call 2 hits it. +// +// Run: +// +// DASHSCOPE_API_KEY= \ +// go test -tags=integration ./tests/integration/ -run TestQwenCacheSmoke -v -timeout 5m +// +// Optional: +// +// DASHSCOPE_API_BASE=... (default coding-intl) +// DASHSCOPE_MODELS=a,b,c (comma-list; defaults to a curated set) +func TestQwenCacheSmoke(t *testing.T) { + apiKey := os.Getenv("DASHSCOPE_API_KEY") + if apiKey == "" { + t.Skip("set DASHSCOPE_API_KEY") + } + apiBase := os.Getenv("DASHSCOPE_API_BASE") + if apiBase == "" { + apiBase = "https://coding-intl.dashscope.aliyuncs.com/v1" + } + + models := []string{"qwen3-coder-plus", "qwen3-max", "qwen-plus", "qwen-turbo"} + if v := os.Getenv("DASHSCOPE_MODELS"); v != "" { + models = strings.Split(v, ",") + } + + // Per-run salt so the cache prefix is unique to this test run; call 1 will + // always be a cache miss (cache_creation > 0), call 2 a hit. + salt := time.Now().Format("20060102T150405.000000000") + + type result struct { + model string + ok bool + err string + create int + readHit int + prompt2 int + hitRate float64 + } + var results []result + + for _, model := range models { + t.Run(model, func(t *testing.T) { + r := result{model: model} + defer func() { results = append(results, r) }() + + p := providers.NewOpenAIProvider("qwen-smoke", apiKey, apiBase, model). + WithProviderType("bailian") + + // ~6K-token stable prefix, salted per run + per model so cache is fresh. + stableSys := fmt.Sprintf("You are an expert assistant for run=%s model=%s. ", salt, model) + + strings.Repeat("Provide thorough technically accurate answers about software engineering. Discuss architecture trade-offs performance security observability and maintenance. Cite design patterns and explain why they apply. ", 200) + + providers.CacheBoundaryMarker + + "\nDynamic suffix: " + time.Now().Format(time.RFC3339Nano) + + req := providers.ChatRequest{ + Model: model, + Messages: []providers.Message{ + {Role: "system", Content: stableSys}, + {Role: "user", Content: "Reply with one word: ok"}, + }, + Options: map[string]any{ + providers.OptMaxTokens: 8, + }, + } + + ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second) + defer cancel() + + // Verify wire format wraps system content. + body := p.BuildRequestBodyForTest(model, req, false) + if msgs, ok := body["messages"].([]map[string]any); ok && len(msgs) > 0 { + if _, isBlocks := msgs[0]["content"].([]map[string]any); !isBlocks { + t.Fatalf("system content not wrapped as blocks; isDashScope() likely false") + } + } + + t.Logf("call 1 (cache create) model=%s...", model) + resp1, err := p.Chat(ctx, req) + if err != nil { + r.err = "call1: " + err.Error() + t.Fatalf("call 1: %v", err) + } + r.create = resp1.Usage.CacheCreationTokens + t.Logf(" call 1 prompt=%d cached=%d create=%d", + resp1.Usage.PromptTokens, resp1.Usage.CacheReadTokens, resp1.Usage.CacheCreationTokens) + + time.Sleep(3 * time.Second) + + t.Logf("call 2 (expected hit)...") + resp2, err := p.Chat(ctx, req) + if err != nil { + r.err = "call2: " + err.Error() + t.Fatalf("call 2: %v", err) + } + r.readHit = resp2.Usage.CacheReadTokens + r.prompt2 = resp2.Usage.PromptTokens + t.Logf(" call 2 prompt=%d cached=%d create=%d", + resp2.Usage.PromptTokens, resp2.Usage.CacheReadTokens, resp2.Usage.CacheCreationTokens) + + if r.prompt2 > 0 { + r.hitRate = float64(r.readHit) / float64(r.prompt2) + } + + // Per-model assertions: at least one of (create > 0 on call 1) OR + // (cached > 0 on call 2 with hit rate >= 80%) must hold. Both + // indicate cache_control plumbing works for this model. + if r.create == 0 && r.readHit == 0 { + t.Errorf("model=%s: cache appears UNSUPPORTED (no create, no hit)", model) + return + } + if r.readHit > 0 && r.hitRate < 0.80 { + t.Errorf("model=%s: hit rate %.1f%% < 80%%", model, r.hitRate*100) + return + } + r.ok = true + }) + } + + // Final summary table for easy copy/paste into validation report. + t.Log("\n=== Qwen cache support summary ===") + t.Logf("%-22s %-6s %-12s %-12s %-12s %-10s", "MODEL", "OK", "CREATE_TOK", "HIT_TOK", "PROMPT2", "HIT_RATE") + for _, r := range results { + ok := "PASS" + if !r.ok { + ok = "FAIL" + } + t.Logf("%-22s %-6s %-12d %-12d %-12d %-9.1f%%", + r.model, ok, r.create, r.readHit, r.prompt2, r.hitRate*100) + } +} +