Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions internal/providers/anthropic_cache_split_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
// the cache boundary marker is split into 2 blocks: stable (cached) + dynamic.
func TestAnthropicSystemBlocksSplit(t *testing.T) {
prompt := "stable content\n" + CacheBoundaryMarker + "\ndynamic content"
blocks := splitSystemPromptForCache(prompt)
blocks := SplitSystemPromptForCache(prompt)
if len(blocks) != 2 {
t.Fatalf("expected 2 blocks, got %d", len(blocks))
}
Expand All @@ -30,7 +30,7 @@ func TestAnthropicSystemBlocksSplit(t *testing.T) {
// marker → single block with cache_control.
func TestAnthropicSingleBlockFallback(t *testing.T) {
prompt := "no boundary here"
blocks := splitSystemPromptForCache(prompt)
blocks := SplitSystemPromptForCache(prompt)
if len(blocks) != 1 {
t.Fatalf("expected 1 block, got %d", len(blocks))
}
Expand All @@ -46,7 +46,7 @@ func TestAnthropicSingleBlockFallback(t *testing.T) {
// after the boundary produces only 1 block (no empty block appended).
func TestAnthropicEmptyDynamic(t *testing.T) {
prompt := "stable only\n" + CacheBoundaryMarker + "\n"
blocks := splitSystemPromptForCache(prompt)
blocks := SplitSystemPromptForCache(prompt)
if len(blocks) != 1 {
t.Fatalf("expected 1 block for empty dynamic, got %d", len(blocks))
}
Expand All @@ -59,7 +59,7 @@ func TestAnthropicEmptyDynamic(t *testing.T) {
// (empty stable section) still produces valid blocks without empty text.
func TestAnthropicEmptyStable(t *testing.T) {
prompt := CacheBoundaryMarker + "\ndynamic only"
blocks := splitSystemPromptForCache(prompt)
blocks := SplitSystemPromptForCache(prompt)
// Stable is empty string after TrimSpace — should still produce a block
// (Anthropic API handles empty text blocks gracefully).
if len(blocks) < 1 {
Expand Down
7 changes: 4 additions & 3 deletions internal/providers/anthropic_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ import (
// to avoid circular import; agent_test verifies they match).
const CacheBoundaryMarker = "<!-- GOCLAW_CACHE_BOUNDARY -->"

// splitSystemPromptForCache splits a system prompt at the cache boundary marker.
// SplitSystemPromptForCache splits a system prompt at CacheBoundaryMarker.
// Returns 2 blocks if boundary found: stable (with cache_control) + dynamic (without).
// Returns 1 block with cache_control if no boundary (backwards compat).
func splitSystemPromptForCache(content string) []map[string]any {
// Used by both Anthropic and DashScope cache middleware (identical wire format).
func SplitSystemPromptForCache(content string) []map[string]any {
ephemeral := map[string]any{"type": "ephemeral"}
before, after, ok := strings.Cut(content, CacheBoundaryMarker)
if !ok {
Expand Down Expand Up @@ -95,7 +96,7 @@ func (p *AnthropicProvider) buildRequestBody(model string, req ChatRequest, stre
for _, msg := range req.Messages {
switch msg.Role {
case "system":
systemBlocks = append(systemBlocks, splitSystemPromptForCache(msg.Content)...)
systemBlocks = append(systemBlocks, SplitSystemPromptForCache(msg.Content)...)

case "user":
if len(msg.Images) > 0 {
Expand Down
62 changes: 62 additions & 0 deletions internal/providers/dashscope_cache_middleware.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package providers

import "os"

// wrapSystemForDashScopeCache transforms a system message string content into
// Anthropic-style content blocks with cache_control:ephemeral markers.
//
// DashScope verified 2026-05-08 to accept and process this wire format on
// coding-intl.dashscope.aliyuncs.com. Result: 90% discount on cached prefix
// tokens, 5min sliding TTL.
//
// Non-system messages and non-string content pass through unchanged
// (idempotent, supports already-blocked input).
func wrapSystemForDashScopeCache(msg map[string]any) map[string]any {
if msg["role"] != "system" {
return msg
}
content, ok := msg["content"].(string)
if !ok {
return msg
}
msg["content"] = SplitSystemPromptForCache(content)
return msg
}

// applyDashScopeToolPrefixCache adds cache_control:ephemeral to the last tool
// definition, caching the entire tool prefix (descriptions, schemas).
//
// alreadyMarked: cache markers already consumed by system message blocks.
// DashScope limits 4 markers/request; skip tool marker if limit reached.
func applyDashScopeToolPrefixCache(tools []map[string]any, alreadyMarked int) []map[string]any {
if len(tools) == 0 || alreadyMarked >= 4 {
return tools
}
last := tools[len(tools)-1]
last["cache_control"] = map[string]any{"type": "ephemeral"}
return tools
}

// countCacheControlMarkers counts cache_control fields in a message's content
// blocks. Used to track marker budget across system + tools.
func countCacheControlMarkers(msg map[string]any) int {
blocks, ok := msg["content"].([]map[string]any)
if !ok {
return 0
}
count := 0
for _, b := range blocks {
if b["cache_control"] != nil {
count++
}
}
return count
}

// dashScopeCacheDisabled returns true when env var GOCLAW_DISABLE_DASHSCOPE_CACHE
// is set to a truthy value. Provides runtime escape hatch without requiring
// code redeploy or config change.
func dashScopeCacheDisabled() bool {
v := os.Getenv("GOCLAW_DISABLE_DASHSCOPE_CACHE")
return v == "true" || v == "1" || v == "yes"
}
182 changes: 182 additions & 0 deletions internal/providers/dashscope_cache_middleware_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
package providers

import (
"encoding/json"
"reflect"
"testing"
)

func TestWrapSystemForDashScopeCache_NoBoundary(t *testing.T) {
msg := map[string]any{
"role": "system",
"content": "You are a helpful assistant.",
}
out := wrapSystemForDashScopeCache(msg)
blocks, ok := out["content"].([]map[string]any)
if !ok {
t.Fatalf("expected []map content, got %T", out["content"])
}
if len(blocks) != 1 {
t.Fatalf("expected 1 block, got %d", len(blocks))
}
if blocks[0]["cache_control"] == nil {
t.Error("block[0] missing cache_control")
}
}

func TestWrapSystemForDashScopeCache_WithBoundary(t *testing.T) {
msg := map[string]any{
"role": "system",
"content": "Stable prefix\n" + CacheBoundaryMarker + "\nDynamic suffix",
}
out := wrapSystemForDashScopeCache(msg)
blocks := out["content"].([]map[string]any)
if len(blocks) != 2 {
t.Fatalf("expected 2 blocks, got %d", len(blocks))
}
if blocks[0]["cache_control"] == nil {
t.Error("stable block missing cache_control")
}
if blocks[1]["cache_control"] != nil {
t.Error("dynamic block should not have cache_control")
}
}

func TestWrapSystemForDashScopeCache_NonSystemUntouched(t *testing.T) {
msg := map[string]any{
"role": "user",
"content": "Hello",
}
out := wrapSystemForDashScopeCache(msg)
if !reflect.DeepEqual(out, msg) {
t.Error("user message should pass through unchanged")
}
}

func TestWrapSystemForDashScopeCache_NonStringContentUntouched(t *testing.T) {
blocks := []map[string]any{{"type": "text", "text": "x", "cache_control": map[string]any{"type": "ephemeral"}}}
msg := map[string]any{"role": "system", "content": blocks}
out := wrapSystemForDashScopeCache(msg)
got, _ := json.Marshal(out["content"])
want, _ := json.Marshal(blocks)
if string(got) != string(want) {
t.Errorf("idempotent fail: got %s want %s", got, want)
}
}

func TestBuildRequestBody_DashScopeEndpoint_WrapsSystem(t *testing.T) {
p := NewOpenAIProvider("test", "key", "https://coding-intl.dashscope.aliyuncs.com/v1", "qwen3.6-plus")
req := ChatRequest{
Messages: []Message{
{Role: "system", Content: "You are an assistant."},
{Role: "user", Content: "Hi"},
},
}
body := p.buildRequestBody("qwen3.6-plus", req, false)
msgs := body["messages"].([]map[string]any)
sysContent := msgs[0]["content"]
if _, ok := sysContent.([]map[string]any); !ok {
t.Fatalf("expected DashScope system content as []block, got %T", sysContent)
}
}

func TestBuildRequestBody_OpenAINative_DoesNotWrap(t *testing.T) {
p := NewOpenAIProvider("test", "key", "https://api.openai.com/v1", "gpt-4o")
req := ChatRequest{Messages: []Message{{Role: "system", Content: "..."}, {Role: "user", Content: "Hi"}}}
body := p.buildRequestBody("gpt-4o", req, false)
msgs := body["messages"].([]map[string]any)
if _, ok := msgs[0]["content"].(string); !ok {
t.Errorf("OpenAI native should keep string content, got %T", msgs[0]["content"])
}
}

func TestApplyDashScopeToolPrefixCache_AddMarkerOnLast(t *testing.T) {
tools := []map[string]any{
{"type": "function", "function": map[string]any{"name": "tool_a"}},
{"type": "function", "function": map[string]any{"name": "tool_b"}},
{"type": "function", "function": map[string]any{"name": "tool_c"}},
}
out := applyDashScopeToolPrefixCache(tools, 1)
if len(out) != 3 {
t.Fatalf("len changed: got %d", len(out))
}
if out[0]["cache_control"] != nil || out[1]["cache_control"] != nil {
t.Error("non-last tools should not have cache_control")
}
if out[2]["cache_control"] == nil {
t.Error("last tool missing cache_control")
}
}

func TestApplyDashScopeToolPrefixCache_EmptyArray(t *testing.T) {
out := applyDashScopeToolPrefixCache([]map[string]any{}, 0)
if len(out) != 0 {
t.Errorf("expected empty, got %d", len(out))
}
}

func TestApplyDashScopeToolPrefixCache_RespectsMarkerLimit(t *testing.T) {
tools := []map[string]any{{"type": "function", "function": map[string]any{"name": "x"}}}
out := applyDashScopeToolPrefixCache(tools, 4)
if out[0]["cache_control"] != nil {
t.Error("should skip tool marker when limit reached")
}
}

func TestCountCacheControlMarkers(t *testing.T) {
msg := map[string]any{
"role": "system",
"content": []map[string]any{
{"type": "text", "text": "x", "cache_control": map[string]any{"type": "ephemeral"}},
{"type": "text", "text": "y"},
},
}
if got := countCacheControlMarkers(msg); got != 1 {
t.Errorf("got %d, want 1", got)
}
}

func TestBuildRequestBody_DashScopeWithTools_AppliesToolCache(t *testing.T) {
p := NewOpenAIProvider("test", "key", "https://coding-intl.dashscope.aliyuncs.com/v1", "qwen3.6-plus")
req := ChatRequest{
Messages: []Message{{Role: "system", Content: "..."}, {Role: "user", Content: "Hi"}},
Tools: []ToolDefinition{
{Type: "function", Function: &ToolFunctionSchema{Name: "search", Description: "search docs", Parameters: map[string]any{}}},
{Type: "function", Function: &ToolFunctionSchema{Name: "fetch", Description: "fetch url", Parameters: map[string]any{}}},
},
}
body := p.buildRequestBody("qwen3.6-plus", req, false)
tools := body["tools"].([]map[string]any)
if tools[0]["cache_control"] != nil {
t.Error("first tool should not have cache_control")
}
if tools[len(tools)-1]["cache_control"] == nil {
t.Error("last tool should have cache_control")
}
}

func TestBuildRequestBody_OpenAINativeWithTools_NoToolCache(t *testing.T) {
p := NewOpenAIProvider("test", "key", "https://api.openai.com/v1", "gpt-4o")
req := ChatRequest{
Messages: []Message{{Role: "system", Content: "..."}, {Role: "user", Content: "Hi"}},
Tools: []ToolDefinition{
{Type: "function", Function: &ToolFunctionSchema{Name: "x", Description: "y", Parameters: map[string]any{}}},
},
}
body := p.buildRequestBody("gpt-4o", req, false)
tools, ok := body["tools"].([]map[string]any)
if ok && len(tools) > 0 && tools[0]["cache_control"] != nil {
t.Error("OpenAI native should not have tool cache_control")
}
}

func TestBuildRequestBody_DashScopeWithEnvDisable_DoesNotWrap(t *testing.T) {
t.Setenv("GOCLAW_DISABLE_DASHSCOPE_CACHE", "true")
p := NewOpenAIProvider("test", "key", "https://coding-intl.dashscope.aliyuncs.com/v1", "qwen3.6-plus")
req := ChatRequest{Messages: []Message{{Role: "system", Content: "..."}, {Role: "user", Content: "Hi"}}}
body := p.buildRequestBody("qwen3.6-plus", req, false)
msgs := body["messages"].([]map[string]any)
if _, ok := msgs[0]["content"].(string); !ok {
t.Errorf("env disable should keep string content, got %T", msgs[0]["content"])
}
}
57 changes: 57 additions & 0 deletions internal/providers/dashscope_usage_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package providers

import (
"encoding/json"
"testing"
)

func TestOpenAIUsage_DashScopeCacheHit_Unmarshal(t *testing.T) {
raw := `{
"prompt_tokens": 2318,
"completion_tokens": 195,
"total_tokens": 2513,
"prompt_tokens_details": {
"text_tokens": 2318,
"cache_creation_input_tokens": 0,
"cached_tokens": 2304
}
}`
var u openAIUsage
if err := json.Unmarshal([]byte(raw), &u); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if u.PromptTokensDetails.CachedTokens != 2304 {
t.Errorf("cached_tokens: got %d want 2304", u.PromptTokensDetails.CachedTokens)
}
if u.PromptTokensDetails.CacheCreationInputTokens != 0 {
t.Errorf("cache_creation: got %d want 0", u.PromptTokensDetails.CacheCreationInputTokens)
}
}

func TestOpenAIUsage_DashScopeCacheCreate_Unmarshal(t *testing.T) {
raw := `{
"prompt_tokens": 2318,
"prompt_tokens_details": {
"cache_creation_input_tokens": 2304,
"cached_tokens": 0
}
}`
var u openAIUsage
if err := json.Unmarshal([]byte(raw), &u); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if u.PromptTokensDetails.CacheCreationInputTokens != 2304 {
t.Errorf("got %d want 2304", u.PromptTokensDetails.CacheCreationInputTokens)
}
}

func TestOpenAIUsage_NoDetails_OK(t *testing.T) {
raw := `{"prompt_tokens": 100, "completion_tokens": 20, "total_tokens": 120}`
var u openAIUsage
if err := json.Unmarshal([]byte(raw), &u); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if u.PromptTokensDetails != nil {
t.Errorf("expected nil PromptTokensDetails, got %+v", u.PromptTokensDetails)
}
}
1 change: 1 addition & 0 deletions internal/providers/openai_chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ func (p *OpenAIProvider) ChatStream(ctx context.Context, req ChatRequest, onChun
}
if chunk.Usage.PromptTokensDetails != nil {
result.Usage.CacheReadTokens = chunk.Usage.PromptTokensDetails.CachedTokens
result.Usage.CacheCreationTokens = chunk.Usage.PromptTokensDetails.CacheCreationInputTokens
}
if chunk.Usage.CompletionTokensDetails != nil && chunk.Usage.CompletionTokensDetails.ReasoningTokens > 0 {
result.Usage.ThinkingTokens = chunk.Usage.CompletionTokensDetails.ReasoningTokens
Expand Down
23 changes: 23 additions & 0 deletions internal/providers/openai_endpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,26 @@ func (p *OpenAIProvider) dashScopePassthroughKeys() bool {
}
return false
}

// isDashScope returns true when this provider routes requests to DashScope/Bailian
// (supports cache_control:ephemeral wire format - verified live 2026-05-08).
// Uses 3-source detection (URL + providerType + name) to handle reverse-proxied
// DashScope endpoints. Includes "bailian" because live qwen-richard provider has
// provider_type=bailian.
//
// Used by buildRequestBody to wrap system content with Anthropic-style
// cache_control blocks for prompt caching (90% discount on cached prefix tokens).
func (p *OpenAIProvider) isDashScope() bool {
if isDashScopeAPIBase(p.apiBase) {
return true
}
pt := strings.ToLower(strings.TrimSpace(p.providerType))
if strings.Contains(pt, "dashscope") || strings.Contains(pt, "bailian") {
return true
}
name := strings.ToLower(p.name)
if strings.Contains(name, "dashscope") || strings.Contains(name, "bailian") {
return true
}
return false
}
Loading
Loading