diff --git a/internal/proxy/addon.go b/internal/proxy/addon.go index 235b521..a9afefb 100644 --- a/internal/proxy/addon.go +++ b/internal/proxy/addon.go @@ -637,20 +637,21 @@ func (a *SluiceAddon) Request(f *mitmproxy.Flow) { // Pass 2+3 on body. if len(f.Request.Body) > 0 { - f.Request.Body = a.swapPhantomBytes(f.Request.Body, pairs, host, port, "body") + f.Request.Body = a.swapPhantomBytes(f.Request.Body, pairs, host, port, "body", false) } // Pass 2+3 on URL query. - if rawQ := f.Request.URL.RawQuery; bytes.Contains([]byte(rawQ), phantomPrefix) { + if rawQ := f.Request.URL.RawQuery; bytesContainsAnyPhantomPrefix([]byte(rawQ)) { f.Request.URL.RawQuery = string( - a.swapPhantomBytes([]byte(rawQ), pairs, host, port, "URL query"), + a.swapPhantomBytes([]byte(rawQ), pairs, host, port, "URL query", false), ) } - // Pass 2+3 on URL path. - if rawP := f.Request.URL.Path; bytes.Contains([]byte(rawP), phantomPrefix) { + // Pass 2+3 on URL path. pathContext=true selects path escaping so + // secrets containing spaces get %20, not '+'. + if rawP := f.Request.URL.Path; bytesContainsAnyPhantomPrefix([]byte(rawP)) { f.Request.URL.Path = string( - a.swapPhantomBytes([]byte(rawP), pairs, host, port, "URL path"), + a.swapPhantomBytes([]byte(rawP), pairs, host, port, "URL path", true), ) f.Request.URL.RawPath = "" } @@ -1187,9 +1188,13 @@ func (a *SluiceAddon) buildPhantomPairs(host string, port int, proto string) []p pairs = append(pairs, oauthPairs...) continue } + phantom := []byte(PhantomToken(name)) + encoded := encodePhantomForPair(phantom) pairs = append(pairs, phantomPair{ - phantom: []byte(PhantomToken(name)), - secret: secret, + phantom: phantom, + encodedPhantom: encoded, + encodedPhantomLower: encodePhantomLowerForPair(encoded), + secret: secret, }) } @@ -1211,34 +1216,96 @@ func releasePhantomPairs(pairs []phantomPair) { // hasPhantomPrefix checks whether the request body, headers, or URL // contain the phantom prefix bytes. func (a *SluiceAddon) hasPhantomPrefix(f *mitmproxy.Flow) bool { - if bytes.Contains(f.Request.Body, phantomPrefix) { + if bytesContainsAnyPhantomPrefix(f.Request.Body) { return true } for _, vals := range f.Request.Header { for _, v := range vals { - if bytes.Contains([]byte(v), phantomPrefix) { + if bytesContainsAnyPhantomPrefix([]byte(v)) { return true } } } - if bytes.Contains([]byte(f.Request.URL.RawQuery), phantomPrefix) { + if bytesContainsAnyPhantomPrefix([]byte(f.Request.URL.RawQuery)) { return true } - if bytes.Contains([]byte(f.Request.URL.Path), phantomPrefix) { + if bytesContainsAnyPhantomPrefix([]byte(f.Request.URL.Path)) { return true } return false } +// bytesContainsAnyPhantomPrefix reports whether the data contains the +// literal phantom prefix or either case of the URL-encoded prefix (%3A or +// %3a). Form-urlencoded request bodies and URL query/path components +// percent-encode the colon in phantom tokens, and RFC 3986 §2.1 makes the +// hex digits case-insensitive, so a scan that only checks one case would +// miss phantoms emitted by clients that lowercase their percent escapes. +func bytesContainsAnyPhantomPrefix(data []byte) bool { + return bytes.Contains(data, phantomPrefix) || + bytes.Contains(data, urlEncodedPhantomPrefix) || + bytes.Contains(data, urlEncodedPhantomPrefixLower) +} + // swapPhantomBytes performs Pass 2 (scoped replacement) and Pass 3 (strip // unbound) on a byte slice. -func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string) []byte { +// +// Each pair is matched in both its literal form (`SLUICE_PHANTOM:`, +// the shape used in JSON bodies and raw header values) and its URL-encoded +// form (`SLUICE_PHANTOM%3A`, the shape used in +// application/x-www-form-urlencoded request bodies and URL query strings). +// The encoded path is what makes OAuth refresh round-trips work: refresh +// POSTs to providers like Anthropic and Google use form-urlencoded bodies, +// so the colon in the phantom token gets percent-encoded on the wire. +// Without the encoded scan the upstream receives `SLUICE_PHANTOM%3A...` +// literally, returns `invalid_grant`, and the agent falls back to a fresh +// interactive OAuth — every time tokens expire. +// +// The encoded phantom is precomputed once per pair (in encodePhantomForPair) +// and stored on phantomPair.encodedPhantom so we don't re-allocate it on +// every body, query, or header scan. The encoded secret is computed on +// demand once per swap call, only when the encoded phantom actually appears. +// +// pathContext chooses between query escaping (false; body, URL query, +// header) and path escaping (true; URL path). The two differ in how +// spaces are encoded: QueryEscape uses '+', PathEscape uses '%20'. Using +// query escaping for a path substitution would turn a space in the +// secret into a literal '+' in the URL path, which the server reads as +// a plus character, not a space — corrupting the request. The boolean is +// passed in explicitly so the type system enforces the choice; callers +// cannot accidentally pick path escaping by typo-ing the location label. +// location is still passed for the audit log message but never drives +// behavior. +func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string, pathContext bool) []byte { for _, p := range pairs { if bytes.Contains(data, p.phantom) { data = bytes.ReplaceAll(data, p.phantom, p.secret.Bytes()) } + // Encoded swap covers both uppercase (%3A, the canonical form Go + // emits) and lowercase (%3a, valid per RFC 3986 §2.1). The + // replacement secret is escaped once on first hit and reused so + // the cost stays linear in number-of-encoded-forms, not pairs. + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret != nil { + return + } + if pathContext { + encodedSecret = pathEscapeBytes(p.secret.Bytes()) + } else { + encodedSecret = queryEscapeBytes(p.secret.Bytes()) + } + } + if len(p.encodedPhantom) > 0 && bytes.Contains(data, p.encodedPhantom) { + ensureEncodedSecret() + data = bytes.ReplaceAll(data, p.encodedPhantom, encodedSecret) + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(data, p.encodedPhantomLower) { + ensureEncodedSecret() + data = bytes.ReplaceAll(data, p.encodedPhantomLower, encodedSecret) + } } - if bytes.Contains(data, phantomPrefix) { + if bytesContainsAnyPhantomPrefix(data) { data = stripUnboundPhantomsFromProvider(data, a.provider) log.Printf("[ADDON-INJECT] stripped unbound phantom token from %s for %s:%d", location, host, port) } @@ -1246,6 +1313,10 @@ func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host st } // swapPhantomHeaders performs Pass 2+3 on all request headers. +// +// Each pair is matched in both its literal and URL-encoded forms so phantom +// tokens carried in percent-encoded header values (custom cookie schemes, +// query-style header payloads) cannot bypass the swap. func (a *SluiceAddon) swapPhantomHeaders(f *mitmproxy.Flow, pairs []phantomPair, host string, port int) { for key, vals := range f.Request.Header { for i, v := range vals { @@ -1256,8 +1327,24 @@ func (a *SluiceAddon) swapPhantomHeaders(f *mitmproxy.Flow, pairs []phantomPair, vb = bytes.ReplaceAll(vb, p.phantom, p.secret.Bytes()) changed = true } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = queryEscapeBytes(p.secret.Bytes()) + } + } + if len(p.encodedPhantom) > 0 && bytes.Contains(vb, p.encodedPhantom) { + ensureEncodedSecret() + vb = bytes.ReplaceAll(vb, p.encodedPhantom, encodedSecret) + changed = true + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(vb, p.encodedPhantomLower) { + ensureEncodedSecret() + vb = bytes.ReplaceAll(vb, p.encodedPhantomLower, encodedSecret) + changed = true + } } - if bytes.Contains(vb, phantomPrefix) { + if bytesContainsAnyPhantomPrefix(vb) { vb = stripUnboundPhantomsFromProvider(vb, a.provider) changed = true log.Printf("[ADDON-INJECT] stripped unbound phantom token from header %q for %s:%d", key, host, port) @@ -1285,15 +1372,30 @@ type phantomSwapReader struct { // maxPhantomLen returns the length of the longest phantom token in the // pairs list. Used to determine how much data to hold back from the // output buffer to handle tokens that span read boundaries. +// +// The result accounts for both literal phantom tokens (SLUICE_PHANTOM:name) +// and their URL-encoded forms (SLUICE_PHANTOM%3Aname). The encoded form is +// strictly longer because the colon expands to %3A, so a holdback sized for +// the literal form alone would lose URL-encoded phantoms that straddle a +// read boundary. Uses the precomputed encodedPhantom on each pair so no +// per-chunk allocation is required. func maxPhantomLen(pairs []phantomPair) int { m := 0 for _, p := range pairs { if len(p.phantom) > m { m = len(p.phantom) } + if len(p.encodedPhantom) > m { + m = len(p.encodedPhantom) + } + if len(p.encodedPhantomLower) > m { + m = len(p.encodedPhantomLower) + } } - // Also account for the generic phantom prefix pattern. - if pLen := len(phantomPrefix) + maxCredNameLen; pLen > m { + // Also account for the generic phantom prefix pattern. Uppercase and + // lowercase encoded prefixes are the same length, so either works as + // the lower bound. + if pLen := len(urlEncodedPhantomPrefix) + maxCredNameLen; pLen > m { m = pLen } return m @@ -1340,14 +1442,32 @@ func (r *phantomSwapReader) Read(p []byte) (int, error) { toProcess := r.pending[:safe] r.pending = append([]byte(nil), r.pending[safe:]...) - // Pass 2: scoped replacement. + // Pass 2: scoped replacement, in both literal and URL-encoded forms + // (both case variants of %3A). The encoded phantom is precomputed + // once per pair so this hot path only allocates when an encoded + // phantom is actually present and we need the encoded form of the + // real secret. for _, pp := range r.pairs { if bytes.Contains(toProcess, pp.phantom) { toProcess = bytes.ReplaceAll(toProcess, pp.phantom, pp.secret.Bytes()) } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = queryEscapeBytes(pp.secret.Bytes()) + } + } + if len(pp.encodedPhantom) > 0 && bytes.Contains(toProcess, pp.encodedPhantom) { + ensureEncodedSecret() + toProcess = bytes.ReplaceAll(toProcess, pp.encodedPhantom, encodedSecret) + } + if len(pp.encodedPhantomLower) > 0 && bytes.Contains(toProcess, pp.encodedPhantomLower) { + ensureEncodedSecret() + toProcess = bytes.ReplaceAll(toProcess, pp.encodedPhantomLower, encodedSecret) + } } - // Pass 3: strip unbound. - if bytes.Contains(toProcess, phantomPrefix) { + // Pass 3: strip unbound, including URL-encoded phantoms. + if bytesContainsAnyPhantomPrefix(toProcess) { toProcess = stripUnboundPhantomsFromProvider(toProcess, r.provider) } diff --git a/internal/proxy/addon_test.go b/internal/proxy/addon_test.go index d7c7394..5c672b7 100644 --- a/internal/proxy/addon_test.go +++ b/internal/proxy/addon_test.go @@ -726,6 +726,250 @@ func TestRequest_StripUnboundPhantoms(t *testing.T) { } } +// TestRequest_PhantomSwapInFormUrlencodedBody covers the OAuth refresh path: +// the agent posts application/x-www-form-urlencoded data and the phantom's +// colon is percent-encoded on the wire (SLUICE_PHANTOM%3Aapi_key). The +// scanner must recognize that form and substitute the URL-encoded secret so +// the upstream still receives a well-formed form body. +func TestRequest_PhantomSwapInFormUrlencodedBody(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real value/with+special&chars"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "POST", "https://api.example.com/oauth/token") + f.Request.Header.Set("Content-Type", "application/x-www-form-urlencoded") + f.Request.Body = []byte("grant_type=refresh_token&refresh_token=SLUICE_PHANTOM%3Aapi_key") + + addon.Requestheaders(f) + addon.Request(f) + + body := string(f.Request.Body) + wantValue := "real+value%2Fwith%2Bspecial%26chars" + if !strings.Contains(body, wantValue) { + t.Fatalf("body = %q, want url-encoded secret %q", body, wantValue) + } + if strings.Contains(body, "SLUICE_PHANTOM%3A") { + t.Fatalf("url-encoded phantom should have been replaced, got %q", body) + } + if strings.Contains(body, "SLUICE_PHANTOM:") { + t.Fatalf("literal phantom should not appear in body, got %q", body) + } +} + +// TestRequest_StripUnboundPhantoms_UrlEncoded asserts that an unbound +// phantom in url-encoded form does not pass through to the upstream. +func TestRequest_StripUnboundPhantoms_UrlEncoded(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{ + "api_key": "real-secret", + "other_key": "other-secret", + }, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "POST", "https://api.example.com/data") + f.Request.Header.Set("Content-Type", "application/x-www-form-urlencoded") + f.Request.Body = []byte("key=SLUICE_PHANTOM%3Aapi_key&unbound=SLUICE_PHANTOM%3Aother_key") + + addon.Requestheaders(f) + addon.Request(f) + + body := string(f.Request.Body) + if !strings.Contains(body, "real-secret") { + t.Fatalf("expected bound phantom to be replaced, got %q", body) + } + if strings.Contains(body, "SLUICE_PHANTOM%3A") { + t.Fatalf("expected unbound url-encoded phantom to be stripped, got %q", body) + } + if strings.Contains(body, "other-secret") { + t.Fatalf("unbound phantom should not be replaced with real credential, got %q", body) + } +} + +// TestRequest_PhantomSwapInUrlEncodedQuery covers the URL query path: +// the phantom is percent-encoded in RawQuery and must be swapped with a +// percent-encoded real value so the resulting query string stays valid. +func TestRequest_PhantomSwapInUrlEncodedQuery(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real-secret"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "GET", "https://api.example.com/data?token=SLUICE_PHANTOM%3Aapi_key") + + addon.Requestheaders(f) + addon.Request(f) + + if got := f.Request.URL.RawQuery; !strings.Contains(got, "real-secret") { + t.Fatalf("RawQuery = %q, want to contain real-secret", got) + } + if got := f.Request.URL.RawQuery; strings.Contains(got, "SLUICE_PHANTOM%3A") { + t.Fatalf("RawQuery still contains url-encoded phantom: %q", got) + } +} + +// TestSwapPhantomBytes_PathUsesPathEscape exercises the URL-path branch of +// the swap directly. A secret that contains a space must be encoded as +// %20 (PathEscape), not '+' (QueryEscape), so the path stays semantically +// correct. Net/url's parser normally decodes %3A in URL.Path before our +// scan ever runs, but the swap helper is shared with QUIC and streaming +// paths where the encoded phantom can reach it, and the encoding choice +// must still be path-correct. +func TestSwapPhantomBytes_PathUsesPathEscape(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real secret"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + + phantom := []byte(PhantomToken("api_key")) + pairs := []phantomPair{{ + phantom: phantom, + encodedPhantom: encodePhantomForPair(phantom), + secret: vault.NewSecureBytes("real secret"), + }} + defer releasePhantomPairs(pairs) + + in := []byte("/v1/SLUICE_PHANTOM%3Aapi_key/resource") + out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL path", true) + got := string(out) + + if !strings.Contains(got, "real%20secret") { + t.Fatalf("path swap = %q, want PathEscape (real%%20secret)", got) + } + if strings.Contains(got, "real+secret") { + t.Fatalf("path swap = %q, must not contain '+' for a space in a URL path", got) + } + if strings.Contains(got, "SLUICE_PHANTOM") { + t.Fatalf("path swap = %q, still contains phantom", got) + } +} + +// TestSwapPhantomBytes_QueryUsesQueryEscape covers the symmetric query case: +// the same swap helper must encode spaces as '+' for query strings and form +// bodies so the receiving application/x-www-form-urlencoded parser sees a +// space. +func TestSwapPhantomBytes_QueryUsesQueryEscape(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real secret"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + + phantom := []byte(PhantomToken("api_key")) + pairs := []phantomPair{{ + phantom: phantom, + encodedPhantom: encodePhantomForPair(phantom), + secret: vault.NewSecureBytes("real secret"), + }} + defer releasePhantomPairs(pairs) + + in := []byte("token=SLUICE_PHANTOM%3Aapi_key") + out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL query", false) + got := string(out) + + if !strings.Contains(got, "real+secret") { + t.Fatalf("query swap = %q, want QueryEscape ('+' for space)", got) + } + if strings.Contains(got, "SLUICE_PHANTOM") { + t.Fatalf("query swap = %q, still contains phantom", got) + } +} + +// TestRequest_PhantomSwapInFormUrlencodedBody_LowercaseHex covers the +// case-insensitivity of percent-encoded hex (RFC 3986 §2.1). A client may +// emit %3a instead of %3A. The scanner must still swap the phantom and +// the upstream must receive the real secret. +func TestRequest_PhantomSwapInFormUrlencodedBody_LowercaseHex(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real-secret"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "POST", "https://api.example.com/oauth/token") + f.Request.Header.Set("Content-Type", "application/x-www-form-urlencoded") + f.Request.Body = []byte("grant_type=refresh_token&refresh_token=SLUICE_PHANTOM%3aapi_key") + + addon.Requestheaders(f) + addon.Request(f) + + body := string(f.Request.Body) + if !strings.Contains(body, "real-secret") { + t.Fatalf("body = %q, want real-secret", body) + } + if strings.Contains(body, "SLUICE_PHANTOM%3a") || strings.Contains(body, "SLUICE_PHANTOM%3A") { + t.Fatalf("body = %q, must not contain any encoded phantom variant", body) + } +} + +// TestRequest_StripUnboundPhantoms_LowercaseHex asserts the unbound-strip +// path also catches the lowercase percent-encoded variant. +func TestRequest_StripUnboundPhantoms_LowercaseHex(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{ + "api_key": "real-secret", + "other_key": "other-secret", + }, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "POST", "https://api.example.com/data") + f.Request.Body = []byte("k=SLUICE_PHANTOM%3aapi_key&u=SLUICE_PHANTOM%3aother_key") + + addon.Requestheaders(f) + addon.Request(f) + + body := string(f.Request.Body) + if !strings.Contains(body, "real-secret") { + t.Fatalf("bound phantom not swapped, body = %q", body) + } + if strings.Contains(body, "SLUICE_PHANTOM") { + t.Fatalf("unbound lowercase phantom not stripped, body = %q", body) + } + if strings.Contains(body, "other-secret") { + t.Fatalf("unbound phantom must not be replaced with real credential, body = %q", body) + } +} + func TestRequest_NoBindingNoChange(t *testing.T) { // No bindings configured. Body without phantom tokens should pass // through unchanged. @@ -872,6 +1116,49 @@ func TestStreamRequestModifier_LargeBodySpanningReads(t *testing.T) { } } +// TestStreamRequestModifier_UrlEncodedPhantomSpanningReads verifies that +// the streaming reader's holdback buffer is sized for the URL-encoded form +// of a phantom (SLUICE_PHANTOM%3Aapi_key), not just the literal one. The +// encoded form is two bytes longer because the colon expands to %3A, and a +// holdback sized to the literal length alone would let an encoded phantom +// straddling a read boundary slip through unswapped. +func TestStreamRequestModifier_UrlEncodedPhantomSpanningReads(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "REPLACED"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + prefix := bytes.Repeat([]byte("A"), 16*1024) + phantom := []byte("SLUICE_PHANTOM%3Aapi_key") + suffix := bytes.Repeat([]byte("B"), 16*1024) + body := make([]byte, 0, len(prefix)+len(phantom)+len(suffix)) + body = append(body, prefix...) + body = append(body, phantom...) + body = append(body, suffix...) + + f := newTestFlow(client, "POST", "https://api.example.com/oauth/token") + f.Request.Header.Set("Content-Type", "application/x-www-form-urlencoded") + reader := addon.StreamRequestModifier(f, bytes.NewReader(body)) + + out, err := io.ReadAll(reader) + if err != nil { + t.Fatalf("ReadAll failed: %v", err) + } + + if bytes.Contains(out, []byte("SLUICE_PHANTOM%3A")) || bytes.Contains(out, []byte("SLUICE_PHANTOM%3a")) { + t.Fatalf("url-encoded phantom must not survive streaming swap, got: %q", out[16*1024-8:16*1024+50]) + } + if !bytes.Contains(out, []byte("REPLACED")) { + t.Fatalf("expected real credential in streamed output") + } +} + func TestStreamRequestModifier_StripUnbound(t *testing.T) { addon := newTestAddonWithCreds( t, diff --git a/internal/proxy/phantom.go b/internal/proxy/phantom.go index eb12da4..d19f125 100644 --- a/internal/proxy/phantom.go +++ b/internal/proxy/phantom.go @@ -2,15 +2,39 @@ package proxy import "regexp" -// phantomPrefix is the byte prefix for all phantom tokens, used for quick -// detection before applying the more expensive regex strip. +// phantomPrefix is the byte prefix for all phantom tokens in their literal +// form, used for quick detection before applying the more expensive regex +// strip. Literal form appears in JSON bodies, raw header values, and +// anywhere the colon survives unencoded. var phantomPrefix = []byte("SLUICE_PHANTOM:") +// urlEncodedPhantomPrefix is the byte prefix for phantom tokens after URL +// percent-encoding (the colon becomes %3A). Appears in +// application/x-www-form-urlencoded request bodies (e.g. OAuth refresh +// POSTs) and in URL query strings. Without scanning for this form, a +// phantom embedded in form-urlencoded data would pass through unswapped +// and the upstream would receive the literal `SLUICE_PHANTOM%3A...` +// string. The two prefixes are kept side by side rather than computed at +// runtime so the byte scan stays a single allocation-free contains check. +// +// Percent-encoding hex digits are case-insensitive per RFC 3986 §2.1, so +// callers may emit either %3A or %3a. Go's url.QueryEscape always produces +// uppercase, but third-party clients can produce lowercase. The lowercase +// variant is stored alongside the uppercase one so the prefix scan catches +// both forms. +var ( + urlEncodedPhantomPrefix = []byte("SLUICE_PHANTOM%3A") + urlEncodedPhantomPrefixLower = []byte("SLUICE_PHANTOM%3a") +) + // phantomStripRe is a last-resort regex for stripping phantom tokens when -// provider.List() cannot enumerate all credential names. It matches word -// characters, dots, and hyphens. +// provider.List() cannot enumerate all credential names. It matches both +// literal (SLUICE_PHANTOM:...) and URL-encoded (SLUICE_PHANTOM%3A...) forms +// so unbound phantoms cannot leak via either encoding. The character class +// matches word characters, dots, and hyphens — the same set used by the +// credential-name and OAuth-suffix grammar. // The primary strip path uses exact matching via provider.List(). -var phantomStripRe = regexp.MustCompile(`SLUICE_PHANTOM:[\w.\-]+`) +var phantomStripRe = regexp.MustCompile(`SLUICE_PHANTOM(?::|%3[Aa])[\w.\-]+`) // PhantomToken returns the placeholder token for a credential name. // Agents use this token in requests. The MITM proxy replaces it with diff --git a/internal/proxy/phantom_pairs.go b/internal/proxy/phantom_pairs.go index ca6596a..3f27c37 100644 --- a/internal/proxy/phantom_pairs.go +++ b/internal/proxy/phantom_pairs.go @@ -6,6 +6,175 @@ import ( "github.com/nemirovsky/sluice/internal/vault" ) +// encodePhantomForPair returns the URL query-escaped form of a phantom +// token, or nil when QueryEscape would leave the bytes unchanged. The +// "nil when unchanged" convention lets the hot-path swap skip a redundant +// bytes.Contains scan for the literal form a second time. A pre-scan +// returns nil before any allocation when no byte in phantom would be +// escaped — also avoids the byte->string copy that url.QueryEscape would +// otherwise produce for the no-op case. +func encodePhantomForPair(phantom []byte) []byte { + if !phantomNeedsQueryEscape(phantom) { + return nil + } + return queryEscapeBytes(phantom) +} + +// phantomNeedsQueryEscape reports whether any byte in phantom would be +// rewritten by queryEscapeBytes. Returns true on a space or any byte +// outside the unreserved-for-query-component set. +func phantomNeedsQueryEscape(phantom []byte) bool { + for _, c := range phantom { + if c == ' ' || !shouldNotEscapeQueryComponent(c) { + return true + } + } + return false +} + +// encodePhantomLowerForPair returns the lowercase-hex variant of the +// uppercase-encoded phantom, or nil when the input is nil or the +// lowercased form is identical (which happens when the encoding has no +// hex digits A-F). RFC 3986 §2.1 makes percent-encoded hex case- +// insensitive, so a phantom that arrives encoded as %3a must still match +// the precomputed phantom whose canonical form is %3A. +// +// A pre-scan returns nil before any allocation when no percent-escape +// sequence contains an uppercase A-F hex digit. This fast path only +// fires for phantoms whose escaped form happens to use 0-9 hex digits +// exclusively. A phantom containing %3A (the encoded colon, which every +// SLUICE_PHANTOM: phantom has after url-encoding) still differs +// between %3A and %3a, so the allocation still occurs in the common +// case — the fast path is for shapes like %20%21%30 where every escape +// is already lowercase-equivalent. +func encodePhantomLowerForPair(encoded []byte) []byte { + if len(encoded) == 0 { + return nil + } + hasUpperHex := false + for i := 0; i < len(encoded); i++ { + if encoded[i] != '%' || i+2 >= len(encoded) { + continue + } + if isASCIIUpperHex(encoded[i+1]) || isASCIIUpperHex(encoded[i+2]) { + hasUpperHex = true + break + } + } + if !hasUpperHex { + return nil + } + lower := make([]byte, len(encoded)) + i := 0 + for i < len(encoded) { + if encoded[i] == '%' && i+2 < len(encoded) { + lower[i] = '%' + lower[i+1] = asciiLowerHex(encoded[i+1]) + lower[i+2] = asciiLowerHex(encoded[i+2]) + i += 3 + continue + } + lower[i] = encoded[i] + i++ + } + return lower +} + +func isASCIIUpperHex(b byte) bool { + return b >= 'A' && b <= 'F' +} + +func asciiLowerHex(b byte) byte { + if b >= 'A' && b <= 'F' { + return b + ('a' - 'A') + } + return b +} + +// queryEscapeBytes is a byte-in, byte-out form-component URL encoder that +// mirrors net/url.QueryEscape's output rules without ever materializing +// the input as a Go string. url.QueryEscape's signature is +// `func(string) string`, which forces callers to wrap a credential's +// SecureBytes via `url.QueryEscape(string(secret.Bytes()))` and leaves +// two immutable string copies of the secret on the heap that +// SecureBytes.Release() cannot zero. Operating on []byte throughout keeps +// the secret only in slices the caller can clear. +// +// The unreserved character set follows RFC 3986 §2.3 plus Go's +// net/url-compatible additions: spaces become '+' (form encoding), and +// everything outside the unreserved set is percent-encoded with +// uppercase hex (the canonical form Go and most clients emit). +func queryEscapeBytes(src []byte) []byte { + dst := make([]byte, 0, len(src)) + for _, c := range src { + switch { + case c == ' ': + dst = append(dst, '+') + case shouldNotEscapeQueryComponent(c): + dst = append(dst, c) + default: + dst = append(dst, '%', hexUpper(c>>4), hexUpper(c&0x0F)) + } + } + return dst +} + +// pathEscapeBytes is the byte-level analogue of net/url.PathEscape. It +// preserves URL-path semantics: spaces become %20 (not '+'), and a +// slightly larger unreserved set is honored (sub-delims that are legal +// in path segments are emitted verbatim). +func pathEscapeBytes(src []byte) []byte { + dst := make([]byte, 0, len(src)) + for _, c := range src { + if shouldNotEscapePathSegment(c) { + dst = append(dst, c) + continue + } + dst = append(dst, '%', hexUpper(c>>4), hexUpper(c&0x0F)) + } + return dst +} + +// shouldNotEscapeQueryComponent reports whether a byte is safe to emit +// literally inside an application/x-www-form-urlencoded value. Matches +// the predicate net/url applies for encodeQueryComponent: ALPHA / DIGIT +// / '-' / '_' / '.' / '~'. +func shouldNotEscapeQueryComponent(c byte) bool { + switch { + case c >= 'A' && c <= 'Z', + c >= 'a' && c <= 'z', + c >= '0' && c <= '9': + return true + case c == '-' || c == '_' || c == '.' || c == '~': + return true + } + return false +} + +// shouldNotEscapePathSegment reports whether a byte is safe to emit +// literally inside a URL path segment. Matches the predicate net/url +// applies for encodePathSegment: unreserved + sub-delims minus the +// segment separators '/' and '?'. Specifically: ALPHA / DIGIT / +// '-' '_' '.' '~' '$' '&' '+' ',' ';' '=' ':' '@'. +func shouldNotEscapePathSegment(c byte) bool { + if shouldNotEscapeQueryComponent(c) { + return true + } + switch c { + case '$', '&', '+', ',', ';', '=', ':', '@': + return true + } + return false +} + +// hexUpper returns the uppercase hex digit for a nibble in 0..15. +func hexUpper(nibble byte) byte { + if nibble < 10 { + return '0' + nibble + } + return 'A' + (nibble - 10) +} + // maxProxyBody limits the request/response body size that MITM proxies // (HTTPS and QUIC) read for phantom token replacement. 16 MiB is // sufficient for typical API traffic while preventing memory exhaustion @@ -24,15 +193,23 @@ func buildOAuthPhantomPairs(name string, secret vault.SecureBytes, logPrefix str return nil, err } accessSecret := vault.NewSecureBytes(cred.AccessToken) + accessPhantom := []byte(oauthPhantomAccess(name, cred.AccessToken)) + accessEncoded := encodePhantomForPair(accessPhantom) pairs := []phantomPair{{ - phantom: []byte(oauthPhantomAccess(name, cred.AccessToken)), - secret: accessSecret, + phantom: accessPhantom, + encodedPhantom: accessEncoded, + encodedPhantomLower: encodePhantomLowerForPair(accessEncoded), + secret: accessSecret, }} if cred.RefreshToken != "" { refreshSecret := vault.NewSecureBytes(cred.RefreshToken) + refreshPhantom := []byte(oauthPhantomRefresh(name, cred.RefreshToken)) + refreshEncoded := encodePhantomForPair(refreshPhantom) pairs = append(pairs, phantomPair{ - phantom: []byte(oauthPhantomRefresh(name, cred.RefreshToken)), - secret: refreshSecret, + phantom: refreshPhantom, + encodedPhantom: refreshEncoded, + encodedPhantomLower: encodePhantomLowerForPair(refreshEncoded), + secret: refreshSecret, }) } return pairs, nil diff --git a/internal/proxy/phantom_strip.go b/internal/proxy/phantom_strip.go index 420796f..ec48d68 100644 --- a/internal/proxy/phantom_strip.go +++ b/internal/proxy/phantom_strip.go @@ -47,6 +47,25 @@ func stripUnboundPhantomsFromProvider(data []byte, provider vault.Provider) []by []byte(PhantomToken(name)), ) } + // Mirror every phantom with its URL-encoded variants so phantoms carried + // in form-urlencoded bodies or URL components are stripped as cleanly as + // the literal form. Two variants are added: the canonical uppercase form + // emitted by Go's url.QueryEscape, and a lowercase-hex form that other + // clients may produce (RFC 3986 §2.1 makes percent-encoded hex case- + // insensitive). Variants are only added when they differ from forms we + // already cover, so the literal scan path is not duplicated. + encodedPhantoms := make([][]byte, 0, len(phantoms)*2) + for _, p := range phantoms { + encoded := encodePhantomForPair(p) + if encoded == nil { + continue + } + encodedPhantoms = append(encodedPhantoms, encoded) + if lower := encodePhantomLowerForPair(encoded); lower != nil { + encodedPhantoms = append(encodedPhantoms, lower) + } + } + phantoms = append(phantoms, encodedPhantoms...) // Sort by token length descending so longer phantom tokens are stripped // before shorter prefixes that could corrupt them via substring match. sort.Slice(phantoms, func(i, j int) bool { @@ -58,8 +77,12 @@ func stripUnboundPhantomsFromProvider(data []byte, provider vault.Provider) []by } } // Last-resort regex strip for phantom tokens from providers that - // don't support List() (e.g. env provider). - if bytes.Contains(data, phantomPrefix) { + // don't support List() (e.g. env provider). The regex handles literal + // (SLUICE_PHANTOM:...) and URL-encoded (SLUICE_PHANTOM%3A.../%3a...) + // forms so unbound phantoms in form-urlencoded bodies are caught too. + if bytes.Contains(data, phantomPrefix) || + bytes.Contains(data, urlEncodedPhantomPrefix) || + bytes.Contains(data, urlEncodedPhantomPrefixLower) { data = phantomStripRe.ReplaceAll(data, nil) } return data diff --git a/internal/proxy/quic.go b/internal/proxy/quic.go index 4c0bc50..6aa472d 100644 --- a/internal/proxy/quic.go +++ b/internal/proxy/quic.go @@ -572,9 +572,13 @@ func (q *QUICProxy) buildPhantomPairs(host string, port int) []phantomPair { pairs = append(pairs, oauthPairs...) continue } + phantom := []byte(PhantomToken(name)) + encoded := encodePhantomForPair(phantom) pairs = append(pairs, phantomPair{ - phantom: []byte(PhantomToken(name)), - secret: secret, + phantom: phantom, + encodedPhantom: encoded, + encodedPhantomLower: encodePhantomLowerForPair(encoded), + secret: secret, }) } } @@ -596,8 +600,24 @@ func (q *QUICProxy) replacePhantomInHeaders(r *http.Request, pairs []phantomPair vb = bytes.ReplaceAll(vb, p.phantom, p.secret.Bytes()) changed = true } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = queryEscapeBytes(p.secret.Bytes()) + } + } + if len(p.encodedPhantom) > 0 && bytes.Contains(vb, p.encodedPhantom) { + ensureEncodedSecret() + vb = bytes.ReplaceAll(vb, p.encodedPhantom, encodedSecret) + changed = true + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(vb, p.encodedPhantomLower) { + ensureEncodedSecret() + vb = bytes.ReplaceAll(vb, p.encodedPhantomLower, encodedSecret) + changed = true + } } - if bytes.Contains(vb, phantomPrefix) { + if bytesContainsAnyPhantomPrefix(vb) { vb = q.stripUnboundPhantoms(vb) changed = true log.Printf("[QUIC-MITM] stripped unbound phantom from header %q for %s:%d", key, host, port) @@ -610,14 +630,32 @@ func (q *QUICProxy) replacePhantomInHeaders(r *http.Request, pairs []phantomPair } // replacePhantomInBody replaces phantom tokens in a request body with real -// credential values and strips any unbound phantom tokens. +// credential values and strips any unbound phantom tokens. Matches both +// the literal SLUICE_PHANTOM: form and the URL-encoded +// SLUICE_PHANTOM%3A form (uppercase and lowercase hex) so that +// HTTP/3 form-urlencoded OAuth refreshes route through the same swap as +// HTTP/1.x and HTTP/2. func (q *QUICProxy) replacePhantomInBody(body []byte, pairs []phantomPair, host string, port int) []byte { for _, p := range pairs { if bytes.Contains(body, p.phantom) { body = bytes.ReplaceAll(body, p.phantom, p.secret.Bytes()) } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = queryEscapeBytes(p.secret.Bytes()) + } + } + if len(p.encodedPhantom) > 0 && bytes.Contains(body, p.encodedPhantom) { + ensureEncodedSecret() + body = bytes.ReplaceAll(body, p.encodedPhantom, encodedSecret) + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(body, p.encodedPhantomLower) { + ensureEncodedSecret() + body = bytes.ReplaceAll(body, p.encodedPhantomLower, encodedSecret) + } } - if bytes.Contains(body, phantomPrefix) { + if bytesContainsAnyPhantomPrefix(body) { body = q.stripUnboundPhantoms(body) log.Printf("[QUIC-MITM] stripped unbound phantom from body for %s:%d", host, port) } diff --git a/internal/proxy/ws.go b/internal/proxy/ws.go index 5d146f0..cfb10fe 100644 --- a/internal/proxy/ws.go +++ b/internal/proxy/ws.go @@ -371,9 +371,23 @@ func (wp *WSProxy) UpdateRules(blockConfigs []WSBlockRuleConfig, redactConfigs [ } // phantomPair holds a phantom token and its corresponding real credential. +// encodedPhantom is the URL query-escaped form of phantom (uppercase hex), +// and encodedPhantomLower is the same form with the percent-encoded colon +// in lowercase. Both are precomputed once per pair so the hot-path swap +// does not re-allocate on every request, stream chunk, or header. +// +// The lowercase variant exists because percent-encoded hex digits are +// case-insensitive per RFC 3986 §2.1: Go's url.QueryEscape always emits +// uppercase, but third-party clients can emit lowercase, and a phantom +// scan that only checked one casing would let `SLUICE_PHANTOM%3a...` +// through to the upstream. The fields are nil when no encoded form would +// differ from the literal phantom bytes, which keeps the no-encoded-form +// branch allocation-free. type phantomPair struct { - phantom []byte - secret vault.SecureBytes + phantom []byte + encodedPhantom []byte + encodedPhantomLower []byte + secret vault.SecureBytes } // Relay runs bidirectional WebSocket frame forwarding between agent and @@ -400,9 +414,13 @@ func (wp *WSProxy) Relay(agentConn, upstreamConn net.Conn, host string, port int pairs = append(pairs, oauthPairs...) continue } + phantom := []byte(PhantomToken(name)) + encoded := encodePhantomForPair(phantom) pairs = append(pairs, phantomPair{ - phantom: []byte(PhantomToken(name)), - secret: secret, + phantom: phantom, + encodedPhantom: encoded, + encodedPhantomLower: encodePhantomLowerForPair(encoded), + secret: secret, }) } } @@ -496,15 +514,33 @@ func (wp *WSProxy) relayFrames(src io.Reader, dst io.Writer, pairs []phantomPair } } - // Replace bound phantom tokens with real credentials. + // Replace bound phantom tokens with real credentials in both + // literal and URL-encoded forms (covers WS text frames that + // carry application/x-www-form-urlencoded payloads or + // percent-escaped query-like content). for _, p := range pairs { if bytes.Contains(payload, p.phantom) { payload = bytes.ReplaceAll(payload, p.phantom, p.secret.Bytes()) } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = queryEscapeBytes(p.secret.Bytes()) + } + } + if len(p.encodedPhantom) > 0 && bytes.Contains(payload, p.encodedPhantom) { + ensureEncodedSecret() + payload = bytes.ReplaceAll(payload, p.encodedPhantom, encodedSecret) + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(payload, p.encodedPhantomLower) { + ensureEncodedSecret() + payload = bytes.ReplaceAll(payload, p.encodedPhantomLower, encodedSecret) + } } - // Strip any remaining unbound phantom tokens. - if bytes.Contains(payload, phantomPrefix) { + // Strip any remaining unbound phantom tokens (literal or + // URL-encoded, either case). + if bytesContainsAnyPhantomPrefix(payload) { payload = wp.stripUnboundPhantoms(payload) log.Printf("[WS] stripped unbound phantom token from text frame") }