Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 38 additions & 14 deletions redacted-proxy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,19 +196,26 @@ def _check_rate_limit(token: str) -> bool:
"venice-uncensored": ("venice", "venice-uncensored"),
"nous-hermes-3-nitro": ("venice", "nous-hermes-3-nitro"),
"lfm-40b": ("venice", "lfm-40b"),
"qwen-2-5-vl": ("venice", "qwen-2-5-vl"),
"llama-3-3-70b": ("venice", "llama-3-3-70b"),
}


def _resolve_provider(model: str, explicit_provider: str = "") -> tuple[str, str]:
"""Return (provider, upstream_model) for a given model name."""
if explicit_provider:
return explicit_provider.lower(), model
# Explicit alias table wins first — prevents ambiguous prefix matches
explicit = explicit_provider.lower() if explicit_provider else ""

# Alias table always supplies the upstream model id (even when X-Provider is set).
if model in _MODEL_ALIASES:
return _MODEL_ALIASES[model]
alias_provider, upstream_model = _MODEL_ALIASES[model]
return explicit or alias_provider, upstream_model

# Venice exact-name set (checked before prefix rules)
if model in _VENICE_MODELS:
return "venice", model
return explicit or "venice", model

if explicit:
return explicit, model
# Prefix routing — Groq llama/gemma/mixtral/qwen only (not Venice variants)
if model.startswith("grok-"):
return "xai", model
Expand Down Expand Up @@ -389,15 +396,17 @@ async def _forward(provider: str, upstream_model: str, payload: dict,
body = _to_anthropic({**payload, "model": upstream_model})
async with session.post(url, json=body, headers=headers, timeout=aiohttp.ClientTimeout(total=90)) as resp:
result = await resp.json(content_type=None)
if "content" not in result:
raise ValueError(f"Anthropic error: {result.get('error', result)}")
if resp.status >= 400 or "content" not in result:
err = result.get("error", result) if isinstance(result, dict) else result
raise ValueError(f"Anthropic HTTP {resp.status}: {err}")
return _from_anthropic(result)
else:
body = {**payload, "model": upstream_model}
async with session.post(url, json=body, headers=headers, timeout=aiohttp.ClientTimeout(total=90)) as resp:
result = await resp.json(content_type=None)
if "choices" not in result:
raise ValueError(f"{provider} error: {result.get('error', result)}")
if resp.status >= 400 or "choices" not in result:
err = result.get("error", result) if isinstance(result, dict) else result
raise ValueError(f"{provider} HTTP {resp.status}: {err}")
return result


Expand Down Expand Up @@ -464,6 +473,14 @@ async def handle_config_post(request: web.Request) -> web.Response:
if "log_level" not in body:
_cfg["log_level"] = _default_log_level(val)
updated["privacy_mode"] = val
# Maximum mode: enable scrub + ephemeral unless overridden in same request
if val == "maximum":
if "privacy_scrub" not in body:
_cfg["privacy_scrub"] = True
updated["privacy_scrub"] = True
if "ephemeral_mode" not in body:
_cfg["ephemeral_mode"] = True
updated["ephemeral_mode"] = True

if "log_level" in body:
val = str(body["log_level"]).lower()
Expand All @@ -487,11 +504,18 @@ async def handle_config_post(request: web.Request) -> web.Response:


async def handle_models(request: web.Request) -> web.Response:
models = [
{"id": alias, "object": "model", "owned_by": prov}
for alias, (prov, _) in _MODEL_ALIASES.items()
if _PROVIDER_KEYS.get(prov)
]
seen: set[str] = set()
models: list[dict[str, str]] = []
for alias, (prov, _) in _MODEL_ALIASES.items():
if alias in seen or not _PROVIDER_KEYS.get(prov):
continue
models.append({"id": alias, "object": "model", "owned_by": prov})
seen.add(alias)
if _PROVIDER_KEYS.get("venice"):
for name in sorted(_VENICE_MODELS):
if name not in seen:
models.append({"id": name, "object": "model", "owned_by": "venice"})
seen.add(name)
return web.json_response({"object": "list", "data": models})


Expand Down
33 changes: 33 additions & 0 deletions redacted-proxy/test_routing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Unit tests for redacted-proxy provider routing."""
import unittest

from main import _resolve_provider


class ResolveProviderTests(unittest.TestCase):
def test_alias_without_explicit_provider(self):
self.assertEqual(_resolve_provider("llama-3.3-70b", ""), ("groq", "llama-3.3-70b-versatile"))

def test_alias_with_x_provider_still_maps_upstream(self):
# CloudLLMClient always sends X-Provider; upstream id must not stay on alias name.
self.assertEqual(
_resolve_provider("llama-3.3-70b", "groq"),
("groq", "llama-3.3-70b-versatile"),
)

def test_venice_model(self):
self.assertEqual(_resolve_provider("gemma-4-uncensored", ""), ("venice", "gemma-4-uncensored"))
self.assertEqual(_resolve_provider("qwen-2-5-vl", "venice"), ("venice", "qwen-2-5-vl"))

def test_venice_set_without_alias_entry(self):
self.assertEqual(_resolve_provider("llama-3-3-70b", ""), ("venice", "llama-3-3-70b"))

def test_grok_prefix(self):
self.assertEqual(_resolve_provider("grok-4-1-fast", "xai"), ("xai", "grok-4-1-fast"))

def test_explicit_provider_unknown_model(self):
self.assertEqual(_resolve_provider("custom-model", "openai"), ("openai", "custom-model"))


if __name__ == "__main__":
unittest.main()
Loading