|
1 | 1 | from typing import List, Optional |
2 | 2 |
|
3 | 3 | import litellm |
| 4 | +from litellm import Router |
| 5 | + |
4 | 6 | import os |
5 | 7 | import boto3 |
6 | 8 | import threading |
|
40 | 42 | "gemma3:27b": "ollama_chat/gemma3:27b", |
41 | 43 | "nomic-embed-text": "ollama/nomic-embed-text", |
42 | 44 | }, |
| 45 | + "ollama_chat": { |
| 46 | + "deepseek-r1": "ollama_chat/deepseek-r1", |
| 47 | + "llama3.2": "ollama_chat/llama3.2", |
| 48 | + "llama3.3": "ollama_chat/llama3.3", |
| 49 | + "gemma3": "ollama_chat/gemma3", |
| 50 | + "gemma3:27b": "ollama_chat/gemma3:27b", |
| 51 | + "nomic-embed-text": "ollama/nomic-embed-text", |
| 52 | + }, |
43 | 53 | "bedrock": { |
44 | 54 | "claude-3-5-haiku-v1": "bedrock/anthropic.claude-3-5-haiku-20241022-v1:0", |
45 | 55 | "claude-3-5-sonnet-v1": "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", |
|
80 | 90 | init_lock = threading.Lock() |
81 | 91 |
|
82 | 92 |
|
| 93 | +# slightly modify the parameters to accommodate services |
| 94 | +litellm.modify_params = True |
| 95 | + |
| 96 | +model_list = [] |
| 97 | +for model_name in ALL_PROVIDER_ALLOWED_FULL_NAMES: |
| 98 | + model_list.append({"model_name": model_name, "litellm_params": {"model": model_name}}) |
| 99 | + |
| 100 | +router = Router(model_list=model_list, default_max_parallel_requests=400) |
| 101 | + |
| 102 | + |
83 | 103 | def construct_bedrock_arn(model_identifier: str) -> Optional[str]: |
84 | 104 | global region, account_id, init_lock |
85 | 105 | with init_lock: |
@@ -192,17 +212,16 @@ async def complete_chat(self, messages: List[dict] | List[ConversationMessage], |
192 | 212 | if "tool_calls" in message: |
193 | 213 | del message["tool_calls"] |
194 | 214 |
|
195 | | - # slightly modify the parameters to accommodate services |
196 | | - litellm.modify_params = True |
197 | | - |
198 | 215 | # parameters provided in kwargs will override the default parameters |
199 | 216 | kwargs = {**self.kwargs, **kwargs} |
200 | 217 |
|
201 | 218 | # sometimes an empty tools list is interpreted as "please hallucinate tools", |
202 | 219 | if "tools" in kwargs and len(kwargs["tools"]) == 0: |
203 | 220 | del kwargs["tools"] |
204 | 221 |
|
205 | | - return await litellm.acompletion(self.name, messages=messages, stream=stream, **kwargs) |
| 222 | + global router |
| 223 | + |
| 224 | + return await router.acompletion(self.name, messages=messages, stream=stream, **kwargs) |
206 | 225 |
|
207 | 226 | async def embeddings(self, text: List[str], **kwargs) -> List[List[float]]: |
208 | 227 | # parameters provided in kwargs will override the default parameters |
|
0 commit comments