Skip to content

Commit a6d3bd0

Browse files
authored
Merge branch 'main' into litellm_tag_routing_fixes
2 parents 05210fe + 00f1d7b commit a6d3bd0

File tree

9 files changed

+146
-13
lines changed

9 files changed

+146
-13
lines changed

deploy/charts/litellm-helm/templates/deployment.yaml

+7-2
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ spec:
104104
imagePullPolicy: {{ .Values.image.pullPolicy }}
105105
env:
106106
- name: HOST
107-
value: "::"
107+
value: "{{ .Values.listen | default "0.0.0.0" }}"
108108
- name: PORT
109109
value: {{ .Values.service.port | quote}}
110110
{{- if .Values.db.deployStandalone }}
@@ -138,8 +138,13 @@ spec:
138138
- name: DATABASE_NAME
139139
value: {{ .Values.db.database }}
140140
{{- end }}
141+
{{- if .Values.database.url }}
142+
- name: DATABASE_URL
143+
value: {{ .Values.database.url | quote }}
144+
{{- else }}
141145
- name: DATABASE_URL
142146
value: "postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)"
147+
{{- end }}
143148
- name: PROXY_MASTER_KEY
144149
valueFrom:
145150
secretKeyRef:
@@ -231,4 +236,4 @@ spec:
231236
{{- with .Values.tolerations }}
232237
tolerations:
233238
{{- toYaml . | nindent 8 }}
234-
{{- end }}
239+
{{- end }}

docs/my-website/docs/proxy/team_logging.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,8 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
208208
-d '{
209209
"metadata": {
210210
"logging": [{
211-
"callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
212-
"callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default
211+
"callback_name": "langfuse", # "otel", "langfuse", "lunary"
212+
"callback_type": "success", # "success", "failure", "success_and_failure"
213213
"callback_vars": {
214214
"langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment
215215
"langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment

litellm/integrations/opentelemetry.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,9 @@ def _get_span_processor(self):
641641
return BatchSpanProcessor(
642642
OTLPSpanExporterHTTP(
643643
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
644-
)
644+
),
645+
max_queue_size=100,
646+
max_export_batch_size=100,
645647
)
646648
elif self.OTEL_EXPORTER == "otlp_grpc":
647649
verbose_logger.debug(
@@ -651,7 +653,9 @@ def _get_span_processor(self):
651653
return BatchSpanProcessor(
652654
OTLPSpanExporterGRPC(
653655
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
654-
)
656+
),
657+
max_queue_size=100,
658+
max_export_batch_size=100,
655659
)
656660
else:
657661
verbose_logger.debug(

litellm/proxy/common_utils/callback_utils.py

+10
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
228228
litellm.callbacks.extend(imported_list)
229229
else:
230230
litellm.callbacks = imported_list # type: ignore
231+
232+
if "prometheus" in value:
233+
from litellm.proxy.proxy_server import app
234+
235+
verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
236+
from prometheus_client import make_asgi_app
237+
238+
# Add prometheus asgi middleware to route /metrics requests
239+
metrics_app = make_asgi_app()
240+
app.mount("/metrics", metrics_app)
231241
else:
232242
litellm.callbacks = [
233243
get_instance_fn(

litellm/proxy/litellm_pre_call_utils.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,11 @@ def convert_key_logging_metadata_to_callback(
8686
team_callback_settings_obj.success_callback = []
8787
if team_callback_settings_obj.failure_callback is None:
8888
team_callback_settings_obj.failure_callback = []
89+
8990
if data.callback_name not in team_callback_settings_obj.success_callback:
9091
team_callback_settings_obj.success_callback.append(data.callback_name)
9192

92-
if data.callback_name in team_callback_settings_obj.failure_callback:
93+
if data.callback_name not in team_callback_settings_obj.failure_callback:
9394
team_callback_settings_obj.failure_callback.append(data.callback_name)
9495

9596
for var, value in data.callback_vars.items():

litellm/proxy/proxy_config.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,17 @@ model_list:
1414
id: "test-openai"
1515

1616

17+
1718
router_settings:
1819
enable_tag_filtering: True # 👈 Key Chang
1920

21+
22+
general_settings:
23+
master_key: sk-1234
24+
alerting: ["slack"]
25+
spend_report_frequency: "1d"
26+
27+
2028
litellm_settings:
2129
success_callback: ["prometheus"]
2230
failure_callback: ["prometheus"]

litellm/router.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3690,7 +3690,7 @@ def _set_cooldown_deployments(
36903690
exception=original_exception,
36913691
)
36923692

3693-
allowed_fails = _allowed_fails or self.allowed_fails
3693+
allowed_fails = _allowed_fails if _allowed_fails is not None else self.allowed_fails
36943694

36953695
dt = get_utc_datetime()
36963696
current_minute = dt.strftime("%H-%M")

litellm/tests/test_prometheus.py

+89-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import uuid
1010

1111
import pytest
12-
from prometheus_client import REGISTRY
12+
from prometheus_client import REGISTRY, CollectorRegistry
1313

1414
import litellm
1515
from litellm import completion
@@ -79,3 +79,91 @@ async def test_async_prometheus_success_logging():
7979
assert metrics["litellm_deployment_success_responses_total"] == 1.0
8080
assert metrics["litellm_deployment_total_requests_total"] == 1.0
8181
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
82+
83+
84+
@pytest.mark.asyncio()
85+
async def test_async_prometheus_success_logging_with_callbacks():
86+
run_id = str(uuid.uuid4())
87+
litellm.set_verbose = True
88+
89+
litellm.success_callback = []
90+
litellm.failure_callback = []
91+
litellm.callbacks = ["prometheus"]
92+
93+
# Get initial metric values
94+
initial_metrics = {}
95+
for metric in REGISTRY.collect():
96+
for sample in metric.samples:
97+
initial_metrics[sample.name] = sample.value
98+
99+
response = await litellm.acompletion(
100+
model="claude-instant-1.2",
101+
messages=[{"role": "user", "content": "what llm are u"}],
102+
max_tokens=10,
103+
mock_response="hi",
104+
temperature=0.2,
105+
metadata={
106+
"id": run_id,
107+
"tags": ["tag1", "tag2"],
108+
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
109+
"user_api_key_alias": "ishaans-prometheus-key",
110+
"user_api_end_user_max_budget": None,
111+
"litellm_api_version": "1.40.19",
112+
"global_max_parallel_requests": None,
113+
"user_api_key_user_id": "admin",
114+
"user_api_key_org_id": None,
115+
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
116+
"user_api_key_team_alias": "testing-team",
117+
},
118+
)
119+
print(response)
120+
await asyncio.sleep(3)
121+
122+
# get prometheus logger
123+
from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
124+
125+
for callback in _in_memory_loggers:
126+
if isinstance(callback, PrometheusLogger):
127+
test_prometheus_logger = callback
128+
129+
print("done with success request")
130+
131+
print(
132+
"vars of test_prometheus_logger",
133+
vars(test_prometheus_logger.litellm_requests_metric),
134+
)
135+
136+
# Get the updated metrics
137+
updated_metrics = {}
138+
for metric in REGISTRY.collect():
139+
for sample in metric.samples:
140+
updated_metrics[sample.name] = sample.value
141+
142+
print("metrics from prometheus", updated_metrics)
143+
144+
# Assert the delta for each metric
145+
assert (
146+
updated_metrics["litellm_requests_metric_total"]
147+
- initial_metrics.get("litellm_requests_metric_total", 0)
148+
== 1.0
149+
)
150+
assert (
151+
updated_metrics["litellm_total_tokens_total"]
152+
- initial_metrics.get("litellm_total_tokens_total", 0)
153+
== 30.0
154+
)
155+
assert (
156+
updated_metrics["litellm_deployment_success_responses_total"]
157+
- initial_metrics.get("litellm_deployment_success_responses_total", 0)
158+
== 1.0
159+
)
160+
assert (
161+
updated_metrics["litellm_deployment_total_requests_total"]
162+
- initial_metrics.get("litellm_deployment_total_requests_total", 0)
163+
== 1.0
164+
)
165+
assert (
166+
updated_metrics["litellm_deployment_latency_per_output_token_bucket"]
167+
- initial_metrics.get("litellm_deployment_latency_per_output_token_bucket", 0)
168+
== 1.0
169+
)

litellm/tests/test_proxy_server.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -1255,7 +1255,17 @@ async def test_add_callback_via_key(prisma_client):
12551255

12561256

12571257
@pytest.mark.asyncio
1258-
async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
1258+
@pytest.mark.parametrize(
1259+
"callback_type, expected_success_callbacks, expected_failure_callbacks",
1260+
[
1261+
("success", ["langfuse"], []),
1262+
("failure", [], ["langfuse"]),
1263+
("success_and_failure", ["langfuse"], ["langfuse"]),
1264+
],
1265+
)
1266+
async def test_add_callback_via_key_litellm_pre_call_utils(
1267+
prisma_client, callback_type, expected_success_callbacks, expected_failure_callbacks
1268+
):
12591269
import json
12601270

12611271
from fastapi import HTTPException, Request, Response
@@ -1312,7 +1322,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
13121322
"logging": [
13131323
{
13141324
"callback_name": "langfuse",
1315-
"callback_type": "success",
1325+
"callback_type": callback_type,
13161326
"callback_vars": {
13171327
"langfuse_public_key": "my-mock-public-key",
13181328
"langfuse_secret_key": "my-mock-secret-key",
@@ -1359,14 +1369,21 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
13591369
}
13601370

13611371
new_data = await add_litellm_data_to_request(**data)
1372+
print("NEW DATA: {}".format(new_data))
13621373

1363-
assert "success_callback" in new_data
1364-
assert new_data["success_callback"] == ["langfuse"]
13651374
assert "langfuse_public_key" in new_data
13661375
assert new_data["langfuse_public_key"] == "my-mock-public-key"
13671376
assert "langfuse_secret_key" in new_data
13681377
assert new_data["langfuse_secret_key"] == "my-mock-secret-key"
13691378

1379+
if expected_success_callbacks:
1380+
assert "success_callback" in new_data
1381+
assert new_data["success_callback"] == expected_success_callbacks
1382+
1383+
if expected_failure_callbacks:
1384+
assert "failure_callback" in new_data
1385+
assert new_data["failure_callback"] == expected_failure_callbacks
1386+
13701387

13711388
@pytest.mark.asyncio
13721389
async def test_gemini_pass_through_endpoint():

0 commit comments

Comments
 (0)