Skip to content

Commit 3581961

Browse files
jace-ryanclaude
andcommitted
feat: adapter logging + fix Gemini system_instruction placement
- All adapters now log model name + latency on every LLM call - Anthropic/OpenAI adapters also log token usage (in/out) - Gemini adapter: moved system_instruction from GenerationConfig to GenerativeModel constructor (was in wrong location per current API) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5efcb04 commit 3581961

4 files changed

Lines changed: 54 additions & 3 deletions

File tree

server/adapters/anthropic_adapter.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@
22

33
from __future__ import annotations
44

5+
import logging
6+
import time
7+
58
from anthropic import Anthropic
69

710
from .base import LLMAdapter
811

12+
logger = logging.getLogger(__name__)
13+
914

1015
class AnthropicAdapter(LLMAdapter):
1116
def __init__(self, api_key: str, model: str) -> None:
@@ -18,12 +23,20 @@ def complete(
1823
messages: list[dict[str, str]],
1924
max_tokens: int = 4096,
2025
) -> str:
26+
start = time.monotonic()
2127
resp = self._client.messages.create(
2228
model=self._model,
2329
max_tokens=max_tokens,
2430
system=system,
2531
messages=messages,
2632
)
33+
elapsed = time.monotonic() - start
34+
tokens_in = resp.usage.input_tokens
35+
tokens_out = resp.usage.output_tokens
36+
logger.info(
37+
"anthropic model=%s in=%d out=%d %.1fs",
38+
self._model, tokens_in, tokens_out, elapsed,
39+
)
2740
return resp.content[0].text
2841

2942
@property

server/adapters/gemini_adapter.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,52 @@
22

33
from __future__ import annotations
44

5+
import logging
6+
import time
7+
58
import google.generativeai as genai
69

710
from .base import LLMAdapter
811

12+
logger = logging.getLogger(__name__)
13+
914

1015
class GeminiAdapter(LLMAdapter):
1116
def __init__(self, api_key: str, model: str) -> None:
1217
genai.configure(api_key=api_key)
1318
self._model_name = model
14-
self._model = genai.GenerativeModel(model_name=model)
19+
self._model = genai.GenerativeModel(
20+
model_name=model,
21+
system_instruction=None, # set per-call
22+
)
1523

1624
def complete(
1725
self,
1826
system: str,
1927
messages: list[dict[str, str]],
2028
max_tokens: int = 4096,
2129
) -> str:
30+
# Gemini uses system_instruction on the model, not in GenerationConfig
31+
model = genai.GenerativeModel(
32+
model_name=self._model_name,
33+
system_instruction=system,
34+
)
35+
2236
history = [
2337
{"role": "user" if m["role"] == "user" else "model", "parts": [m["content"]]}
2438
for m in messages
2539
]
26-
chat = self._model.start_chat(history=history[:-1] if len(history) > 1 else [])
40+
chat = model.start_chat(history=history[:-1] if len(history) > 1 else [])
41+
42+
start = time.monotonic()
2743
resp = chat.send_message(
2844
history[-1]["parts"][0] if history else "",
2945
generation_config=genai.types.GenerationConfig(
3046
max_output_tokens=max_tokens,
31-
system_instruction=system,
3247
),
3348
)
49+
elapsed = time.monotonic() - start
50+
logger.info("gemini model=%s %.1fs", self._model_name, elapsed)
3451
return resp.text
3552

3653
@property

server/adapters/ollama_adapter.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@
22

33
from __future__ import annotations
44

5+
import logging
6+
import time
7+
58
import httpx
69

710
from .base import LLMAdapter
811

12+
logger = logging.getLogger(__name__)
13+
914

1015
class OllamaAdapter(LLMAdapter):
1116
def __init__(self, base_url: str, model: str) -> None:
@@ -24,12 +29,15 @@ def complete(
2429
"stream": False,
2530
"options": {"num_predict": max_tokens},
2631
}
32+
start = time.monotonic()
2733
resp = httpx.post(
2834
f"{self._base_url}/api/chat",
2935
json=payload,
3036
timeout=120.0,
3137
)
3238
resp.raise_for_status()
39+
elapsed = time.monotonic() - start
40+
logger.info("ollama model=%s %.1fs", self._model, elapsed)
3341
return resp.json()["message"]["content"]
3442

3543
@property

server/adapters/openai_adapter.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@
22

33
from __future__ import annotations
44

5+
import logging
6+
import time
7+
58
from openai import OpenAI
69

710
from .base import LLMAdapter
811

12+
logger = logging.getLogger(__name__)
13+
914

1015
class OpenAIAdapter(LLMAdapter):
1116
def __init__(self, api_key: str, model: str) -> None:
@@ -18,12 +23,20 @@ def complete(
1823
messages: list[dict[str, str]],
1924
max_tokens: int = 4096,
2025
) -> str:
26+
start = time.monotonic()
2127
full = [{"role": "system", "content": system}, *messages]
2228
resp = self._client.chat.completions.create(
2329
model=self._model,
2430
max_tokens=max_tokens,
2531
messages=full,
2632
)
33+
elapsed = time.monotonic() - start
34+
usage = resp.usage
35+
if usage:
36+
logger.info(
37+
"openai model=%s in=%d out=%d %.1fs",
38+
self._model, usage.prompt_tokens, usage.completion_tokens, elapsed,
39+
)
2740
return resp.choices[0].message.content or ""
2841

2942
@property

0 commit comments

Comments
 (0)