|
2 | 2 |
|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
| 5 | +import logging |
| 6 | +import time |
| 7 | + |
5 | 8 | import google.generativeai as genai |
6 | 9 |
|
7 | 10 | from .base import LLMAdapter |
8 | 11 |
|
| 12 | +logger = logging.getLogger(__name__) |
| 13 | + |
9 | 14 |
|
10 | 15 | class GeminiAdapter(LLMAdapter): |
11 | 16 | def __init__(self, api_key: str, model: str) -> None: |
12 | 17 | genai.configure(api_key=api_key) |
13 | 18 | self._model_name = model |
14 | | - self._model = genai.GenerativeModel(model_name=model) |
| 19 | + self._model = genai.GenerativeModel( |
| 20 | + model_name=model, |
| 21 | + system_instruction=None, # set per-call |
| 22 | + ) |
15 | 23 |
|
16 | 24 | def complete( |
17 | 25 | self, |
18 | 26 | system: str, |
19 | 27 | messages: list[dict[str, str]], |
20 | 28 | max_tokens: int = 4096, |
21 | 29 | ) -> str: |
| 30 | + # Gemini uses system_instruction on the model, not in GenerationConfig |
| 31 | + model = genai.GenerativeModel( |
| 32 | + model_name=self._model_name, |
| 33 | + system_instruction=system, |
| 34 | + ) |
| 35 | + |
22 | 36 | history = [ |
23 | 37 | {"role": "user" if m["role"] == "user" else "model", "parts": [m["content"]]} |
24 | 38 | for m in messages |
25 | 39 | ] |
26 | | - chat = self._model.start_chat(history=history[:-1] if len(history) > 1 else []) |
| 40 | + chat = model.start_chat(history=history[:-1] if len(history) > 1 else []) |
| 41 | + |
| 42 | + start = time.monotonic() |
27 | 43 | resp = chat.send_message( |
28 | 44 | history[-1]["parts"][0] if history else "", |
29 | 45 | generation_config=genai.types.GenerationConfig( |
30 | 46 | max_output_tokens=max_tokens, |
31 | | - system_instruction=system, |
32 | 47 | ), |
33 | 48 | ) |
| 49 | + elapsed = time.monotonic() - start |
| 50 | + logger.info("gemini model=%s %.1fs", self._model_name, elapsed) |
34 | 51 | return resp.text |
35 | 52 |
|
36 | 53 | @property |
|
0 commit comments