|
9 | 9 | import hashlib |
10 | 10 | import json |
11 | 11 | import os |
| 12 | +import uuid |
12 | 13 | from collections.abc import Generator |
13 | 14 | from contextlib import contextmanager |
14 | 15 | from enum import StrEnum |
15 | 16 | from pathlib import Path |
16 | 17 | from typing import Any, Literal, cast |
17 | 18 |
|
| 19 | +from openai.types.chat import ChatCompletion, ChatCompletionChunk |
| 20 | + |
18 | 21 | from llama_stack.log import get_logger |
19 | 22 |
|
20 | 23 | logger = get_logger(__name__, category="testing") |
@@ -207,6 +210,20 @@ async def _patched_inference_method(original_method, self, client_type, endpoint |
207 | 210 | recording = _current_storage.find_recording(request_hash) |
208 | 211 | if recording: |
209 | 212 | response_body = recording["response"]["body"] |
| 213 | + if ( |
| 214 | + isinstance(response_body, list) |
| 215 | + and len(response_body) > 0 |
| 216 | + and isinstance(response_body[0], ChatCompletionChunk) |
| 217 | + ): |
| 218 | + # We can't replay chatcompletions with the same id and we store them in a sqlite database with a unique constraint on the id. |
| 219 | + # So we generate a new id and replace the old one. |
| 220 | + newid = uuid.uuid4().hex |
| 221 | + response_body[0].id = "chatcmpl-" + newid |
| 222 | + elif isinstance(response_body, ChatCompletion): |
| 223 | + # We can't replay chatcompletions with the same id and we store them in a sqlite database with a unique constraint on the id. |
| 224 | + # So we generate a new id and replace the old one. |
| 225 | + newid = uuid.uuid4().hex |
| 226 | + response_body.id = "chatcmpl-" + newid |
210 | 227 |
|
211 | 228 | if recording["response"].get("is_streaming", False): |
212 | 229 |
|
|
0 commit comments