From a1a8ef48393d51485c6f39aaa264035bc8b66c1f Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Fri, 26 Sep 2025 18:15:38 +0000 Subject: [PATCH 01/19] fix: handle empty repsonses --- src/litai/llm.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index 458673e..3759db3 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -334,7 +334,7 @@ def chat( # noqa: D417 for model in self.models: for attempt in range(self.max_retries): try: - return self._model_call( + response = self._model_call( model=model, prompt=prompt, system_prompt=system_prompt, @@ -350,6 +350,18 @@ def chat( # noqa: D417 **kwargs, ) + if not stream and response: + yield response + return + elif stream: + has_content = False + for chunk in response: + if chunk != "": + has_content = True + yield chunk + if has_content: + return + except Exception as e: handle_model_error(e, model, attempt, self.max_retries, self._verbose) From 579bcb6b5e0329c1867139b63640c38a0017adb9 Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Fri, 26 Sep 2025 18:47:40 +0000 Subject: [PATCH 02/19] fix: handle empty repsonses --- src/litai/llm.py | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index 3759db3..54443d0 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -14,6 +14,7 @@ """LLM client class.""" import datetime +import itertools import json import logging import os @@ -311,7 +312,7 @@ def chat( # noqa: D417 name=model, teamspace=self._teamspace, enable_async=self._enable_async ) sdk_model = self._sdkllm_cache[model_key] - return self._model_call( + response = self._model_call( model=sdk_model, prompt=prompt, system_prompt=system_prompt, @@ -326,6 +327,24 @@ def chat( # noqa: D417 reasoning_effort=reasoning_effort, **kwargs, ) + if not stream and response: + return response + if stream: + peek_iter, return_iter = itertools.tee(response) + try: + peek_iter, return_iter = itertools.tee(response) + has_content = False + + for chunk in peek_iter: + if chunk != "": + has_content = True + break + + if has_content: + return return_iter + except StopIteration: + pass + except Exception as e: print(f"💥 Failed to override with model '{model}'") handle_model_error(e, sdk_model, 0, self.max_retries, self._verbose) @@ -351,16 +370,22 @@ def chat( # noqa: D417 ) if not stream and response: - yield response - return - elif stream: - has_content = False - for chunk in response: - if chunk != "": - has_content = True - yield chunk - if has_content: - return + return response + if stream: + peek_iter, return_iter = itertools.tee(response) + try: + peek_iter, return_iter = itertools.tee(response) + has_content = False + + for chunk in peek_iter: + if chunk != "": + has_content = True + break + + if has_content: + return return_iter + except StopIteration: + pass except Exception as e: handle_model_error(e, model, attempt, self.max_retries, self._verbose) From c3b6e5ec6c491b4ecfbdcc62d89aa6f54fc3460d Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Fri, 26 Sep 2025 18:55:32 +0000 Subject: [PATCH 03/19] fix: handle empty repsonses --- src/litai/llm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index 54443d0..06609f2 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -330,7 +330,6 @@ def chat( # noqa: D417 if not stream and response: return response if stream: - peek_iter, return_iter = itertools.tee(response) try: peek_iter, return_iter = itertools.tee(response) has_content = False From e884236138f3991971aef2f6beb049243f3fc041 Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Fri, 26 Sep 2025 19:00:48 +0000 Subject: [PATCH 04/19] fix: handle empty repsonses --- src/litai/llm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index 06609f2..efcec42 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -371,7 +371,6 @@ def chat( # noqa: D417 if not stream and response: return response if stream: - peek_iter, return_iter = itertools.tee(response) try: peek_iter, return_iter = itertools.tee(response) has_content = False From f52687eb263e3bf2450eefdcdd5a92e61e0297cc Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Fri, 26 Sep 2025 20:59:12 +0000 Subject: [PATCH 05/19] add test --- tests/test_llm.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/test_llm.py b/tests/test_llm.py index b93ac50..8ee4faa 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -247,6 +247,57 @@ def mock_llm_constructor(name, teamspace="default-teamspace", **kwargs): ) +def test_empty_response_retries(monkeypatch): + """Test fallback model logic when main model fails.""" + from litai.llm import LLM as LLMCLIENT + + LLMCLIENT._sdkllm_cache.clear() + mock_main_model = MagicMock() + mock_main_model.name = "main-model" + mock_fallback_model = MagicMock() + mock_fallback_model.name = "fallback-model" + + mock_main_model.chat.side_effect = "" + mock_fallback_model.chat.side_effect = [ + "", + "", + "Fallback response", + ] + + def mock_llm_constructor(name, teamspace="default-teamspace", **kwargs): + if name == "main-model": + return mock_main_model + if name == "fallback-model": + return mock_fallback_model + raise ValueError(f"Unknown model: {name}") + + monkeypatch.setattr("litai.llm.SDKLLM", mock_llm_constructor) + + llm = LLM( + model="main-model", + fallback_models=["fallback-model"], + ) + + response = llm.chat(prompt="Hello") + assert response == "Fallback response" + + assert mock_main_model.chat.call_count == 3 + assert mock_fallback_model.chat.call_count == 3 + + mock_fallback_model.chat.assert_called_with( + prompt="Hello", + system_prompt=None, + max_completion_tokens=None, + images=None, + conversation=None, + metadata=None, + stream=False, + full_response=False, + tools=None, + reasoning_effort=None, + ) + + @pytest.mark.asyncio async def test_llm_async_chat(monkeypatch): """Test async requests.""" From 0966fc1aa891e7b58b4af636e5036d01988d9a1e Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Fri, 26 Sep 2025 21:09:29 +0000 Subject: [PATCH 06/19] fix output --- src/litai/llm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index efcec42..04a8642 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -20,7 +20,7 @@ import os import threading import warnings -from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Literal, Optional, Sequence, Union import requests from lightning_sdk.lightning_cloud.openapi import V1ConversationResponseChunk @@ -273,7 +273,7 @@ def chat( # noqa: D417 auto_call_tools: bool = False, reasoning_effort: Optional[Literal["none", "low", "medium", "high"]] = None, **kwargs: Any, - ) -> str: + ) -> Union[str, Iterator[str]]: """Sends a message to the LLM and retrieves a response. Args: From d4b6237282f8f1a4c6ea9777170fcdb03bbe0d8e Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Fri, 26 Sep 2025 21:14:32 +0000 Subject: [PATCH 07/19] fix output --- src/litai/llm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index 04a8642..5f18081 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -526,7 +526,9 @@ def if_(self, input: str, question: str) -> bool: Answer with only 'yes' or 'no'. """ - response = self.chat(prompt).strip().lower() + response = self.chat(prompt) + if isinstance(response, str): + response = response.strip().lower() return "yes" in response def classify(self, input: str, choices: List[str]) -> str: @@ -552,7 +554,9 @@ def classify(self, input: str, choices: List[str]) -> str: Answer with only one of the choices. """.strip() - response = self.chat(prompt).strip().lower() + response = self.chat(prompt) + if isinstance(response, str): + response = response.strip().lower() if response in normalized_choices: return response From 04e01a0be61c364bc00eafb9b57799df1fb83ccd Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Fri, 26 Sep 2025 21:19:57 +0000 Subject: [PATCH 08/19] fix output --- src/litai/llm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/litai/llm.py b/src/litai/llm.py index 5f18081..c06f329 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -529,6 +529,8 @@ def if_(self, input: str, question: str) -> bool: response = self.chat(prompt) if isinstance(response, str): response = response.strip().lower() + elif isinstance(response, Iterator): + response = "".join(list(response)).strip().lower() return "yes" in response def classify(self, input: str, choices: List[str]) -> str: @@ -557,6 +559,8 @@ def classify(self, input: str, choices: List[str]) -> str: response = self.chat(prompt) if isinstance(response, str): response = response.strip().lower() + elif isinstance(response, Iterator): + response = "".join(list(response)).strip().lower() if response in normalized_choices: return response From 986ce563f7334dd4b17502ec4d2cc78aa605b509 Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Mon, 29 Sep 2025 22:03:57 +0000 Subject: [PATCH 09/19] async will work --- src/litai/llm.py | 263 ++++++++++++++++++++++++++++----------- src/litai/utils/utils.py | 10 ++ 2 files changed, 200 insertions(+), 73 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index c06f329..aa8615a 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -13,6 +13,7 @@ # limitations under the License. """LLM client class.""" +import asyncio import datetime import itertools import json @@ -20,15 +21,16 @@ import os import threading import warnings -from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Literal, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, Iterator, List, Literal, Optional, Sequence, Union +import nest_asyncio import requests from lightning_sdk.lightning_cloud.openapi import V1ConversationResponseChunk from lightning_sdk.llm import LLM as SDKLLM from litai.tools import LitTool from litai.utils.supported_public_models import ModelLiteral -from litai.utils.utils import handle_model_error +from litai.utils.utils import handle_empty_response, handle_model_error if TYPE_CHECKING: from langchain_core.tools import StructuredTool @@ -259,6 +261,151 @@ def context_length(self, model: Optional[str] = None) -> int: return self._llm.get_context_length(self._model) return self._llm.get_context_length(model) + async def _peek_and_rebuild_async( + self, + agen: AsyncIterator[str], + ) -> Optional[AsyncIterator[str]]: + """Peek into an async iterator to check for non-empty content and rebuild it if necessary.""" + peeked_items: List[str] = [] + has_content_found = False + + async for item in agen: + peeked_items.append(item) + if item != "": + has_content_found = True + break + + if has_content_found: + + async def rebuilt(): + for peeked_item in peeked_items: + yield peeked_item + + async for remaining_item in agen: + yield remaining_item + + return rebuilt() + + return None + + async def async_chat( + self, + models_to_try: List[SDKLLM], + prompt: str, + system_prompt: Optional[str], + max_tokens: Optional[int], + images: Optional[Union[List[str], str]], + conversation: Optional[str], + metadata: Optional[Dict[str, str]], + stream: bool, + full_response: Optional[bool] = None, + model: Optional[SDKLLM] = None, + tools: Optional[Sequence[Union[str, Dict[str, Any]]]] = None, + lit_tools: Optional[List[LitTool]] = None, + auto_call_tools: bool = False, + reasoning_effort: Optional[str] = None, + **kwargs: Any, + ) -> Union[str, AsyncIterator[str], None]: + """Sends a message to the LLM asynchronously with full retry/fallback logic.""" + for sdk_model in models_to_try: + for attempt in range(self.max_retries): + try: + response = await self._model_call( + model=sdk_model, + prompt=prompt, + system_prompt=system_prompt, + max_completion_tokens=max_tokens, + images=images, + conversation=conversation, + metadata=metadata, + stream=stream, + tools=tools, + lit_tools=lit_tools, + full_response=full_response, + auto_call_tools=auto_call_tools, + reasoning_effort=reasoning_effort, + **kwargs, + ) + + if not stream: + if response: + return response + handle_empty_response(sdk_model, attempt, self.max_retries) + else: + non_empty_stream = await self._peek_and_rebuild_async(response) + if non_empty_stream: + return non_empty_stream + handle_empty_response(sdk_model, attempt, self.max_retries) + if sdk_model == model: + print(f"💥 Failed to override with model '{model}'") + except Exception as e: + handle_model_error(e, sdk_model, attempt, self.max_retries, self._verbose) + raise RuntimeError(f"💥 [LLM call failed after {self.max_retries} attempts]") + + def sync_chat( + self, + models_to_try: List[SDKLLM], + prompt: str, + system_prompt: Optional[str], + max_tokens: Optional[int], + images: Optional[Union[List[str], str]], + conversation: Optional[str], + metadata: Optional[Dict[str, str]], + stream: bool, + model: Optional[SDKLLM] = None, + full_response: Optional[bool] = None, + tools: Optional[Sequence[Union[str, Dict[str, Any]]]] = None, + lit_tools: Optional[List[LitTool]] = None, + auto_call_tools: bool = False, + reasoning_effort: Optional[str] = None, + **kwargs: Any, + ) -> Union[str, AsyncIterator[str], None]: + """Sends a message to the LLM synchronously with full retry/fallback logic.""" + for sdk_model in models_to_try: + for attempt in range(self.max_retries): + try: + response = self._model_call( + model=sdk_model, + prompt=prompt, + system_prompt=system_prompt, + max_completion_tokens=max_tokens, + images=images, + conversation=conversation, + metadata=metadata, + stream=stream, + tools=tools, + lit_tools=lit_tools, + full_response=full_response, + auto_call_tools=auto_call_tools, + reasoning_effort=reasoning_effort, + **kwargs, + ) + + if not stream: + if response: + return response + handle_empty_response(sdk_model, attempt, self.max_retries) + if stream: + try: + peek_iter, return_iter = itertools.tee(response) + has_content = False + for chunk in peek_iter: + if chunk != "": + has_content = True + break + if has_content: + return return_iter + handle_empty_response(sdk_model, attempt, self.max_retries) + except StopIteration: + pass + + except Exception as e: + if sdk_model == model: + print(f"💥 Failed to override with model '{model}'") + handle_model_error(e, sdk_model, attempt, self.max_retries, self._verbose) + + raise RuntimeError(f"💥 [LLM call failed after {self.max_retries} attempts]") + def chat( # noqa: D417 self, prompt: str, @@ -273,7 +420,7 @@ def chat( # noqa: D417 auto_call_tools: bool = False, reasoning_effort: Optional[Literal["none", "low", "medium", "high"]] = None, **kwargs: Any, - ) -> Union[str, Iterator[str]]: + ) -> Union[str, Iterator[str], AsyncIterator[str]]: """Sends a message to the LLM and retrieves a response. Args: @@ -304,91 +451,61 @@ def chat( # noqa: D417 self._wait_for_model() lit_tools = LitTool.convert_tools(tools) processed_tools = [tool.as_tool() for tool in lit_tools] if lit_tools else None + + models_to_try = [] + sdk_model = None if model: - try: - model_key = f"{model}::{self._teamspace}::{self._enable_async}" - if model_key not in self._sdkllm_cache: - self._sdkllm_cache[model_key] = SDKLLM( - name=model, teamspace=self._teamspace, enable_async=self._enable_async - ) - sdk_model = self._sdkllm_cache[model_key] - response = self._model_call( + model_key = f"{model}::{self._teamspace}::{self._enable_async}" + if model_key not in self._sdkllm_cache: + self._sdkllm_cache[model_key] = SDKLLM( + name=model, teamspace=self._teamspace, enable_async=self._enable_async + ) + sdk_model = self._sdkllm_cache[model_key] + models_to_try.append(sdk_model) + models_to_try.extend(self.models) + + if self._enable_async: + nest_asyncio.apply() + nest_asyncio.apply() + + loop = asyncio.get_event_loop() + return loop.create_task( + self.async_chat( + models_to_try=models_to_try, model=sdk_model, prompt=prompt, system_prompt=system_prompt, - max_completion_tokens=max_tokens, + max_tokens=max_tokens, images=images, conversation=conversation, metadata=metadata, stream=stream, + full_response=self._full_response, tools=processed_tools, lit_tools=lit_tools, auto_call_tools=auto_call_tools, reasoning_effort=reasoning_effort, **kwargs, ) - if not stream and response: - return response - if stream: - try: - peek_iter, return_iter = itertools.tee(response) - has_content = False - - for chunk in peek_iter: - if chunk != "": - has_content = True - break - - if has_content: - return return_iter - except StopIteration: - pass - - except Exception as e: - print(f"💥 Failed to override with model '{model}'") - handle_model_error(e, sdk_model, 0, self.max_retries, self._verbose) - - # Retry with fallback models - for model in self.models: - for attempt in range(self.max_retries): - try: - response = self._model_call( - model=model, - prompt=prompt, - system_prompt=system_prompt, - max_completion_tokens=max_tokens, - images=images, - conversation=conversation, - metadata=metadata, - stream=stream, - tools=processed_tools, - lit_tools=lit_tools, - auto_call_tools=auto_call_tools, - reasoning_effort=reasoning_effort, - **kwargs, - ) - - if not stream and response: - return response - if stream: - try: - peek_iter, return_iter = itertools.tee(response) - has_content = False - - for chunk in peek_iter: - if chunk != "": - has_content = True - break + ) - if has_content: - return return_iter - except StopIteration: - pass - - except Exception as e: - handle_model_error(e, model, attempt, self.max_retries, self._verbose) - - raise RuntimeError(f"💥 [LLM call failed after {self.max_retries} attempts]") + return self.sync_chat( + models_to_try=models_to_try, + model=sdk_model, + prompt=prompt, + system_prompt=system_prompt, + max_tokens=max_tokens, + images=images, + conversation=conversation, + metadata=metadata, + stream=stream, + full_response=self._full_response, + tools=processed_tools, + lit_tools=lit_tools, + auto_call_tools=auto_call_tools, + reasoning_effort=reasoning_effort, + **kwargs, + ) @staticmethod def call_tool( diff --git a/src/litai/utils/utils.py b/src/litai/utils/utils.py index 06761e4..252c836 100644 --- a/src/litai/utils/utils.py +++ b/src/litai/utils/utils.py @@ -182,3 +182,13 @@ def handle_model_error(e: Exception, model: SDKLLM, attempt: int, max_retries: i print("-" * 50) print(f"❌ All {max_retries} attempts failed for model {model.name}") print("-" * 50) + + +def handle_empty_response(model: SDKLLM, attempt: int, max_retries: int) -> None: + """Handles empty responses from model calls.""" + if attempt < max_retries - 1: + print(f"🔁 Received empty response. Attempt {attempt + 1}/{max_retries} failed. Retrying...") + else: + print("-" * 50) + print(f"❌ All {max_retries} attempts received empty responses for model {model.name}.") + print("-" * 50) From 442b617bf81c858a261ccce18341b8ff4c0785ad Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Mon, 29 Sep 2025 22:06:03 +0000 Subject: [PATCH 10/19] async will work --- src/litai/llm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index aa8615a..d4adc74 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -21,6 +21,7 @@ import os import threading import warnings +from asyncio import Task from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, Iterator, List, Literal, Optional, Sequence, Union import nest_asyncio @@ -359,7 +360,7 @@ def sync_chat( auto_call_tools: bool = False, reasoning_effort: Optional[str] = None, **kwargs: Any, - ) -> Union[str, AsyncIterator[str], None]: + ) -> Union[str, None]: """Sends a message to the LLM synchronously with full retry/fallback logic.""" for sdk_model in models_to_try: for attempt in range(self.max_retries): @@ -420,7 +421,7 @@ def chat( # noqa: D417 auto_call_tools: bool = False, reasoning_effort: Optional[Literal["none", "low", "medium", "high"]] = None, **kwargs: Any, - ) -> Union[str, Iterator[str], AsyncIterator[str]]: + ) -> Union[str, Task[str | AsyncIterator[str] | None] | None]: """Sends a message to the LLM and retrieves a response. Args: From 2f106a9bf44aa8eecb4ee28fae516457fb6e2e30 Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Mon, 29 Sep 2025 22:35:01 +0000 Subject: [PATCH 11/19] fix-some-typing --- src/litai/llm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index d4adc74..5206a5c 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -210,7 +210,7 @@ def _format_tool_response( return LLM.call_tool(result, lit_tools) or "" return json.dumps(result) - def _model_call( + def _model_call( # noqa: D417 self, model: SDKLLM, prompt: str, @@ -289,7 +289,7 @@ async def rebuilt(): return None - async def async_chat( + async def async_chat( # noqa: D417 self, models_to_try: List[SDKLLM], prompt: str, From 138f78edc39227f6fd763e31bf4f0c8851654552 Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Mon, 29 Sep 2025 22:57:03 +0000 Subject: [PATCH 12/19] fix-more-typig --- src/litai/llm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index 5206a5c..adb83fd 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -278,7 +278,7 @@ async def _peek_and_rebuild_async( if has_content_found: - async def rebuilt(): + async def rebuilt() -> AsyncIterator[str]: for peeked_item in peeked_items: yield peeked_item @@ -311,7 +311,7 @@ async def async_chat( # noqa: D417 for sdk_model in models_to_try: for attempt in range(self.max_retries): try: - response = await self._model_call( + response = await self._model_call( # type: ignore[misc] model=sdk_model, prompt=prompt, system_prompt=system_prompt, @@ -360,7 +360,7 @@ def sync_chat( auto_call_tools: bool = False, reasoning_effort: Optional[str] = None, **kwargs: Any, - ) -> Union[str, None]: + ) -> Union[str, Iterator[str], None]: """Sends a message to the LLM synchronously with full retry/fallback logic.""" for sdk_model in models_to_try: for attempt in range(self.max_retries): From 6863d5d05dc9e3c08230b61444526088e9b72b0c Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Mon, 29 Sep 2025 22:58:29 +0000 Subject: [PATCH 13/19] fix-more-typig --- src/litai/llm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index adb83fd..faae8f9 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -289,7 +289,7 @@ async def rebuilt() -> AsyncIterator[str]: return None - async def async_chat( # noqa: D417 + async def async_chat( self, models_to_try: List[SDKLLM], prompt: str, @@ -421,7 +421,7 @@ def chat( # noqa: D417 auto_call_tools: bool = False, reasoning_effort: Optional[Literal["none", "low", "medium", "high"]] = None, **kwargs: Any, - ) -> Union[str, Task[str | AsyncIterator[str] | None] | None]: + ) -> Union[str, Task[str | AsyncIterator[str] | None] | Iterator[str], None]: """Sends a message to the LLM and retrieves a response. Args: From 87f7ec12ead04d6dacd16ea714931c2897d8c1ec Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Mon, 29 Sep 2025 23:02:02 +0000 Subject: [PATCH 14/19] fix-more-typig --- src/litai/llm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index faae8f9..0c44ac5 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -647,8 +647,8 @@ def if_(self, input: str, question: str) -> bool: response = self.chat(prompt) if isinstance(response, str): response = response.strip().lower() - elif isinstance(response, Iterator): - response = "".join(list(response)).strip().lower() + else: + return False return "yes" in response def classify(self, input: str, choices: List[str]) -> str: @@ -677,8 +677,8 @@ def classify(self, input: str, choices: List[str]) -> str: response = self.chat(prompt) if isinstance(response, str): response = response.strip().lower() - elif isinstance(response, Iterator): - response = "".join(list(response)).strip().lower() + else: + return normalized_choices[0] if response in normalized_choices: return response From 0bcb8ba21958501d49dd91beffbbfda43338f6a1 Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Mon, 29 Sep 2025 23:02:23 +0000 Subject: [PATCH 15/19] fix-more-typig --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 7f9490a..e697c35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ lightning_sdk >= 2025.09.16 +nest_asyncio From 1f16fa69f9e0795abb3768a39f9ba19fbb04d118 Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Mon, 29 Sep 2025 23:03:54 +0000 Subject: [PATCH 16/19] fix-req --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e697c35..21044e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ lightning_sdk >= 2025.09.16 -nest_asyncio +nest-asyncio From f8bb56dfa45bd74ff1282e65a07710a19503cd33 Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Tue, 30 Sep 2025 00:40:11 +0000 Subject: [PATCH 17/19] remove-some-code --- src/litai/llm.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index 0c44ac5..f07f844 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -328,15 +328,12 @@ async def async_chat( **kwargs, ) - if not stream: - if response: - return response - handle_empty_response(sdk_model, attempt, self.max_retries) - else: - non_empty_stream = await self._peek_and_rebuild_async(response) - if non_empty_stream: - return non_empty_stream - handle_empty_response(sdk_model, attempt, self.max_retries) + if not stream and response: + return response + non_empty_stream = await self._peek_and_rebuild_async(response) + if non_empty_stream: + return non_empty_stream + handle_empty_response(sdk_model, attempt, self.max_retries) if sdk_model == model: print(f"💥 Failed to override with model '{model}'") except Exception as e: @@ -382,10 +379,8 @@ def sync_chat( **kwargs, ) - if not stream: - if response: - return response - handle_empty_response(sdk_model, attempt, self.max_retries) + if not stream and response: + return response if stream: try: peek_iter, return_iter = itertools.tee(response) @@ -396,9 +391,9 @@ def sync_chat( break if has_content: return return_iter - handle_empty_response(sdk_model, attempt, self.max_retries) except StopIteration: pass + handle_empty_response(sdk_model, attempt, self.max_retries) except Exception as e: if sdk_model == model: From 9ee85bcec38f16968f49150e94f3a5d5a0399964 Mon Sep 17 00:00:00 2001 From: Daniela Dapena Date: Tue, 30 Sep 2025 15:46:59 +0000 Subject: [PATCH 18/19] add-tests --- src/litai/llm.py | 7 +-- tests/test_llm.py | 118 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 3 deletions(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index f07f844..cd9e834 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -330,9 +330,10 @@ async def async_chat( if not stream and response: return response - non_empty_stream = await self._peek_and_rebuild_async(response) - if non_empty_stream: - return non_empty_stream + if stream and response: + non_empty_stream = await self._peek_and_rebuild_async(response) + if non_empty_stream: + return non_empty_stream handle_empty_response(sdk_model, attempt, self.max_retries) if sdk_model == model: print(f"💥 Failed to override with model '{model}'") diff --git a/tests/test_llm.py b/tests/test_llm.py index 8ee4faa..233aa89 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -298,6 +298,124 @@ def mock_llm_constructor(name, teamspace="default-teamspace", **kwargs): ) +def test_empty_response_retries_sync_stream(monkeypatch): + """Test that retries work correctly for sync streaming when empty responses are returned.""" + from litai.llm import LLM as LLMCLIENT + + LLMCLIENT._sdkllm_cache.clear() + + class MockSyncIterator: + def __init__(self, items): + self.items = items + self.index = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.index < len(self.items): + item = self.items[self.index] + self.index += 1 + return item + raise StopIteration + + mock_responses = [ + MockSyncIterator([]), + MockSyncIterator([]), + MockSyncIterator(["hello", " world"]), + ] + + mock_main_model = MagicMock() + + def mock_llm_constructor(name, teamspace="default-teamspace", **kwargs): + if name == "main-model": + mock_main_model.chat.side_effect = mock_responses + mock_main_model.name = "main-model" + return mock_main_model + raise ValueError(f"Unknown model: {name}") + + monkeypatch.setattr("litai.llm.SDKLLM", mock_llm_constructor) + + llm = LLM( + model="main-model", + ) + + response = llm.chat("test prompt", stream=True) + + assert mock_main_model.chat.call_count == 3 + + result = "" + for chunk in response: + result += chunk + assert result == "hello world" + + +@pytest.mark.asyncio +async def test_empty_response_retries_async(monkeypatch): + """Test that retries work correctly for async and non streaming when empty responses are returned.""" + from litai.llm import LLM as LLMCLIENT + + LLMCLIENT._sdkllm_cache.clear() + mock_sdkllm = MagicMock() + mock_sdkllm.name = "mock-model" + + mock_sdkllm.chat = AsyncMock(side_effect=["", "", "Main response"]) + + monkeypatch.setattr("litai.llm.SDKLLM", lambda *args, **kwargs: mock_sdkllm) + + llm = LLM( + model="main-model", + enable_async=True, + ) + response = await llm.chat(prompt="Hello", stream=False) + + assert response == "Main response" + assert mock_sdkllm.chat.call_count == 3 + + +@pytest.mark.asyncio +async def test_empty_response_retries_async_stream(monkeypatch): + """Test that retries work correctly for async streaming when empty responses are returned.""" + from litai.llm import LLM as LLMCLIENT + + LLMCLIENT._sdkllm_cache.clear() + mock_sdkllm = MagicMock() + mock_sdkllm.name = "mock-model" + + class MockAsyncIterator: + def __init__(self, items): + self.items = items + self.index = 0 + + def __aiter__(self): + return self + + async def __anext__(self): + if self.index < len(self.items): + item = self.items[self.index] + self.index += 1 + return item + raise StopAsyncIteration + + mock_sdkllm.chat = AsyncMock( + side_effect=[MockAsyncIterator([]), MockAsyncIterator([]), MockAsyncIterator(["Main", " response"])] + ) + + monkeypatch.setattr("litai.llm.SDKLLM", lambda *args, **kwargs: mock_sdkllm) + + llm = LLM( + model="main-model", + enable_async=True, + ) + + response = await llm.chat(prompt="Hello", stream=True) + result = "" + async for chunk in response: + result += chunk + assert result == "Main response" + assert mock_sdkllm.chat.call_count == 3 + + @pytest.mark.asyncio async def test_llm_async_chat(monkeypatch): """Test async requests.""" From c84aabfe9a85eda45e2178c9181afce54788d28a Mon Sep 17 00:00:00 2001 From: Kaeun Kim Date: Mon, 6 Oct 2025 15:00:59 +0100 Subject: [PATCH 19/19] nit: return type --- src/litai/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/litai/llm.py b/src/litai/llm.py index cd9e834..0784dcd 100644 --- a/src/litai/llm.py +++ b/src/litai/llm.py @@ -417,7 +417,7 @@ def chat( # noqa: D417 auto_call_tools: bool = False, reasoning_effort: Optional[Literal["none", "low", "medium", "high"]] = None, **kwargs: Any, - ) -> Union[str, Task[str | AsyncIterator[str] | None] | Iterator[str], None]: + ) -> Union[str, Task[Union[str, AsyncIterator[str], None]], Iterator[str], None]: """Sends a message to the LLM and retrieves a response. Args: