chore(types): Type-clean server/ (20 errors) (#1397)

tgasser-nv · Pouyanpi · web-flow · commit d2d41f4c3541 · 2025-10-28T14:30:15.000-05:00
* Initial checkin * Add nemoguardrails/server to pyright type-checking * chore(types): Type-clean embeddings/ (25 errors) (#1383) * test: restore test that was skipped due to Colang 2.0 serialization issue (#1449) * fix(llm): add fallback extraction for reasoning traces from <think> tags (#1474) Adds a compatibility layer for LLM providers that don't properly populate reasoning_content in additional_kwargs. When reasoning_content is missing, the system now falls back to extracting reasoning traces from <think>...</think> tags in the response content and removes the tags from the final output. This fixes compatibility with certain NVIDIA models (e.g., nvidia/llama-3.3-nemotron-super-49b-v1.5) in langchain-nvidia-ai-endpoints that include reasoning traces in <think> tags but fail to populate the reasoning_content field. All reasoning models using ChatNVIDIA should expose reasoning content consistently through the same interface * Clean up the config_id logic based on Traian and Greptile feedback --------- Co-authored-by: Pouyan <13303554+Pouyanpi@users.noreply.github.com>
diff --git a/nemoguardrails/server/api.py b/nemoguardrails/server/api.py
@@ -22,7 +22,7 @@
 import time
 import warnings
 from contextlib import asynccontextmanager
-from typing import Any, List, Optional
+from typing import Any, Callable, List, Optional
 
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
@@ -42,14 +42,32 @@
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger(__name__)
 
+
+class GuardrailsApp(FastAPI):
+    """Custom FastAPI subclass with additional attributes for Guardrails server."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Initialize custom attributes
+        self.default_config_id: Optional[str] = None
+        self.rails_config_path: str = ""
+        self.disable_chat_ui: bool = False
+        self.auto_reload: bool = False
+        self.stop_signal: bool = False
+        self.single_config_mode: bool = False
+        self.single_config_id: Optional[str] = None
+        self.loop: Optional[asyncio.AbstractEventLoop] = None
+        self.task: Optional[asyncio.Future] = None
+
+
 # The list of registered loggers. Can be used to send logs to various
 # backends and storage engines.
-registered_loggers = []
+registered_loggers: List[Callable] = []
 
 api_description = """Guardrails Sever API."""
 
 # The headers for each request
-api_request_headers = contextvars.ContextVar("headers")
+api_request_headers: contextvars.ContextVar = contextvars.ContextVar("headers")
 
 # The datastore that the Server should use.
 # This is currently used only for storing threads.
@@ -59,7 +77,7 @@
 
 
 @asynccontextmanager
-async def lifespan(app: FastAPI):
+async def lifespan(app: GuardrailsApp):
     # Startup logic here
     """Register any additional challenges, if available at startup."""
     challenges_files = os.path.join(app.rails_config_path, "challenges.json")
@@ -82,8 +100,11 @@ async def lifespan(app: FastAPI):
         if os.path.exists(filepath):
             filename = os.path.basename(filepath)
             spec = importlib.util.spec_from_file_location(filename, filepath)
-            config_module = importlib.util.module_from_spec(spec)
-            spec.loader.exec_module(config_module)
+            if spec is not None and spec.loader is not None:
+                config_module = importlib.util.module_from_spec(spec)
+                spec.loader.exec_module(config_module)
+            else:
+                config_module = None
 
             # If there is an `init` function, we call it with the reference to the app.
             if config_module is not None and hasattr(config_module, "init"):
@@ -110,21 +131,22 @@ async def root_handler():
 
     if app.auto_reload:
         app.loop = asyncio.get_running_loop()
+        # Store the future directly as task
         app.task = app.loop.run_in_executor(None, start_auto_reload_monitoring)
 
     yield
 
     # Shutdown logic here
     if app.auto_reload:
         app.stop_signal = True
-        if hasattr(app, "task"):
+        if hasattr(app, "task") and app.task is not None:
             app.task.cancel()
         log.info("Shutting down file observer")
     else:
         pass
 
 
-app = FastAPI(
+app = GuardrailsApp(
     title="Guardrails Server API",
     description=api_description,
     version="0.1.0",
@@ -186,7 +208,7 @@ class RequestBody(BaseModel):
         max_length=255,
         description="The id of an existing thread to which the messages should be added.",
     )
-    messages: List[dict] = Field(
+    messages: Optional[List[dict]] = Field(
         default=None, description="The list of messages in the current conversation."
     )
     context: Optional[dict] = Field(
@@ -232,7 +254,7 @@ def ensure_config_ids(cls, v, values):
 
 
 class ResponseBody(BaseModel):
-    messages: List[dict] = Field(
+    messages: Optional[List[dict]] = Field(
         default=None, description="The new messages in the conversation"
     )
     llm_output: Optional[dict] = Field(
@@ -282,8 +304,8 @@ async def get_rails_configs():
 
 
 # One instance of LLMRails per config id
-llm_rails_instances = {}
-llm_rails_events_history_cache = {}
+llm_rails_instances: dict[str, LLMRails] = {}
+llm_rails_events_history_cache: dict[str, dict] = {}
 
 
 def _generate_cache_key(config_ids: List[str]) -> str:
@@ -310,7 +332,7 @@ def _get_rails(config_ids: List[str]) -> LLMRails:
         # get the same thing.
         config_ids = [""]
 
-    full_llm_rails_config = None
+    full_llm_rails_config: Optional[RailsConfig] = None
 
     for config_id in config_ids:
         base_path = os.path.abspath(app.rails_config_path)
@@ -330,6 +352,9 @@ def _get_rails(config_ids: List[str]) -> LLMRails:
         else:
             full_llm_rails_config += rails_config
 
+    if full_llm_rails_config is None:
+        raise ValueError("No valid rails configuration found.")
+
     llm_rails = LLMRails(config=full_llm_rails_config, verbose=True)
     llm_rails_instances[configs_cache_key] = llm_rails
 
@@ -360,30 +385,33 @@ async def chat_completion(body: RequestBody, request: Request):
     # Save the request headers in a context variable.
     api_request_headers.set(request.headers)
 
+    # Use Request config_ids if set, otherwise use the FastAPI default config.
+    # If neither is available we can't generate any completions as we have no config_id
     config_ids = body.config_ids
-    if not config_ids and app.default_config_id:
-        config_ids = [app.default_config_id]
-    elif not config_ids and not app.default_config_id:
-        raise GuardrailsConfigurationError(
-            "No 'config_id' provided and no default configuration is set for the server. "
-            "You must set a 'config_id' in your request or set use --default-config-id when . "
-        )
+    if not config_ids:
+        if app.default_config_id:
+            config_ids = [app.default_config_id]
+        else:
+            raise GuardrailsConfigurationError(
+                "No request config_ids provided and server has no default configuration"
+            )
+
     try:
         llm_rails = _get_rails(config_ids)
     except ValueError as ex:
         log.exception(ex)
-        return {
-            "messages": [
+        return ResponseBody(
+            messages=[
                 {
                     "role": "assistant",
                     "content": f"Could not load the {config_ids} guardrails configuration. "
                     f"An internal error has occurred.",
                 }
             ]
-        }
+        )
 
     try:
-        messages = body.messages
+        messages = body.messages or []
         if body.context:
             messages.insert(0, {"role": "context", "content": body.context})
 
@@ -396,14 +424,14 @@ async def chat_completion(body: RequestBody, request: Request):
 
             # We make sure the `thread_id` meets the minimum complexity requirement.
             if len(body.thread_id) < 16:
-                return {
-                    "messages": [
+                return ResponseBody(
+                    messages=[
                         {
                             "role": "assistant",
                             "content": "The `thread_id` must have a minimum length of 16 characters.",
                         }
                     ]
-                }
+                )
 
             # Fetch the existing thread messages. For easier management, we prepend
             # the string `thread-` to all thread keys.
@@ -440,32 +468,37 @@ async def chat_completion(body: RequestBody, request: Request):
             )
 
             if isinstance(res, GenerationResponse):
-                bot_message = res.response[0]
+                bot_message_content = res.response[0]
+                # Ensure bot_message is always a dict
+                if isinstance(bot_message_content, str):
+                    bot_message = {"role": "assistant", "content": bot_message_content}
+                else:
+                    bot_message = bot_message_content
             else:
                 assert isinstance(res, dict)
                 bot_message = res
 
             # If we're using threads, we also need to update the data before returning
             # the message.
-            if body.thread_id:
+            if body.thread_id and datastore is not None and datastore_key is not None:
                 await datastore.set(datastore_key, json.dumps(messages + [bot_message]))
 
-            result = {"messages": [bot_message]}
+            result = ResponseBody(messages=[bot_message])
 
             # If we have additional GenerationResponse fields, we return as well
             if isinstance(res, GenerationResponse):
-                result["llm_output"] = res.llm_output
-                result["output_data"] = res.output_data
-                result["log"] = res.log
-                result["state"] = res.state
+                result.llm_output = res.llm_output
+                result.output_data = res.output_data
+                result.log = res.log
+                result.state = res.state
 
             return result
 
     except Exception as ex:
         log.exception(ex)
-        return {
-            "messages": [{"role": "assistant", "content": "Internal server error."}]
-        }
+        return ResponseBody(
+            messages=[{"role": "assistant", "content": "Internal server error."}]
+        )
 
 
 # By default, there are no challenges
@@ -498,7 +531,7 @@ def register_datastore(datastore_instance: DataStore):
     datastore = datastore_instance
 
 
-def register_logger(logger: callable):
+def register_logger(logger: Callable):
     """Register an additional logger"""
     registered_loggers.append(logger)
 
@@ -510,8 +543,7 @@ def start_auto_reload_monitoring():
         from watchdog.observers import Observer
 
         class Handler(FileSystemEventHandler):
-            @staticmethod
-            def on_any_event(event):
+            def on_any_event(self, event):
                 if event.is_directory:
                     return None
 
@@ -521,7 +553,8 @@ def on_any_event(event):
                     )
 
                     # Compute the relative path
-                    rel_path = os.path.relpath(event.src_path, app.rails_config_path)
+                    src_path_str = str(event.src_path)
+                    rel_path = os.path.relpath(src_path_str, app.rails_config_path)
 
                     # The config_id is the first component
                     parts = rel_path.split(os.path.sep)
@@ -530,7 +563,7 @@ def on_any_event(event):
                     if (
                         not parts[-1].startswith(".")
                         and ".ipynb_checkpoints" not in parts
-                        and os.path.isfile(event.src_path)
+                        and os.path.isfile(src_path_str)
                     ):
                         # We just remove the config from the cache so that a new one is used next time
                         if config_id in llm_rails_instances:
diff --git a/nemoguardrails/server/datastore/redis_store.py b/nemoguardrails/server/datastore/redis_store.py
@@ -16,7 +16,10 @@
 import asyncio
 from typing import Optional
 
-import aioredis
+try:
+    import aioredis  # type: ignore[import]
+except ImportError:
+    aioredis = None  # type: ignore[assignment]
 
 from nemoguardrails.server.datastore.datastore import DataStore
 
@@ -35,6 +38,11 @@ def __init__(
             username: [Optional] The username to use for authentication.
             password: [Optional] The password to use for authentication
         """
+        if aioredis is None:
+            raise ImportError(
+                "aioredis is required for RedisStore. Install it with: pip install aioredis"
+            )
+
         self.url = url
         self.username = username
         self.password = password
diff --git a/pyproject.toml b/pyproject.toml
@@ -162,6 +162,7 @@ include = [
   "nemoguardrails/kb/**",
   "nemoguardrails/logging/**",
   "nemoguardrails/tracing/**",
+  "nemoguardrails/server/**",
   "tests/test_callbacks.py",
 ]
 

Original file line number	Diff line number	Diff line change
`@@ -162,6 +162,7 @@ include = [`
`162`	`162`	`"nemoguardrails/kb/**",`
`163`	`163`	`"nemoguardrails/logging/**",`
`164`	`164`	`"nemoguardrails/tracing/**",`
	`165`	`+ "nemoguardrails/server/**",`
`165`	`166`	`"tests/test_callbacks.py",`
`166`	`167`	`]`
`167`	`168`