Make compatible with Python 3.10 on Lightning AI workspace

tomasruizt · tomasruizt · commit 312eeb4d1bb4 · 2025-03-19T02:05:28.000Z
diff --git a/llmlib/llmlib/base_llm.py b/llmlib/llmlib/base_llm.py
@@ -1,6 +1,7 @@
 from io import BytesIO
 from pathlib import Path
-from typing import Literal, Self
+from typing import Literal
+from typing_extensions import Self
 from PIL import Image
 
 
diff --git a/llmlib/llmlib/gemini/gemini_code.py b/llmlib/llmlib/gemini/gemini_code.py
@@ -3,6 +3,7 @@
 """
 
 from dataclasses import dataclass
+from enum import Enum
 from functools import singledispatchmethod
 from io import BytesIO
 import json
@@ -23,7 +24,6 @@
 )
 import cv2
 from google import genai
-from enum import StrEnum
 from ..base_llm import LLM, Message, validate_only_first_message_has_files
 from ..error_handling import notify_bugsnag
 
@@ -43,7 +43,7 @@ def storage_uri(bucket: str, blob_name: str) -> str:
     return "gs://%s/%s" % (bucket, blob_name)
 
 
-class GeminiModels(StrEnum):
+class GeminiModels(str, Enum):
     """
     The 3 trailing digits indicate the stable version
     https://cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versions#stable-version
diff --git a/llmlib/llmlib/huggingface_inference.py b/llmlib/llmlib/huggingface_inference.py
@@ -5,8 +5,7 @@
 from pathlib import Path
 from dataclasses import dataclass
 import PIL
-from enum import StrEnum
-
+from enum import Enum
 import openai
 from .base_llm import LLM, Message, validate_only_first_message_has_files
 import cv2
@@ -21,8 +20,12 @@ def get_image_as_base64(image_bytes: bytes):
     return base64.b64encode(image_bytes).decode("utf-8")
 
 
-def convert_message_to_hf_format(message: Message, max_n_frames_per_video: int) -> dict:
-    """Convert a Message to HuggingFace chat format."""
+def convert_message_to_openai_format(message: Message, max_n_frames_per_video: int) -> dict:
+    """
+    Convert a Message to OpenAI chat format.
+    Images become base64 encoded strings.
+    Videos are processed like a list of images, each of which becomes a base64 encoded string.
+    """
     content = []
 
     # Add text content if present
@@ -54,8 +57,9 @@ def convert_message_to_hf_format(message: Message, max_n_frames_per_video: int)
 
 
 def video_to_imgs(video_path: Path, max_n_frames: int) -> list[PIL.Image.Image]:
-    assert isinstance(video_path, Path), video_path
     """From https://github.com/agustoslu/simple-inference-benchmark/blob/5cec55787d34af65f0d11efc429c3d4de92f051a/utils.py#L79"""
+    assert isinstance(video_path, Path), video_path
+    assert video_path.exists(), video_path
     cap = cv2.VideoCapture(str(video_path))
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     fps = int(cap.get(cv2.CAP_PROP_FPS))
@@ -129,7 +133,7 @@ def extract_bytes(img: PIL.Image.Image | str | Path) -> bytes:
         raise ValueError(f"Unsupported image type: {type(img)}")
 
 
-class HuggingFaceVLMs(StrEnum):
+class HuggingFaceVLMs(str, Enum):
     gemma_3_27b_it = "google/gemma-3-27b-it"
 
 
@@ -171,7 +175,7 @@ def complete_msgs(self, msgs: list[Message]) -> str:
         """Complete a conversation with the model."""
         validate_only_first_message_has_files(msgs)
         hf_messages = [
-            convert_message_to_hf_format(
+            convert_message_to_openai_format(
                 msg, max_n_frames_per_video=self.max_n_frames_per_video
             )
             for msg in msgs
diff --git a/llmlib/llmlib/openai/openai_completion.py b/llmlib/llmlib/openai/openai_completion.py
@@ -6,7 +6,9 @@
 
 _default_model = "gpt-4o-mini"
 
-client = OpenAI()  # must be outside of the class to avoid pickling issues
+
+def create_client() -> OpenAI:
+    return OpenAI()  # must be outside of the class to avoid pickling issues
 
 
 class OpenAIModel(LLM):
@@ -43,6 +45,7 @@ def complete(model: str, prompt: str) -> str:
 
 
 def complete_msgs(model: str, messages: list[dict]) -> str:
+    client = create_client()
     completion: ChatCompletion = client.chat.completions.create(
         model=model, temperature=0.0, messages=messages
     )
diff --git a/llmlib/pyproject.toml b/llmlib/pyproject.toml
@@ -6,7 +6,7 @@ authors = [
     {name = "Tomas Ruiz", email = "tomas.ruiz.te@gmail.com"}
 ]
 readme = "README.md"
-requires-python = ">=3.11"
+requires-python = ">=3.10"
 dependencies = [
     "bugsnag>=4.7.1",
     "decorator>=5.1.1",
diff --git a/tests/test_huggingface_vlm.py b/tests/test_huggingface_vlm.py
@@ -1,5 +1,5 @@
 import cv2
-from llmlib.huggingface_inference import convert_message_to_hf_format
+from llmlib.huggingface_inference import convert_message_to_openai_format
 import pytest
 from llmlib.huggingface_inference import HuggingFaceVLM, HuggingFaceVLMs
 from .helpers import (
@@ -66,14 +66,14 @@ def test_huggingface_vlm_multi_turn_with_6min_video(gemma3):
 
 
 @pytest.mark.skipif(condition=is_ci(), reason="Files are not available on CI")
-def test_convert_to_huggingface_format():
+def test_convert_to_openai_format():
     img_msg1 = pyramid_message(load_img=True)
     img_msg2 = pyramid_message(load_img=False)
     max_n_frames_per_video = 200
-    b64_enc1 = convert_message_to_hf_format(img_msg1, max_n_frames_per_video)[
+    b64_enc1 = convert_message_to_openai_format(img_msg1, max_n_frames_per_video)[
         "content"
     ][1]["image_url"]["url"]
-    b64_enc2 = convert_message_to_hf_format(img_msg2, max_n_frames_per_video)[
+    b64_enc2 = convert_message_to_openai_format(img_msg2, max_n_frames_per_video)[
         "content"
     ][1]["image_url"]["url"]
     # assert b64_enc1 == b64_enc2
@@ -84,5 +84,5 @@ def test_convert_to_huggingface_format():
     cv2.imwrite(file_for_test("generated_pyramid_2.jpeg"), array2)
 
     msg = video_message()
-    hf_msg = convert_message_to_hf_format(msg, max_n_frames_per_video)
+    hf_msg = convert_message_to_openai_format(msg, max_n_frames_per_video)
     assert len(hf_msg["content"]) > 10
diff --git a/tests/test_model_registry.py b/tests/test_model_registry.py
@@ -4,13 +4,13 @@
 
 
 @dataclass
-class TestLLM(LLM):
+class LLMForTest(LLM):
     model_id: str
     model_ids = ["id1", "id2"]
 
 
 def test_model_entries_from_mult_ids():
-    e1, e2 = model_entries_from_mult_ids(TestLLM)
+    e1, e2 = model_entries_from_mult_ids(LLMForTest)
     assert e1.model_id == "id1"
     assert e2.model_id == "id2"
 

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@ authors = [`
`6`	`6`	`{name = "Tomas Ruiz", email = "[email protected]"}`
`7`	`7`	`]`
`8`	`8`	`readme = "README.md"`
`9`		`-requires-python = ">=3.11"`
	`9`	`+requires-python = ">=3.10"`
`10`	`10`	`dependencies = [`
`11`	`11`	`"bugsnag>=4.7.1",`
`12`	`12`	`"decorator>=5.1.1",`