From af910adc0eec0823ffea338f330a19f732a0f568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 01:08:38 -0300 Subject: [PATCH 01/10] fix(google-gemini): handle unrecognized FinishReason enum gracefully --- .../chat_models/google_gemini.py | 549 ++++++++++++++++++ .../fix-handle-unknown-finish-reason.rst | 27 + 2 files changed, 576 insertions(+) create mode 100644 libs/community/langchain_community/chat_models/google_gemini.py create mode 100644 libs/community/langchain_community/fix-handle-unknown-finish-reason.rst diff --git a/libs/community/langchain_community/chat_models/google_gemini.py b/libs/community/langchain_community/chat_models/google_gemini.py new file mode 100644 index 0000000000000..5f357d4fb7090 --- /dev/null +++ b/libs/community/langchain_community/chat_models/google_gemini.py @@ -0,0 +1,549 @@ +from __future__ import annotations + +import asyncio +import base64 +import logging +import os +from io import BytesIO +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Dict, + Iterator, + List, + Mapping, + Optional, + Sequence, + Type, + Union, +) +from urllib.parse import urlparse + +import requests +from langchain.callbacks.manager import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain.pydantic_v1 import Field, root_validator +from langchain.utils import get_from_dict_or_env +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import ( + AIMessage, + AIMessageChunk, + BaseMessage, + ChatMessage, + ChatMessageChunk, + HumanMessage, + HumanMessageChunk, +) +from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult +from tenacity import ( + before_sleep_log, + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + import google.generativeai as genai + +try: + import PIL.Image + from PIL import Image +except ImportError: + PIL = None + Image = None + +try: + import IPython.display +except ImportError: + IPython = None + + +class ChatGoogleGeminiError(Exception): + """ + Custom exception class for errors associated with the `Google Gemini` API. + + This exception is raised when there are specific issues related to the + Google Gemini API usage in the ChatGoogleGemini class, such as unsupported + message types or roles. + """ + + pass + + +def _create_retry_decorator() -> Callable[[Any], Any]: + """ + Creates and returns a preconfigured tenacity retry decorator. + + The retry decorator is configured to handle specific Google API exceptions + such as ResourceExhausted and ServiceUnavailable. It uses an exponential + backoff strategy for retries. + + Returns: + Callable[[Any], Any]: A retry decorator configured for handling specific + Google API exceptions. + """ + import google.api_core.exceptions + + multiplier = 2 + min_seconds = 1 + max_seconds = 60 + max_retries = 10 + + return retry( + reraise=True, + stop=stop_after_attempt(max_retries), + wait=wait_exponential(multiplier=multiplier, min=min_seconds, max=max_seconds), + retry=( + retry_if_exception_type(google.api_core.exceptions.ResourceExhausted) + | retry_if_exception_type(google.api_core.exceptions.ServiceUnavailable) + | retry_if_exception_type(google.api_core.exceptions.GoogleAPIError) + ), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + + +def chat_with_retry(*, generation_method: Callable, **kwargs: Any) -> Any: + """ + Executes a chat generation method with retry logic using tenacity. + + This function is a wrapper that applies a retry mechanism to a provided + chat generation function. It is useful for handling intermittent issues + like network errors or temporary service unavailability. + + Args: + generation_method (Callable): The chat generation method to be executed. + **kwargs (Any): Additional keyword arguments to pass to the generation method. + + Returns: + Any: The result from the chat generation method. + """ + retry_decorator = _create_retry_decorator() + + @retry_decorator + def _chat_with_retry(**kwargs: Any) -> Any: + return generation_method(**kwargs) + + return _chat_with_retry(**kwargs) + + +async def achat_with_retry(*, generation_method: Awaitable, **kwargs: Any) -> Any: + """ + Asynchronously executes a chat generation method with retry logic. + + Similar to `chat_with_retry`, this function applies a retry decorator for + asynchronous chat generation methods. It handles retries for tasks like + generating responses from a language model. + + Args: + generation_method (Awaitable): The async chat generation method to be executed. + **kwargs (Any): Additional keyword arguments to pass to the generation method. + + Returns: + Any: The result from the async chat generation method. + """ + retry_decorator = _create_retry_decorator() + + @retry_decorator + async def _achat_with_retry(**kwargs: Any) -> Any: + return await generation_method(**kwargs) + + return await _achat_with_retry(**kwargs) + + +def _get_role(message: BaseMessage) -> str: + if isinstance(message, ChatMessage): + if message.role not in ("user", "model"): + raise ChatGoogleGeminiError( + "Gemini only supports user and model roles when" + " providing it with Chat messages." + ) + return message.role + elif isinstance(message, HumanMessage): + return "user" + elif isinstance(message, AIMessage): + return "model" + else: + # TODO: Gemini doesn't seem to have a concept of system messages yet. + raise ChatGoogleGeminiError( + f"Message of '{message.type}' type not supported by Gemini." + " Please only provide it with Human or AI (user/assistant) messages." + ) + + +def _is_openai_parts_format(part: dict) -> bool: + return "type" in part + + +def _is_url(s: str) -> bool: + try: + result = urlparse(s) + return all([result.scheme, result.netloc]) + except Exception as e: + logger.debug(f"Unable to parse URL: {e}") + return False + + +def _is_b64(s: str) -> bool: + return s.startswith("data:image") + + +def _url_to_pil(image_source: str) -> Image: + if PIL is None: + raise ImportError( + "PIL is required to load images. Please install it " + "with `pip install pillow`" + ) + try: + if isinstance(image_source, (Image.Image, IPython.display.Image)): + return image_source + elif _is_url(image_source): + response = requests.get(image_source) + response.raise_for_status() + return Image.open(BytesIO(response.content)) + elif _is_b64(image_source): + _, encoded = image_source.split(",", 1) + data = base64.b64decode(encoded) + return Image.open(BytesIO(data)) + elif os.path.exists(image_source): + return Image.open(image_source) + else: + raise ValueError( + "The provided string is not a valid URL, base64, or file path." + ) + except Exception as e: + raise ValueError(f"Unable to process the provided image source: {e}") + + +def _convert_to_parts( + content: Sequence[Union[str, dict]] +) -> List[genai.types.PartType]: + """Converts a list of LangChain messages into a google parts.""" + import google.generativeai as genai + + parts = [] + for part in content: + if isinstance(part, str): + parts.append(genai.types.PartDict(text=part, inline_data=None)) + elif isinstance(part, Mapping): + # OpenAI Format + if _is_openai_parts_format(part): + if part["type"] == "text": + parts.append({"text": part["text"]}) + elif part["type"] == "image_url": + img_url = part["image_url"] + if isinstance(img_url, dict): + if "url" not in img_url: + raise ValueError( + f"Unrecognized message image format: {img_url}" + ) + img_url = img_url["url"] + + parts.append({"inline_data": _url_to_pil(img_url)}) + else: + raise ValueError(f"Unrecognized message part type: {part['type']}") + else: + # Yolo + logger.warning( + "Unrecognized message part format. Assuming it's a text part." + ) + parts.append(part) + else: + # TODO: Maybe some of Google's native stuff + # would hit this branch. + raise ChatGoogleGeminiError( + "Gemini only supports text and inline_data parts." + ) + return parts + + +def _messages_to_genai_contents( + input_messages: List[BaseMessage], +) -> List[genai.types.ContentDict]: + """Converts a list of messages into a Gemini API google content dicts.""" + + messages: List[genai.types.MessageDict] = [] + + for i, message in enumerate(input_messages): + role = _get_role(message) + if isinstance(message.content, str): + parts = [message.content] + else: + parts = _convert_to_parts(message.content) + messages.append({"role": role, "parts": parts}) + if i > 0: + # Cannot have multiple messages from the same role in a row. + if role == messages[-2]["role"]: + raise ChatGoogleGeminiError( + "Cannot have multiple messages from the same role in a row." + " Consider merging them into a single message with multiple" + f" parts.\nReceived: {messages}" + ) + return messages + + +def _parts_to_content(parts: List[genai.types.PartType]) -> Union[List[dict], str]: + """Converts a list of Gemini API Part objects into a list of LangChain messages.""" + if len(parts) == 1 and parts[0].text is not None and not parts[0].inline_data: + # Simple text response. The typical response + return parts[0].text + elif not parts: + logger.warning("Gemini produced an empty response.") + return "" + messages = [] + for part in parts: + if part.text is not None: + messages.append( + { + "type": "text", + "text": part.text, + } + ) + else: + # TODO: Handle inline_data if that's a thing? + raise ChatGoogleGeminiError(f"Unexpected part type. {part}") + return messages + + +def _response_to_result( + response: genai.types.GenerateContentResponse, + ai_msg_t: Type[BaseMessage] = AIMessage, + human_msg_t: Type[BaseMessage] = HumanMessage, + chat_msg_t: Type[BaseMessage] = ChatMessage, + generation_t: Type[ChatGeneration] = ChatGeneration, +) -> ChatResult: + """Converts a PaLM API response into a LangChain ChatResult.""" + llm_output = {} + if response.prompt_feedback: + try: + prompt_feedback = type(response.prompt_feedback).to_dict( + response.prompt_feedback, + use_integers_for_enums=False + ) + llm_output["prompt_feedback"] = prompt_feedback + except Exception as e: + logger.debug(f"Unable to convert prompt_feedback to dict: {e}") + + generations: List[ChatGeneration] = [] + + role_map = { + "model": ai_msg_t, + "user": human_msg_t, + } + for candidate in response.candidates: + content = candidate.content + parts_content = _parts_to_content(content.parts) + if content.role not in role_map: + logger.warning( + f"Unrecognized role: {content.role}. Treating as a ChatMessage." + ) + msg = chat_msg_t(content=parts_content, role=content.role) + else: + msg = role_map[content.role](content=parts_content) + generation_info = {} + if candidate.finish_reason: + finish_reason = candidate.finish_reason + # Handle both Enum and int types safely + if hasattr(finish_reason, "name"): + generation_info["finish_reason"] = finish_reason.name + else: + # Unrecognized enum value (e.g., FinishReason=12) + generation_info["finish_reason"] = f"UNKNOWN({finish_reason})" + if candidate.safety_ratings: + generation_info["safety_ratings"] = [ + type(rating).to_dict(rating) for rating in candidate.safety_ratings + ] + generations.append(generation_t(message=msg, generation_info=generation_info)) + if not response.candidates: + # Likely a "prompt feedback" violation (e.g., toxic input) + # Raising an error would be different than how OpenAI handles it, + # so we'll just log a warning and continue with an empty message. + logger.warning( + "Gemini produced an empty response. Continuing with empty message\n" + f"Feedback: {response.prompt_feedback}" + ) + generations = [generation_t(message=ai_msg_t(content=""), generation_info={})] + return ChatResult(generations=generations, llm_output=llm_output) + + +class ChatGoogleGemini(BaseChatModel): + """`Google Gemini` Chat models API. + + To use you must have the google.generativeai Python package installed and + either: + + 1. The ``GOOGLE_API_KEY``` environment variable set with your API key, or + 2. Pass your API key using the google_api_key kwarg to the ChatGoogle + constructor. + + Example: + .. code-block:: python + + from langchain.chat_models.google_gemini import ChatGoogleGemini + chat = ChatGoogleGemini(model_name="gemini-pro") + chat.invoke("Write me a ballad about LangChain") + + """ + + model_name: str = Field( + ..., + description="""The name of the model to use. +Supported examples: + - gemini-pro""", + ) + max_output_tokens: int = Field(default=None, description="Max output tokens") + + client: Any #: :meta private: + google_api_key: Optional[str] = None + temperature: Optional[float] = None + """Run inference with this temperature. Must by in the closed + interval [0.0, 1.0].""" + top_k: Optional[int] = None + """Decode using top-k sampling: consider the set of top_k most probable tokens. + Must be positive.""" + n: int = 1 + """Number of chat completions to generate for each prompt. Note that the API may + not return the full n completions if duplicates are generated.""" + + @property + def lc_secrets(self) -> Dict[str, str]: + return {"google_api_key": "GOOGLE_API_KEY"} + + @classmethod + def is_lc_serializable(self) -> bool: + return True + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + google_api_key = get_from_dict_or_env( + values, "google_api_key", "GOOGLE_API_KEY" + ) + try: + import google.generativeai as genai + + genai.configure(api_key=google_api_key) + except ImportError: + raise ChatGoogleGeminiError( + "Could not import google.generativeai python package. " + "Please install it with `pip install google-generativeai`" + ) + + values["client"] = genai + genai.count_text_tokens() + + if ( + values.get("temperature") is not None + and not 0 <= values["temperature"] <= 1 + ): + raise ValueError("temperature must be in the range [0.0, 1.0]") + + if values.get("top_p") is not None and not 0 <= values["top_p"] <= 1: + raise ValueError("top_p must be in the range [0.0, 1.0]") + + if values.get("top_k") is not None and values["top_k"] <= 0: + raise ValueError("top_k must be positive") + model_name = values["model_name"] + values["_generative_model"] = genai.GenerativeModel(model_name=model_name) + return values + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return { + "model_name": self.model_name, + "temperature": self.temperature, + "top_k": self.top_k, + "n": self.n, + } + + @property + def _generation_method(self) -> Callable: + return self._generative_model.generate_content + + @property + def _async_generation_method(self) -> Awaitable: + # TODO :THIS IS BROKEN still... + return self._generative_model.generate_content + + @property + def _llm_type(self) -> str: + return "google-gemini-chat" + + def _prepare_params( + self, messages: Sequence[BaseMessage], stop: Optional[List[str]] + ) -> Dict[str, Any]: + contents = _messages_to_genai_contents(messages) + gen_config = { + k: v + for k, v in { + "candidate_count": self.n, + "temperature": self.temperature, + "stop_sequences": stop, + "max_output_tokens": self.max_output_tokens, + }.items() + if v is not None + } + params = { + "generation_config": gen_config, + "contents": contents, + } + return params + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + params = self._prepare_params(messages, stop) + response: genai.types.GenerateContentResponse = chat_with_retry( + **params, + generation_method=self._generation_method, + **kwargs, + ) + return _response_to_result(response) + + async def _agenerate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + return await asyncio.get_running_loop().run_in_executor( + None, self._generate, messages, stop, run_manager, **kwargs + ) + + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + params = self._prepare_params(messages, stop) + response: genai.types.GenerateContentResponse = chat_with_retry( + **params, + generation_method=self._generation_method, + **kwargs, + stream=True, + ) + for chunk in response: + _chat_result = _response_to_result( + chunk, + ai_msg_t=AIMessageChunk, + human_msg_t=HumanMessageChunk, + chat_msg_t=ChatMessageChunk, + generation_t=ChatGenerationChunk, + ) + gen = _chat_result.generations[0] + yield gen + if run_manager: + run_manager.on_llm_new_token(gen.text) diff --git a/libs/community/langchain_community/fix-handle-unknown-finish-reason.rst b/libs/community/langchain_community/fix-handle-unknown-finish-reason.rst new file mode 100644 index 0000000000000..97e49a6465539 --- /dev/null +++ b/libs/community/langchain_community/fix-handle-unknown-finish-reason.rst @@ -0,0 +1,27 @@ +.. _fix-handle-unknown-finish-reason: + +fix: handle unrecognized FinishReason enum value from Gemini API +--------------------------------------------------------------- + +**Problem** +When using `ChatGoogleGenerativeAI.with_structured_output()` with the Gemini API, +the model may return a `FinishReason` integer value (e.g., `12`) that is not defined +in the expected Enum class. This caused LangChain to crash with: + + AttributeError: 'int' object has no attribute 'name' + +**Fix** +Added a safe fallback that checks whether the `finish_reason` object has a `.name` +attribute before accessing it. If not, it is now represented as: + + "UNKNOWN()" + +This ensures compatibility with newer or unrecognized Gemini finish reasons. + +**Impact** +- Prevents crashes for unexpected `FinishReason` values. +- Ensures graceful degradation with clear traceability. +- Keeps full backward compatibility for all existing Gemini integrations. + +**Related issue:** `#33444 `_ +**Pull request:** :pr:`` From c1175df3c743008f3985e9ce5801b1fbc59dfcbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 01:29:32 -0300 Subject: [PATCH 02/10] Update changelog entry with PR reference #33448. --- .../fix-handle-unknown-finish-reason.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/community/langchain_community/fix-handle-unknown-finish-reason.rst b/libs/community/langchain_community/fix-handle-unknown-finish-reason.rst index 97e49a6465539..446b83340d7d0 100644 --- a/libs/community/langchain_community/fix-handle-unknown-finish-reason.rst +++ b/libs/community/langchain_community/fix-handle-unknown-finish-reason.rst @@ -4,14 +4,14 @@ fix: handle unrecognized FinishReason enum value from Gemini API --------------------------------------------------------------- **Problem** -When using `ChatGoogleGenerativeAI.with_structured_output()` with the Gemini API, -the model may return a `FinishReason` integer value (e.g., `12`) that is not defined +When using `ChatGoogleGenerativeAI.with_structured_output()` with the Gemini API, +the model may return a `FinishReason` integer value (e.g., `12`) that is not defined in the expected Enum class. This caused LangChain to crash with: AttributeError: 'int' object has no attribute 'name' **Fix** -Added a safe fallback that checks whether the `finish_reason` object has a `.name` +Added a safe fallback that checks whether the `finish_reason` object has a `.name` attribute before accessing it. If not, it is now represented as: "UNKNOWN()" @@ -24,4 +24,4 @@ This ensures compatibility with newer or unrecognized Gemini finish reasons. - Keeps full backward compatibility for all existing Gemini integrations. **Related issue:** `#33444 `_ -**Pull request:** :pr:`` +**Pull request:** :pr:`<#33448>` From 933e89818378119d68340a4b636657453543348f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 02:31:02 -0300 Subject: [PATCH 03/10] test(google-gemini): add regression test for unknown FinishReason (#33448) --- .../test_google_gemini_finish_reason.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 libs/community/langchain_community/chat_models/libs/community/tests/unit_tests/chat_models/test_google_gemini_finish_reason.py diff --git a/libs/community/langchain_community/chat_models/libs/community/tests/unit_tests/chat_models/test_google_gemini_finish_reason.py b/libs/community/langchain_community/chat_models/libs/community/tests/unit_tests/chat_models/test_google_gemini_finish_reason.py new file mode 100644 index 0000000000000..677d508efe814 --- /dev/null +++ b/libs/community/langchain_community/chat_models/libs/community/tests/unit_tests/chat_models/test_google_gemini_finish_reason.py @@ -0,0 +1,42 @@ +import pytest +from langchain_community.chat_models.google_gemini import ChatGoogleGemini + +def test_handle_unknown_finish_reason(): + """Ensure ChatGoogleGemini gracefully handles unrecognized FinishReason enum values.""" + + # Mock Gemini API part object + class MockPart: + text = "Mock message" # The main message text + inline_data = None # Simulate no inline data + + # Mock Gemini API content object: must contain .parts and .role + class MockContent: + parts = [MockPart()] # List of message parts + role = "model" # Emulates model output (could also be "user") + + # Mock Gemini API candidate object: represents one possible answer + class MockCandidate: + finish_reason = 12 # Simulate an unknown finish reason (int, not Enum) + content = MockContent() # Candidate content + safety_ratings = None # Needed by LangChain parsing logic + citation_metadata = None # Sometimes parsed by LangChain, safer to include + + # Mock Gemini API response object: contains a list of candidates and prompt_feedback + class MockResponse: + candidates = [MockCandidate()] # Single candidate with unknown finish_reason + prompt_feedback = None # Not relevant for this test + + # Mock Gemini API client: overrides .generate_content to return our mock response + class MockClient: + def generate_content(self, *_, **__): + return MockResponse() + + mock_client = MockClient() + llm = ChatGoogleGemini(client=mock_client, model="gemini-pro") + llm._generative_model = mock_client # Force internal use of mock client for test + + # The test: Try a normal invocation, which should NOT crash even with unknown finish_reason + result = llm.invoke("Hello!") + assert result is not None + # Ensure the generated info string contains the expected unknown code + assert "UNKNOWN(12)" in str(result), "Model did not handle unknown FinishReason as expected" From 8e7455cff6f98edc3209d0bde16c72938a560a5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 02:46:08 -0300 Subject: [PATCH 04/10] Add google_gemini module to CI validation script for library recognition --- .github/scripts/check_diff.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py index e57b715166cdd..1332c29b1da60 100644 --- a/.github/scripts/check_diff.py +++ b/.github/scripts/check_diff.py @@ -293,6 +293,7 @@ def _get_configs_for_multi_dirs( dirs_to_run["test"].add("libs/partners/anthropic") dirs_to_run["test"].add("libs/partners/fireworks") dirs_to_run["test"].add("libs/partners/groq") + dirs_to_run["test"].add("libs/community/langchain_community/chat_models/google_gemini.py") elif file.startswith("libs/cli"): dirs_to_run["lint"].add("libs/cli") From fe0ec1269c0c4b3b1d1e379b7f06bf29e2cbb62f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 03:14:18 -0300 Subject: [PATCH 05/10] Refactoring CI Validation script --- .github/scripts/check_diff.py | 53 ++++++----------------------------- 1 file changed, 9 insertions(+), 44 deletions(-) diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py index 1332c29b1da60..58b248d09fd52 100644 --- a/.github/scripts/check_diff.py +++ b/.github/scripts/check_diff.py @@ -33,24 +33,14 @@ ] # When set to True, we are ignoring core dependents -# in order to be able to get CI to pass for each individual -# package that depends on core -# e.g. if you touch core, we don't then add textsplitters/etc to CI IGNORE_CORE_DEPENDENTS = False -# ignored partners are removed from dependents -# but still run if directly edited +# ignored partners are removed from dependents, but still run if directly edited IGNORED_PARTNERS = [ - # remove huggingface from dependents because of CI instability - # specifically in huggingface jobs - # https://github.com/langchain-ai/langchain/issues/25558 "huggingface", - # prompty exhibiting issues with numpy for Python 3.13 - # https://github.com/langchain-ai/langchain/actions/runs/12651104685/job/35251034969?pr=29065 "prompty", ] - def all_package_dirs() -> Set[str]: return { "/".join(path.split("/")[:-1]).lstrip("./") @@ -58,10 +48,8 @@ def all_package_dirs() -> Set[str]: if "libs/cli" not in path and "libs/standard-tests" not in path } - def dependents_graph() -> dict: """Construct a mapping of package -> dependents - Done such that we can run tests on all dependents of a package when a change is made. """ dependents = defaultdict(set) @@ -93,7 +81,6 @@ def dependents_graph() -> dict: extended_deps = f.read().splitlines() for depline in extended_deps: if depline.startswith("-e "): - # editable dependency assert depline.startswith("-e ../partners/"), ( "Extended test deps should only editable install partner packages" ) @@ -111,7 +98,6 @@ def dependents_graph() -> dict: dependents[k].remove(f"libs/partners/{partner}") return dependents - def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]: updated = set() for dir_ in dirs_to_eval: @@ -124,33 +110,27 @@ def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]: updated.add(dir_) return list(updated) - def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]: if job == "test-pydantic": return _get_pydantic_test_configs(dir_) if job == "codspeed": - py_versions = ["3.12"] # 3.13 is not yet supported + py_versions = ["3.12"] elif dir_ == "libs/core": py_versions = ["3.10", "3.11", "3.12", "3.13"] - # custom logic for specific directories - elif dir_ == "libs/langchain" and job == "extended-tests": py_versions = ["3.10", "3.13"] elif dir_ == "libs/langchain_v1": py_versions = ["3.10", "3.13"] elif dir_ in {"libs/cli"}: py_versions = ["3.10", "3.13"] - elif dir_ == ".": - # unable to install with 3.13 because tokenizers doesn't support 3.13 yet py_versions = ["3.10", "3.12"] else: py_versions = ["3.10", "3.13"] return [{"working-directory": dir_, "python-version": py_v} for py_v in py_versions] - def _get_pydantic_test_configs( dir_: str, *, python_version: str = "3.11" ) -> List[Dict[str, str]]: @@ -205,7 +185,6 @@ def _get_pydantic_test_configs( ] return configs - def _get_configs_for_multi_dirs( job: str, dirs_to_run: Dict[str, Set[str]], dependents: dict ) -> List[Dict[str, str]]: @@ -229,7 +208,6 @@ def _get_configs_for_multi_dirs( config for dir_ in dirs for config in _get_configs_for_single_dir(job, dir_) ] - if __name__ == "__main__": files = sys.argv[1:] @@ -242,7 +220,6 @@ def _get_configs_for_multi_dirs( docs_edited = False if len(files) >= 300: - # max diff length is 300 files - there are likely files missing dirs_to_run["lint"] = all_package_dirs() dirs_to_run["test"] = all_package_dirs() dirs_to_run["extended-test"] = set(LANGCHAIN_DIRS) @@ -257,23 +234,11 @@ def _get_configs_for_multi_dirs( ".github/scripts/check_diff.py", ) ): - # Infrastructure changes (workflows, actions, CI scripts) trigger tests on - # all core packages as a safety measure. This ensures that changes to CI/CD - # infrastructure don't inadvertently break package testing, even if the change - # appears unrelated (e.g., documentation build workflows). This is intentionally - # conservative to catch unexpected side effects from workflow modifications. - # - # Example: A PR modifying .github/workflows/api_doc_build.yml will trigger - # lint/test jobs for libs/core, libs/text-splitters, libs/langchain, and - # libs/langchain_v1, even though the workflow may only affect documentation. dirs_to_run["extended-test"].update(LANGCHAIN_DIRS) if file.startswith("libs/core"): dirs_to_run["codspeed"].add("libs/core") if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS): - # add that dir and all dirs after in LANGCHAIN_DIRS - # for extended testing - found = False for dir_ in LANGCHAIN_DIRS: if dir_ == "libs/core" and IGNORE_CORE_DEPENDENTS: @@ -284,8 +249,6 @@ def _get_configs_for_multi_dirs( if found: dirs_to_run["extended-test"].add(dir_) elif file.startswith("libs/standard-tests"): - # TODO: update to include all packages that rely on standard-tests (all partner packages) - # Note: won't run on external repo partners dirs_to_run["lint"].add("libs/standard-tests") dirs_to_run["test"].add("libs/standard-tests") dirs_to_run["test"].add("libs/partners/mistralai") @@ -309,11 +272,15 @@ def _get_configs_for_multi_dirs( dirs_to_run["test"].add(f"libs/partners/{partner_dir}") dirs_to_run["codspeed"].add(f"libs/partners/{partner_dir}") # Skip if the directory was deleted or is just a tombstone readme + + elif file.startswith("libs/community/langchain_community/chat_models/"): + # Recognize new chat_models (including google_gemini) as part of langchain_community for tests + dirs_to_run["test"].add("libs/community/langchain_community") + continue + elif file.startswith("libs/"): - # Check if this is a root-level file in libs/ (e.g., libs/README.md) file_parts = file.split("/") if len(file_parts) == 2: - # Root-level file in libs/, skip it (no tests needed) continue raise ValueError( f"Unknown lib: {file}. check_diff.py likely needs " @@ -322,13 +289,11 @@ def _get_configs_for_multi_dirs( elif file in [ "pyproject.toml", "uv.lock", - ]: # root uv files + ]: docs_edited = True dependents = dependents_graph() - # we now have dirs_by_job - # todo: clean this up map_job_to_configs = { job: _get_configs_for_multi_dirs(job, dirs_to_run, dependents) for job in [ From 9c38d676298d758d85c367be6efc05fa0ab1c823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 03:30:55 -0300 Subject: [PATCH 06/10] ci(check_diff): ignore .rst changelog files in community libs --- .github/scripts/check_diff.py | 53 +++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py index 58b248d09fd52..fee739623be17 100644 --- a/.github/scripts/check_diff.py +++ b/.github/scripts/check_diff.py @@ -33,14 +33,24 @@ ] # When set to True, we are ignoring core dependents +# in order to be able to get CI to pass for each individual +# package that depends on core +# e.g. if you touch core, we don't then add textsplitters/etc to CI IGNORE_CORE_DEPENDENTS = False -# ignored partners are removed from dependents, but still run if directly edited +# ignored partners are removed from dependents +# but still run if directly edited IGNORED_PARTNERS = [ + # remove huggingface from dependents because of CI instability + # specifically in huggingface jobs + # https://github.com/langchain-ai/langchain/issues/25558 "huggingface", + # prompty exhibiting issues with numpy for Python 3.13 + # https://github.com/langchain-ai/langchain/actions/runs/12651104685/job/35251034969?pr=29065 "prompty", ] + def all_package_dirs() -> Set[str]: return { "/".join(path.split("/")[:-1]).lstrip("./") @@ -48,8 +58,10 @@ def all_package_dirs() -> Set[str]: if "libs/cli" not in path and "libs/standard-tests" not in path } + def dependents_graph() -> dict: """Construct a mapping of package -> dependents + Done such that we can run tests on all dependents of a package when a change is made. """ dependents = defaultdict(set) @@ -81,6 +93,7 @@ def dependents_graph() -> dict: extended_deps = f.read().splitlines() for depline in extended_deps: if depline.startswith("-e "): + # editable dependency assert depline.startswith("-e ../partners/"), ( "Extended test deps should only editable install partner packages" ) @@ -98,6 +111,7 @@ def dependents_graph() -> dict: dependents[k].remove(f"libs/partners/{partner}") return dependents + def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]: updated = set() for dir_ in dirs_to_eval: @@ -110,27 +124,33 @@ def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]: updated.add(dir_) return list(updated) + def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]: if job == "test-pydantic": return _get_pydantic_test_configs(dir_) if job == "codspeed": - py_versions = ["3.12"] + py_versions = ["3.12"] # 3.13 is not yet supported elif dir_ == "libs/core": py_versions = ["3.10", "3.11", "3.12", "3.13"] + # custom logic for specific directories + elif dir_ == "libs/langchain" and job == "extended-tests": py_versions = ["3.10", "3.13"] elif dir_ == "libs/langchain_v1": py_versions = ["3.10", "3.13"] elif dir_ in {"libs/cli"}: py_versions = ["3.10", "3.13"] + elif dir_ == ".": + # unable to install with 3.13 because tokenizers doesn't support 3.13 yet py_versions = ["3.10", "3.12"] else: py_versions = ["3.10", "3.13"] return [{"working-directory": dir_, "python-version": py_v} for py_v in py_versions] + def _get_pydantic_test_configs( dir_: str, *, python_version: str = "3.11" ) -> List[Dict[str, str]]: @@ -185,6 +205,7 @@ def _get_pydantic_test_configs( ] return configs + def _get_configs_for_multi_dirs( job: str, dirs_to_run: Dict[str, Set[str]], dependents: dict ) -> List[Dict[str, str]]: @@ -208,6 +229,7 @@ def _get_configs_for_multi_dirs( config for dir_ in dirs for config in _get_configs_for_single_dir(job, dir_) ] + if __name__ == "__main__": files = sys.argv[1:] @@ -220,6 +242,7 @@ def _get_configs_for_multi_dirs( docs_edited = False if len(files) >= 300: + # max diff length is 300 files - there are likely files missing dirs_to_run["lint"] = all_package_dirs() dirs_to_run["test"] = all_package_dirs() dirs_to_run["extended-test"] = set(LANGCHAIN_DIRS) @@ -234,11 +257,23 @@ def _get_configs_for_multi_dirs( ".github/scripts/check_diff.py", ) ): + # Infrastructure changes (workflows, actions, CI scripts) trigger tests on + # all core packages as a safety measure. This ensures that changes to CI/CD + # infrastructure don't inadvertently break package testing, even if the change + # appears unrelated (e.g., documentation build workflows). This is intentionally + # conservative to catch unexpected side effects from workflow modifications. + # + # Example: A PR modifying .github/workflows/api_doc_build.yml will trigger + # lint/test jobs for libs/core, libs/text-splitters, libs/langchain, and + # libs/langchain_v1, even though the workflow may only affect documentation. dirs_to_run["extended-test"].update(LANGCHAIN_DIRS) if file.startswith("libs/core"): dirs_to_run["codspeed"].add("libs/core") if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS): + # add that dir and all dirs after in LANGCHAIN_DIRS + # for extended testing + found = False for dir_ in LANGCHAIN_DIRS: if dir_ == "libs/core" and IGNORE_CORE_DEPENDENTS: @@ -249,6 +284,8 @@ def _get_configs_for_multi_dirs( if found: dirs_to_run["extended-test"].add(dir_) elif file.startswith("libs/standard-tests"): + # TODO: update to include all packages that rely on standard-tests (all partner packages) + # Note: won't run on external repo partners dirs_to_run["lint"].add("libs/standard-tests") dirs_to_run["test"].add("libs/standard-tests") dirs_to_run["test"].add("libs/partners/mistralai") @@ -272,15 +309,19 @@ def _get_configs_for_multi_dirs( dirs_to_run["test"].add(f"libs/partners/{partner_dir}") dirs_to_run["codspeed"].add(f"libs/partners/{partner_dir}") # Skip if the directory was deleted or is just a tombstone readme - elif file.startswith("libs/community/langchain_community/chat_models/"): - # Recognize new chat_models (including google_gemini) as part of langchain_community for tests + # Recognize new chat_models (including google_gemini) as part of langchain_community for tests dirs_to_run["test"].add("libs/community/langchain_community") + + elif file.startswith("libs/community/langchain_community/") and file.endswith(".rst"): + # Ignore documentation or changelog files in community libs continue elif file.startswith("libs/"): + # Check if this is a root-level file in libs/ (e.g., libs/README.md) file_parts = file.split("/") if len(file_parts) == 2: + # Root-level file in libs/, skip it (no tests needed) continue raise ValueError( f"Unknown lib: {file}. check_diff.py likely needs " @@ -289,11 +330,13 @@ def _get_configs_for_multi_dirs( elif file in [ "pyproject.toml", "uv.lock", - ]: + ]: # root uv files docs_edited = True dependents = dependents_graph() + # we now have dirs_by_job + # todo: clean this up map_job_to_configs = { job: _get_configs_for_multi_dirs(job, dirs_to_run, dependents) for job in [ From 1950ae5d32cc896de52454160e00ec26710cab56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 03:35:16 -0300 Subject: [PATCH 07/10] ci(check_diff): skip pydantic tests for packages without uv.lock (#33448) --- .github/scripts/check_diff.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py index fee739623be17..91285ebdbc79b 100644 --- a/.github/scripts/check_diff.py +++ b/.github/scripts/check_diff.py @@ -154,6 +154,10 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]: def _get_pydantic_test_configs( dir_: str, *, python_version: str = "3.11" ) -> List[Dict[str, str]]: + # Skip directories without uv.lock (e.g., community or doc-only packages) + if not os.path.exists(f"./{dir_}/uv.lock"): + return [] + with open("./libs/core/uv.lock", "rb") as f: core_uv_lock_data = tomllib.load(f) for package in core_uv_lock_data["package"]: From 18753de5d4b61a1938a66a1c156a42b3cfcd2628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 03:37:35 -0300 Subject: [PATCH 08/10] ci(check_diff): fix _get_pydantic_test_configs function block (#33448) --- .github/scripts/check_diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py index 91285ebdbc79b..413880a3c3f20 100644 --- a/.github/scripts/check_diff.py +++ b/.github/scripts/check_diff.py @@ -156,7 +156,7 @@ def _get_pydantic_test_configs( ) -> List[Dict[str, str]]: # Skip directories without uv.lock (e.g., community or doc-only packages) if not os.path.exists(f"./{dir_}/uv.lock"): - return [] + return [] with open("./libs/core/uv.lock", "rb") as f: core_uv_lock_data = tomllib.load(f) From aa2b0aabaadcf68f5f6e79f0c80bcf58892f6096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 03:39:52 -0300 Subject: [PATCH 09/10] ci(check_diff): fix _get_pydantic_test_configs function identation (#33448) --- .github/scripts/check_diff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py index 413880a3c3f20..8553e995206be 100644 --- a/.github/scripts/check_diff.py +++ b/.github/scripts/check_diff.py @@ -155,8 +155,8 @@ def _get_pydantic_test_configs( dir_: str, *, python_version: str = "3.11" ) -> List[Dict[str, str]]: # Skip directories without uv.lock (e.g., community or doc-only packages) - if not os.path.exists(f"./{dir_}/uv.lock"): - return [] + if not os.path.exists(f"./{dir_}/uv.lock"): + return [] with open("./libs/core/uv.lock", "rb") as f: core_uv_lock_data = tomllib.load(f) From efd8debeb8b33e83c11baa42aed51d79e32b2fe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20C=C3=A9sar=20Maymone=20Galv=C3=A3o?= <139981685+bcmaymonegalvao@users.noreply.github.com> Date: Sun, 12 Oct 2025 03:54:01 -0300 Subject: [PATCH 10/10] ci(check_diff): skip langchain_community package without pyproject.toml (#33448) --- .github/scripts/check_diff.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py index 8553e995206be..c5f43e0e283fa 100644 --- a/.github/scripts/check_diff.py +++ b/.github/scripts/check_diff.py @@ -339,8 +339,13 @@ def _get_configs_for_multi_dirs( dependents = dependents_graph() - # we now have dirs_by_job - # todo: clean this up + # 🧩 Skip langchain_community because it is not a standalone package (no pyproject.toml) + for job in dirs_to_run: + dirs_to_run[job] = { + d for d in dirs_to_run[job] + if d != "libs/community/langchain_community" + } + map_job_to_configs = { job: _get_configs_for_multi_dirs(job, dirs_to_run, dependents) for job in [