diff --git a/pyproject.toml b/pyproject.toml index f354425..7b496d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -142,15 +142,22 @@ target-version = "py310" [tool.ruff.format] docstring-code-format = true +skip-magic-trailing-comma = true [tool.ruff.lint] -select = ["A", "ASYNC", "B", "BLE", "C4", "C90", "D", "DTZ", "E", "EM", "ERA", "F", "FBT", "FLY", "FURB", "G", "I", "ICN", "INP", "INT", "ISC", "LOG", "N", "NPY", "PERF", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "Q", "RET", "RSE", "RUF", "S", "SIM", "SLF", "SLOT", "T10", "T20", "TCH", "TID", "TRY", "UP", "W", "YTT"] -ignore = ["D203", "D213", "E501", "RET504", "RUF002", "RUF022", "S101", "S307", "TC004"] +select = ["ALL"] +ignore = ["CPY", "FIX", "ARG001", "COM812", "D203", "D213", "E501", "PD008", "PD009", "RET504", "S101", "TD003"] unfixable = ["ERA001", "F401", "F841", "T201", "T203"] +[tool.ruff.lint.flake8-annotations] +allow-star-arg-any = true + [tool.ruff.lint.flake8-tidy-imports] ban-relative-imports = "all" +[tool.ruff.lint.isort] +split-on-trailing-comma = false + [tool.ruff.lint.pycodestyle] max-doc-length = 100 diff --git a/src/raglite/__init__.py b/src/raglite/__init__.py index df2e30d..a0b9995 100644 --- a/src/raglite/__init__.py +++ b/src/raglite/__init__.py @@ -17,7 +17,7 @@ vector_search, ) -__all__ = [ +__all__ = [ # noqa: RUF022 # Config "RAGLiteConfig", # Insert diff --git a/src/raglite/_bench.py b/src/raglite/_bench.py index 3356c3c..6a40637 100644 --- a/src/raglite/_bench.py +++ b/src/raglite/_bench.py @@ -94,7 +94,7 @@ def __init__( insert_variant: str | None = None, search_variant: str | None = None, config: RAGLiteConfig | None = None, - ): + ) -> None: super().__init__( dataset, num_results=num_results, @@ -145,7 +145,7 @@ def __init__( num_results: int = 10, insert_variant: str | None = None, search_variant: str | None = None, - ): + ) -> None: super().__init__( dataset, num_results=num_results, @@ -156,7 +156,7 @@ def __init__( self.embedder_dim = 3072 self.persist_path = self.cwd / self.insert_id - def insert_documents(self, max_workers: int | None = None) -> None: + def insert_documents(self, max_workers: int | None = None) -> None: # noqa: ARG002 # Adapted from https://docs.llamaindex.ai/en/stable/examples/vector_stores/FaissIndexDemo/. import faiss from llama_index.core import Document, StorageContext, VectorStoreIndex @@ -178,7 +178,7 @@ def insert_documents(self, max_workers: int | None = None) -> None: index.storage_context.persist(persist_dir=self.persist_path) @cached_property - def index(self) -> Any: + def index(self) -> Any: # noqa: ANN401 from llama_index.core import StorageContext, load_index_from_storage from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.vector_stores.faiss import FaissVectorStore @@ -215,7 +215,7 @@ def __init__( num_results: int = 10, insert_variant: str | None = None, search_variant: str | None = None, - ): + ) -> None: super().__init__( dataset, num_results=num_results, @@ -227,7 +227,7 @@ def __init__( ) @cached_property - def client(self) -> Any: + def client(self) -> Any: # noqa: ANN401 import openai return openai.OpenAI() diff --git a/src/raglite/_chatml_function_calling.py b/src/raglite/_chatml_function_calling.py index dcbaa4d..54c0da1 100644 --- a/src/raglite/_chatml_function_calling.py +++ b/src/raglite/_chatml_function_calling.py @@ -25,24 +25,12 @@ import json import warnings -from typing import ( # noqa: UP035 - Any, - Iterator, - List, - Optional, - Union, - cast, -) +from typing import Any, Iterator, List, Optional, Union, cast # noqa: UP035 import jinja2 from jinja2.sandbox import ImmutableSandboxedEnvironment -from raglite._lazy_llama import ( - llama, - llama_chat_format, - llama_grammar, - llama_types, -) +from raglite._lazy_llama import llama, llama_chat_format, llama_grammar, llama_types def _accumulate_chunks( @@ -98,7 +86,7 @@ def _convert_chunks_to_completion( { "text": text, "index": 0, - "logprobs": logprobs, # TODO: Improve accumulation of logprobs + "logprobs": logprobs, # TODO(lsorber): Improve accumulation of logprobs "finish_reason": finish_reason, # type: ignore[typeddict-item] } ], @@ -143,12 +131,7 @@ def _stream_tool_calls( llama_grammar.JSON_GBNF, verbose=llama.verbose ) completion_or_chunks = llama.create_completion( - prompt=prompt, - **{ - **completion_kwargs, - "max_tokens": None, - "grammar": grammar, - }, + prompt=prompt, **{**completion_kwargs, "max_tokens": None, "grammar": grammar} ) chunks: List[llama_types.CreateCompletionResponse] = [] chat_chunks = llama_chat_format._convert_completion_to_chat_function( # noqa: SLF001 @@ -206,11 +189,7 @@ def _convert_text_completion_logprobs_to_chat( "bytes": None, "logprob": logprob, # type: ignore[typeddict-item] "top_logprobs": [ - { - "token": top_token, - "logprob": top_logprob, - "bytes": None, - } + {"token": top_token, "logprob": top_logprob, "bytes": None} for top_token, top_logprob in (top_logprobs or {}).items() ], } @@ -318,9 +297,9 @@ def chatml_function_calling_with_streaming( "{% endfor %}" "{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}" ) - template_renderer = ImmutableSandboxedEnvironment( - undefined=jinja2.StrictUndefined, - ).from_string(function_calling_template) + template_renderer = ImmutableSandboxedEnvironment(undefined=jinja2.StrictUndefined).from_string( + function_calling_template + ) # Convert legacy functions to tools if functions is not None: diff --git a/src/raglite/_cli.py b/src/raglite/_cli.py index 34fcd24..e6342f2 100644 --- a/src/raglite/_cli.py +++ b/src/raglite/_cli.py @@ -88,7 +88,7 @@ def install_mcp_server( "--python", "3.11", "--with", - "numpy<2.0.0", # TODO: Remove this constraint when uv no longer needs it to solve the environment. + "numpy<2.0.0", # TODO(lsorber): Remove this constraint when uv no longer needs it to solve the environment. "raglite", "mcp", "run", diff --git a/src/raglite/_config.py b/src/raglite/_config.py index 3988060..af13126 100644 --- a/src/raglite/_config.py +++ b/src/raglite/_config.py @@ -24,7 +24,7 @@ # Lazily load the default search method to avoid circular imports. -# TODO: Replace with search_and_rerank_chunk_spans after benchmarking. +# TODO(lsorber): Replace with search_and_rerank_chunk_spans after benchmarking. def _vector_search( query: str, *, num_results: int = 8, config: "RAGLiteConfig | None" = None ) -> tuple[list[ChunkId], list[float]]: diff --git a/src/raglite/_embed.py b/src/raglite/_embed.py index 0e523f5..fc50d08 100644 --- a/src/raglite/_embed.py +++ b/src/raglite/_embed.py @@ -78,7 +78,7 @@ def _create_segment( # Compute the number of tokens per sentence. We use a method based on a sentinel token to # minimise the number of calls to embedder.tokenize, which incurs a significant overhead # (presumably to load the tokenizer) [1]. - # TODO: Make token counting faster and more robust once [1] is fixed. + # TODO(lsorber): Make token counting faster and more robust once [1] is fixed. # [1] https://github.com/abetlen/llama-cpp-python/issues/1763 num_tokens_list: list[int] = [] sentence_batch, sentence_batch_len = [], 0 @@ -94,7 +94,7 @@ def _create_segment( # Compute the maximum number of tokens for each segment's preamble and content. # Unfortunately, llama-cpp-python truncates the input to n_batch tokens and crashes if you try # to increase it [1]. Until this is fixed, we have to limit max_tokens to n_batch. - # TODO: Improve the context window size once [1] is fixed. + # TODO(lsorber): Improve the context window size once [1] is fixed. # [1] https://github.com/abetlen/llama-cpp-python/issues/1762 max_tokens = min(n_ctx, n_batch) - 16 max_tokens_preamble = round(0.382 * max_tokens) # Golden ratio. diff --git a/src/raglite/_eval.py b/src/raglite/_eval.py index ab2296c..cf57377 100644 --- a/src/raglite/_eval.py +++ b/src/raglite/_eval.py @@ -112,11 +112,7 @@ class ContextEvalResponse(BaseModel): relevant_chunks = [] for candidate_chunk in tqdm( - candidate_chunks, - desc="Evaluating chunks", - unit="chunk", - dynamic_ncols=True, - leave=False, + candidate_chunks, desc="Evaluating chunks", unit="chunk", dynamic_ncols=True, leave=False ): try: context_eval_response = extract_with_llm( @@ -139,8 +135,7 @@ class AnswerResponse(BaseModel): extra="forbid" # Forbid extra attributes as required by OpenAI's strict mode. ) answer: str = Field( - ..., - description="A complete answer to the given question using the provided context.", + ..., description="A complete answer to the given question using the provided context." ) system_prompt: ClassVar[str] = f""" You are given a set of contexts extracted from a document. @@ -191,11 +186,7 @@ def insert_evals( session.execute(text("CHECKPOINT;")) -def answer_evals( - num_evals: int = 100, - *, - config: RAGLiteConfig | None = None, -) -> "pd.DataFrame": +def answer_evals(num_evals: int = 100, *, config: RAGLiteConfig | None = None) -> "pd.DataFrame": """Read evals from the database and answer them with RAG.""" try: import pandas as pd @@ -251,7 +242,7 @@ def evaluate( class RAGLiteRagasEmbeddings(BaseRagasEmbeddings): """A RAGLite embedder for Ragas.""" - def __init__(self, config: RAGLiteConfig | None = None): + def __init__(self, config: RAGLiteConfig | None = None) -> None: self.config = config or RAGLiteConfig() def embed_query(self, text: str) -> list[float]: diff --git a/src/raglite/_extract.py b/src/raglite/_extract.py index f904747..328f0ac 100644 --- a/src/raglite/_extract.py +++ b/src/raglite/_extract.py @@ -45,7 +45,7 @@ class MyNameResponse(BaseModel): # is disabled by default because it only supports a subset of JSON schema features [2]. # [1] https://docs.litellm.ai/docs/completion/json_mode # [2] https://platform.openai.com/docs/guides/structured-outputs#some-type-specific-keywords-are-not-yet-supported - # TODO: Fall back to {"type": "json_object"} if JSON schema is not supported by the LLM. + # TODO(lsorber): Fall back to {"type": "json_object"} if JSON schema isn't supported by the LLM. response_format: dict[str, Any] | None = ( { "type": "json_schema", diff --git a/src/raglite/_insert.py b/src/raglite/_insert.py index bd63c29..2deb171 100644 --- a/src/raglite/_insert.py +++ b/src/raglite/_insert.py @@ -29,9 +29,7 @@ def _create_chunk_records( chunklets = split_chunklets(sentences, max_size=config.chunk_max_size) chunklet_embeddings = embed_strings(chunklets, config=config) chunks, chunk_embeddings = split_chunks( - chunklets=chunklets, - chunklet_embeddings=chunklet_embeddings, - max_size=config.chunk_max_size, + chunklets=chunklets, chunklet_embeddings=chunklet_embeddings, max_size=config.chunk_max_size ) # Create the chunk records. chunk_records, headings = [], "" @@ -79,12 +77,7 @@ def _create_chunk_records( ) else: chunk_embedding_records_list.append( - [ - ChunkEmbedding( - chunk_id=chunk_record.id, - embedding=full_chunk_embedding, - ) - ] + [ChunkEmbedding(chunk_id=chunk_record.id, embedding=full_chunk_embedding)] ) return document, chunk_records, chunk_embedding_records_list diff --git a/src/raglite/_lazy_llama.py b/src/raglite/_lazy_llama.py index 8c10a51..3e6e238 100644 --- a/src/raglite/_lazy_llama.py +++ b/src/raglite/_lazy_llama.py @@ -36,17 +36,17 @@ def __getattr__(name: str) -> object: class LazyAttributeError: error_message = "To use llama.cpp models, please install `llama-cpp-python`." - def __init__(self, error: ModuleNotFoundError | None = None): + def __init__(self, error: ModuleNotFoundError | None = None) -> None: self.error = error def __getattr__(self, name: str) -> NoReturn: raise ModuleNotFoundError(self.error_message) from self.error - def __call__(self, *args: Any, **kwargs: Any) -> NoReturn: + def __call__(self, *args: Any, **kwargs: Any) -> NoReturn: # noqa: ARG002 raise ModuleNotFoundError(self.error_message) from self.error class LazySubmoduleError: - def __init__(self, error: ModuleNotFoundError): + def __init__(self, error: ModuleNotFoundError) -> None: self.error = error def __getattr__(self, name: str) -> LazyAttributeError | type[LazyAttributeError]: diff --git a/src/raglite/_litellm.py b/src/raglite/_litellm.py index 6525920..77c99ab 100644 --- a/src/raglite/_litellm.py +++ b/src/raglite/_litellm.py @@ -1,5 +1,7 @@ """Add support for llama-cpp-python models to LiteLLM.""" +# ruff: noqa: ANN401, ARG002 + import asyncio import contextlib import logging @@ -26,12 +28,7 @@ from raglite._chatml_function_calling import chatml_function_calling_with_streaming from raglite._config import RAGLiteConfig -from raglite._lazy_llama import ( - Llama, - LlamaRAMCache, - llama_supports_gpu_offload, - llama_types, -) +from raglite._lazy_llama import Llama, LlamaRAMCache, llama_supports_gpu_offload, llama_types # Reduce the logging level for LiteLLM, flashrank, and httpx. litellm.suppress_debug_info = True diff --git a/src/raglite/_query_adapter.py b/src/raglite/_query_adapter.py index 47957cc..cbd4e0f 100644 --- a/src/raglite/_query_adapter.py +++ b/src/raglite/_query_adapter.py @@ -1,6 +1,6 @@ """Compute and update an optimal query adapter.""" -# ruff: noqa: N806 +# ruff: noqa: N806, RUF002 from dataclasses import replace diff --git a/src/raglite/_rag.py b/src/raglite/_rag.py index 07f676a..8393e31 100644 --- a/src/raglite/_rag.py +++ b/src/raglite/_rag.py @@ -119,7 +119,7 @@ def _get_tools( "The `query` string MUST be a precise single-faceted question in the user's language.\n" "The `query` string MUST resolve all pronouns to explicit nouns." ), - }, + } }, "required": ["query"], "additionalProperties": False, @@ -237,7 +237,7 @@ async def async_rag( # Add the tool call requests to the message array. messages.append(response.choices[0].message.to_dict()) # type: ignore[arg-type,union-attr] # Run the tool calls to retrieve the RAG context and append the output to the message array. - # TODO: Make this async. + # TODO(lsorber): Make this async. messages.extend(_run_tools(tool_calls, on_retrieval, config)) # Asynchronously stream the assistant response. chunks = [] diff --git a/src/raglite/_split_chunks.py b/src/raglite/_split_chunks.py index d85b3e8..f3355a7 100644 --- a/src/raglite/_split_chunks.py +++ b/src/raglite/_split_chunks.py @@ -10,9 +10,7 @@ def split_chunks( # noqa: C901, PLR0915 - chunklets: list[str], - chunklet_embeddings: FloatMatrix, - max_size: int = 2048, + chunklets: list[str], chunklet_embeddings: FloatMatrix, max_size: int = 2048 ) -> tuple[list[str], list[FloatMatrix]]: """Split chunklets into optimal semantic chunks with corresponding chunklet embeddings. @@ -103,11 +101,7 @@ def split_chunks( # noqa: C901, PLR0915 ) b_ub = np.ones(A.shape[0], dtype=np.float32) res = linprog( - partition_similarity, - A_ub=-A, - b_ub=-b_ub, - bounds=(0, 1), - integrality=[1] * A.shape[1], + partition_similarity, A_ub=-A, b_ub=-b_ub, bounds=(0, 1), integrality=[1] * A.shape[1] ) if not res.success: error_message = "Optimization of chunk partitions failed." diff --git a/src/raglite/_typing.py b/src/raglite/_typing.py index 5a0cb73..94c4661 100644 --- a/src/raglite/_typing.py +++ b/src/raglite/_typing.py @@ -1,5 +1,7 @@ """RAGLite typing.""" +# ruff: noqa: ANN401, ARG002 + import io import pickle from collections.abc import Callable @@ -96,12 +98,7 @@ def __init__(self, left: Any, right: Any, metric: DistanceMetric) -> None: @compiles(EmbeddingDistance, "postgresql") def _embedding_distance_postgresql(element: EmbeddingDistance, compiler: Any, **kwargs: Any) -> str: - op_map: dict[DistanceMetric, str] = { - "cosine": "<=>", - "dot": "<#>", - "l1": "<+>", - "l2": "<->", - } + op_map: dict[DistanceMetric, str] = {"cosine": "<=>", "dot": "<#>", "l1": "<+>", "l2": "<->"} left, right = list(element.clauses) operator = op_map[element.metric] return f"({compiler.process(left)} {operator} {compiler.process(right)})" @@ -202,7 +199,7 @@ class Embedding(TypeDecorator[FloatVector]): impl = NumpyArray comparator_factory: type[EmbeddingComparator] = EmbeddingComparator - def __init__(self, dim: int = -1): + def __init__(self, dim: int = -1) -> None: super().__init__() self.dim = dim diff --git a/tests/test_chatml_function_calling.py b/tests/test_chatml_function_calling.py index fd0c91a..a2b612f 100644 --- a/tests/test_chatml_function_calling.py +++ b/tests/test_chatml_function_calling.py @@ -10,11 +10,7 @@ from typeguard import ForwardRefPolicy, check_type from raglite._chatml_function_calling import chatml_function_calling_with_streaming -from raglite._lazy_llama import ( - Llama, - llama_supports_gpu_offload, - llama_types, -) +from raglite._lazy_llama import Llama, llama_supports_gpu_offload, llama_types def is_accelerator_available() -> bool: @@ -27,11 +23,7 @@ def is_accelerator_available() -> bool: @pytest.mark.parametrize( - "stream", - [ - pytest.param(True, id="stream=True"), - pytest.param(False, id="stream=False"), - ], + "stream", [pytest.param(True, id="stream=True"), pytest.param(False, id="stream=False")] ) @pytest.mark.parametrize( "tool_choice", @@ -46,14 +38,8 @@ def is_accelerator_available() -> bool: @pytest.mark.parametrize( "user_prompt_expected_tool_calls", [ - pytest.param( - ("Is 7 a prime number?", 0), - id="expected_tool_calls=0", - ), - pytest.param( - ("What's the weather like in Paris today?", 1), - id="expected_tool_calls=1", - ), + pytest.param(("Is 7 a prime number?", 0), id="expected_tool_calls=0"), + pytest.param(("What's the weather like in Paris today?", 1), id="expected_tool_calls=1"), pytest.param( ("What's the weather like in Paris today? What about New York?", 2), id="expected_tool_calls=2", diff --git a/tests/test_lazy_llama.py b/tests/test_lazy_llama.py index 42363c4..c68067d 100644 --- a/tests/test_lazy_llama.py +++ b/tests/test_lazy_llama.py @@ -19,7 +19,7 @@ def test_raglite_import_without_llama_cpp(monkeypatch: pytest.MonkeyPatch) -> No original_import = builtins.__import__ # Define a fake import function that raises ModuleNotFoundError when trying to import llama_cpp. - def fake_import(name: str, *args: Any) -> Any: + def fake_import(name: str, *args: Any) -> Any: # noqa: ANN401 if name.startswith("llama_cpp"): import_error = f"No module named '{name}'" raise ModuleNotFoundError(import_error) diff --git a/tests/test_rag.py b/tests/test_rag.py index 151cd96..bb3ca1e 100644 --- a/tests/test_rag.py +++ b/tests/test_rag.py @@ -2,11 +2,7 @@ import json -from raglite import ( - RAGLiteConfig, - add_context, - retrieve_context, -) +from raglite import RAGLiteConfig, add_context, retrieve_context from raglite._database import ChunkSpan from raglite._rag import rag diff --git a/tests/test_rerank.py b/tests/test_rerank.py index 25ddf7b..51da375 100644 --- a/tests/test_rerank.py +++ b/tests/test_rerank.py @@ -31,19 +31,16 @@ def kendall_tau(a: list[T], b: list[T]) -> float: }, id="flashrank_multilingual", ), - ], + ] ) -def reranker( - request: pytest.FixtureRequest, -) -> BaseRanker | dict[str, BaseRanker] | None: +def reranker(request: pytest.FixtureRequest) -> BaseRanker | dict[str, BaseRanker] | None: """Get a reranker to test RAGLite with.""" reranker: BaseRanker | dict[str, BaseRanker] | None = request.param return reranker def test_reranker( - raglite_test_config: RAGLiteConfig, - reranker: BaseRanker | dict[str, BaseRanker] | None, + raglite_test_config: RAGLiteConfig, reranker: BaseRanker | dict[str, BaseRanker] | None ) -> None: """Test inserting a document, updating the indexes, and searching for a query.""" # Update the config with the reranker. diff --git a/tests/test_search.py b/tests/test_search.py index 8151018..6c116ee 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -19,11 +19,9 @@ pytest.param(keyword_search, id="keyword_search"), pytest.param(vector_search, id="vector_search"), pytest.param(hybrid_search, id="hybrid_search"), - ], + ] ) -def search_method( - request: pytest.FixtureRequest, -) -> BasicSearchMethod: +def search_method(request: pytest.FixtureRequest) -> BasicSearchMethod: """Get a search method to test RAGLite with.""" search_method: BasicSearchMethod = request.param return search_method diff --git a/tests/test_split_sentences.py b/tests/test_split_sentences.py index b8de530..5122297 100644 --- a/tests/test_split_sentences.py +++ b/tests/test_split_sentences.py @@ -61,8 +61,7 @@ def test_split_sentences() -> None: id="huge-2a", ), pytest.param( - ("X" * 768 + " " + "X" * 768, ["X" * 768 + " ", "X" * 768], (4, 1024)), - id="huge-2b", + ("X" * 768 + " " + "X" * 768, ["X" * 768 + " ", "X" * 768], (4, 1024)), id="huge-2b" ), ], )