Skip to content

[R-306] Frequent openai.APIConnectionError when running testset generatorΒ #1512

@alexander-zuev

Description

@alexander-zuev

[X] I have checked the documentation and related resources and couldn't resolve my bug. Yes, there are no recommendations on how to fix the api connection errors, especially since you are using wrappers (langchain / llamaindex)

Describe the bug
A clear and concise description of what the bug is.
In short - there are frequent, intermittent APIConnectionErrors when running testset generator. Error trace doesn't give out specific enough reasons or ways to troubleshoot.

I would expect in such cases the library to re-try the connection and not just fail the generation wasting all of the tokens/embeddings

Ragas version: 0.2.0
Python version: 3.12.6

Code to Reproduce
Sharing the evaluation module code

import asyncio
import json
import os.path
from collections.abc import Sequence
from pprint import pprint
from typing import Dict, List, Optional

import weave
from datasets import Dataset
from distlib.markers import Evaluator
from langchain_community.document_loaders import JSONLoader
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from ragas import EvaluationDataset, SingleTurnSample, evaluate
from ragas.cost import get_token_usage_for_openai
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import (
    AnswerCorrectness,
    AnswerRelevancy,
    ContextRecall,
    Faithfulness,
    answer_correctness,
    answer_relevancy,
    context_precision,
    context_recall,
    faithfulness,
)
from ragas.testset import Testset, TestsetGenerator, TestsetSample
from ragas.testset.synthesizers import default_query_distribution
from weave import Dataset as WeaveDataset
from weave.trace import weave_client

from src.generation.claude_assistant import ClaudeAssistant
from src.utils.config import (
    ANTHROPIC_API_KEY,
    EMBEDDING_MODEL,
    EVAL_DIR,
    EVALUATOR_MODEL_NAME,
    MAIN_MODEL,
    RAW_DATA_DIR,
    WEAVE_PROJECT_NAME,
)
from src.utils.decorators import anthropic_error_handler, base_error_handler
from src.utils.logger import configure_logging, get_logger
from src.vector_storage.vector_db import Reranker, ResultRetriever, VectorDB

logger = get_logger()


class DataLoader:
    """
    Represents a data loader responsible for loading and saving datasets.

    class DataLoader:
        def __init__(self) -> None:
            Initializes the DataLoader with default dataset path and filename.

        @base_error_handler
        def get_documents(self, filename: str) -> list[Document]:
            Loads documents from a JSON file.

            :param filename: Name of the JSON file to load documents from.
            :return: List of Document objects loaded from the file.

        def metadata_func(self, record: dict, metadata: dict) -> dict:
            Extracts basic metadata from a record and updates a metadata dictionary.

            :param record: Dictionary containing the record with metadata.
            :param metadata: Dictionary to update with extracted metadata.
            :return: Updated metadata dictionary.

        @base_error_handler
        def save_dataset(self, dataset: Dataset, filename: Optional[str] = None) -> str:
            Saves the given dataset to a JSON file.

            :param dataset: Dataset object to save.
            :param filename: Optional; Name of the file to save the dataset to. If not provided, uses the default dataset filename.
            :return: Path to the saved dataset file.

        @base_error_handler
        def load_json(self, filename: str = None) -> Dataset | None:
            Loads the dataset from a JSON file.

            :param filename: Optional; Name of the file to load the dataset from. If not provided, uses the default dataset filename.
            :return: Loaded Dataset object if successful, otherwise None.
    """

    def __init__(self) -> None:
        self.dataset_path = EVAL_DIR
        self.dataset_filename: str = "eval_dataset.json"

    @base_error_handler
    def get_documents(self, filename: str) -> list[Document]:
        filepath = os.path.join(RAW_DATA_DIR, filename)
        loader = JSONLoader(
            file_path=filepath,
            jq_schema=".data[]",
            content_key="markdown",
            metadata_func=self.metadata_func,
        )

        documents = loader.load()
        logger.info(f"Successfully loaded documents from {filename}")
        return documents

    def metadata_func(self, record: dict, metadata: dict) -> dict:
        metadata["title"] = record.get("metadata", {}).get("title", "")
        metadata["url"] = record.get("metadata", {}).get("sourceURL", "")
        metadata["description"] = record.get("metadata", {}).get("description", "")

        return metadata

    @base_error_handler
    def save_dataset(self, dataset: Dataset, filename: str | None = None) -> str:
        """Saves the given object to json file"""
        if filename != self.dataset_filename:
            self.dataset_filename = filename
            logger.debug(f"Updated filename to: {self.dataset_filename}")

        filepath = os.path.join(self.dataset_path, filename)
        try:
            data_dict = dataset.to_dict()
            with open(filepath, "w") as f:
                json.dump(data_dict, f, indent=2)
            logger.info(f"Dataset saved to: {filepath}")
            return filepath
        except Exception as e:
            logger.error(f"Error saving dataset to JSON: {str(e)}")
            raise

    @base_error_handler
    def load_json(self, filename: str = None) -> Dataset | None:
        """Loads the dataset from json file"""
        if filename:
            self.dataset_filename = filename

        filepath = os.path.join(self.dataset_path, self.dataset_filename)
        try:
            with open(filepath) as f:
                data = json.load(f)
                num_samples = len(data["question"])  # or any other key that represents the number of samples

                dataset = Dataset.from_dict(data)
            logger.info(f"Loaded dataset with {num_samples} samples")
            return dataset
        except FileNotFoundError:
            logger.error(f"File not found: {filepath}")
            return None
        except json.JSONDecodeError:
            logger.error(f"Error decoding JSON file {filepath}, invalid format")
            raise
        except Exception as e:
            logger.error(f"Error loading dataset from JSON: {str(e)}")
            raise


class DatasetGenerator:
    """
    Class responsible for generating datasets using various language models and embeddings.

    :param model_name: Name of the language model used for generating datasets.
    :type model_name: str, optional
    :param critic_llm: Name of the language model used for critiquing datasets.
    :type critic_llm: str, optional
    :param embedding_model: Name of the embedding model used for datasets.
    :type embedding_model: str, optional
    :param dataset_path: Path where the dataset will be stored.
    :type dataset_path: str, optional
    :param claude_assistant: Instance of ClaudeAssistant for additional functionalities.
    :type claude_assistant: ClaudeAssistant, optional
    :param loader: Instance of DataLoader for loading data.
    :type loader: DataLoader, optional
    """

    def __init__(
        self,
        model_name: str = "gpt-4o",
        dataset_path: str = EVAL_DIR,
        claude_assistant: ClaudeAssistant = None,
        loader: DataLoader = None,
x    ) -> None:
        self.generator_llm = LangchainLLMWrapper(ChatOpenAI(model=model_name))
        self.generator = TestsetGenerator(llm=self.generator_llm)
        self.dataset_path = dataset_path
        self.claude_assistant = claude_assistant
        self.dataset: Testset | None = None
        self.dataset_filename: str = "eval_dataset.json"
        self.loader = loader or DataLoader()
        self.weave_dataset_name: str = "anthropic_dataset"

    @base_error_handler
    def generate_dataset(self, documents: Sequence[Document], testset_size: int, **kwargs) -> EvaluationDataset:
        """
        :param documents: A sequence of Document objects to generate the dataset from.
        :type documents: Sequence[Document]
        :param testset_size: The size of the test dataset to be generated.
        :type testset_size: int
        :param kwargs: Additional keyword arguments to pass to the generator.
        :type kwargs: dict
        :return: The generated Dataset object.
        :rtype: Dataset
        """
        query_distribution = default_query_distribution(self.generator.llm)

        test_dataset = self.generator.generate_with_langchain_docs(
            documents=documents,
            testset_size=testset_size,
            query_distribution=query_distribution,
            **kwargs,
        )

        # dataset = self.convert_to_dataset(test_dataset)
        # self.dataset = dataset
        evaluation_dataset = test_dataset.to_evaluation_dataset()
        return evaluation_dataset

    # TODO: Deprecate and remove this method
    @anthropic_error_handler
    def convert_to_dataset(self, test_dataset: Testset) -> EvaluationDataset:
        """Takes synthetic dataset from Ragas and converts to HF Dataset"""
        if not test_dataset:
            raise ValueError("Dataset not generated, generate the dataset first!")

        samples = []
        for sample in test_dataset.samples:
            samples.append(
                SingleTurnSample(
                    user_input=sample.question,
                    response="",  # This will be filled during evaluation
                    retrieved_contexts=sample.contexts,
                    reference=sample.ground_truth,
                )
            )
        return EvaluationDataset(samples=samples)

        #
        # data = {
        #     "user_input": [],
        #     "contexts": [],
        #     "ground_truth": []
        # }
        #
        # for row in test_dataset.samples:
        #     data["user_input"].append(row.user_input)
        #     data["contexts"].append(row.reference_contexts)
        #     data["ground_truth"].append(row.reference)
        #
        # dataset = Dataset.from_dict(data)
        # self.dataset = dataset
        # return self.dataset


class WeaveManager:
    """
    WeaveManager class provides an interface for interacting with Weave for dataset operations.

    Methods
    -------
    __init__(project_name: str = WEAVE_PROJECT_NAME)
        Initializes the WeaveManager with the given project name.

    upload_dataset(dataset: Dataset, name: str) -> str
        Uploads the given HuggingFace dataset to Weave and returns the name of the uploaded dataset.

    retrieve_dataset(name: str) -> weave_client.ObjectRef
        Retrieves a dataset from Weave using the given dataset name and returns a reference to the dataset.
    """

    def __init__(
        self,
        project_name: str = WEAVE_PROJECT_NAME,
    ):
        self.project_name = project_name
        weave.init(project_name)

    async def upload_dataset(self, dataset: Dataset, name: str) -> str:
        """Uploads dataset to Weave"""
        # Convert HuggingFace Dataset to a list of dictionaries
        data_list = dataset.to_list()

        # Create a Weave Dataset
        weave_dataset = WeaveDataset(name=name, rows=data_list)

        try:
            # Publish the dataset
            weave.publish(weave_dataset)

            logger.info(f"Dataset '{name!r}' uploaded to Weave")
            return name
        except Exception as e:
            logger.error(f"An error occurred while uploading dataset to Weave: {str(e)}")
            raise

    async def retrieve_dataset(self, name: str) -> weave_client.ObjectRef:
        if name is None:
            raise ValueError("Dataset name is required!")
        dataset_name = name

        dataset_ref = weave.ref(dataset_name).get()
        print(dataset_ref)
        return dataset_ref


class Evaluator:
    """
    Evaluator class for evaluating model-generated outputs against ground truth answers.

    This class provides methods for evaluating model outputs in terms of several metrics such as faithfulness, answer relevancy, answer correctness, context recall, and context precision. It integrates with various models and datasets to perform comprehensive evaluations.
    """

    def __init__(
        self,
        model: str = "gpt-4o",
        embedding_model: str = EMBEDDING_MODEL,
        claude_assistant: ClaudeAssistant = None,
        evaluator_model_name: str = EVALUATOR_MODEL_NAME,
        anthropic_api_key: str = ANTHROPIC_API_KEY,
        claude_assistant_model: str = MAIN_MODEL,
    ):
        self.model_name = model
        self.embedding_model_name = embedding_model
        self.llm = OpenAI(model=model)
        self.embeddings = OpenAIEmbedding(model=embedding_model)
        self.claude_assistant = claude_assistant
        self.evaluator_model_name = evaluator_model_name
        self.evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model=evaluator_model_name))
        self.main_model = claude_assistant_model
        self.anthropic_api_key = anthropic_api_key
        self.metrics = self._initialize_metrics()

    def _initialize_metrics(self):
        return {
            "faithfulness": Faithfulness(llm=self.evaluator_llm),
            "answer_relevancy": AnswerRelevancy(llm=self.evaluator_llm),
            "answer_correctness": AnswerCorrectness(llm=self.evaluator_llm),
            "context_recall": ContextRecall(llm=self.evaluator_llm),
            "context_precision": ContextRecall(llm=self.evaluator_llm),
        }

    @weave.op()
    async def evaluate_row(self, question: str, ground_truth: str, model_output: dict) -> dict[str, list[float]]:
        """
        Evaluate a model's output for a given question and ground truth by computing various metrics.

        This method utilizes a dataset containing the question, ground truth, the answer from the model's output,
        and the retrieved contexts to evaluate the performance of the model. It computes several metrics including
        faithfulness, answer relevancy, answer correctness, context recall, and context precision using
        specified evaluation tools.

        :param question: The question posed to the model.
        :type question: str
        :param ground_truth: The expected ground truth answer for the question.
        :type ground_truth: str
        :param model_output: The model's output containing the answer and retrieved contexts.
        :type model_output: dict
        :return: A dictionary with evaluation metric scores.
        :rtype: dict[str, list[float]]
        :raises ValueError: If the model output is None or does not contain necessary keys.
        :raises RuntimeError: If there is an issue with initializing the metrics or models.
        """
        if model_output is None:
            logger.warning(f"Model output is None for the question: {question[:50]}...")

        sample = SingleTurnSample(
            user_input=question,
            reference=ground_truth,
            response=model_output.get("answer", ""),
            retrieved_contexts=model_output.get("contexts", []),
        )

        # answer = model_output.get("answer", "")
        # contexts = model_output.get("contexts", [])

        # if not contexts:
        #     logger.warning(f"No contexts found for question: {question[:50]}...")

        # prepare data for RAGAS
        # data = {
        #     "question": [question],
        #     "ground_truth": [ground_truth],
        #     "answer": [answer],
        #     "retrieved_contexts": [contexts],
        # }

        dataset = Dataset.from_dict(data)

        # initialize metrics

        metrics = [faithfulness, answer_relevancy, answer_correctness, context_recall, context_precision]

        judge_model = ChatOpenAI(model=self.evaluator_model_name)
        embeddings_model = OpenAIEmbeddings(model=self.embedding_model_name)

        result = evaluate(
            dataset=dataset,
            metrics=metrics,
            llm=judge_model,
            embeddings=embeddings_model,
            token_usage_parser=get_token_usage_for_openai,
        )

        return {
            "faithfulness": result["faithfulness"],
            "answer_relevancy": result["answer_relevancy"],
            "context_recall": result["context_recall"],
            "context_precision": result["context_precision"],
        }

    async def run_weave_evaluation(self, eval_dataset: Dataset | weave_client.ObjectRef) -> dict:
        logger.info("Running evaluation...")

        if isinstance(eval_dataset, Dataset):
            eval_dataset = eval_dataset.to_list()

        evaluation = weave.Evaluation(
            dataset=eval_dataset,
            scorers=[self.evaluate_row],
        )
        results = await evaluation.evaluate(model=self.claude_assistant)
        return results


class EvalManager:
    """
    class EvalManager:
        Initializes an instance of the EvalManager with optional components.

        Parameters:
            loader (DataLoader): An instance of DataLoader, defaults to a new DataLoader instance if not provided.
            weave_manager (WeaveManager): An instance of WeaveManager, defaults to a new WeaveManager instance if not provided.
            vector_db (VectorDB): An instance of VectorDB, defaults to a new VectorDB instance if not provided.
            claude_assistant (ClaudeAssistant): An instance of ClaudeAssistant, defaults to a new ClaudeAssistant instance if not provided.
            retriever (ResultRetriever): An instance of ResultRetriever, defaults to a new ResultRetriever instance if not provided.
            reranker (Reranker): An instance of Reranker, defaults to a new Reranker instance if not provided.
            generator (DatasetGenerator): An instance of DatasetGenerator, defaults to a new DatasetGenerator instance if not provided.
            evaluator (Evaluator): An instance of Evaluator, defaults to a new Evaluator instance if not provided.

    @base_error_handler
    async def run_pipeline:
        Runs evaluation pipeline with pre-determined parameters.

        Parameters:
            generate_new_dataset (bool): Indicates whether to generate a new dataset. Defaults to False.
            num_questions (int): Number of questions for test size in the dataset. Defaults to 5.
            input_filename (str): Name of the input file containing documents. Defaults to "docs_anthropic_com_en_20240928_135426.json".
            weave_dataset_name (str): Name of the dataset in Weave to be used. Defaults to "anthropic_dataset".

        Returns:
            results: The results of the evaluation after running the pipeline.
    """

    def __init__(
        self,
        loader: DataLoader = None,
        weave_manager: WeaveManager = None,
        vector_db: VectorDB = None,
        claude_assistant: ClaudeAssistant = None,
        retriever: ResultRetriever = None,
        reranker: Reranker = None,
        generator: DatasetGenerator = None,
        evaluator: Evaluator = None,
    ):
        self.loader = loader or DataLoader()
        self.weave_manager = weave_manager or WeaveManager()
        self.vector_db = vector_db or VectorDB()
        self.claude_assistant = claude_assistant or ClaudeAssistant(vector_db=self.vector_db)
        self.reranker = reranker or Reranker()
        self.retriever = retriever or ResultRetriever(self.vector_db, self.reranker)
        self.claude_assistant.retriever = self.retriever
        self.generator = generator or DatasetGenerator(claude_assistant=self.claude_assistant, loader=self.loader)
        self.evaluator = evaluator or Evaluator(claude_assistant=self.claude_assistant)

    @base_error_handler
    async def run_pipeline(
        self,
        generate_new_dataset: bool = False,
        testsize: int = 5,
        input_filename: str = "docs_anthropic_com_en_20240928_135426.json",
        weave_dataset_name: str = "anthropic_dataset",
    ):
        """Runs evaluation pipeline with pre-determined parameters"""

        if generate_new_dataset:
            logger.info("Generating new dataset...")
            # Generate and upload new dataset
            docs = self.loader.get_documents(filename=input_filename)
            dataset = self.generator.generate_dataset(documents=docs, testset_size=testsize)

            for row in dataset:
                logger.debug(f"Question: {row['question']}")

            # Upload dataset
            await self.weave_manager.upload_dataset(dataset, name="anthropic_dataset")

        # Retrieve dataset from Weave
        logger.info(f"Getting a dataset from Weave with name: {weave_dataset_name}")
        dataset = await self.weave_manager.retrieve_dataset(name=weave_dataset_name)
        logger.debug(f"First row of the dataset {dataset.rows[0]}")

        # Run evaluation
        results = await self.evaluator.run_weave_evaluation(eval_dataset=dataset)
        return results


async def main():
    configure_logging(debug=False)  # Debug mode is OFF

    filename = "docs_anthropic_com_en_20240928_135426.json"

    # Initialize dataset generator and evaluation manager

    # Generate a new dataset or not

    # Run evaluation pipeline on a given dataset

    pipeline_manager = EvalManager()

    # Run the pipeline
    results = await pipeline_manager.run_pipeline(
        generate_new_dataset=True,
        input_filename=filename,
        testsize=1,
        weave_dataset_name="anthropic_dataset",
    )
    await asyncio.sleep(0)  # Give a chance for any lingering tasks to complete

    print("Evaluation Results:")
    pprint(results)


# TODO: refactor this method to be able generating a new dataset
def new_ragas():
    configure_logging(debug=True)  # Debug mode is OFF
    dataset_generator = DatasetGenerator()
    weave_manager = WeaveManager()

    filename = "docs_anthropic_com_en_20240928_135426.json"
    testsize = 5

    logger.info("Generating new dataset...")
    # Generate and upload new dataset
    docs = dataset_generator.loader.get_documents(filename=filename)
    dataset = dataset_generator.generate_dataset(documents=docs, testset_size=testsize)
    print(dataset)

    # Upload the dataset
    weave_manager.upload_dataset(dataset, name="anthropic_dataset_v0.2")


#
if __name__ == "__main__":
    # Generate a new dataset
    new_ragas()

    # Run an evaluation run on a dataset
    # asyncio.run(main())`

Error trace

`C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Scripts\python.exe C:\Users\azuev\PycharmProjects\omni-claude\src\evaluation\evaluation.py 
C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\prompt\base.py:9: LangChainDeprecationWarning: As of langchain-core 0.3.0, LangChain uses pydantic v2 internally. The langchain_core.pydantic_v1 module was a compatibility shim for pydantic v1, and should no longer be used. Please update the code to import from Pydantic directly.

For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from ragas.llms.prompt import PromptValue
There are different credentials in the netrc file and the environment. Using the environment value.
Logged in as Weights & Biases user: azuev.
View Weave data at https://wandb.ai/azuev-az-company/OmniClaude/weave
ℹ️ INFO - 2024-10-15 21:01:06,167 - omni-claude.__main__:547 - INFO - Generating new dataset...
ℹ️ INFO - 2024-10-15 21:01:06,195 - omni-claude.__main__:109 - INFO - Successfully loaded documents from docs_anthropic_com_en_20240928_135426.json
Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]
Generating common themes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating Scenarios:  33%|β–ˆβ–ˆβ–ˆβ–Ž      | 1/3 [00:04<00:09,  4.51s/it]
Generating common themes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:02<00:00,  2.32s/it]
                                                                       

Generating common_concepts: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [03:21<00:00, 201.05s/it]

Generating Samples:   0%|          | 0/7 [00:03<?, ?it/s]
Traceback (most recent call last):
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\openai\_base_client.py", line 1564, in _request
    response = await self._client.send(
               ^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpx\_client.py", line 1674, in send
    response = await self._send_handling_auth(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpx\_client.py", line 1702, in _send_handling_auth
    response = await self._send_handling_redirects(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpx\_client.py", line 1739, in _send_handling_redirects
    response = await self._send_single_request(request)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpx\_client.py", line 1776, in _send_single_request
    response = await transport.handle_async_request(request)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpx\_transports\default.py", line 377, in handle_async_request
    resp = await self._pool.handle_async_request(req)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpcore\_async\connection_pool.py", line 216, in handle_async_request
    raise exc from None
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpcore\_async\connection_pool.py", line 189, in handle_async_request
    await self._close_connections(closing)
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpcore\_async\connection_pool.py", line 305, in _close_connections
    await connection.aclose()
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpcore\_async\connection.py", line 171, in aclose
    await self._connection.aclose()
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpcore\_async\http11.py", line 265, in aclose
    await self._network_stream.aclose()
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\httpcore\_backends\anyio.py", line 55, in aclose
    await self._stream.aclose()
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\anyio\streams\tls.py", line 202, in aclose
    await self.transport_stream.aclose()
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\anyio\_backends\_asyncio.py", line 1258, in aclose
    self._transport.close()
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\asyncio\proactor_events.py", line 109, in close
    self._loop.call_soon(self._call_connection_lost, None)
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\asyncio\base_events.py", line 795, in call_soon
    self._check_closed()
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\asyncio\base_events.py", line 541, in _check_closed
    raise RuntimeError('Event loop is closed')
RuntimeError: Event loop is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\azuev\PycharmProjects\omni-claude\src\evaluation\evaluation.py", line 560, in <module>
    new_ragas()
  File "C:\Users\azuev\PycharmProjects\omni-claude\src\evaluation\evaluation.py", line 550, in new_ragas
    dataset = dataset_generator.generate_dataset(documents=docs, testset_size=testsize)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\PycharmProjects\omni-claude\src\utils\decorators.py", line 30, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\PycharmProjects\omni-claude\src\evaluation\evaluation.py", line 212, in generate_dataset
    test_dataset = self.generator.generate_with_langchain_docs(
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\testset\synthesizers\generate.py", line 92, in generate_with_langchain_docs
    return self.generate(
           ^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\testset\synthesizers\generate.py", line 214, in generate
    eval_samples = exec.results()
                   ^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\executor.py", line 146, in results
    results = asyncio.run(_aresults())
              ^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\nest_asyncio.py", line 98, in run_until_complete
    return f.result()
           ^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\asyncio\futures.py", line 203, in result
    raise self._exception.with_traceback(self._exception_tb)
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\asyncio\tasks.py", line 314, in __step_run_and_handle_result
    result = coro.send(None)
             ^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\executor.py", line 141, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\asyncio\tasks.py", line 631, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\asyncio\futures.py", line 203, in result
    raise self._exception.with_traceback(self._exception_tb)
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\asyncio\tasks.py", line 314, in __step_run_and_handle_result
    result = coro.send(None)
             ^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\executor.py", line 36, in sema_coro
    return await coro
           ^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\executor.py", line 81, in wrapped_callable_async
    raise e
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\executor.py", line 78, in wrapped_callable_async
    result = await callable(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\testset\synthesizers\base.py", line 112, in generate_sample
    sample = await self._generate_sample(scenario, sample_generation_grp)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\testset\synthesizers\abstract_query.py", line 156, in _generate_sample
    user_input = await self.generate_query(scenario, callbacks)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\testset\synthesizers\abstract_query.py", line 176, in generate_query
    query = await self.generate_user_input_prompt.generate(
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\prompt\pydantic_prompt.py", line 126, in generate
    output_single = await self.generate_multiple(
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\prompt\pydantic_prompt.py", line 181, in generate_multiple
    resp = await llm.generate(
           ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\llms\base.py", line 97, in generate
    return await agenerate_text_with_retry(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\tenacity\asyncio\__init__.py", line 189, in async_wrapped
    return await copy(fn, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\tenacity\asyncio\__init__.py", line 111, in __call__
    do = await self.iter(retry_state=retry_state)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\tenacity\asyncio\__init__.py", line 153, in iter
    result = await action(retry_state)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\tenacity\_utils.py", line 99, in inner
    return call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\tenacity\__init__.py", line 398, in <lambda>
    self._add_action_func(lambda rs: rs.outcome.result())
                                     ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\concurrent\futures\_base.py", line 449, in result
    return self.__get_result()
           ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\concurrent\futures\_base.py", line 401, in __get_result
    raise self._exception
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\tenacity\asyncio\__init__.py", line 114, in __call__
    result = await fn(*args, **kwargs)
❌ ERROR - 2024-10-15 21:08:32,628 - omni-claude.src.utils.decorators:32 - ERROR - Error in generate_dataset: Connection error.
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\ragas\llms\base.py", line 179, in agenerate_text
    return await self.langchain_llm.agenerate_prompt(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\langchain_core\language_models\chat_models.py", line 796, in agenerate_prompt
    return await self.agenerate(
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\langchain_core\language_models\chat_models.py", line 756, in agenerate
    raise exceptions[0]
  File "C:\Users\azuev\AppData\Local\Programs\Python\Python312\Lib\asyncio\tasks.py", line 314, in __step_run_and_handle_result
    result = coro.send(None)
             ^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\langchain_core\language_models\chat_models.py", line 924, in _agenerate_with_cache
    result = await self._agenerate(
             ^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\langchain_openai\chat_models\base.py", line 836, in _agenerate
    response = await self.async_client.create(**payload)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\weave\trace\op.py", line 464, in wrapper
    res, _ = await _execute_call(wrapper, call, *args, **kwargs)  # type: ignore
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\weave\trace\op.py", line 276, in _call_async
    return handle_exception(e)
           ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\weave\trace\op.py", line 274, in _call_async
    res = await func(*args, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\weave\integrations\openai\openai_sdk.py", line 330, in _wrapper
    return await fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\openai\resources\chat\completions.py", line 1490, in create
    return await self._post(
           ^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\openai\_base_client.py", line 1831, in post
    return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\openai\_base_client.py", line 1525, in request
    return await self._request(
           ^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\azuev\AppData\Local\pypoetry\Cache\virtualenvs\omni-claude-85PpB9Q--py3.12\Lib\site-packages\openai\_base_client.py", line 1598, in _request
    raise APIConnectionError(request=request) from err
openai.APIConnectionError: Connection error.

Process finished with exit code 1

R-306

Metadata

Metadata

Assignees

Labels

bugSomething isn't workinglinearCreated by Linear-GitHub Syncmodule-testsetgenModule testset generation

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions