From c424cd4d91eda4f1718692308bfde7616020210e Mon Sep 17 00:00:00 2001 From: Josh Reini <60949774+joshreini1@users.noreply.github.com> Date: Wed, 15 May 2024 15:31:16 -0400 Subject: [PATCH] Fix a few old groundedness references (#1139) * Update selecting_components.md * Update MultiQueryRetrievalLangchain.ipynb * Update random_evaluation.ipynb * Update canopy_quickstart.ipynb --- .../selecting_components.md | 47 ++++++++++--------- .../MultiQueryRetrievalLangchain.ipynb | 6 +-- .../experimental/random_evaluation.ipynb | 6 +-- .../frameworks/canopy/canopy_quickstart.ipynb | 4 +- 4 files changed, 27 insertions(+), 36 deletions(-) diff --git a/docs/trulens_eval/evaluation/feedback_selectors/selecting_components.md b/docs/trulens_eval/evaluation/feedback_selectors/selecting_components.md index f94a36d18..fce3803d3 100644 --- a/docs/trulens_eval/evaluation/feedback_selectors/selecting_components.md +++ b/docs/trulens_eval/evaluation/feedback_selectors/selecting_components.md @@ -9,21 +9,25 @@ to refer to parts of an LLM stack trace and use those when defining evaluations. For example, the following lens refers to the input to the retrieve step of the app called query. -```python -Select.RecordCalls.retrieve.args.query -``` +!!! example + + ```python + Select.RecordCalls.retrieve.args.query + ``` Such lenses can then be used to define evaluations as so: -```python -# Context relevance between question and each context chunk. -f_context_relevance = ( - Feedback(provider.context_relevance_with_cot_reasons, name = "Context Relevance") - .on(Select.RecordCalls.retrieve.args.query) - .on(Select.RecordCalls.retrieve.rets) - .aggregate(np.mean) -) -``` +!!! example + + ```python + # Context relevance between question and each context chunk. + f_context_relevance = ( + Feedback(provider.context_relevance_with_cot_reasons, name = "Context Relevance") + .on(Select.RecordCalls.retrieve.args.query) + .on(Select.RecordCalls.retrieve.rets) + .aggregate(np.mean) + ) + ``` In most cases, the Select object produces only a single item but can also address multiple items. @@ -35,18 +39,15 @@ the documents returned by the `retrieve` method. These items can be evaluated se as shown above, or can be collected into an array for evaluation with `.collect()`. This is most commonly used for groundedness evaluations. -Example: +!!! example -```python -grounded = Groundedness(groundedness_provider=provider) - -f_groundedness = ( - Feedback(grounded.groundedness_measure_with_cot_reasons, name = "Groundedness") - .on(Select.RecordCalls.retrieve.rets.collect()) - .on_output() - .aggregate(grounded.grounded_statements_aggregator) -) -``` + ```python + f_groundedness = ( + Feedback(provider.groundedness_measure_with_cot_reasons, name = "Groundedness") + .on(Select.RecordCalls.retrieve.rets.collect()) + .on_output() + ) + ``` Selectors can also access multiple calls to the same component. In agentic applications, this is an increasingly common practice. For example, an agent could complete multiple diff --git a/trulens_eval/examples/experimental/MultiQueryRetrievalLangchain.ipynb b/trulens_eval/examples/experimental/MultiQueryRetrievalLangchain.ipynb index 2f60a2a1c..54ef26c55 100644 --- a/trulens_eval/examples/experimental/MultiQueryRetrievalLangchain.ipynb +++ b/trulens_eval/examples/experimental/MultiQueryRetrievalLangchain.ipynb @@ -69,7 +69,6 @@ "from trulens_eval.feedback.provider import OpenAI\n", "import logging\n", "from trulens_eval.app import App\n", - "from trulens_eval.feedback import Groundedness\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.runnables import RunnablePassthrough\n", "from langchain import hub" @@ -184,14 +183,11 @@ "\n", "context = App.select_context(rag_chain)\n", "\n", - "\n", - "grounded = Groundedness(groundedness_provider=OpenAI())\n", "# Define a groundedness feedback function\n", "f_groundedness = (\n", - " Feedback(grounded.groundedness_measure_with_cot_reasons)\n", + " Feedback(provider.groundedness_measure_with_cot_reasons)\n", " .on(context.collect()) # collect context chunks into a list\n", " .on_output()\n", - " .aggregate(grounded.grounded_statements_aggregator)\n", ")\n", "\n", "# Question/answer relevance between overall question and answer.\n", diff --git a/trulens_eval/examples/experimental/random_evaluation.ipynb b/trulens_eval/examples/experimental/random_evaluation.ipynb index 080a922c9..289d2202d 100644 --- a/trulens_eval/examples/experimental/random_evaluation.ipynb +++ b/trulens_eval/examples/experimental/random_evaluation.ipynb @@ -203,7 +203,6 @@ "outputs": [], "source": [ "from trulens_eval import Feedback, Select\n", - "from trulens_eval.feedback import Groundedness\n", "from trulens_eval.feedback.provider.openai import OpenAI as fOpenAI\n", "\n", "import numpy as np\n", @@ -211,14 +210,11 @@ "# Initialize provider class\n", "fopenai = fOpenAI()\n", "\n", - "grounded = Groundedness(groundedness_provider=fopenai)\n", - "\n", "# Define a groundedness feedback function\n", "f_groundedness = (\n", - " Feedback(grounded.groundedness_measure_with_cot_reasons, name = \"Groundedness\")\n", + " Feedback(provider.groundedness_measure_with_cot_reasons, name = \"Groundedness\")\n", " .on(Select.RecordCalls.retrieve.rets.collect())\n", " .on_output()\n", - " .aggregate(grounded.grounded_statements_aggregator)\n", ")\n", "\n", "# Question/answer relevance between overall question and answer.\n", diff --git a/trulens_eval/examples/expositional/frameworks/canopy/canopy_quickstart.ipynb b/trulens_eval/examples/expositional/frameworks/canopy/canopy_quickstart.ipynb index cec2dbbe3..e5d6b2d6e 100644 --- a/trulens_eval/examples/expositional/frameworks/canopy/canopy_quickstart.ipynb +++ b/trulens_eval/examples/expositional/frameworks/canopy/canopy_quickstart.ipynb @@ -440,7 +440,6 @@ ], "source": [ "from trulens_eval import Feedback, Select\n", - "from trulens_eval.feedback import Groundedness\n", "from trulens_eval.feedback.provider.openai import OpenAI as fOpenAI\n", "import numpy as np\n", "\n", @@ -455,10 +454,9 @@ "\n", "# Define a groundedness feedback function\n", "f_groundedness = (\n", - " Feedback(grounded.groundedness_measure_with_cot_reasons, name = \"Groundedness\", higher_is_better=True)\n", + " Feedback(provider.groundedness_measure_with_cot_reasons, name = \"Groundedness\", higher_is_better=True)\n", " .on(context.collect())\n", " .on(output)\n", - " .aggregate(grounded.grounded_statements_aggregator)\n", ")\n", "\n", "# Question/answer relevance between overall question and answer.\n",