From ef77c7597cc0d0b505b61913397bc77308150f1d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 1 Nov 2023 20:45:53 -0400 Subject: [PATCH] Automated File Generation from Docs Notebook Changes (#540) Co-authored-by: joshreini1 --- README.md | 21 +++----- trulens_eval/README.md | 20 +++---- .../colab/langchain_quickstart_colab.ipynb | 54 +++++++++---------- .../colab/llama_index_quickstart_colab.ipynb | 46 ++++++++-------- .../colab/text2text_quickstart_colab.ipynb | 50 ++++++++--------- .../py_script_quickstarts/all_tools.py | 2 +- .../langchain_quickstart.py | 2 +- .../llama_index_quickstart.py | 2 +- .../text2text_quickstart.py | 2 +- trulens_eval/generated_files/all_tools.ipynb | 4 +- trulens_eval/tests/unit/test_providers.py | 8 +-- trulens_eval/trulens_eval/Leaderboard.py | 43 +++++++++------ .../trulens_eval/feedback/groundedness.py | 15 +++--- .../trulens_eval/feedback/groundtruth.py | 4 +- .../trulens_eval/feedback/provider/base.py | 11 ++-- .../trulens_eval/feedback/provider/litellm.py | 13 +++-- .../trulens_eval/pages/Evaluations.py | 12 ++--- trulens_eval/trulens_eval/schema.py | 5 +- trulens_eval/trulens_eval/ux/styles.py | 9 ++-- 19 files changed, 162 insertions(+), 161 deletions(-) diff --git a/README.md b/README.md index 133342415..41377ef2b 100644 --- a/README.md +++ b/README.md @@ -43,25 +43,18 @@ TruLens supports the evaluation of tracking for any LLM app framework. Choose a **Langchain** -[langchain_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/langchain_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) - -[langchain_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py). +[langchain_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/langchain_quickstart.ipynb). +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) **Llama-Index** -[llama_index_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb) - -[llama_index_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py) - -**No Framework** - -[text2text_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/text2text_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb) +[llama_index_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb). +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb) -[text2text_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py) +**Custom Text to Text Apps** +[text2text_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/text2text_quickstart.ipynb). +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb) ## TruLens-Explain diff --git a/trulens_eval/README.md b/trulens_eval/README.md index d325cf350..409ad8af4 100644 --- a/trulens_eval/README.md +++ b/trulens_eval/README.md @@ -46,24 +46,18 @@ TruLens supports the evaluation of tracking for any LLM app framework. Choose a **Langchain** -[langchain_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/langchain_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) - -[langchain_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py). +[langchain_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/langchain_quickstart.ipynb). +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) **Llama-Index** -[llama_index_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb) - -[llama_index_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py) - -**No Framework** +[llama_index_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb). +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb) -[text2text_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/text2text_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb) +**Custom Text to Text Apps** -[text2text_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.16.0/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py) +[text2text_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/text2text_quickstart.ipynb). +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.17.0a/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb) ### 💡 Contributing diff --git a/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb b/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb index 95565970c..7d0266da9 100644 --- a/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb +++ b/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb @@ -17,7 +17,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "9e4e988a", + "id": "4d8bd621", "metadata": {}, "source": [ "# Langchain Quickstart\n", @@ -30,17 +30,17 @@ { "cell_type": "code", "execution_count": null, - "id": "ae2a62a7", + "id": "e32c8ad8", "metadata": {}, "outputs": [], "source": [ - "# ! pip install trulens_eval==0.16.0 langchain>=0.0.263" + "# ! pip install trulens_eval==0.17.0a langchain>=0.0.263" ] }, { "attachments": {}, "cell_type": "markdown", - "id": "c3779d2d", + "id": "1b5822db", "metadata": {}, "source": [ "## Setup\n", @@ -51,7 +51,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b587f1ee", + "id": "6ff3805d", "metadata": {}, "outputs": [], "source": [ @@ -63,7 +63,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "fc907ab5", + "id": "d4703d64", "metadata": {}, "source": [ "### Import from LangChain and TruLens" @@ -72,7 +72,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8bce252d", + "id": "3accbb8e", "metadata": {}, "outputs": [], "source": [ @@ -95,7 +95,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "ff36b994", + "id": "9f2043ff", "metadata": {}, "source": [ "### Create Simple LLM Application\n", @@ -106,7 +106,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3b5178cd", + "id": "67703ade", "metadata": {}, "outputs": [], "source": [ @@ -128,7 +128,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "f494403b", + "id": "8e0a7c70", "metadata": {}, "source": [ "### Send your first request" @@ -137,7 +137,7 @@ { "cell_type": "code", "execution_count": null, - "id": "60d2e86f", + "id": "005409f4", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4c155484", + "id": "93aafd5a", "metadata": {}, "outputs": [], "source": [ @@ -159,7 +159,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "9fe8dd3d", + "id": "6b87ddde", "metadata": {}, "source": [ "## Initialize Feedback Function(s)" @@ -168,7 +168,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ab4f3123", + "id": "11d1f315", "metadata": {}, "outputs": [], "source": [ @@ -184,7 +184,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "7db53395", + "id": "2002422b", "metadata": {}, "source": [ "## Instrument chain for logging with TruLens" @@ -193,7 +193,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bd89b425", + "id": "8b6dda74", "metadata": {}, "outputs": [], "source": [ @@ -205,7 +205,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3a8ef1e0", + "id": "faf179c3", "metadata": {}, "outputs": [], "source": [ @@ -217,7 +217,7 @@ }, { "cell_type": "markdown", - "id": "cc35e768", + "id": "a1e95335", "metadata": {}, "source": [ "## Retrieve records and feedback" @@ -226,7 +226,7 @@ { "cell_type": "code", "execution_count": null, - "id": "91566071", + "id": "37b0e597", "metadata": {}, "outputs": [], "source": [ @@ -241,7 +241,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eaee8f99", + "id": "5c1f1720", "metadata": {}, "outputs": [], "source": [ @@ -263,7 +263,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "b0cda143", + "id": "8e9293ed", "metadata": {}, "source": [ "## Explore in a Dashboard" @@ -272,7 +272,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2ff2f753", + "id": "95e4dabd", "metadata": {}, "outputs": [], "source": [ @@ -284,7 +284,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "16744b64", + "id": "85a83b30", "metadata": {}, "source": [ "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard." @@ -293,7 +293,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "3b624b81", + "id": "867fbc1c", "metadata": {}, "source": [ "### Chain Leaderboard\n", @@ -326,7 +326,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "99247aa6", + "id": "5d4329eb", "metadata": {}, "source": [ "Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard." @@ -335,7 +335,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "539e4367", + "id": "06bc5f81", "metadata": {}, "source": [ "## Or view results directly in your notebook" @@ -344,7 +344,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4437d3e8", + "id": "feb52898", "metadata": {}, "outputs": [], "source": [ diff --git a/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb b/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb index d0afd548d..c46ac0330 100644 --- a/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb +++ b/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb @@ -17,7 +17,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "92e32b5d", + "id": "8d9feb48", "metadata": {}, "source": [ "# Llama-Index Quickstart\n", @@ -32,7 +32,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "fb40dbb5", + "id": "7592f07c", "metadata": {}, "source": [ "## Setup\n", @@ -44,17 +44,17 @@ { "cell_type": "code", "execution_count": null, - "id": "cf92085b", + "id": "ab35c4a0", "metadata": {}, "outputs": [], "source": [ - "#! pip install trulens-eval==0.16.0 llama_index>=0.8.29post1 html2text>=2020.1.16" + "# pip install trulens-eval==0.17.0a llama_index>=0.8.29post1 html2text>=2020.1.16" ] }, { "attachments": {}, "cell_type": "markdown", - "id": "cfc4661c", + "id": "9e61fca6", "metadata": {}, "source": [ "### Add API keys\n", @@ -64,7 +64,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b5648da1", + "id": "ccadc9ac", "metadata": {}, "outputs": [], "source": [ @@ -75,7 +75,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "1a7b3220", + "id": "2475410a", "metadata": {}, "source": [ "### Import from LlamaIndex and TruLens" @@ -84,7 +84,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f69106ed", + "id": "3aa6d6af", "metadata": {}, "outputs": [], "source": [ @@ -98,7 +98,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "4c4bb053", + "id": "03d62c4e", "metadata": {}, "source": [ "### Create Simple LLM Application\n", @@ -109,7 +109,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c519f949", + "id": "654c2513", "metadata": {}, "outputs": [], "source": [ @@ -126,7 +126,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "c8ac9fe5", + "id": "64e61c9a", "metadata": {}, "source": [ "### Send your first request" @@ -135,7 +135,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bb07a234", + "id": "ce91006b", "metadata": {}, "outputs": [], "source": [ @@ -146,7 +146,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "2c3fef2e", + "id": "d280ff6d", "metadata": {}, "source": [ "## Initialize Feedback Function(s)" @@ -155,7 +155,7 @@ { "cell_type": "code", "execution_count": null, - "id": "33e3017b", + "id": "7562f17a", "metadata": {}, "outputs": [], "source": [ @@ -184,7 +184,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "c785829f", + "id": "60814999", "metadata": {}, "source": [ "## Instrument app for logging with TruLens" @@ -193,7 +193,7 @@ { "cell_type": "code", "execution_count": null, - "id": "68e435dd", + "id": "834a4ed4", "metadata": {}, "outputs": [], "source": [ @@ -205,7 +205,7 @@ { "cell_type": "code", "execution_count": null, - "id": "48736fc7", + "id": "2e9e73c4", "metadata": {}, "outputs": [], "source": [ @@ -217,7 +217,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "afb53178", + "id": "76beb4b4", "metadata": {}, "source": [ "## Explore in a Dashboard" @@ -226,7 +226,7 @@ { "cell_type": "code", "execution_count": null, - "id": "08b96e3e", + "id": "6fb3d7f4", "metadata": {}, "outputs": [], "source": [ @@ -238,7 +238,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "a18b5175", + "id": "a305729f", "metadata": {}, "source": [ "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard." @@ -247,7 +247,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "b7cbed8c", + "id": "0e6066be", "metadata": {}, "source": [ "Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard." @@ -256,7 +256,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "bd2ce7ac", + "id": "9bb9aa57", "metadata": {}, "source": [ "## Or view results directly in your notebook" @@ -265,7 +265,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9700ebfe", + "id": "5f9fa905", "metadata": {}, "outputs": [], "source": [ diff --git a/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb b/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb index b585ba65b..3f144a8ec 100644 --- a/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb +++ b/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb @@ -17,7 +17,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "51ead443", + "id": "0ecbdfa3", "metadata": {}, "source": [ "# Text to Text Quickstart\n", @@ -30,17 +30,17 @@ { "cell_type": "code", "execution_count": null, - "id": "c2bb21f9", + "id": "876032a8", "metadata": {}, "outputs": [], "source": [ - "# ! pip install trulens_eval==0.16.0" + "# ! pip install trulens_eval==0.17.0a" ] }, { "attachments": {}, "cell_type": "markdown", - "id": "8734d273", + "id": "bab73ba7", "metadata": {}, "source": [ "## Setup\n", @@ -51,7 +51,7 @@ { "cell_type": "code", "execution_count": null, - "id": "817aadb5", + "id": "e05e8c4f", "metadata": {}, "outputs": [], "source": [ @@ -63,7 +63,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1118b485", + "id": "db07936d", "metadata": {}, "outputs": [], "source": [ @@ -74,7 +74,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "7743f8f3", + "id": "7b67eb2e", "metadata": {}, "source": [ "### Import from TruLens" @@ -83,7 +83,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5fe41297", + "id": "3564582a", "metadata": {}, "outputs": [], "source": [ @@ -97,7 +97,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "9b12d001", + "id": "541d99ec", "metadata": {}, "source": [ "### Create Simple Text to Text Application\n", @@ -108,7 +108,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cbbfb92f", + "id": "d8ebea10", "metadata": {}, "outputs": [], "source": [ @@ -125,7 +125,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ebb05179", + "id": "b523e5be", "metadata": {}, "outputs": [], "source": [ @@ -138,7 +138,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "e38b66b0", + "id": "fca07271", "metadata": {}, "source": [ "### Send your first request" @@ -147,7 +147,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3be2bea5", + "id": "e233960e", "metadata": {}, "outputs": [], "source": [ @@ -159,7 +159,7 @@ { "cell_type": "code", "execution_count": null, - "id": "76025c42", + "id": "541d9269", "metadata": {}, "outputs": [], "source": [ @@ -169,7 +169,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "247af499", + "id": "f767bfc2", "metadata": {}, "source": [ "## Initialize Feedback Function(s)" @@ -178,7 +178,7 @@ { "cell_type": "code", "execution_count": null, - "id": "962c8653", + "id": "84545974", "metadata": {}, "outputs": [], "source": [ @@ -192,7 +192,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "2c4ec643", + "id": "866790f4", "metadata": {}, "source": [ "## Instrument the callable for logging with TruLens" @@ -201,7 +201,7 @@ { "cell_type": "code", "execution_count": null, - "id": "87fc0c18", + "id": "9982869c", "metadata": {}, "outputs": [], "source": [ @@ -213,7 +213,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5d46ce71", + "id": "e6552e92", "metadata": {}, "outputs": [], "source": [ @@ -224,7 +224,7 @@ { "cell_type": "code", "execution_count": null, - "id": "07c60b73", + "id": "9b455c6a", "metadata": {}, "outputs": [], "source": [ @@ -235,7 +235,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "a0e40771", + "id": "256dc9e5", "metadata": {}, "source": [ "## Explore in a Dashboard" @@ -244,7 +244,7 @@ { "cell_type": "code", "execution_count": null, - "id": "aa218643", + "id": "e67635bd", "metadata": {}, "outputs": [], "source": [ @@ -256,7 +256,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "7994941a", + "id": "bc8ab2b1", "metadata": {}, "source": [ "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard." @@ -265,7 +265,7 @@ { "attachments": {}, "cell_type": "markdown", - "id": "5e9419ac", + "id": "2e583555", "metadata": {}, "source": [ "## Or view results directly in your notebook" @@ -274,7 +274,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f5ae27f2", + "id": "6b33bd3a", "metadata": {}, "outputs": [], "source": [ diff --git a/trulens_eval/examples/quickstart/py_script_quickstarts/all_tools.py b/trulens_eval/examples/quickstart/py_script_quickstarts/all_tools.py index b41daa1ea..6cff6225e 100644 --- a/trulens_eval/examples/quickstart/py_script_quickstarts/all_tools.py +++ b/trulens_eval/examples/quickstart/py_script_quickstarts/all_tools.py @@ -9,7 +9,7 @@ # In[ ]: -# ! pip install trulens_eval==0.16.0 langchain>=0.0.263 +# ! pip install trulens_eval==0.17.0a langchain>=0.0.263 # ## Setup # ### Add API keys diff --git a/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py b/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py index 6965b4140..25ba7d2c9 100644 --- a/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py +++ b/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py @@ -9,7 +9,7 @@ # In[ ]: -# ! pip install trulens_eval==0.16.0 langchain>=0.0.263 +# ! pip install trulens_eval==0.17.0a langchain>=0.0.263 # ## Setup # ### Add API keys diff --git a/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py b/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py index b4c76e88e..43724660e 100644 --- a/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py +++ b/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py @@ -16,7 +16,7 @@ # In[ ]: -#! pip install trulens-eval==0.16.0 llama_index>=0.8.29post1 html2text>=2020.1.16 +# pip install trulens-eval==0.17.0a llama_index>=0.8.29post1 html2text>=2020.1.16 # ### Add API keys # For this quickstart, you will need Open AI and Huggingface keys. The OpenAI key is used for embeddings and GPT, and the Huggingface key is used for evaluation. diff --git a/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py b/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py index 32131cc34..66c8552c9 100644 --- a/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py +++ b/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py @@ -9,7 +9,7 @@ # In[ ]: -# ! pip install trulens_eval==0.16.0 +# ! pip install trulens_eval==0.17.0a # ## Setup # ### Add API keys diff --git a/trulens_eval/generated_files/all_tools.ipynb b/trulens_eval/generated_files/all_tools.ipynb index 1826ab33a..0fcf669d2 100644 --- a/trulens_eval/generated_files/all_tools.ipynb +++ b/trulens_eval/generated_files/all_tools.ipynb @@ -18,7 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "# ! pip install trulens_eval==0.16.0 langchain>=0.0.263" + "# ! pip install trulens_eval==0.17.0a langchain>=0.0.263" ] }, { @@ -711,7 +711,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.11.5" }, "vscode": { "interpreter": { diff --git a/trulens_eval/tests/unit/test_providers.py b/trulens_eval/tests/unit/test_providers.py index 98ab9bcdc..553a99b12 100644 --- a/trulens_eval/tests/unit/test_providers.py +++ b/trulens_eval/tests/unit/test_providers.py @@ -2,12 +2,14 @@ Tests for Feedback providers. """ -import unittest from pprint import PrettyPrinter from typing import Callable, Dict, List, Tuple -from unittest import TestCase, main +import unittest +from unittest import main +from unittest import TestCase -from trulens_eval.feedback.provider.base import LLMProvider, Provider +from trulens_eval.feedback.provider.base import LLMProvider +from trulens_eval.feedback.provider.base import Provider from trulens_eval.feedback.provider.openai import OpenAI from trulens_eval.keys import check_keys diff --git a/trulens_eval/trulens_eval/Leaderboard.py b/trulens_eval/trulens_eval/Leaderboard.py index ba9cfc10a..2f99c3607 100644 --- a/trulens_eval/trulens_eval/Leaderboard.py +++ b/trulens_eval/trulens_eval/Leaderboard.py @@ -36,13 +36,15 @@ def streamlit_app(): # Set the title and subtitle of the app st.title("App Leaderboard") - st.write("Average feedback values displayed in the range from 0 (worst) to 1 (best).") + st.write( + "Average feedback values displayed in the range from 0 (worst) to 1 (best)." + ) df, feedback_col_names = lms.get_records_and_feedback([]) feedback_directions = { ( - row.feedback_json.get("supplied_name", "") or row.feedback_json["implementation"]["name"] - ): - row.feedback_json.get("higher_is_better", True) + row.feedback_json.get("supplied_name", "") or + row.feedback_json["implementation"]["name"] + ): row.feedback_json.get("higher_is_better", True) for _, row in lms.get_feedback_defs().iterrows() } @@ -68,11 +70,15 @@ def streamlit_app(): # st.text('Metadata' + str(metadata)) st.header(app, help=draw_metadata(metadata)) app_feedback_col_names = [ - col_name for col_name in feedback_col_names if not app_df[col_name].isna().all() + col_name for col_name in feedback_col_names + if not app_df[col_name].isna().all() ] - col1, col2, col3, col4, *feedback_cols, col99 = st.columns(5 + len(app_feedback_col_names)) + col1, col2, col3, col4, *feedback_cols, col99 = st.columns( + 5 + len(app_feedback_col_names) + ) latency_mean = ( - app_df["latency"].apply(lambda td: td if td != MIGRATION_UNKNOWN_STR else None).mean() + app_df["latency"]. + apply(lambda td: td if td != MIGRATION_UNKNOWN_STR else None).mean() ) # app_df_feedback = df.loc[df.app_id == app] @@ -82,8 +88,7 @@ def streamlit_app(): "Average Latency (Seconds)", ( f"{millify(round(latency_mean, 5), precision=2)}" - if not math.isnan(latency_mean) - else "nan" + if not math.isnan(latency_mean) else "nan" ), ) col3.metric( @@ -93,7 +98,11 @@ def streamlit_app(): col4.metric( "Total Tokens", millify( - sum(tokens for tokens in app_df.total_tokens if tokens is not None), precision=2 + sum( + tokens for tokens in app_df.total_tokens + if tokens is not None + ), + precision=2 ), ) @@ -107,18 +116,22 @@ def streamlit_app(): if "distance" in col_name: feedback_cols[i].metric( - label=col_name, value=f"{round(mean, 2)}", delta_color="normal" + label=col_name, + value=f"{round(mean, 2)}", + delta_color="normal" ) else: - cat = CATEGORY.of_score(mean, higher_is_better=feedback_directions[col_name]) + cat = CATEGORY.of_score( + mean, higher_is_better=feedback_directions[col_name] + ) feedback_cols[i].metric( label=col_name, value=f"{round(mean, 2)}", delta=f"{cat.icon} {cat.adjective}", delta_color=( - "normal" - if cat.compare(mean, CATEGORY.PASS[cat.direction].threshold) - else "inverse" + "normal" if cat.compare( + mean, CATEGORY.PASS[cat.direction].threshold + ) else "inverse" ), ) diff --git a/trulens_eval/trulens_eval/feedback/groundedness.py b/trulens_eval/trulens_eval/feedback/groundedness.py index f3ce867bb..8585546ec 100644 --- a/trulens_eval/trulens_eval/feedback/groundedness.py +++ b/trulens_eval/trulens_eval/feedback/groundedness.py @@ -6,11 +6,11 @@ from trulens_eval.feedback import prompts from trulens_eval.feedback.provider import Provider +from trulens_eval.feedback.provider.bedrock import Bedrock from trulens_eval.feedback.provider.hugs import Huggingface +from trulens_eval.feedback.provider.litellm import LiteLLM from trulens_eval.feedback.provider.openai import AzureOpenAI from trulens_eval.feedback.provider.openai import OpenAI -from trulens_eval.feedback.provider.litellm import LiteLLM -from trulens_eval.feedback.provider.bedrock import Bedrock from trulens_eval.utils.generated import re_0_10_rating from trulens_eval.utils.pyschema import WithClassInfo from trulens_eval.utils.serial import SerialModel @@ -23,10 +23,7 @@ class Groundedness(SerialModel, WithClassInfo): """ groundedness_provider: Provider - def __init__( - self, - groundedness_provider: Provider = None - ): + def __init__(self, groundedness_provider: Provider = None): """Instantiates the groundedness providers. Currently the groundedness functions work well with a summarizer. This class will use an LLM to find the relevant strings in a text. The groundedness_provider can either be an LLM provider (such as OpenAI) or NLI with huggingface. @@ -90,7 +87,8 @@ def groundedness_measure(self, source: str, statement: str) -> float: ) groundedness_scores = {} - if isinstance(self.groundedness_provider, (AzureOpenAI, OpenAI, LiteLLM, Bedrock)): + if isinstance(self.groundedness_provider, + (AzureOpenAI, OpenAI, LiteLLM, Bedrock)): groundedness_scores[f"full_doc_score"] = re_0_10_rating( self.groundedness_provider. _groundedness_doc_in_out(source, statement) @@ -146,7 +144,8 @@ def groundedness_measure_with_cot_reasons( float: A measure between 0 and 1, where 1 means each sentence is grounded in the source. """ groundedness_scores = {} - if isinstance(self.groundedness_provider, (AzureOpenAI, OpenAI, LiteLLM, Bedrock)): + if isinstance(self.groundedness_provider, + (AzureOpenAI, OpenAI, LiteLLM, Bedrock)): plausible_junk_char_min = 4 # very likely "sentences" under 4 characters are punctuation, spaces, etc if len(statement) > plausible_junk_char_min: reason = self.groundedness_provider._groundedness_doc_in_out( diff --git a/trulens_eval/trulens_eval/feedback/groundtruth.py b/trulens_eval/trulens_eval/feedback/groundtruth.py index f77492154..3912395c2 100644 --- a/trulens_eval/trulens_eval/feedback/groundtruth.py +++ b/trulens_eval/trulens_eval/feedback/groundtruth.py @@ -175,9 +175,7 @@ def agreement_measure( return ret - def mae( - self, prompt: str, response: str, score: float - ) -> float: + def mae(self, prompt: str, response: str, score: float) -> float: """ Method to look up the numeric expected score from a golden set and take the differnce. diff --git a/trulens_eval/trulens_eval/feedback/provider/base.py b/trulens_eval/trulens_eval/feedback/provider/base.py index d3f59a162..bc5961b1f 100644 --- a/trulens_eval/trulens_eval/feedback/provider/base.py +++ b/trulens_eval/trulens_eval/feedback/provider/base.py @@ -156,11 +156,12 @@ def _extract_score_and_reasons_from_response( parts = line.split(":") if len(parts) > 1: supporting_evidence = ":".join(parts[1:]).strip() - reasons = {'reason': - ( - f"{'Criteria: ' + str(criteria) + ' ' if criteria else ''}\n" - f"{'Supporting Evidence: ' + str(supporting_evidence) if supporting_evidence else ''}" - ) + reasons = { + 'reason': + ( + f"{'Criteria: ' + str(criteria) + ' ' if criteria else ''}\n" + f"{'Supporting Evidence: ' + str(supporting_evidence) if supporting_evidence else ''}" + ) } return score, reasons else: diff --git a/trulens_eval/trulens_eval/feedback/provider/litellm.py b/trulens_eval/trulens_eval/feedback/provider/litellm.py index ddcbd2597..38643d0c6 100644 --- a/trulens_eval/trulens_eval/feedback/provider/litellm.py +++ b/trulens_eval/trulens_eval/feedback/provider/litellm.py @@ -1,5 +1,4 @@ import logging - from typing import Dict, Optional, Sequence from trulens_eval.feedback.provider.base import LLMProvider @@ -51,17 +50,21 @@ def _create_chat_completion( messages: Optional[Sequence[Dict]] = None, **kwargs ) -> str: - + from litellm import completion if prompt is not None: - comp = completion(model = self.model_engine, + comp = completion( + model=self.model_engine, messages=[{ "role": "system", "content": prompt - }], **kwargs + }], + **kwargs ) elif messages is not None: - comp = completion(model = self.model_engine, messages=messages, **kwargs) + comp = completion( + model=self.model_engine, messages=messages, **kwargs + ) else: raise ValueError("`prompt` or `messages` must be specified.") diff --git a/trulens_eval/trulens_eval/pages/Evaluations.py b/trulens_eval/trulens_eval/pages/Evaluations.py index 61f401f87..475e58b80 100644 --- a/trulens_eval/trulens_eval/pages/Evaluations.py +++ b/trulens_eval/trulens_eval/pages/Evaluations.py @@ -55,12 +55,12 @@ # TODO: remove code redundancy / redundant database calls feedback_directions = { ( - row.feedback_json.get("supplied_name", "") or row.feedback_json["implementation"]["name"] - ): - ( - "HIGHER_IS_BETTER" - if row.feedback_json.get("higher_is_better", True) else "LOWER_IS_BETTER" - ) for _, row in lms.get_feedback_defs().iterrows() + row.feedback_json.get("supplied_name", "") or + row.feedback_json["implementation"]["name"] + ): ( + "HIGHER_IS_BETTER" if row.feedback_json.get("higher_is_better", True) + else "LOWER_IS_BETTER" + ) for _, row in lms.get_feedback_defs().iterrows() } default_direction = "HIGHER_IS_BETTER" diff --git a/trulens_eval/trulens_eval/schema.py b/trulens_eval/trulens_eval/schema.py index 86bbd8595..89c85e5c2 100644 --- a/trulens_eval/trulens_eval/schema.py +++ b/trulens_eval/trulens_eval/schema.py @@ -379,11 +379,10 @@ class FeedbackResult(SerialModel): calls: Sequence[FeedbackCall] = [] # Final result, potentially aggregating multiple calls. - result: Optional[ - float] = None + result: Optional[float] = None # Error information if there was an error. - error: Optional[str] = None + error: Optional[str] = None # TODO: doc multi_result: Optional[str] = None diff --git a/trulens_eval/trulens_eval/ux/styles.py b/trulens_eval/trulens_eval/ux/styles.py index 5f8cebb0f..212e4ad47 100644 --- a/trulens_eval/trulens_eval/ux/styles.py +++ b/trulens_eval/trulens_eval/ux/styles.py @@ -1,6 +1,6 @@ -import operator from collections import defaultdict from enum import Enum +import operator from typing import Callable, List, NamedTuple, Optional import numpy as np @@ -115,11 +115,10 @@ def of_score(score: float, higher_is_better: bool = True) -> Category: valid_directions = ["HIGHER_IS_BETTER", "LOWER_IS_BETTER"] cellstyle_jscode = { - k: - f"""function(params) {{ + k: f"""function(params) {{ let v = parseFloat(params.value); """ + "\n".join( - f""" + f""" if (v {'>=' if k == "HIGHER_IS_BETTER" else '<='} {cat.threshold}) {{ return {{ 'color': 'black', @@ -127,7 +126,7 @@ def of_score(score: float, higher_is_better: bool = True) -> Category: }}; }} """ for cat in map(operator.itemgetter(k), CATEGORY.ALL) - ) + f""" + ) + f""" // i.e. not a number return {{ 'color': 'black',