From 95d8d0b21fe6d4f914cb4aaffcd8bf245752ef08 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 15 May 2024 12:30:51 -0400
Subject: [PATCH] Automated File Generation from Docs Notebook Changes (#1137)

Co-authored-by: joshreini1 <joshreini1@users.noreply.github.com>
Co-authored-by: Josh Reini <60949774+joshreini1@users.noreply.github.com>
---
 README.md                                     |  4 +-
 docs/trulens_eval/all_tools.ipynb             | 49 +++++++---------
 trulens_eval/generated_files/all_tools.py     | 58 +++++++++----------
 trulens_eval/trulens_eval/app.py              | 15 ++---
 .../trulens_eval/feedback/feedback.py         | 13 +++--
 .../trulens_eval/feedback/provider/base.py    | 31 +++++-----
 .../trulens_eval/feedback/provider/hugs.py    | 16 ++---
 trulens_eval/trulens_eval/instruments.py      |  8 ++-
 trulens_eval/trulens_eval/schema/types.py     |  2 +
 trulens_eval/trulens_eval/tru_rails.py        |  4 +-
 trulens_eval/trulens_eval/utils/serial.py     |  5 +-
 11 files changed, 99 insertions(+), 106 deletions(-)

diff --git a/README.md b/README.md
index c8701cbc4..1abcfef2a 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ community](https://communityinviter.com/apps/aiqualityforum/josh)!
 
 **Don't just vibe-check your llm app!** Systematically evaluate and track your
 LLM experiments with TruLens. As you develop your app including prompts, models,
-retrievers, knowledge sources and more, *TruLens-Eval* is the tool you need to
+retreivers, knowledge sources and more, *TruLens-Eval* is the tool you need to
 understand its performance.
 
 Fine-grained, stack-agnostic instrumentation and comprehensive evaluations help
@@ -38,7 +38,7 @@ you to identify failure modes & systematically iterate to improve your
 application.
 
 Read more about the core concepts behind TruLens including [Feedback
-Functions](https://www.trulens.org/trulens_eval/getting_started/core_concepts/feedback_functions/),
+Functions](https://www.trulens.org/trulens_eval/getting_started/core_concepts/
 [The RAG Triad](https://www.trulens.org/trulens_eval/getting_started/core_concepts/rag_triad/),
 and [Honest, Harmless and Helpful
 Evals](https://www.trulens.org/trulens_eval/getting_started/core_concepts/honest_harmless_helpful_evals/).
diff --git a/docs/trulens_eval/all_tools.ipynb b/docs/trulens_eval/all_tools.ipynb
index 2f076badf..6fa74a30b 100644
--- a/docs/trulens_eval/all_tools.ipynb
+++ b/docs/trulens_eval/all_tools.ipynb
@@ -194,14 +194,11 @@
     "from trulens_eval.app import App\n",
     "context = App.select_context(rag_chain)\n",
     "\n",
-    "from trulens_eval.feedback import Groundedness\n",
-    "grounded = Groundedness(groundedness_provider=OpenAI())\n",
     "# Define a groundedness feedback function\n",
     "f_groundedness = (\n",
-    "    Feedback(grounded.groundedness_measure_with_cot_reasons)\n",
+    "    Feedback(provider.groundedness_measure_with_cot_reasons)\n",
     "    .on(context.collect()) # collect context chunks into a list\n",
     "    .on_output()\n",
-    "    .aggregate(grounded.grounded_statements_aggregator)\n",
     ")\n",
     "\n",
     "# Question/answer relevance between overall question and answer.\n",
@@ -582,14 +579,12 @@
     "from trulens_eval.app import App\n",
     "context = App.select_context(query_engine)\n",
     "\n",
-    "from trulens_eval.feedback import Groundedness\n",
-    "grounded = Groundedness(groundedness_provider=OpenAI())\n",
     "# Define a groundedness feedback function\n",
     "f_groundedness = (\n",
-    "    Feedback(grounded.groundedness_measure_with_cot_reasons)\n",
+    "    Feedback(provider.groundedness_measure_with_cot_reasons)\n",
     "    .on(context.collect()) # collect context chunks into a list\n",
     "    .on_output()\n",
-    "    .aggregate(grounded.grounded_statements_aggregator)\n",
+    "    .aggregate(provider.grounded_statements_aggregator)\n",
     ")\n",
     "\n",
     "# Question/answer relevance between overall question and answer.\n",
@@ -762,7 +757,8 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
+    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n",
+    "os.environ[\"HUGGINGFACE_API_KEY\"] = \"hf_...\""
    ]
   },
   {
@@ -862,6 +858,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from openai import OpenAI\n",
+    "oai_client = OpenAI()\n",
+    "\n",
     "class RAG_from_scratch:\n",
     "    @instrument\n",
     "    def retrieve(self, query: str) -> list:\n",
@@ -872,7 +871,7 @@
     "        query_texts=query,\n",
     "        n_results=2\n",
     "    )\n",
-    "        return results['documents'][0]\n",
+    "        return results['documents']\n",
     "\n",
     "    @instrument\n",
     "    def generate_completion(self, query: str, context_str: list) -> str:\n",
@@ -921,23 +920,18 @@
    "outputs": [],
    "source": [
     "from trulens_eval import Feedback, Select\n",
-    "from trulens_eval.feedback import Groundedness\n",
     "from trulens_eval.feedback.provider.openai import OpenAI\n",
     "\n",
     "import numpy as np\n",
     "\n",
     "provider = OpenAI()\n",
     "\n",
-    "grounded = Groundedness(groundedness_provider=provider)\n",
-    "\n",
     "# Define a groundedness feedback function\n",
     "f_groundedness = (\n",
-    "    Feedback(grounded.groundedness_measure_with_cot_reasons, name = \"Groundedness\")\n",
+    "    Feedback(provider.groundedness_measure_with_cot_reasons, name = \"Groundedness\")\n",
     "    .on(Select.RecordCalls.retrieve.rets.collect())\n",
     "    .on_output()\n",
-    "    .aggregate(grounded.grounded_statements_aggregator)\n",
     ")\n",
-    "\n",
     "# Question/answer relevance between overall question and answer.\n",
     "f_answer_relevance = (\n",
     "    Feedback(provider.relevance_with_cot_reasons, name = \"Answer Relevance\")\n",
@@ -945,12 +939,12 @@
     "    .on_output()\n",
     ")\n",
     "\n",
-    "# Question/statement relevance between question and each context chunk.\n",
+    "# Context relevance between question and each context chunk.\n",
     "f_context_relevance = (\n",
     "    Feedback(provider.context_relevance_with_cot_reasons, name = \"Context Relevance\")\n",
     "    .on(Select.RecordCalls.retrieve.args.query)\n",
-    "    .on(Select.RecordCalls.retrieve.rets.collect())\n",
-    "    .aggregate(np.mean)\n",
+    "    .on(Select.RecordCalls.retrieve.rets)\n",
+    "    .aggregate(np.mean) # choose a different aggregation method if you wish\n",
     ")"
    ]
   },
@@ -2016,32 +2010,31 @@
     "from trulens_eval.feedback import prompts\n",
     "\n",
     "class Custom_AzureOpenAI(AzureOpenAI):\n",
-    "    def qs_relevance_with_cot_reasons_extreme(self, question: str, statement: str) -> Tuple[float, Dict]:\n",
+    "    def context_relevance_with_cot_reasons_extreme(self, question: str, context: str) -> Tuple[float, Dict]:\n",
     "        \"\"\"\n",
-    "        Tweaked version of question statement relevance, extending AzureOpenAI provider.\n",
+    "        Tweaked version of context relevance, extending AzureOpenAI provider.\n",
     "        A function that completes a template to check the relevance of the statement to the question.\n",
     "        Scoring guidelines for scores 5-8 are removed to push the LLM to more extreme scores.\n",
     "        Also uses chain of thought methodology and emits the reasons.\n",
     "\n",
     "        Args:\n",
     "            question (str): A question being asked. \n",
-    "            statement (str): A statement to the question.\n",
+    "            context (str): A statement to the question.\n",
     "\n",
     "        Returns:\n",
     "            float: A value between 0 and 1. 0 being \"not relevant\" and 1 being \"relevant\".\n",
     "        \"\"\"\n",
     "\n",
-    "        system_prompt = str.format(prompts.QS_RELEVANCE, question = question, statement = statement)\n",
-    "\n",
     "        # remove scoring guidelines around middle scores\n",
-    "        system_prompt = system_prompt.replace(\n",
+    "        system_prompt = prompts.CONTEXT_RELEVANCE_SYSTEM.replace(\n",
     "        \"- STATEMENT that is RELEVANT to most of the QUESTION should get a score of 5, 6, 7 or 8. Higher score indicates more RELEVANCE.\\n\\n\", \"\")\n",
     "        \n",
-    "        system_prompt = system_prompt.replace(\n",
+    "        user_prompt = str.format(prompts.CONTEXT_RELEVANCE_USER, question = question, context = context)\n",
+    "        user_prompt = user_prompt.replace(\n",
     "            \"RELEVANCE:\", prompts.COT_REASONS_TEMPLATE\n",
     "        )\n",
     "\n",
-    "        return self.generate_score_and_reasons(system_prompt)"
+    "        return self.generate_score_and_reasons(system_prompt, user_prompt)"
    ]
   },
   {
@@ -2125,7 +2118,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.10.14"
   },
   "vscode": {
    "interpreter": {
diff --git a/trulens_eval/generated_files/all_tools.py b/trulens_eval/generated_files/all_tools.py
index 7b95d5ea9..d6061e751 100644
--- a/trulens_eval/generated_files/all_tools.py
+++ b/trulens_eval/generated_files/all_tools.py
@@ -115,14 +115,11 @@ def format_docs(docs):
 
 context = App.select_context(rag_chain)
 
-from trulens_eval.feedback import Groundedness
-
-grounded = Groundedness(groundedness_provider=OpenAI())
 # Define a groundedness feedback function
 f_groundedness = (
-    Feedback(grounded.groundedness_measure_with_cot_reasons
+    Feedback(provider.groundedness_measure_with_cot_reasons
             ).on(context.collect())  # collect context chunks into a list
-    .on_output().aggregate(grounded.grounded_statements_aggregator)
+    .on_output()
 )
 
 # Question/answer relevance between overall question and answer.
@@ -345,14 +342,11 @@ def display_call_stack(data):
 
 context = App.select_context(query_engine)
 
-from trulens_eval.feedback import Groundedness
-
-grounded = Groundedness(groundedness_provider=OpenAI())
 # Define a groundedness feedback function
 f_groundedness = (
-    Feedback(grounded.groundedness_measure_with_cot_reasons
+    Feedback(provider.groundedness_measure_with_cot_reasons
             ).on(context.collect())  # collect context chunks into a list
-    .on_output().aggregate(grounded.grounded_statements_aggregator)
+    .on_output().aggregate(provider.grounded_statements_aggregator)
 )
 
 # Question/answer relevance between overall question and answer.
@@ -447,6 +441,7 @@ def display_call_stack(data):
 import os
 
 os.environ["OPENAI_API_KEY"] = "sk-..."
+os.environ["HUGGINGFACE_API_KEY"] = "hf_..."
 
 # ## Get Data
 #
@@ -500,6 +495,10 @@ def display_call_stack(data):
 
 # In[ ]:
 
+from openai import OpenAI
+
+oai_client = OpenAI()
+
 
 class RAG_from_scratch:
 
@@ -509,7 +508,7 @@ def retrieve(self, query: str) -> list:
         Retrieve relevant text from vector store.
         """
         results = vector_store.query(query_texts=query, n_results=2)
-        return results['documents'][0]
+        return results['documents']
 
     @instrument
     def generate_completion(self, query: str, context_str: list) -> str:
@@ -552,21 +551,16 @@ def query(self, query: str) -> str:
 
 from trulens_eval import Feedback
 from trulens_eval import Select
-from trulens_eval.feedback import Groundedness
 from trulens_eval.feedback.provider.openai import OpenAI
 
 provider = OpenAI()
 
-grounded = Groundedness(groundedness_provider=provider)
-
 # Define a groundedness feedback function
 f_groundedness = (
     Feedback(
-        grounded.groundedness_measure_with_cot_reasons, name="Groundedness"
-    ).on(Select.RecordCalls.retrieve.rets.collect()
-        ).on_output().aggregate(grounded.grounded_statements_aggregator)
+        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
+    ).on(Select.RecordCalls.retrieve.rets.collect()).on_output()
 )
-
 # Question/answer relevance between overall question and answer.
 f_answer_relevance = (
     Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance").on(
@@ -574,12 +568,13 @@ def query(self, query: str) -> str:
     ).on_output()
 )
 
-# Question/statement relevance between question and each context chunk.
+# Context relevance between question and each context chunk.
 f_context_relevance = (
     Feedback(
         provider.context_relevance_with_cot_reasons, name="Context Relevance"
-    ).on(Select.RecordCalls.retrieve.args.query
-        ).on(Select.RecordCalls.retrieve.rets.collect()).aggregate(np.mean)
+    ).on(Select.RecordCalls.retrieve.args.query).on(
+        Select.RecordCalls.retrieve.rets
+    ).aggregate(np.mean)  # choose a different aggregation method if you wish
 )
 
 # ## Construct the app
@@ -1193,38 +1188,37 @@ def style_check_professional(self, response: str) -> float:
 
 class Custom_AzureOpenAI(AzureOpenAI):
 
-    def qs_relevance_with_cot_reasons_extreme(
-        self, question: str, statement: str
+    def context_relevance_with_cot_reasons_extreme(
+        self, question: str, context: str
     ) -> Tuple[float, Dict]:
         """
-        Tweaked version of question statement relevance, extending AzureOpenAI provider.
+        Tweaked version of context relevance, extending AzureOpenAI provider.
         A function that completes a template to check the relevance of the statement to the question.
         Scoring guidelines for scores 5-8 are removed to push the LLM to more extreme scores.
         Also uses chain of thought methodology and emits the reasons.
 
         Args:
             question (str): A question being asked. 
-            statement (str): A statement to the question.
+            context (str): A statement to the question.
 
         Returns:
             float: A value between 0 and 1. 0 being "not relevant" and 1 being "relevant".
         """
 
-        system_prompt = str.format(
-            prompts.QS_RELEVANCE, question=question, statement=statement
-        )
-
         # remove scoring guidelines around middle scores
-        system_prompt = system_prompt.replace(
+        system_prompt = prompts.CONTEXT_RELEVANCE_SYSTEM.replace(
             "- STATEMENT that is RELEVANT to most of the QUESTION should get a score of 5, 6, 7 or 8. Higher score indicates more RELEVANCE.\n\n",
             ""
         )
 
-        system_prompt = system_prompt.replace(
+        user_prompt = str.format(
+            prompts.CONTEXT_RELEVANCE_USER, question=question, context=context
+        )
+        user_prompt = user_prompt.replace(
             "RELEVANCE:", prompts.COT_REASONS_TEMPLATE
         )
 
-        return self.generate_score_and_reasons(system_prompt)
+        return self.generate_score_and_reasons(system_prompt, user_prompt)
 
 
 # ## Multi-Output Feedback functions
diff --git a/trulens_eval/trulens_eval/app.py b/trulens_eval/trulens_eval/app.py
index fc6c8d6bc..48bdd3666 100644
--- a/trulens_eval/trulens_eval/app.py
+++ b/trulens_eval/trulens_eval/app.py
@@ -349,7 +349,8 @@ class RecordingContext():
     """
 
     def __init__(self, app: mod_app.App, record_metadata: JSON = None):
-        self.calls: Dict[mod_types_schema.CallID, mod_record_schema.RecordAppCall] = {}
+        self.calls: Dict[mod_types_schema.CallID,
+                         mod_record_schema.RecordAppCall] = {}
         """A record (in terms of its RecordAppCall) in process of being created.
         
         Storing as a map as we want to override calls with the same id which may
@@ -418,11 +419,9 @@ def add_call(self, call: mod_record_schema.RecordAppCall):
     def finish_record(
         self,
         calls_to_record: Callable[[
-            List[mod_record_schema.RecordAppCall],
-            mod_types_schema.Metadata,
-            Optional[mod_record_schema.Record]
-            ], mod_record_schema.Record
-        ],
+            List[mod_record_schema.RecordAppCall], mod_types_schema.
+            Metadata, Optional[mod_record_schema.Record]
+        ], mod_record_schema.Record],
         existing_record: Optional[mod_record_schema.Record] = None
     ):
         """
@@ -432,9 +431,7 @@ def finish_record(
 
         with self.lock:
             record = calls_to_record(
-                list(self.calls.values()),
-                self.record_metadata,
-                existing_record
+                list(self.calls.values()), self.record_metadata, existing_record
             )
             self.calls = {}
 
diff --git a/trulens_eval/trulens_eval/feedback/feedback.py b/trulens_eval/trulens_eval/feedback/feedback.py
index 639a86d49..916c64713 100644
--- a/trulens_eval/trulens_eval/feedback/feedback.py
+++ b/trulens_eval/trulens_eval/feedback/feedback.py
@@ -109,13 +109,14 @@ def rag_triad(
     ret = {}
 
     for f_imp, f_agg, arg1name, arg1lens, arg2name, arg2lens, f_name in [
-        (provider.groundedness_measure_with_cot_reasons, np.mean, "source", context.collect(),
-         "statement", answer, "Groundedness"),
-        (provider.relevance_with_cot_reasons, np.mean, "prompt", question, "response", answer, "Answer Relevance"),
-        (provider.context_relevance_with_cot_reasons, np.mean, "question", question, "context",
-         context, "Context Relevance")
+        (provider.groundedness_measure_with_cot_reasons, np.mean, "source",
+         context.collect(), "statement", answer, "Groundedness"),
+        (provider.relevance_with_cot_reasons, np.mean, "prompt", question,
+         "response", answer, "Answer Relevance"),
+        (provider.context_relevance_with_cot_reasons, np.mean, "question",
+         question, "context", context, "Context Relevance")
     ]:
-        f = Feedback(f_imp, if_exists=context, name = f_name).aggregate(f_agg)
+        f = Feedback(f_imp, if_exists=context, name=f_name).aggregate(f_agg)
         if arg1lens is not None:
             f = f.on(**{arg1name: arg1lens})
         else:
diff --git a/trulens_eval/trulens_eval/feedback/provider/base.py b/trulens_eval/trulens_eval/feedback/provider/base.py
index 610d3bbb6..51620289b 100644
--- a/trulens_eval/trulens_eval/feedback/provider/base.py
+++ b/trulens_eval/trulens_eval/feedback/provider/base.py
@@ -1,18 +1,18 @@
 import logging
-from typing import ClassVar, Dict, Optional, Sequence, Tuple, List
+from typing import ClassVar, Dict, List, Optional, Sequence, Tuple
 import warnings
 
+import nltk
+from nltk.tokenize import sent_tokenize
+import numpy as np
+from tqdm.auto import tqdm
+
 from trulens_eval.feedback import prompts
 from trulens_eval.feedback.provider.endpoint import base as mod_endpoint
 from trulens_eval.utils import generated as mod_generated_utils
+from trulens_eval.utils.generated import re_0_10_rating
 from trulens_eval.utils.pyschema import WithClassInfo
 from trulens_eval.utils.serial import SerialModel
-from trulens_eval.utils.generated import re_0_10_rating
-
-import nltk
-from nltk.tokenize import sent_tokenize
-import numpy as np
-from tqdm.auto import tqdm
 
 logger = logging.getLogger(__name__)
 
@@ -1119,7 +1119,7 @@ def stereotypes_with_cot_reasons(self, prompt: str,
         )
 
         return self.generate_score_and_reasons(system_prompt, user_prompt)
-    
+
     def groundedness_measure_with_cot_reasons(
         self, source: str, statement: str
     ) -> Tuple[float, dict]:
@@ -1153,19 +1153,22 @@ def groundedness_measure_with_cot_reasons(
         nltk.download('punkt')
         groundedness_scores = {}
         reasons_str = ""
-        
+
         hypotheses = sent_tokenize(statement)
         system_prompt = prompts.LLM_GROUNDEDNESS_SYSTEM
         for i, hypothesis in enumerate(tqdm(
-            hypotheses, desc="Groundedness per statement in source")):
+                hypotheses, desc="Groundedness per statement in source")):
             user_prompt = prompts.LLM_GROUNDEDNESS_USER.format(
-                premise=f"{source}",
-                hypothesis=f"{hypothesis}"
+                premise=f"{source}", hypothesis=f"{hypothesis}"
+            )
+            score, reason = self.generate_score_and_reasons(
+                system_prompt, user_prompt
             )
-            score, reason = self.generate_score_and_reasons(system_prompt, user_prompt)
             groundedness_scores[f"statement_{i}"] = score
             reasons_str += f"STATEMENT {i}:\n{reason['reason']}\n"
 
         # Calculate the average groundedness score from the scores dictionary
-        average_groundedness_score = float(np.mean(list(groundedness_scores.values())))
+        average_groundedness_score = float(
+            np.mean(list(groundedness_scores.values()))
+        )
         return average_groundedness_score, {"reasons": reasons_str}
diff --git a/trulens_eval/trulens_eval/feedback/provider/hugs.py b/trulens_eval/trulens_eval/feedback/provider/hugs.py
index 1af02e891..7c597b5bb 100644
--- a/trulens_eval/trulens_eval/feedback/provider/hugs.py
+++ b/trulens_eval/trulens_eval/feedback/provider/hugs.py
@@ -2,9 +2,13 @@
 import logging
 from typing import Dict, get_args, get_origin, Optional, Tuple, Union
 
+import nltk
+from nltk.tokenize import sent_tokenize
 import numpy as np
 import requests
+from tqdm.auto import tqdm
 
+from trulens_eval.feedback import prompts
 from trulens_eval.feedback.provider.base import Provider
 from trulens_eval.feedback.provider.endpoint import HuggingfaceEndpoint
 from trulens_eval.feedback.provider.endpoint.base import DummyEndpoint
@@ -12,12 +16,6 @@
 from trulens_eval.utils.python import Future
 from trulens_eval.utils.python import locals_except
 from trulens_eval.utils.threading import ThreadPoolExecutor
-from trulens_eval.feedback import prompts
-
-import nltk
-from nltk.tokenize import sent_tokenize
-import numpy as np
-from tqdm.auto import tqdm
 
 logger = logging.getLogger(__name__)
 
@@ -193,7 +191,7 @@ def get_scores(text):
         l1: float = float(1.0 - (np.linalg.norm(diff, ord=1)) / 2.0)
 
         return l1, dict(text1_scores=scores1, text2_scores=scores2)
-    
+
     def groundedness_measure_with_nli(self, source: str,
                                       statement: str) -> Tuple[float, dict]:
         """
@@ -242,7 +240,9 @@ def groundedness_measure_with_nli(self, source: str,
                 score=score * 10,
             )
             groundedness_scores[f"statement_{i}"] = score
-        average_groundedness_score = float(np.mean(list(groundedness_scores.values())))
+        average_groundedness_score = float(
+            np.mean(list(groundedness_scores.values()))
+        )
         return average_groundedness_score, {"reasons": reasons_str}
 
     @_tci
diff --git a/trulens_eval/trulens_eval/instruments.py b/trulens_eval/trulens_eval/instruments.py
index e23a4357b..ed278c6ad 100644
--- a/trulens_eval/trulens_eval/instruments.py
+++ b/trulens_eval/trulens_eval/instruments.py
@@ -588,9 +588,11 @@ def handle_done(rets):
 
             if isinstance(rets, Awaitable):
                 # If method produced an awaitable
-                logger.info(f"""This app produced an asynchronous response of type `{class_name(type(rets))}`. 
-                            This record will be updated once the response is available""")
-                            
+                logger.info(
+                    f"""This app produced an asynchronous response of type `{class_name(type(rets))}`. 
+                            This record will be updated once the response is available"""
+                )
+
                 # TODO(piotrm): need to track costs of awaiting the ret in the
                 # below.
 
diff --git a/trulens_eval/trulens_eval/schema/types.py b/trulens_eval/trulens_eval/schema/types.py
index 54c99bb3a..502e960a8 100644
--- a/trulens_eval/trulens_eval/schema/types.py
+++ b/trulens_eval/trulens_eval/schema/types.py
@@ -18,10 +18,12 @@
 See [RecordAppCall.call_id][trulens_eval.schema.record.RecordAppCall.call_id].
 """
 
+
 def new_call_id() -> CallID:
     """Generate a new call id."""
     return str(uuid.uuid4())
 
+
 AppID: typing_extensions.TypeAlias = str
 """Unique identifier for an app.
 
diff --git a/trulens_eval/trulens_eval/tru_rails.py b/trulens_eval/trulens_eval/tru_rails.py
index 85c82767e..2ccbb89b7 100644
--- a/trulens_eval/trulens_eval/tru_rails.py
+++ b/trulens_eval/trulens_eval/tru_rails.py
@@ -442,7 +442,9 @@ def __getattr__(self, name):
         if name == "__name__":
             return self.__class__.__name__  # Return the class name of TruRails
         elif safe_hasattr(self.app, name):
-            return getattr(self.app, name)  # Delegate to the wrapped app if it has the attribute
+            return getattr(
+                self.app, name
+            )  # Delegate to the wrapped app if it has the attribute
         else:
             raise AttributeError(f"TruRails has no attribute named {name}")
 
diff --git a/trulens_eval/trulens_eval/utils/serial.py b/trulens_eval/trulens_eval/utils/serial.py
index 935fa8e6b..39a2d1925 100644
--- a/trulens_eval/trulens_eval/utils/serial.py
+++ b/trulens_eval/trulens_eval/utils/serial.py
@@ -428,9 +428,8 @@ def get(self, obj: Dict[str, T]) -> Iterable[T]:
                 logger.warning(
                     "Object (of type %s is a sequence containing more than one dictionary. "
                     "Lookup by item or attribute `%s` is ambiguous. "
-                    "Use a lookup by index(es) or slice first to disambiguate.", 
-                    type(obj).__name__,
-                    self.item_or_attribute
+                    "Use a lookup by index(es) or slice first to disambiguate.",
+                    type(obj).__name__, self.item_or_attribute
                 )
                 for sub_obj in obj:
                     try: