Skip to content

Commit

Permalink
Expand langchain instrumentation for RAGs (#691)
Browse files Browse the repository at this point in the history
* add langchain rag instrumentation

* remove unneeded
  • Loading branch information
joshreini1 authored Dec 18, 2023
1 parent 05e1a46 commit dc71d58
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 41 deletions.
138 changes: 99 additions & 39 deletions trulens_eval/examples/quickstart/langchain_quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"metadata": {},
"outputs": [],
"source": [
"# ! pip install trulens_eval==0.19.1 openai==1.3.7"
"# ! pip install trulens_eval==0.19.1 openai==1.3.7 langchain chromadb langchainhub bs4"
]
},
{
Expand All @@ -38,8 +38,7 @@
"outputs": [],
"source": [
"import os\n",
"os.environ[\"OPENAI_API_KEY\"] = \"...\"\n",
"os.environ[\"HUGGINGFACE_API_KEY\"] = \"...\""
"os.environ[\"OPENAI_API_KEY\"] = \"...\""
]
},
{
Expand All @@ -62,24 +61,26 @@
"from trulens_eval import TruChain, Feedback, Huggingface, Tru\n",
"from trulens_eval.schema import FeedbackResult\n",
"tru = Tru()\n",
"tru.reset_database()\n",
"\n",
"# Imports from langchain to build app. You may need to install langchain first\n",
"# with the following:\n",
"# ! pip install langchain>=0.0.170\n",
"from langchain.chains import LLMChain\n",
"from langchain.llms import OpenAI\n",
"from langchain.prompts import ChatPromptTemplate, PromptTemplate\n",
"from langchain.prompts import HumanMessagePromptTemplate"
"# Imports from langchain to build app\n",
"import bs4\n",
"from langchain import hub\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.document_loaders import WebBaseLoader\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.schema import StrOutputParser\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.vectorstores import Chroma\n",
"from langchain_core.runnables import RunnablePassthrough"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create Simple LLM Application\n",
"\n",
"This example uses a LangChain framework and OpenAI LLM"
"### Load documents"
]
},
{
Expand All @@ -88,27 +89,22 @@
"metadata": {},
"outputs": [],
"source": [
"full_prompt = HumanMessagePromptTemplate(\n",
" prompt=PromptTemplate(\n",
" template=\n",
" \"Provide a helpful response with relevant background information for the following: {prompt}\",\n",
" input_variables=[\"prompt\"],\n",
" )\n",
"loader = WebBaseLoader(\n",
" web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n",
" bs_kwargs=dict(\n",
" parse_only=bs4.SoupStrainer(\n",
" class_=(\"post-content\", \"post-title\", \"post-header\")\n",
" )\n",
" ),\n",
")\n",
"\n",
"chat_prompt_template = ChatPromptTemplate.from_messages([full_prompt])\n",
"\n",
"llm = OpenAI(temperature=0.9, max_tokens=128)\n",
"\n",
"chain = LLMChain(llm=llm, prompt=chat_prompt_template, verbose=True)"
"docs = loader.load()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Send your first request"
"### Create Vector Store"
]
},
{
Expand All @@ -117,7 +113,17 @@
"metadata": {},
"outputs": [],
"source": [
"prompt_input = '¿que hora es?'"
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
"splits = text_splitter.split_documents(docs)\n",
"\n",
"vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create RAG"
]
},
{
Expand All @@ -126,9 +132,37 @@
"metadata": {},
"outputs": [],
"source": [
"llm_response = chain(prompt_input)\n",
"retriever = vectorstore.as_retriever()\n",
"\n",
"display(llm_response)"
"prompt = hub.pull(\"rlm/rag-prompt\")\n",
"llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
"\n",
"def format_docs(docs):\n",
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
"\n",
"rag_chain = (\n",
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
" | prompt\n",
" | llm\n",
" | StrOutputParser()\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Send your first request"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rag_chain.invoke(\"What is Task Decomposition?\")"
]
},
{
Expand All @@ -145,13 +179,30 @@
"metadata": {},
"outputs": [],
"source": [
"# Initialize Huggingface-based feedback function collection class:\n",
"hugs = Huggingface()\n",
"from trulens_eval.feedback.provider import OpenAI\n",
"from trulens_eval import Select\n",
"import numpy as np\n",
"# Initialize provider class\n",
"openai = OpenAI()\n",
"from trulens_eval.feedback import Groundedness\n",
"grounded = Groundedness(groundedness_provider=OpenAI())\n",
"# Define a groundedness feedback function\n",
"f_groundedness = (\n",
" Feedback(grounded.groundedness_measure_with_cot_reasons)\n",
" .on(Select.RecordCalls.first.invoke.rets.context)\n",
" .on_output()\n",
" .aggregate(grounded.grounded_statements_aggregator)\n",
")\n",
"\n",
"# Define a language match feedback function using HuggingFace.\n",
"f_lang_match = Feedback(hugs.language_match).on_input_output()\n",
"# By default this will check language match on the main app input and main app\n",
"# output."
"# Question/answer relevance between overall question and answer.\n",
"f_qa_relevance = Feedback(openai.relevance).on_input_output()\n",
"# Question/statement relevance between question and each context chunk.\n",
"f_context_relevance = (\n",
" Feedback(openai.qs_relevance)\n",
" .on(Select.RecordCalls.first.invoke.args.input)\n",
" .on(Select.RecordCalls.first.invoke.rets.context)\n",
" .aggregate(np.mean)\n",
")"
]
},
{
Expand All @@ -168,9 +219,9 @@
"metadata": {},
"outputs": [],
"source": [
"tru_recorder = TruChain(chain,\n",
"tru_recorder = TruChain(rag_chain,\n",
" app_id='Chain1_ChatApplication',\n",
" feedbacks=[f_lang_match])"
" feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])"
]
},
{
Expand All @@ -180,11 +231,20 @@
"outputs": [],
"source": [
"with tru_recorder as recording:\n",
" llm_response = chain(prompt_input)\n",
" llm_response = rag_chain.invoke(\"What is Task Decomposition?\")\n",
"\n",
"display(llm_response)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tru.run_dashboard()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
6 changes: 4 additions & 2 deletions trulens_eval/trulens_eval/tru_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
class LangChainInstrument(Instrument):

class Default:
MODULES = {"langchain."}
MODULES = {"langchain"}

# Thunk because langchain is optional. TODO: Not anymore.
CLASSES = lambda: {
Expand Down Expand Up @@ -94,7 +94,9 @@ class Default:
"acall":
lambda o: isinstance(o, Chain),
"_get_relevant_documents":
lambda o: True, # VectorStoreRetriever, langchain >= 0.230
lambda o: isinstance(o, (RunnableSerializable)),
"_aget_relevant_documents":
lambda o: isinstance(o, (RunnableSerializable)),
# "format_prompt": lambda o: isinstance(o, langchain.prompts.base.BasePromptTemplate),
# "format": lambda o: isinstance(o, langchain.prompts.base.BasePromptTemplate),
# the prompt calls might be too small to be interesting
Expand Down

0 comments on commit dc71d58

Please sign in to comment.