|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "id": "3bc2bc66-9510-4b18-945d-973378ea287f", |
| 6 | + "metadata": {}, |
| 7 | + "source": [ |
| 8 | + "## Load Documents" |
| 9 | + ] |
| 10 | + }, |
| 11 | + { |
| 12 | + "cell_type": "code", |
| 13 | + "execution_count": 1, |
| 14 | + "id": "7a8a4a57-f95d-4072-9fd6-d989928298c1", |
| 15 | + "metadata": {}, |
| 16 | + "outputs": [ |
| 17 | + { |
| 18 | + "name": "stdout", |
| 19 | + "output_type": "stream", |
| 20 | + "text": [ |
| 21 | + "he 2022 FIFA World Cup was the 22nd FIFA World Cup, the world championship for national football teams organized by FIFA. It took place in Qatar from 20 November to 18 December 2022, after the country was awarded the hosting rights in 2010.\n", |
| 22 | + "\n", |
| 23 | + "Argentina were crowned the champions after winning the final against the title holder France 4–2 on penalties following a 3–3 draw after extra time.\n" |
| 24 | + ] |
| 25 | + } |
| 26 | + ], |
| 27 | + "source": [ |
| 28 | + "from langchain_community.document_loaders import UnstructuredMarkdownLoader\n", |
| 29 | + "loader = UnstructuredMarkdownLoader('fifa/2022.md')\n", |
| 30 | + "docs = loader.load()\n", |
| 31 | + "!cat fifa/2022.md" |
| 32 | + ] |
| 33 | + }, |
| 34 | + { |
| 35 | + "cell_type": "markdown", |
| 36 | + "id": "953fc9d4-b7fc-4cf2-9661-df3b63557e5d", |
| 37 | + "metadata": {}, |
| 38 | + "source": [ |
| 39 | + "## Split documents" |
| 40 | + ] |
| 41 | + }, |
| 42 | + { |
| 43 | + "cell_type": "code", |
| 44 | + "execution_count": 2, |
| 45 | + "id": "6dc83f65-5b73-4136-8cae-b6c58cb88aed", |
| 46 | + "metadata": {}, |
| 47 | + "outputs": [], |
| 48 | + "source": [ |
| 49 | + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", |
| 50 | + "\n", |
| 51 | + "\n", |
| 52 | + "text_splitter = RecursiveCharacterTextSplitter()\n", |
| 53 | + "documents = text_splitter.split_documents(docs)" |
| 54 | + ] |
| 55 | + }, |
| 56 | + { |
| 57 | + "cell_type": "markdown", |
| 58 | + "id": "79b83071-9c85-46ce-a8c7-a20bf2252fe9", |
| 59 | + "metadata": {}, |
| 60 | + "source": [ |
| 61 | + "## Index Documents" |
| 62 | + ] |
| 63 | + }, |
| 64 | + { |
| 65 | + "cell_type": "code", |
| 66 | + "execution_count": 3, |
| 67 | + "id": "f57c1d99-5ab8-46fb-8c60-1fcc0bf90355", |
| 68 | + "metadata": {}, |
| 69 | + "outputs": [], |
| 70 | + "source": [ |
| 71 | + "from langchain_openai import OpenAIEmbeddings\n", |
| 72 | + "from langchain_community.vectorstores import FAISS\n", |
| 73 | + "\n", |
| 74 | + "embeddings = OpenAIEmbeddings()\n", |
| 75 | + "vector = FAISS.from_documents(documents, embeddings)" |
| 76 | + ] |
| 77 | + }, |
| 78 | + { |
| 79 | + "cell_type": "markdown", |
| 80 | + "id": "d3bd3443-1e28-4561-9b57-36d978ce1547", |
| 81 | + "metadata": {}, |
| 82 | + "source": [ |
| 83 | + "## RAG prompt" |
| 84 | + ] |
| 85 | + }, |
| 86 | + { |
| 87 | + "cell_type": "code", |
| 88 | + "execution_count": 4, |
| 89 | + "id": "8e4e785d-186f-46b8-8f25-faa6efb5e2f4", |
| 90 | + "metadata": {}, |
| 91 | + "outputs": [], |
| 92 | + "source": [ |
| 93 | + "from langchain_openai import ChatOpenAI\n", |
| 94 | + "from langchain_core.prompts import ChatPromptTemplate\n", |
| 95 | + "\n", |
| 96 | + "prompt = ChatPromptTemplate.from_template(\"\"\"Answer the following question based only on the provided context:\n", |
| 97 | + "\n", |
| 98 | + "<context>\n", |
| 99 | + "{context}\n", |
| 100 | + "</context>\n", |
| 101 | + "\n", |
| 102 | + "Question: {input}\"\"\")\n", |
| 103 | + "llm = ChatOpenAI()" |
| 104 | + ] |
| 105 | + }, |
| 106 | + { |
| 107 | + "cell_type": "markdown", |
| 108 | + "id": "4a563e6e-8190-471a-9f58-e58af09928b1", |
| 109 | + "metadata": {}, |
| 110 | + "source": [ |
| 111 | + "## RAG chain" |
| 112 | + ] |
| 113 | + }, |
| 114 | + { |
| 115 | + "cell_type": "code", |
| 116 | + "execution_count": 5, |
| 117 | + "id": "08312246-1332-4e31-9c8f-2aac74064f7c", |
| 118 | + "metadata": {}, |
| 119 | + "outputs": [], |
| 120 | + "source": [ |
| 121 | + "from langchain_core.runnables import RunnableParallel, RunnablePassthrough\n", |
| 122 | + "from langchain_core.output_parsers import StrOutputParser\n", |
| 123 | + "\n", |
| 124 | + "retriever = vector.as_retriever()\n", |
| 125 | + "chain = (\n", |
| 126 | + " RunnableParallel({\"context\": retriever, \"input\": RunnablePassthrough()})\n", |
| 127 | + " | prompt\n", |
| 128 | + " | llm\n", |
| 129 | + " | StrOutputParser()\n", |
| 130 | + ")" |
| 131 | + ] |
| 132 | + }, |
| 133 | + { |
| 134 | + "cell_type": "code", |
| 135 | + "execution_count": 6, |
| 136 | + "id": "9a9de0d5-e33a-4427-a846-1b42ca5a7aa9", |
| 137 | + "metadata": {}, |
| 138 | + "outputs": [ |
| 139 | + { |
| 140 | + "name": "stdout", |
| 141 | + "output_type": "stream", |
| 142 | + "text": [ |
| 143 | + "Argentina won the championship of the 22nd FIFA World Cup.\n" |
| 144 | + ] |
| 145 | + } |
| 146 | + ], |
| 147 | + "source": [ |
| 148 | + "answer = chain.invoke(\"Which country won the championship of the 22nd FIFA World Cup?\")\n", |
| 149 | + "print(answer)" |
| 150 | + ] |
| 151 | + }, |
| 152 | + { |
| 153 | + "cell_type": "code", |
| 154 | + "execution_count": 7, |
| 155 | + "id": "11095dc3-f815-40be-8fe3-c50fd72fcd36", |
| 156 | + "metadata": {}, |
| 157 | + "outputs": [ |
| 158 | + { |
| 159 | + "name": "stdout", |
| 160 | + "output_type": "stream", |
| 161 | + "text": [ |
| 162 | + "Based on the provided context, there is no information about the winner of the 23rd FIFA World Cup.\n" |
| 163 | + ] |
| 164 | + } |
| 165 | + ], |
| 166 | + "source": [ |
| 167 | + "answer = chain.invoke(\"Which country won the championship of the 23rd FIFA World Cup?\")\n", |
| 168 | + "print(answer)" |
| 169 | + ] |
| 170 | + }, |
| 171 | + { |
| 172 | + "cell_type": "code", |
| 173 | + "execution_count": 8, |
| 174 | + "id": "14a07101-42ce-4fa6-85ba-4b632f7c8a01", |
| 175 | + "metadata": {}, |
| 176 | + "outputs": [ |
| 177 | + { |
| 178 | + "name": "stdout", |
| 179 | + "output_type": "stream", |
| 180 | + "text": [ |
| 181 | + "In the 23rd FIFA World Cup held in 2026, the final match featured Argentina and France, In the end, France won 2:1 to clinch the championship.\n" |
| 182 | + ] |
| 183 | + } |
| 184 | + ], |
| 185 | + "source": [ |
| 186 | + "from langchain_community.document_loaders import TextLoader\n", |
| 187 | + "\n", |
| 188 | + "loader = TextLoader(\"fifa/2026.txt\")\n", |
| 189 | + "docs = loader.load()\n", |
| 190 | + "documents2 = text_splitter.split_documents(docs)\n", |
| 191 | + "!cat fifa/2026.txt" |
| 192 | + ] |
| 193 | + }, |
| 194 | + { |
| 195 | + "cell_type": "code", |
| 196 | + "execution_count": 9, |
| 197 | + "id": "c23f4e02-cb43-4c01-8756-b1a92ad95ee7", |
| 198 | + "metadata": {}, |
| 199 | + "outputs": [], |
| 200 | + "source": [ |
| 201 | + "vector2 = FAISS.from_documents(documents2 + documents, embeddings)\n", |
| 202 | + "retriever2 = vector2.as_retriever()\n", |
| 203 | + "chain2 = (\n", |
| 204 | + " RunnableParallel({\"context\": retriever2, \"input\": RunnablePassthrough()})\n", |
| 205 | + " | prompt\n", |
| 206 | + " | llm\n", |
| 207 | + " | StrOutputParser()\n", |
| 208 | + ")" |
| 209 | + ] |
| 210 | + }, |
| 211 | + { |
| 212 | + "cell_type": "code", |
| 213 | + "execution_count": 10, |
| 214 | + "id": "78cbcef5-43e7-4e8d-89c6-01030009120b", |
| 215 | + "metadata": {}, |
| 216 | + "outputs": [ |
| 217 | + { |
| 218 | + "name": "stdout", |
| 219 | + "output_type": "stream", |
| 220 | + "text": [ |
| 221 | + "Argentina\n" |
| 222 | + ] |
| 223 | + } |
| 224 | + ], |
| 225 | + "source": [ |
| 226 | + "answer = chain2.invoke(\"Which country won the championship of the 22nd FIFA World Cup?\")\n", |
| 227 | + "print(answer)" |
| 228 | + ] |
| 229 | + }, |
| 230 | + { |
| 231 | + "cell_type": "code", |
| 232 | + "execution_count": 11, |
| 233 | + "id": "145a3834-84b5-4a63-9c21-dcfb6903e1e3", |
| 234 | + "metadata": {}, |
| 235 | + "outputs": [ |
| 236 | + { |
| 237 | + "name": "stdout", |
| 238 | + "output_type": "stream", |
| 239 | + "text": [ |
| 240 | + "France\n" |
| 241 | + ] |
| 242 | + } |
| 243 | + ], |
| 244 | + "source": [ |
| 245 | + "answer = chain2.invoke(\"Which country won the championship of the 23rd FIFA World Cup?\")\n", |
| 246 | + "print(answer)" |
| 247 | + ] |
| 248 | + } |
| 249 | + ], |
| 250 | + "metadata": { |
| 251 | + "kernelspec": { |
| 252 | + "display_name": "Python 3 (ipykernel)", |
| 253 | + "language": "python", |
| 254 | + "name": "python3" |
| 255 | + }, |
| 256 | + "language_info": { |
| 257 | + "codemirror_mode": { |
| 258 | + "name": "ipython", |
| 259 | + "version": 3 |
| 260 | + }, |
| 261 | + "file_extension": ".py", |
| 262 | + "mimetype": "text/x-python", |
| 263 | + "name": "python", |
| 264 | + "nbconvert_exporter": "python", |
| 265 | + "pygments_lexer": "ipython3", |
| 266 | + "version": "3.11.6" |
| 267 | + } |
| 268 | + }, |
| 269 | + "nbformat": 4, |
| 270 | + "nbformat_minor": 5 |
| 271 | +} |
0 commit comments