From cc6338fb4f65a80c3b2d3be15d845c1dcc33c4ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Davi=20Arag=C3=A3o?= Date: Tue, 11 Nov 2025 12:03:28 -0300 Subject: [PATCH 1/2] feat: update text summarization output --- .../notebooks/register-model.ipynb | 51 +++++++----- .../notebooks/run-workflow.ipynb | 82 ++++++++++--------- 2 files changed, 74 insertions(+), 59 deletions(-) diff --git a/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb b/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb index 2b3bc021..12ccbc4c 100644 --- a/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb +++ b/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb @@ -72,7 +72,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 01:04:39 - INFO - Notebook execution started.\n" + "2025-11-10 19:53:44 - INFO - Notebook execution started.\n" ] } ], @@ -101,8 +101,8 @@ "output_type": "stream", "text": [ "Note: you may need to restart the kernel to use updated packages.\n", - "CPU times: user 27.6 ms, sys: 7.98 ms, total: 35.6 ms\n", - "Wall time: 1.45 s\n" + "CPU times: user 183 ms, sys: 111 ms, total: 294 ms\n", + "Wall time: 7.47 s\n" ] } ], @@ -275,21 +275,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "1b67f548-6765-4d42-8730-fab796acd64c", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025/11/05 01:04:48 INFO mlflow.tracking.fluent: Experiment with name 'Summarization_Service' does not exist. Creating a new experiment.\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4659dbb75ff54aac8d65d97926ac70f5", + "model_id": "d117eca843be4d39bb13e1885ad154c0", "version_major": 2, "version_minor": 0 }, @@ -303,7 +296,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "01efb80d6a6648889e546cfa5b0450f0", + "model_id": "ac8591cff46d4012b48a0bbc52ab0cba", "version_major": 2, "version_minor": 0 }, @@ -317,7 +310,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "996080307bfa491bbb13fe1907e4af7c", + "model_id": "606bd7e34bb3473d83eea0732fbf658b", "version_major": 2, "version_minor": 0 }, @@ -332,7 +325,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 01:08:27,267 - INFO - Model and artifacts successfully registered in MLflow.\n", + "2025-11-10 20:18:50,698 - INFO - Model and artifacts successfully registered in MLflow.\n", "Successfully registered model 'Text_Summarization_Service'.\n" ] }, @@ -340,9 +333,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Model registered successfully with run ID: 51182439e0ee4c56ab46a49b0e868f5c\n", - "CPU times: user 770 ms, sys: 19.4 s, total: 20.2 s\n", - "Wall time: 3min 39s\n" + "Model registered successfully with run ID: d89e0153703f4066991946b05f3d68a9\n", + "CPU times: user 8.74 s, sys: 3min, total: 3min 9s\n", + "Wall time: 16min 50s\n" ] }, { @@ -391,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "8649162e-d673-4370-9a6f-9b37a8a1f34d", "metadata": {}, "outputs": [ @@ -399,8 +392,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 01:08:27 - INFO - ⏱️ Total execution time: 3m 48.55s\n", - "2025-11-05 01:08:27 - INFO - ✅ Notebook execution completed successfully.\n" + "2025-11-10 20:18:52 - INFO - ⏱️ Total execution time: 25m 7.85s\n", + "2025-11-10 20:18:52 - INFO - ✅ Notebook execution completed successfully.\n" ] } ], @@ -421,6 +414,22 @@ "source": [ "Built with ❤️ using [**HP AI Studio**](https://hp.com/ai-studio)." ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec79f1cb-dc2d-478a-8e4b-2dc5c0a704cf", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b6b9a6a-6a67-46de-8fbf-19c28726ef89", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb b/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb index 7f16d832..cfdbbb8f 100644 --- a/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb +++ b/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb @@ -82,7 +82,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:26:42 - INFO - Notebook execution started.\n" + "2025-11-10 19:42:14 - INFO - Notebook execution started.\n" ] } ], @@ -118,9 +118,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "Note: you may need to restart the kernel to use updated packages.\n", - "CPU times: user 44.6 ms, sys: 30.4 ms, total: 75 ms\n", - "Wall time: 4.25 s\n" + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "langchain-classic 1.0.0a1 requires langchain-core<2.0.0,>=1.0.0a7, but you have langchain-core 0.3.79 which is incompatible.\n", + "langchain-classic 1.0.0a1 requires langchain-text-splitters<2.0.0,>=1.0.0a1, but you have langchain-text-splitters 0.3.11 which is incompatible.\n", + "grpcio-status 1.76.0 requires protobuf<7.0.0,>=6.31.1, but you have protobuf 5.29.5 which is incompatible.\n", + "langchain-aws 1.0.0a1 requires langchain-core<2.0.0,>=1.0.0a4, but you have langchain-core 0.3.79 which is incompatible.\n", + "langchain-openai 1.0.0a4 requires langchain-core<2.0.0,>=1.0.0a7, but you have langchain-core 0.3.79 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n", + "CPU times: user 1.51 s, sys: 625 ms, total: 2.13 s\n", + "Wall time: 1min 1s\n" ] } ], @@ -355,7 +361,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:26:55 - INFO - Local Llama model is properly configured. \n" + "2025-11-10 19:43:52 - INFO - Local Llama model is properly configured. \n" ] } ], @@ -672,7 +678,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "86c96659a6c643398256c7e9077fe2d6", + "model_id": "a75117116fd843179eec7546b9246ba4", "version_major": 2, "version_minor": 0 }, @@ -686,7 +692,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "56edf0dd1f654198ba10da4f6f5dd224", + "model_id": "c81ba1d2f9eb4f40a62b30e5b2a03938", "version_major": 2, "version_minor": 0 }, @@ -700,7 +706,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "856c034c5cfa4577bd434977813bc066", + "model_id": "989ac43d10be4cb88566577c82ec598d", "version_major": 2, "version_minor": 0 }, @@ -714,7 +720,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ca5180955987458989daa9b935e93f29", + "model_id": "27ee963748c24cc494a6737d4a68403c", "version_major": 2, "version_minor": 0 }, @@ -728,7 +734,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "225d40ea79f2464fa9f62499c6596f08", + "model_id": "cb155f0e35b84987ad8f189bf8c5c231", "version_major": 2, "version_minor": 0 }, @@ -749,7 +755,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bf1d17f1d9ae4001a453d133259151ed", + "model_id": "9935a5bcb8e8464aa5b7d0ae316bba4b", "version_major": 2, "version_minor": 0 }, @@ -763,7 +769,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c2348e77519f4e9a84bda11b17dee0c5", + "model_id": "3876f0051d98481daac4db54fa9d143d", "version_major": 2, "version_minor": 0 }, @@ -777,7 +783,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "de8f29d1da914548bbc13f4dcb7ac22d", + "model_id": "fd5daa81421842bda5101bbbe45b2e99", "version_major": 2, "version_minor": 0 }, @@ -791,7 +797,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "16df29f056ea46129b9610e3d362d276", + "model_id": "efd1610a56b147d6975ab8cb4720c929", "version_major": 2, "version_minor": 0 }, @@ -805,7 +811,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "62c7795d9d414a558ead29264f096eba", + "model_id": "384d711ec8354ec9b587bf75fecac976", "version_major": 2, "version_minor": 0 }, @@ -819,7 +825,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2814d964cc2144df957c6e718226bf25", + "model_id": "3c0f2983de394e14aba671a00aa9a26c", "version_major": 2, "version_minor": 0 }, @@ -1172,15 +1178,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:27:01 - INFO - Encoding 250 content items with embedding model\n", - "2025-11-05 00:27:01 - INFO - Generated embeddings with shape: (250, 384)\n", - "2025-11-05 00:27:01 - INFO - Performing KMeans clustering with 6 clusters\n", - "2025-11-05 00:27:02 - INFO - Clustering complete, found 6 clusters\n", - "2025-11-05 00:27:02 - INFO - Found 248 raw transitions between clusters\n", - "2025-11-05 00:27:02 - INFO - After filtering: using 6 breaks between clusters\n", - "2025-11-05 00:27:02 - INFO - Created 6 breaks using clustering method\n", - "2025-11-05 00:27:02 - INFO - Created 6 breaks using method 'clustering'\n", - "2025-11-05 00:27:02 - INFO - Generated 7 chunks from content\n" + "2025-11-10 19:44:14 - INFO - Encoding 250 content items with embedding model\n", + "2025-11-10 19:44:14 - INFO - Generated embeddings with shape: (250, 384)\n", + "2025-11-10 19:44:15 - INFO - Performing KMeans clustering with 6 clusters\n", + "2025-11-10 19:44:15 - INFO - Clustering complete, found 6 clusters\n", + "2025-11-10 19:44:15 - INFO - Found 248 raw transitions between clusters\n", + "2025-11-10 19:44:15 - INFO - After filtering: using 6 breaks between clusters\n", + "2025-11-10 19:44:15 - INFO - Created 6 breaks using clustering method\n", + "2025-11-10 19:44:15 - INFO - Created 6 breaks using method 'clustering'\n", + "2025-11-10 19:44:15 - INFO - Generated 7 chunks from content\n" ] } ], @@ -1241,8 +1247,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.91 s, sys: 3.44 s, total: 5.35 s\n", - "Wall time: 1min 8s\n" + "CPU times: user 4.86 s, sys: 8.75 s, total: 13.6 s\n", + "Wall time: 3min 9s\n" ] } ], @@ -1366,10 +1372,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:28:11 - INFO - Starting text summarization and evaluation...\n", - "2025-11-05 00:28:11 - INFO - Processing 7 chunks\n", - "2025-11-05 00:28:21 - INFO - ✅ Summarization completed in 10.03 seconds\n", - "2025-11-05 00:28:21 - ERROR - Error calculating ROUGE metrics: name 'evaluate' is not defined\n" + "2025-11-10 19:47:25 - INFO - Starting text summarization and evaluation...\n", + "2025-11-10 19:47:25 - INFO - Processing 7 chunks\n", + "2025-11-10 19:53:18 - INFO - ✅ Summarization completed in 353.17 seconds\n", + "2025-11-10 19:53:18 - ERROR - Error calculating ROUGE metrics: name 'evaluate' is not defined\n" ] }, { @@ -1381,9 +1387,9 @@ "SUMMARIZATION RESULTS\n", "==================================================\n", "Original text length: 6560 characters\n", - "Summary length: 1626 characters\n", - "Compression ratio: 24.79%\n", - "Processing time: 10.03 seconds\n", + "Summary length: 1667 characters\n", + "Compression ratio: 25.41%\n", + "Processing time: 353.17 seconds\n", "\n", "ROUGE Scores:\n", " ROUGE1: 0.0000\n", @@ -1397,9 +1403,9 @@ "\n", "The excerpt describes a state of intense heat and injustice, specifically referencing the state of Mississippi.\n", "\n", - "The excerpt is a passage from Martin Luther King Jr.'s famous \"I Have a Dream\" speech. In the passage, King expresses his vision of a future where people are judged not by the color of their skin but by the content of their character. He envisions a world where children of all colors can join hands and sing together in harmony.\n", + "The excerpt appears to be a passage from Martin Luther King Jr.'s famous \"I Have a Dream\" speech. In the passage, King expresses his vision of a future where people are judged not by the color of their skin but by the content of their character. He envisions a world where people can live together in harmony and equality.\n", "\n", - "The excerpt expresses a desire for freedom to ring and for America to become a great nation.\n", + "The excerpt expresses a desire for freedom to ring in America, and suggests that this is necessary for the country to become a great nation.\n", "\n", "The excerpt is a poetic passage that calls for freedom to ring from various locations. Specifically, it mentions the \"prestigious hilltops of New Hampshire\" and the \"mighty mountains of New York\".\n", "\n", @@ -1484,8 +1490,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:28:21 - INFO - ⏱️ Total execution time: 0m 10.04s\n", - "2025-11-05 00:28:21 - INFO - ✅ Notebook execution completed successfully.\n" + "2025-11-10 19:53:18 - INFO - ⏱️ Total execution time: 5m 53.37s\n", + "2025-11-10 19:53:18 - INFO - ✅ Notebook execution completed successfully.\n" ] } ], From a9e518b74f15791567cdcfaa542db8ff9e2643cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Davi=20Arag=C3=A3o?= Date: Tue, 11 Nov 2025 17:28:24 -0300 Subject: [PATCH 2/2] fix: update requirements to fix install error --- .../notebooks/register-model.ipynb | 45 +++-- .../notebooks/run-workflow.ipynb | 156 ++++++++++-------- .../requirements.txt | 4 +- 3 files changed, 106 insertions(+), 99 deletions(-) diff --git a/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb b/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb index 12ccbc4c..d3bba5ea 100644 --- a/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb +++ b/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb @@ -72,7 +72,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-10 19:53:44 - INFO - Notebook execution started.\n" + "2025-11-11 19:28:04 - INFO - Notebook execution started.\n" ] } ], @@ -101,8 +101,8 @@ "output_type": "stream", "text": [ "Note: you may need to restart the kernel to use updated packages.\n", - "CPU times: user 183 ms, sys: 111 ms, total: 294 ms\n", - "Wall time: 7.47 s\n" + "CPU times: user 173 ms, sys: 31.4 ms, total: 205 ms\n", + "Wall time: 4.75 s\n" ] } ], @@ -275,14 +275,21 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "1b67f548-6765-4d42-8730-fab796acd64c", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/11/11 19:28:34 INFO mlflow.tracking.fluent: Experiment with name 'Summarization_Service' does not exist. Creating a new experiment.\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d117eca843be4d39bb13e1885ad154c0", + "model_id": "9a4485f97f0b46a980bc4bd57eccc4ed", "version_major": 2, "version_minor": 0 }, @@ -296,7 +303,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ac8591cff46d4012b48a0bbc52ab0cba", + "model_id": "8adddd13d60d45c7b26c0c7055ed89c0", "version_major": 2, "version_minor": 0 }, @@ -310,7 +317,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "606bd7e34bb3473d83eea0732fbf658b", + "model_id": "bafa74954b9b4294922934ac4fb03a72", "version_major": 2, "version_minor": 0 }, @@ -325,7 +332,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-10 20:18:50,698 - INFO - Model and artifacts successfully registered in MLflow.\n", + "2025-11-11 19:36:53,094 - INFO - Model and artifacts successfully registered in MLflow.\n", "Successfully registered model 'Text_Summarization_Service'.\n" ] }, @@ -333,9 +340,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Model registered successfully with run ID: d89e0153703f4066991946b05f3d68a9\n", - "CPU times: user 8.74 s, sys: 3min, total: 3min 9s\n", - "Wall time: 16min 50s\n" + "Model registered successfully with run ID: e2c7fcda9a6742df98167adebd8a402e\n", + "CPU times: user 4.24 s, sys: 1min 10s, total: 1min 14s\n", + "Wall time: 8min 19s\n" ] }, { @@ -384,7 +391,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "8649162e-d673-4370-9a6f-9b37a8a1f34d", "metadata": {}, "outputs": [ @@ -392,8 +399,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-10 20:18:52 - INFO - ⏱️ Total execution time: 25m 7.85s\n", - "2025-11-10 20:18:52 - INFO - ✅ Notebook execution completed successfully.\n" + "2025-11-11 19:36:54 - INFO - ⏱️ Total execution time: 8m 50.12s\n", + "2025-11-11 19:36:54 - INFO - ✅ Notebook execution completed successfully.\n" ] } ], @@ -418,15 +425,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ec79f1cb-dc2d-478a-8e4b-2dc5c0a704cf", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7b6b9a6a-6a67-46de-8fbf-19c28726ef89", + "id": "9b1d7d38-7654-49fe-97d0-a416b12e57c7", "metadata": {}, "outputs": [], "source": [] diff --git a/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb b/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb index cfdbbb8f..92e8b0e6 100644 --- a/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb +++ b/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "id": "83818c3c-0efd-49af-a2fc-d4579e4daf7d", "metadata": {}, "outputs": [], @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "id": "207d0596-9e36-4956-96eb-79311c1fa63d", "metadata": {}, "outputs": [ @@ -82,7 +82,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-10 19:42:14 - INFO - Notebook execution started.\n" + "2025-11-11 18:53:38 - INFO - Notebook execution started.\n", + "2025-11-11 18:53:38 - INFO - Notebook execution started.\n" ] } ], @@ -110,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "023fbdfa", "metadata": {}, "outputs": [ @@ -118,15 +119,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "langchain-classic 1.0.0a1 requires langchain-core<2.0.0,>=1.0.0a7, but you have langchain-core 0.3.79 which is incompatible.\n", - "langchain-classic 1.0.0a1 requires langchain-text-splitters<2.0.0,>=1.0.0a1, but you have langchain-text-splitters 0.3.11 which is incompatible.\n", - "grpcio-status 1.76.0 requires protobuf<7.0.0,>=6.31.1, but you have protobuf 5.29.5 which is incompatible.\n", - "langchain-aws 1.0.0a1 requires langchain-core<2.0.0,>=1.0.0a4, but you have langchain-core 0.3.79 which is incompatible.\n", - "langchain-openai 1.0.0a4 requires langchain-core<2.0.0,>=1.0.0a7, but you have langchain-core 0.3.79 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n", - "CPU times: user 1.51 s, sys: 625 ms, total: 2.13 s\n", - "Wall time: 1min 1s\n" + "Note: you may need to restart the kernel to use updated packages.\n", + "CPU times: user 88.6 ms, sys: 61.5 ms, total: 150 ms\n", + "Wall time: 2.85 s\n" ] } ], @@ -138,19 +133,18 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, + "id": "96465a7c-75c9-41fc-98d2-3f7d58fc5333", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, "id": "902fbba0", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.12/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", - " from tqdm.autonotebook import tqdm, trange\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import sys\n", @@ -207,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "id": "426355b0", "metadata": {}, "outputs": [], @@ -217,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "7b055f19", "metadata": {}, "outputs": [], @@ -261,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "id": "3d9e832d", "metadata": {}, "outputs": [], @@ -289,7 +283,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "713a24f4-01f4-4a33-8124-7d7601ced6ef", "metadata": {}, "outputs": [ @@ -335,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "id": "4ab31867-255e-489c-810d-42786bde5a18", "metadata": {}, "outputs": [], @@ -353,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "id": "54b28d4a", "metadata": {}, "outputs": [ @@ -361,7 +355,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-10 19:43:52 - INFO - Local Llama model is properly configured. \n" + "2025-11-11 18:53:41 - INFO - Local Llama model is properly configured. \n", + "2025-11-11 18:53:41 - INFO - Local Llama model is properly configured. \n" ] } ], @@ -405,7 +400,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "id": "dc16a213-9f92-4c75-93ff-66adc3133cce", "metadata": {}, "outputs": [ @@ -498,7 +493,7 @@ "4 00:00:28.740 " ] }, - "execution_count": 11, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -542,7 +537,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "id": "d1b33cbb-1c2b-404e-ad65-b243c6702308", "metadata": { "scrolled": true @@ -630,7 +625,7 @@ "4 2 " ] }, - "execution_count": 12, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -671,14 +666,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "id": "62c67feb-11f7-47ad-bdec-3ec252e51797", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a75117116fd843179eec7546b9246ba4", + "model_id": "034820ce93634a78956cf438c0e3fa3b", "version_major": 2, "version_minor": 0 }, @@ -692,7 +687,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c81ba1d2f9eb4f40a62b30e5b2a03938", + "model_id": "189eca4ef77f420eb813954f39a0e514", "version_major": 2, "version_minor": 0 }, @@ -706,7 +701,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "989ac43d10be4cb88566577c82ec598d", + "model_id": "4720d46ffc364841819dd29a0f1353df", "version_major": 2, "version_minor": 0 }, @@ -720,7 +715,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "27ee963748c24cc494a6737d4a68403c", + "model_id": "5bc74dae28564395a59d82c15dcf8fa3", "version_major": 2, "version_minor": 0 }, @@ -734,7 +729,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cb155f0e35b84987ad8f189bf8c5c231", + "model_id": "d6895b6fe4c54ea8812bf42f24171cf2", "version_major": 2, "version_minor": 0 }, @@ -755,7 +750,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9935a5bcb8e8464aa5b7d0ae316bba4b", + "model_id": "52c7ad25a6a1451c835dc52f33b84118", "version_major": 2, "version_minor": 0 }, @@ -769,7 +764,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3876f0051d98481daac4db54fa9d143d", + "model_id": "44e105d369524e9c87249c78c21a6afd", "version_major": 2, "version_minor": 0 }, @@ -783,7 +778,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fd5daa81421842bda5101bbbe45b2e99", + "model_id": "c3bae07b63a94683a1a355da17fd9a5d", "version_major": 2, "version_minor": 0 }, @@ -797,7 +792,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "efd1610a56b147d6975ab8cb4720c929", + "model_id": "b1f187777c9c421ebb370c3c6f339659", "version_major": 2, "version_minor": 0 }, @@ -811,7 +806,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "384d711ec8354ec9b587bf75fecac976", + "model_id": "181f7d0a6927451ca26130648b83af91", "version_major": 2, "version_minor": 0 }, @@ -825,7 +820,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3c0f2983de394e14aba671a00aa9a26c", + "model_id": "a879bae564e447b89c7922a63dddae94", "version_major": 2, "version_minor": 0 }, @@ -844,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "id": "c5538aee-0233-4b58-a574-879dfa64a792", "metadata": {}, "outputs": [], @@ -1170,7 +1165,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "id": "a25ffc2d", "metadata": {}, "outputs": [ @@ -1178,15 +1173,24 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-10 19:44:14 - INFO - Encoding 250 content items with embedding model\n", - "2025-11-10 19:44:14 - INFO - Generated embeddings with shape: (250, 384)\n", - "2025-11-10 19:44:15 - INFO - Performing KMeans clustering with 6 clusters\n", - "2025-11-10 19:44:15 - INFO - Clustering complete, found 6 clusters\n", - "2025-11-10 19:44:15 - INFO - Found 248 raw transitions between clusters\n", - "2025-11-10 19:44:15 - INFO - After filtering: using 6 breaks between clusters\n", - "2025-11-10 19:44:15 - INFO - Created 6 breaks using clustering method\n", - "2025-11-10 19:44:15 - INFO - Created 6 breaks using method 'clustering'\n", - "2025-11-10 19:44:15 - INFO - Generated 7 chunks from content\n" + "2025-11-11 18:54:00 - INFO - Encoding 250 content items with embedding model\n", + "2025-11-11 18:54:00 - INFO - Encoding 250 content items with embedding model\n", + "2025-11-11 18:54:00 - INFO - Generated embeddings with shape: (250, 384)\n", + "2025-11-11 18:54:00 - INFO - Generated embeddings with shape: (250, 384)\n", + "2025-11-11 18:54:00 - INFO - Performing KMeans clustering with 6 clusters\n", + "2025-11-11 18:54:00 - INFO - Performing KMeans clustering with 6 clusters\n", + "2025-11-11 18:54:01 - INFO - Clustering complete, found 6 clusters\n", + "2025-11-11 18:54:01 - INFO - Clustering complete, found 6 clusters\n", + "2025-11-11 18:54:01 - INFO - Found 248 raw transitions between clusters\n", + "2025-11-11 18:54:01 - INFO - Found 248 raw transitions between clusters\n", + "2025-11-11 18:54:01 - INFO - After filtering: using 6 breaks between clusters\n", + "2025-11-11 18:54:01 - INFO - After filtering: using 6 breaks between clusters\n", + "2025-11-11 18:54:01 - INFO - Created 6 breaks using clustering method\n", + "2025-11-11 18:54:01 - INFO - Created 6 breaks using clustering method\n", + "2025-11-11 18:54:01 - INFO - Created 6 breaks using method 'clustering'\n", + "2025-11-11 18:54:01 - INFO - Created 6 breaks using method 'clustering'\n", + "2025-11-11 18:54:01 - INFO - Generated 7 chunks from content\n", + "2025-11-11 18:54:01 - INFO - Generated 7 chunks from content\n" ] } ], @@ -1229,7 +1233,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "id": "3137ca2a", "metadata": {}, "outputs": [], @@ -1239,7 +1243,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "id": "539fba44-6a64-40a1-88e6-d5cf1f5cc4b0", "metadata": {}, "outputs": [ @@ -1247,8 +1251,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.86 s, sys: 8.75 s, total: 13.6 s\n", - "Wall time: 3min 9s\n" + "CPU times: user 2min 51s, sys: 5.15 s, total: 2min 56s\n", + "Wall time: 5min 10s\n" ] } ], @@ -1260,7 +1264,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 23, "id": "043cdb8f-a70a-499a-a2d6-56c14d965169", "metadata": {}, "outputs": [], @@ -1288,7 +1292,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 24, "id": "40e5cde3-b064-4280-8ada-8df68820a2f0", "metadata": {}, "outputs": [], @@ -1362,7 +1366,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 25, "id": "c27bb40e-7823-490a-af94-0d8aae5e5886", "metadata": { "scrolled": true @@ -1372,10 +1376,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-10 19:47:25 - INFO - Starting text summarization and evaluation...\n", - "2025-11-10 19:47:25 - INFO - Processing 7 chunks\n", - "2025-11-10 19:53:18 - INFO - ✅ Summarization completed in 353.17 seconds\n", - "2025-11-10 19:53:18 - ERROR - Error calculating ROUGE metrics: name 'evaluate' is not defined\n" + "2025-11-11 18:59:11 - INFO - Starting text summarization and evaluation...\n", + "2025-11-11 18:59:11 - INFO - Starting text summarization and evaluation...\n", + "2025-11-11 18:59:11 - INFO - Processing 7 chunks\n", + "2025-11-11 18:59:11 - INFO - Processing 7 chunks\n", + "2025-11-11 19:05:08 - INFO - ✅ Summarization completed in 356.80 seconds\n", + "2025-11-11 19:05:08 - INFO - ✅ Summarization completed in 356.80 seconds\n", + "2025-11-11 19:05:08 - ERROR - Error calculating ROUGE metrics: name 'evaluate' is not defined\n", + "2025-11-11 19:05:08 - ERROR - Error calculating ROUGE metrics: name 'evaluate' is not defined\n" ] }, { @@ -1389,7 +1397,7 @@ "Original text length: 6560 characters\n", "Summary length: 1667 characters\n", "Compression ratio: 25.41%\n", - "Processing time: 353.17 seconds\n", + "Processing time: 356.80 seconds\n", "\n", "ROUGE Scores:\n", " ROUGE1: 0.0000\n", @@ -1482,7 +1490,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 26, "id": "60bd4708-d7b4-4928-a9be-48679ef8748d", "metadata": {}, "outputs": [ @@ -1490,8 +1498,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-10 19:53:18 - INFO - ⏱️ Total execution time: 5m 53.37s\n", - "2025-11-10 19:53:18 - INFO - ✅ Notebook execution completed successfully.\n" + "2025-11-11 19:05:08 - INFO - ⏱️ Total execution time: 5m 56.83s\n", + "2025-11-11 19:05:08 - INFO - ⏱️ Total execution time: 5m 56.83s\n", + "2025-11-11 19:05:08 - INFO - ✅ Notebook execution completed successfully.\n", + "2025-11-11 19:05:08 - INFO - ✅ Notebook execution completed successfully.\n" ] } ], diff --git a/generative-ai/text-summarization-with-langchain/requirements.txt b/generative-ai/text-summarization-with-langchain/requirements.txt index b5747171..8b6a0200 100644 --- a/generative-ai/text-summarization-with-langchain/requirements.txt +++ b/generative-ai/text-summarization-with-langchain/requirements.txt @@ -6,10 +6,8 @@ langchain-huggingface==0.2.0 pyyaml pandas sentence-transformers -langchain_core -langchain_huggingface tokenizers>=0.13.0 httpx>=0.24.0 mlflow==2.21.2 langchain==0.3.27 -langchain-community==0.3.27 +langchain-community