diff --git a/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb b/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb index 2b3bc021..d3bba5ea 100644 --- a/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb +++ b/generative-ai/text-summarization-with-langchain/notebooks/register-model.ipynb @@ -72,7 +72,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 01:04:39 - INFO - Notebook execution started.\n" + "2025-11-11 19:28:04 - INFO - Notebook execution started.\n" ] } ], @@ -101,8 +101,8 @@ "output_type": "stream", "text": [ "Note: you may need to restart the kernel to use updated packages.\n", - "CPU times: user 27.6 ms, sys: 7.98 ms, total: 35.6 ms\n", - "Wall time: 1.45 s\n" + "CPU times: user 173 ms, sys: 31.4 ms, total: 205 ms\n", + "Wall time: 4.75 s\n" ] } ], @@ -283,13 +283,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025/11/05 01:04:48 INFO mlflow.tracking.fluent: Experiment with name 'Summarization_Service' does not exist. Creating a new experiment.\n" + "2025/11/11 19:28:34 INFO mlflow.tracking.fluent: Experiment with name 'Summarization_Service' does not exist. Creating a new experiment.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4659dbb75ff54aac8d65d97926ac70f5", + "model_id": "9a4485f97f0b46a980bc4bd57eccc4ed", "version_major": 2, "version_minor": 0 }, @@ -303,7 +303,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "01efb80d6a6648889e546cfa5b0450f0", + "model_id": "8adddd13d60d45c7b26c0c7055ed89c0", "version_major": 2, "version_minor": 0 }, @@ -317,7 +317,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "996080307bfa491bbb13fe1907e4af7c", + "model_id": "bafa74954b9b4294922934ac4fb03a72", "version_major": 2, "version_minor": 0 }, @@ -332,7 +332,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 01:08:27,267 - INFO - Model and artifacts successfully registered in MLflow.\n", + "2025-11-11 19:36:53,094 - INFO - Model and artifacts successfully registered in MLflow.\n", "Successfully registered model 'Text_Summarization_Service'.\n" ] }, @@ -340,9 +340,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Model registered successfully with run ID: 51182439e0ee4c56ab46a49b0e868f5c\n", - "CPU times: user 770 ms, sys: 19.4 s, total: 20.2 s\n", - "Wall time: 3min 39s\n" + "Model registered successfully with run ID: e2c7fcda9a6742df98167adebd8a402e\n", + "CPU times: user 4.24 s, sys: 1min 10s, total: 1min 14s\n", + "Wall time: 8min 19s\n" ] }, { @@ -399,8 +399,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 01:08:27 - INFO - ⏱️ Total execution time: 3m 48.55s\n", - "2025-11-05 01:08:27 - INFO - ✅ Notebook execution completed successfully.\n" + "2025-11-11 19:36:54 - INFO - ⏱️ Total execution time: 8m 50.12s\n", + "2025-11-11 19:36:54 - INFO - ✅ Notebook execution completed successfully.\n" ] } ], @@ -421,6 +421,14 @@ "source": [ "Built with ❤️ using [**HP AI Studio**](https://hp.com/ai-studio)." ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b1d7d38-7654-49fe-97d0-a416b12e57c7", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb b/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb index 7f16d832..92e8b0e6 100644 --- a/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb +++ b/generative-ai/text-summarization-with-langchain/notebooks/run-workflow.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "id": "83818c3c-0efd-49af-a2fc-d4579e4daf7d", "metadata": {}, "outputs": [], @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "id": "207d0596-9e36-4956-96eb-79311c1fa63d", "metadata": {}, "outputs": [ @@ -82,7 +82,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:26:42 - INFO - Notebook execution started.\n" + "2025-11-11 18:53:38 - INFO - Notebook execution started.\n", + "2025-11-11 18:53:38 - INFO - Notebook execution started.\n" ] } ], @@ -110,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "023fbdfa", "metadata": {}, "outputs": [ @@ -119,8 +120,8 @@ "output_type": "stream", "text": [ "Note: you may need to restart the kernel to use updated packages.\n", - "CPU times: user 44.6 ms, sys: 30.4 ms, total: 75 ms\n", - "Wall time: 4.25 s\n" + "CPU times: user 88.6 ms, sys: 61.5 ms, total: 150 ms\n", + "Wall time: 2.85 s\n" ] } ], @@ -132,19 +133,18 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, + "id": "96465a7c-75c9-41fc-98d2-3f7d58fc5333", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, "id": "902fbba0", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.12/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", - " from tqdm.autonotebook import tqdm, trange\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import sys\n", @@ -201,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "id": "426355b0", "metadata": {}, "outputs": [], @@ -211,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "7b055f19", "metadata": {}, "outputs": [], @@ -255,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "id": "3d9e832d", "metadata": {}, "outputs": [], @@ -283,7 +283,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "713a24f4-01f4-4a33-8124-7d7601ced6ef", "metadata": {}, "outputs": [ @@ -329,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "id": "4ab31867-255e-489c-810d-42786bde5a18", "metadata": {}, "outputs": [], @@ -347,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "id": "54b28d4a", "metadata": {}, "outputs": [ @@ -355,7 +355,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:26:55 - INFO - Local Llama model is properly configured. \n" + "2025-11-11 18:53:41 - INFO - Local Llama model is properly configured. \n", + "2025-11-11 18:53:41 - INFO - Local Llama model is properly configured. \n" ] } ], @@ -399,7 +400,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "id": "dc16a213-9f92-4c75-93ff-66adc3133cce", "metadata": {}, "outputs": [ @@ -492,7 +493,7 @@ "4 00:00:28.740 " ] }, - "execution_count": 11, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +537,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "id": "d1b33cbb-1c2b-404e-ad65-b243c6702308", "metadata": { "scrolled": true @@ -624,7 +625,7 @@ "4 2 " ] }, - "execution_count": 12, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -665,14 +666,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "id": "62c67feb-11f7-47ad-bdec-3ec252e51797", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "86c96659a6c643398256c7e9077fe2d6", + "model_id": "034820ce93634a78956cf438c0e3fa3b", "version_major": 2, "version_minor": 0 }, @@ -686,7 +687,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "56edf0dd1f654198ba10da4f6f5dd224", + "model_id": "189eca4ef77f420eb813954f39a0e514", "version_major": 2, "version_minor": 0 }, @@ -700,7 +701,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "856c034c5cfa4577bd434977813bc066", + "model_id": "4720d46ffc364841819dd29a0f1353df", "version_major": 2, "version_minor": 0 }, @@ -714,7 +715,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ca5180955987458989daa9b935e93f29", + "model_id": "5bc74dae28564395a59d82c15dcf8fa3", "version_major": 2, "version_minor": 0 }, @@ -728,7 +729,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "225d40ea79f2464fa9f62499c6596f08", + "model_id": "d6895b6fe4c54ea8812bf42f24171cf2", "version_major": 2, "version_minor": 0 }, @@ -749,7 +750,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bf1d17f1d9ae4001a453d133259151ed", + "model_id": "52c7ad25a6a1451c835dc52f33b84118", "version_major": 2, "version_minor": 0 }, @@ -763,7 +764,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c2348e77519f4e9a84bda11b17dee0c5", + "model_id": "44e105d369524e9c87249c78c21a6afd", "version_major": 2, "version_minor": 0 }, @@ -777,7 +778,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "de8f29d1da914548bbc13f4dcb7ac22d", + "model_id": "c3bae07b63a94683a1a355da17fd9a5d", "version_major": 2, "version_minor": 0 }, @@ -791,7 +792,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "16df29f056ea46129b9610e3d362d276", + "model_id": "b1f187777c9c421ebb370c3c6f339659", "version_major": 2, "version_minor": 0 }, @@ -805,7 +806,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "62c7795d9d414a558ead29264f096eba", + "model_id": "181f7d0a6927451ca26130648b83af91", "version_major": 2, "version_minor": 0 }, @@ -819,7 +820,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2814d964cc2144df957c6e718226bf25", + "model_id": "a879bae564e447b89c7922a63dddae94", "version_major": 2, "version_minor": 0 }, @@ -838,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "id": "c5538aee-0233-4b58-a574-879dfa64a792", "metadata": {}, "outputs": [], @@ -1164,7 +1165,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "id": "a25ffc2d", "metadata": {}, "outputs": [ @@ -1172,15 +1173,24 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:27:01 - INFO - Encoding 250 content items with embedding model\n", - "2025-11-05 00:27:01 - INFO - Generated embeddings with shape: (250, 384)\n", - "2025-11-05 00:27:01 - INFO - Performing KMeans clustering with 6 clusters\n", - "2025-11-05 00:27:02 - INFO - Clustering complete, found 6 clusters\n", - "2025-11-05 00:27:02 - INFO - Found 248 raw transitions between clusters\n", - "2025-11-05 00:27:02 - INFO - After filtering: using 6 breaks between clusters\n", - "2025-11-05 00:27:02 - INFO - Created 6 breaks using clustering method\n", - "2025-11-05 00:27:02 - INFO - Created 6 breaks using method 'clustering'\n", - "2025-11-05 00:27:02 - INFO - Generated 7 chunks from content\n" + "2025-11-11 18:54:00 - INFO - Encoding 250 content items with embedding model\n", + "2025-11-11 18:54:00 - INFO - Encoding 250 content items with embedding model\n", + "2025-11-11 18:54:00 - INFO - Generated embeddings with shape: (250, 384)\n", + "2025-11-11 18:54:00 - INFO - Generated embeddings with shape: (250, 384)\n", + "2025-11-11 18:54:00 - INFO - Performing KMeans clustering with 6 clusters\n", + "2025-11-11 18:54:00 - INFO - Performing KMeans clustering with 6 clusters\n", + "2025-11-11 18:54:01 - INFO - Clustering complete, found 6 clusters\n", + "2025-11-11 18:54:01 - INFO - Clustering complete, found 6 clusters\n", + "2025-11-11 18:54:01 - INFO - Found 248 raw transitions between clusters\n", + "2025-11-11 18:54:01 - INFO - Found 248 raw transitions between clusters\n", + "2025-11-11 18:54:01 - INFO - After filtering: using 6 breaks between clusters\n", + "2025-11-11 18:54:01 - INFO - After filtering: using 6 breaks between clusters\n", + "2025-11-11 18:54:01 - INFO - Created 6 breaks using clustering method\n", + "2025-11-11 18:54:01 - INFO - Created 6 breaks using clustering method\n", + "2025-11-11 18:54:01 - INFO - Created 6 breaks using method 'clustering'\n", + "2025-11-11 18:54:01 - INFO - Created 6 breaks using method 'clustering'\n", + "2025-11-11 18:54:01 - INFO - Generated 7 chunks from content\n", + "2025-11-11 18:54:01 - INFO - Generated 7 chunks from content\n" ] } ], @@ -1223,7 +1233,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "id": "3137ca2a", "metadata": {}, "outputs": [], @@ -1233,7 +1243,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "id": "539fba44-6a64-40a1-88e6-d5cf1f5cc4b0", "metadata": {}, "outputs": [ @@ -1241,8 +1251,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.91 s, sys: 3.44 s, total: 5.35 s\n", - "Wall time: 1min 8s\n" + "CPU times: user 2min 51s, sys: 5.15 s, total: 2min 56s\n", + "Wall time: 5min 10s\n" ] } ], @@ -1254,7 +1264,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 23, "id": "043cdb8f-a70a-499a-a2d6-56c14d965169", "metadata": {}, "outputs": [], @@ -1282,7 +1292,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 24, "id": "40e5cde3-b064-4280-8ada-8df68820a2f0", "metadata": {}, "outputs": [], @@ -1356,7 +1366,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 25, "id": "c27bb40e-7823-490a-af94-0d8aae5e5886", "metadata": { "scrolled": true @@ -1366,10 +1376,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:28:11 - INFO - Starting text summarization and evaluation...\n", - "2025-11-05 00:28:11 - INFO - Processing 7 chunks\n", - "2025-11-05 00:28:21 - INFO - ✅ Summarization completed in 10.03 seconds\n", - "2025-11-05 00:28:21 - ERROR - Error calculating ROUGE metrics: name 'evaluate' is not defined\n" + "2025-11-11 18:59:11 - INFO - Starting text summarization and evaluation...\n", + "2025-11-11 18:59:11 - INFO - Starting text summarization and evaluation...\n", + "2025-11-11 18:59:11 - INFO - Processing 7 chunks\n", + "2025-11-11 18:59:11 - INFO - Processing 7 chunks\n", + "2025-11-11 19:05:08 - INFO - ✅ Summarization completed in 356.80 seconds\n", + "2025-11-11 19:05:08 - INFO - ✅ Summarization completed in 356.80 seconds\n", + "2025-11-11 19:05:08 - ERROR - Error calculating ROUGE metrics: name 'evaluate' is not defined\n", + "2025-11-11 19:05:08 - ERROR - Error calculating ROUGE metrics: name 'evaluate' is not defined\n" ] }, { @@ -1381,9 +1395,9 @@ "SUMMARIZATION RESULTS\n", "==================================================\n", "Original text length: 6560 characters\n", - "Summary length: 1626 characters\n", - "Compression ratio: 24.79%\n", - "Processing time: 10.03 seconds\n", + "Summary length: 1667 characters\n", + "Compression ratio: 25.41%\n", + "Processing time: 356.80 seconds\n", "\n", "ROUGE Scores:\n", " ROUGE1: 0.0000\n", @@ -1397,9 +1411,9 @@ "\n", "The excerpt describes a state of intense heat and injustice, specifically referencing the state of Mississippi.\n", "\n", - "The excerpt is a passage from Martin Luther King Jr.'s famous \"I Have a Dream\" speech. In the passage, King expresses his vision of a future where people are judged not by the color of their skin but by the content of their character. He envisions a world where children of all colors can join hands and sing together in harmony.\n", + "The excerpt appears to be a passage from Martin Luther King Jr.'s famous \"I Have a Dream\" speech. In the passage, King expresses his vision of a future where people are judged not by the color of their skin but by the content of their character. He envisions a world where people can live together in harmony and equality.\n", "\n", - "The excerpt expresses a desire for freedom to ring and for America to become a great nation.\n", + "The excerpt expresses a desire for freedom to ring in America, and suggests that this is necessary for the country to become a great nation.\n", "\n", "The excerpt is a poetic passage that calls for freedom to ring from various locations. Specifically, it mentions the \"prestigious hilltops of New Hampshire\" and the \"mighty mountains of New York\".\n", "\n", @@ -1476,7 +1490,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 26, "id": "60bd4708-d7b4-4928-a9be-48679ef8748d", "metadata": {}, "outputs": [ @@ -1484,8 +1498,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-11-05 00:28:21 - INFO - ⏱️ Total execution time: 0m 10.04s\n", - "2025-11-05 00:28:21 - INFO - ✅ Notebook execution completed successfully.\n" + "2025-11-11 19:05:08 - INFO - ⏱️ Total execution time: 5m 56.83s\n", + "2025-11-11 19:05:08 - INFO - ⏱️ Total execution time: 5m 56.83s\n", + "2025-11-11 19:05:08 - INFO - ✅ Notebook execution completed successfully.\n", + "2025-11-11 19:05:08 - INFO - ✅ Notebook execution completed successfully.\n" ] } ], diff --git a/generative-ai/text-summarization-with-langchain/requirements.txt b/generative-ai/text-summarization-with-langchain/requirements.txt index b5747171..8b6a0200 100644 --- a/generative-ai/text-summarization-with-langchain/requirements.txt +++ b/generative-ai/text-summarization-with-langchain/requirements.txt @@ -6,10 +6,8 @@ langchain-huggingface==0.2.0 pyyaml pandas sentence-transformers -langchain_core -langchain_huggingface tokenizers>=0.13.0 httpx>=0.24.0 mlflow==2.21.2 langchain==0.3.27 -langchain-community==0.3.27 +langchain-community