Skip to content
590 changes: 590 additions & 0 deletions Lab.Week1.ipynb

Large diffs are not rendered by default.

346 changes: 346 additions & 0 deletions Lab1_Week3.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,346 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "4e7dda1b-c83b-4286-a2fc-81057cb0890a",
"metadata": {},
"outputs": [],
"source": [
"from functools import reduce\n",
"import numpy\n",
"import pandas"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "848e37d7-459c-484e-92b6-4c11b0e8d417",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Número de palabras: 13694\n"
]
}
],
"source": [
"import urllib.request\n",
"\n",
"url = \"https://www.gutenberg.org/cache/epub/58585/pg58585.txt\"\n",
"response = urllib.request.urlopen(url)\n",
"prophet = response.read().decode(\"utf8\").split(\" \")\n",
"\n",
"print(\"Número de palabras:\", len(prophet))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c55c6688-4e11-442f-a4dc-818c831f7573",
"metadata": {},
"outputs": [],
"source": [
"prophet = prophet[568:]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "3c815a30-0959-4d2f-97bc-0bd9b8f67ff5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['was',\n",
" 'to\\r\\nreturn',\n",
" 'and',\n",
" 'bear',\n",
" 'him',\n",
" 'back',\n",
" 'to',\n",
" 'the',\n",
" 'isle',\n",
" 'of\\r\\nhis']"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prophet[:10]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "66466b48-d2de-44fe-9bc2-6d64e4375f7b",
"metadata": {},
"outputs": [],
"source": [
"def reference(x):\n",
" '''\n",
" Input: A string\n",
" Output: The string with references removed\n",
" \n",
" Example:\n",
" Input: 'the{7}'\n",
" Output: 'the'\n",
" '''\n",
" parts = x.split('{')\n",
" return parts[0]\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "dc2b5a05-960b-417d-b0b0-191ae434a210",
"metadata": {},
"outputs": [],
"source": [
"prophet_reference = list(map(reference,prophet))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "5c1f09b5-e94e-486a-baf8-9519ecf5eee0",
"metadata": {},
"outputs": [],
"source": [
"def line_break(x):\n",
" '''\n",
" Input: A string\n",
" Output: A list of strings split on the line break (\\n) character\n",
" \n",
" Example:\n",
" Input: 'the\\nbeloved'\n",
" Output: ['the', 'beloved']\n",
" '''\n",
" return x.split('\\n')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "09542ab4-5faa-4890-87b8-f5445cc6aecc",
"metadata": {},
"outputs": [],
"source": [
"prophet_line = list(map(line_break, prophet_reference))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "17233c76-fd30-42ba-a857-737775ac030c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['was',\n",
" 'to\\r',\n",
" 'return',\n",
" 'and',\n",
" 'bear',\n",
" 'him',\n",
" 'back',\n",
" 'to',\n",
" 'the',\n",
" 'isle',\n",
" 'of\\r',\n",
" 'his',\n",
" 'birth.\\r',\n",
" '\\r',\n",
" 'And',\n",
" 'in',\n",
" 'the',\n",
" 'twelfth',\n",
" 'year,',\n",
" 'on']"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prophet_flat = [i for sub in prophet_line for i in sub]\n",
"prophet_flat[:20]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "e650e1dc-e47d-495d-9d34-a0bbc4110a09",
"metadata": {},
"outputs": [],
"source": [
"# Exercise 2 \n",
"def word_filter(x):\n",
" '''\n",
" Input: A string\n",
" Output: True if the word is not in the specified list \n",
" and False if the word is in the list.\n",
" '''\n",
" \n",
" word_list = ['and', 'the', 'a', 'an']\n",
" \n",
" if x in word_list:\n",
" return False\n",
" else:\n",
" return True\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "72c4649f-be1a-4791-8af0-4805031f76b2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['was',\n",
" 'to\\r',\n",
" 'return',\n",
" 'bear',\n",
" 'him',\n",
" 'back',\n",
" 'to',\n",
" 'isle',\n",
" 'of\\r',\n",
" 'his',\n",
" 'birth.\\r',\n",
" '\\r',\n",
" 'And',\n",
" 'in',\n",
" 'twelfth',\n",
" 'year,',\n",
" 'on',\n",
" 'seventh\\r',\n",
" 'day',\n",
" 'of']"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prophet_filter = list(filter(word_filter, prophet_flat))\n",
"\n",
"prophet_filter[:20]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "693b6490-51a7-4d9d-b7d3-340d875bf861",
"metadata": {},
"outputs": [],
"source": [
"def word_filter_case(x):\n",
" '''\n",
" Same as word_filter, but ignores case.\n",
" '''\n",
" \n",
" word_list = ['and', 'the', 'a', 'an']\n",
" \n",
" if x.lower() in word_list:\n",
" return False\n",
" else:\n",
" return True"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "173ccad8-3ad3-4b91-9c5e-f34a50e06d88",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['was',\n",
" 'to\\r',\n",
" 'return',\n",
" 'bear',\n",
" 'him',\n",
" 'back',\n",
" 'to',\n",
" 'isle',\n",
" 'of\\r',\n",
" 'his',\n",
" 'birth.\\r',\n",
" '\\r',\n",
" 'in',\n",
" 'twelfth',\n",
" 'year,',\n",
" 'on',\n",
" 'seventh\\r',\n",
" 'day',\n",
" 'of',\n",
" 'Ielool,']"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prophet_filter_case = list(filter(word_filter_case, prophet_flat))\n",
"prophet_filter_case[:20]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4620572b-6a9d-469d-b160-2c9eb2d93884",
"metadata": {},
"outputs": [],
"source": [
"def concat_space(a, b):\n",
" '''\n",
" Input: Two strings\n",
" Output: A single string separated by a space\n",
" \n",
" Example:\n",
" Input: 'John', 'Smith'\n",
" Output: 'John Smith'\n",
" '''\n",
" return a + ' ' + b"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading