Skip to content

Commit

Permalink
Created using Colaboratory
Browse files Browse the repository at this point in the history
  • Loading branch information
rmaacario committed Dec 27, 2022
1 parent 9d6a532 commit ea19b90
Showing 1 changed file with 100 additions and 33 deletions.
133 changes: 100 additions & 33 deletions A_Naive_Bayes_Classifier_of_Spatial_Prepositions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"colab": {
"private_outputs": true,
"provenance": [],
"authorship_tag": "ABX9TyO/HriD8rlWF7QYjff37k8Q",
"authorship_tag": "ABX9TyP2KqHzT9giuOQl/CAysGTo",
"include_colab_link": true
},
"kernelspec": {
Expand Down Expand Up @@ -42,7 +42,7 @@
" (\"I'm at a crossroads\", '0'), ('Our office is near the lab, but not next to it.', '1'), ('The new year is near.', '1'), ('The train is right on time.', '0'), ('The restaurant is on strike.', '0'), ('The bridge is just beyond that tollbooth.', '1'), ('I left the house at 6:00.', '0'), ('From Paris he flew to Berlin.', '1'), ('She abstained from casting a vote of no confidence.', '0'), (\" got beaten up by thugs on my way home.\", '0'),\n",
" ('The restaurant is at the end of the street.', '0'), ('They were left out of the group.', '0'), ('They went out of the room.', '1'), (\"Jesus walked on water.\", '1'), ('She had peeped into the book.', '0'), ('Our favorite restaurant is just off the road.', '1'),('The troops marched toward the village.', '1'), ('They prevented her from leaving the meeting.', '0'), ('The girl broke into tears', '0'), (\"I'll go ahead and you can catch me up later.\", '0'),\n",
" ('She fearlessly walks into the room.', '1'), ('I am going to the lab.', '1'), ('We gave the results to our advisor', '0'), ('We had to get through the literature review before we began our methodology.', '0'), ('This painting will look great over the fireplace.', '1'), ('The puppy crept under the chair.', '1'), ('He dissuaded her from reporting him to the director.', '0'), ('The boy is in tears', '0'), (\"I'm calling off the event\", '0'),\n",
" ('We had to start from these findings in order to get our best results', '0'), ('He walked toward the door', '1'), ('We are working toward a better research method but have not achieved it yet.', '0'), ('My keys were under the dresser.', '1'), (' I met her in the United States', '1'), (' I met her in Chicago at the Lyric Opera.', '1'), ('I can see you before 3:00.', '0'), ('He cried in happiness', '0'),\n",
" ('We had to start from these findings in order to get our best results', '0'), ('He walked toward the door', '1'), ('We are working toward a better research method but have not achieved it yet.', '0'), ('My keys were under the dresser.', '1'), (' I met her in the United States', '1'), (' I met her in Chicago at the Lyric Opera.', '1'), ('I can see you before 3:00.', '0'), ('He cried in happiness', '0'), (\"She's far from home.\", '1'), (\"This is far from good.\", '0'),\n",
" ('The dog sat on the pillow', '1'), ('We live between two neighbors.', '1'), (\"Let's keep it between you and me\", '0'), ('He sat on the chair', '1'), ('There is some milk in the fridge.', '1'), ('She was hiding under the table.', '1'), ('The stampeding cattle ran right off the cliff.', '1'), ('The ball fell off the table, onto the floor, and rolled under the bed.', '1'), (\"The roof gave in under the weight of the snow.\", '0'), (\"They gave up the search when it got dark.\", '0'),\n",
" ('The cat jumped off the counter.', '1'), ('Barry drove over the bridge.', '1'), ('Matilde lost her ring at the beach.', '1'), ('The book belongs to Anthony.', '0'), ('They were sitting by the tree.', '0'), ('I walked out of the house.', '1'), ('The portrait of their mother hangs over the fireplace.', '1'), ('I heard it from her.', '0'), ('We learned a lot from Professor Kearns.', '0'), ('He checked into the hotel.', '0'),\n",
" ('Once upon a time, there was a beautiful princess.', '0'), ('The baby climbed onto the table.', '1'), ('It is up to us to find the answer.', '0'), ('The loud noise came from within the stadium.', '1'), ('She never leaves without her phone.', '0'), ('The house lies just over that hill', '1'), ('When he awoke they were driving through the forest.', '1'), ('Toward morning, he fell asleep.', '0'), (\"Don't bring up politics if you want to have a quiet conversation with that guy.\", '0'),\n",
Expand All @@ -55,18 +55,11 @@
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Naive Bayes Model from scratch"
],
"metadata": {
"id": "aXtnBBLAaK9F"
}
},
{
"cell_type": "code",
"source": [
"#### Importing the libraries:\n",
"\n",
"import itertools\n",
"from collections import Counter\n",
"import nltk\n",
Expand All @@ -77,31 +70,46 @@
"nltk.download('punkt')\n",
"from nltk.tokenize import word_tokenize\n",
"from nltk.stem.snowball import SnowballStemmer\n",
"stemmer = SnowballStemmer('english')\n",
"\n",
"####\n",
"stemmer = SnowballStemmer('english')\n"
],
"metadata": {
"id": "xFMwCWDjx_Fz"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Naive Bayes Model from scratch"
],
"metadata": {
"id": "aXtnBBLAaK9F"
}
},
{
"cell_type": "code",
"source": [
"#### List of stops\n",
"\n",
"stops = [\"no\", \"not\", \"and\", \"i\", \"me\", \"my\", \"myself\", \"we\", \"our\", \"ours\", \"ourselves\", \"you\", \"your\", \"yours\", \"yourself\", \"yourselves\", \"he\", \"him\", \"his\", \"himself\", \"she\", \"her\", \"hers\", \"herself\", \"it\", \"its\", \"itself\", \"they\", \"them\", \"their\", \"theirs\", \"themselves\", \"what\", \"which\", \"who\", \"whom\", \"this\", \"that\", \"these\", \"those\", \"am\", \"is\", \"are\", \"was\", \"were\", \"be\", \"been\", \"being\", \"have\", \"has\", \"had\", \"having\", \"do\", \"does\", \"did\", \"doing\", \"but\", \"if\", \"or\", \"because\", \"as\", \"until\", \"while\", \"with\", \"again\", \"further\", \"then\", \"once\", \"here\", \"there\", \"when\", \"where\", \"why\", \"how\", \"all\", \"any\", \"both\", \"each\", \"few\", \"more\", \"most\", \"other\", \"some\", \"such\", \"no\", \"nor\", \"not\", \"only\", \"own\", \"same\", \"so\", \"than\", \"too\", \"very\", \"s\", \"t\", \"can\", \"will\", \"just\", \"don\", \"should\", \"now\", \"page\"]\n",
"\n",
"####\n",
"\n",
"def tokenizar(str_texto):\n",
"def tokenize(str_texto):\n",
" return word_tokenize(str_texto)\n",
"\n",
"def limpar(lista):\n",
"def clean(lista):\n",
" return [i.lower() for i in lista if i.isalpha()]\n",
"\n",
"def sem_stops(lista):\n",
"def remove_stops(lista):\n",
" return [i for i in lista if i not in stops]\n",
"\n",
"def stemizar(lista):\n",
" return [stemmer.stem(i) for i in lista]\n",
"\n",
"def achatar(lista):\n",
" return list(itertools.chain(*lista))\n",
"\n",
"def pre_processar(str_texto):\n",
" return sem_stops(limpar(tokenizar(str_texto)))\n"
"def preprocess(str_texto):\n",
" return remove_stops(clean(tokenize(str_texto)))\n"
],
"metadata": {
"id": "clcPkQpDMQOQ"
Expand All @@ -123,7 +131,7 @@
{
"cell_type": "code",
"source": [
"corpus = [(pre_processar(i[0]), i[1]) for i in corpus]\n",
"corpus = [(preprocess(i[0]), i[1]) for i in corpus]\n",
"corpus[:10]"
],
"metadata": {
Expand All @@ -135,7 +143,7 @@
{
"cell_type": "code",
"source": [
"pre_processar(\"He’s standing against the wall\")"
"preprocess(\"He’s standing against the wall\")"
],
"metadata": {
"id": "fdS9YpFrfKm1"
Expand Down Expand Up @@ -194,12 +202,12 @@
"\n",
"#### Condition: ####\n",
"if sent_input2.lower() == \"y\":\n",
" tokens_test = pre_processar(sent_input)\n",
" tokens_test = preprocess(sent_input)\n",
" tokens_test = [i for i in tokens_test if i in vocab]\n",
"\n",
"if sent_input2.lower() == \"n\": \n",
" sent_input = input('Reenter your sentence: ')\n",
" tokens_test = pre_processar(sent_input)\n",
" tokens_test = preprocess(sent_input)\n",
" tokens_test = [i for i in tokens_test if i in vocab]\n",
"\n",
"# Calculating probabilities with Laplace smoothing:\n",
Expand Down Expand Up @@ -274,8 +282,8 @@
{
"cell_type": "code",
"source": [
"vetor = CountVectorizer() \n",
"x = vetor.fit_transform(sents).toarray() \n",
"vector = CountVectorizer() \n",
"x = vector.fit_transform(sents).toarray() \n",
"x "
],
"metadata": {
Expand Down Expand Up @@ -343,15 +351,15 @@
"\n",
"#### Condition with output: ####\n",
"if sent_input4.lower() == \"y\":\n",
" prediction = str(model.predict(vetor.transform([sent_input3])))\n",
" prediction = str(model.predict(vector.transform([sent_input3])))\n",
" print(' ')\n",
" print('-------')\n",
" print(sent_input3)\n",
" print('The preposition you entered is:', prediction.replace(\"['1']\", 'SPATIAL.').replace(\"['0']\", 'NON-SPATIAL.'))\n",
"\n",
"if sent_input4.lower() == \"n\": \n",
" sent_input = input('Reenter your sentence: ')\n",
" prediction = str(model.predict(vetor.transform([sent_input3])))\n",
" prediction = str(model.predict(vector.transform([sent_input3])))\n",
" print(' ')\n",
" print('-------')\n",
" print(sent_input3)\n",
Expand Down Expand Up @@ -511,7 +519,7 @@
{
"cell_type": "code",
"source": [
"# Probabilitues (prior) of classes\n",
"# Probabilities (prior) of classes\n",
"# Shows bias\n",
"\n",
"model.class_log_prior_"
Expand Down Expand Up @@ -565,7 +573,7 @@
"source": [
"# Alphabetically indexed attribute names\n",
"\n",
"vetor.get_feature_names_out()"
"vector.get_feature_names_out()"
],
"metadata": {
"id": "yj_PFsEYRtce"
Expand All @@ -577,7 +585,7 @@
"cell_type": "code",
"source": [
"dic_atributes = dict()\n",
"for e, i in enumerate(vetor.get_feature_names_out()):\n",
"for e, i in enumerate(vector.get_feature_names_out()):\n",
" dic_atributes[i] = (model.feature_log_prob_[0][e], model.feature_log_prob_[1][e])"
],
"metadata": {
Expand Down Expand Up @@ -611,6 +619,65 @@
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df['SPATIAL'].nlargest(n=10)\n"
],
"metadata": {
"id": "G00Au2wPpBwg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df['NON-SPATIAL'].nlargest(n=10)\n"
],
"metadata": {
"id": "KWxKTVpupIrN"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df['Ratio SPATIAL/NON-SPATIAL'].nlargest(n=10)"
],
"metadata": {
"id": "eMna5L5J-_jr"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"df.plot()\n",
"\n",
"plt.show()"
],
"metadata": {
"id": "sWT48Ax-75GC"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df['Ratio SPATIAL/NON-SPATIAL'].nlargest(n=10).plot(kind = 'hist')"
],
"metadata": {
"id": "fIxC3aY-83eT"
},
"execution_count": null,
"outputs": []
}
]
}

0 comments on commit ea19b90

Please sign in to comment.