Skip to content

Commit e26b033

Browse files
Add files via upload
1 parent 09df85a commit e26b033

File tree

1 file changed

+284
-0
lines changed

1 file changed

+284
-0
lines changed

Spam Detector.ipynb

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import pandas as pd\n",
10+
"import numpy as np\n"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": null,
16+
"metadata": {},
17+
"outputs": [],
18+
"source": [
19+
"data=pd.read_csv(\"spam.csv\", encoding=\"latin-1\")"
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": null,
25+
"metadata": {},
26+
"outputs": [],
27+
"source": [
28+
"data.head()"
29+
]
30+
},
31+
{
32+
"cell_type": "code",
33+
"execution_count": null,
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"data.columns"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"execution_count": null,
43+
"metadata": {},
44+
"outputs": [],
45+
"source": [
46+
"data.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True)"
47+
]
48+
},
49+
{
50+
"cell_type": "code",
51+
"execution_count": null,
52+
"metadata": {},
53+
"outputs": [],
54+
"source": [
55+
"data.head()"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": null,
61+
"metadata": {},
62+
"outputs": [],
63+
"source": [
64+
"data['class']=data['class'].map({'ham':0, 'spam':1})"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": null,
70+
"metadata": {},
71+
"outputs": [],
72+
"source": [
73+
"data.head()"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"metadata": {},
80+
"outputs": [],
81+
"source": [
82+
"from sklearn.feature_extraction.text import CountVectorizer"
83+
]
84+
},
85+
{
86+
"cell_type": "code",
87+
"execution_count": null,
88+
"metadata": {},
89+
"outputs": [],
90+
"source": [
91+
"from sklearn.model_selection import train_test_split"
92+
]
93+
},
94+
{
95+
"cell_type": "code",
96+
"execution_count": null,
97+
"metadata": {},
98+
"outputs": [],
99+
"source": [
100+
"X=data['message']\n",
101+
"y=data['class']"
102+
]
103+
},
104+
{
105+
"cell_type": "code",
106+
"execution_count": null,
107+
"metadata": {},
108+
"outputs": [],
109+
"source": [
110+
"X.shape"
111+
]
112+
},
113+
{
114+
"cell_type": "code",
115+
"execution_count": null,
116+
"metadata": {},
117+
"outputs": [],
118+
"source": [
119+
"y.shape"
120+
]
121+
},
122+
{
123+
"cell_type": "code",
124+
"execution_count": null,
125+
"metadata": {},
126+
"outputs": [],
127+
"source": [
128+
"data.isnull().sum()"
129+
]
130+
},
131+
{
132+
"cell_type": "code",
133+
"execution_count": null,
134+
"metadata": {},
135+
"outputs": [],
136+
"source": [
137+
"cv=CountVectorizer()"
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": null,
143+
"metadata": {},
144+
"outputs": [],
145+
"source": [
146+
"X=cv.fit_transform(X)"
147+
]
148+
},
149+
{
150+
"cell_type": "code",
151+
"execution_count": null,
152+
"metadata": {},
153+
"outputs": [],
154+
"source": [
155+
"x_train, x_test,y_train, y_test=train_test_split(X,y, test_size=0.2, random_state=42)"
156+
]
157+
},
158+
{
159+
"cell_type": "code",
160+
"execution_count": null,
161+
"metadata": {},
162+
"outputs": [],
163+
"source": [
164+
"x_train.shape"
165+
]
166+
},
167+
{
168+
"cell_type": "code",
169+
"execution_count": null,
170+
"metadata": {},
171+
"outputs": [],
172+
"source": [
173+
"x_test.shape"
174+
]
175+
},
176+
{
177+
"cell_type": "code",
178+
"execution_count": null,
179+
"metadata": {},
180+
"outputs": [],
181+
"source": [
182+
"from sklearn.naive_bayes import MultinomialNB"
183+
]
184+
},
185+
{
186+
"cell_type": "code",
187+
"execution_count": null,
188+
"metadata": {},
189+
"outputs": [],
190+
"source": [
191+
"model=MultinomialNB()"
192+
]
193+
},
194+
{
195+
"cell_type": "code",
196+
"execution_count": null,
197+
"metadata": {},
198+
"outputs": [],
199+
"source": [
200+
"model.fit(x_train, y_train)"
201+
]
202+
},
203+
{
204+
"cell_type": "code",
205+
"execution_count": null,
206+
"metadata": {},
207+
"outputs": [],
208+
"source": [
209+
"model.score(x_test, y_test)"
210+
]
211+
},
212+
{
213+
"cell_type": "code",
214+
"execution_count": null,
215+
"metadata": {},
216+
"outputs": [],
217+
"source": [
218+
"msg=\"You Won 1000$\"\n",
219+
"data = [msg]\n",
220+
"vect = cv.transform(data).toarray()\n",
221+
"my_prediction = model.predict(vect)"
222+
]
223+
},
224+
{
225+
"cell_type": "code",
226+
"execution_count": null,
227+
"metadata": {},
228+
"outputs": [],
229+
"source": [
230+
"vect"
231+
]
232+
},
233+
{
234+
"cell_type": "code",
235+
"execution_count": null,
236+
"metadata": {},
237+
"outputs": [],
238+
"source": [
239+
"import pickle\n",
240+
"pickle.dump(model, open('spam.pkl','wb'))\n",
241+
"model1 = pickle.load(open('spam.pkl','rb'))\n"
242+
]
243+
},
244+
{
245+
"cell_type": "code",
246+
"execution_count": null,
247+
"metadata": {},
248+
"outputs": [],
249+
"source": [
250+
"import pickle\n",
251+
"pickle.dump(cv, open('vec.pkl','wb'))\n",
252+
"#model1 = pickle.load(open('spam.pkl','rb'))"
253+
]
254+
},
255+
{
256+
"cell_type": "markdown",
257+
"metadata": {},
258+
"source": [
259+
"## Jayesh Baranwal"
260+
]
261+
}
262+
],
263+
"metadata": {
264+
"kernelspec": {
265+
"display_name": "ml",
266+
"language": "python",
267+
"name": "python3"
268+
},
269+
"language_info": {
270+
"codemirror_mode": {
271+
"name": "ipython",
272+
"version": 3
273+
},
274+
"file_extension": ".py",
275+
"mimetype": "text/x-python",
276+
"name": "python",
277+
"nbconvert_exporter": "python",
278+
"pygments_lexer": "ipython3",
279+
"version": "3.8.19"
280+
}
281+
},
282+
"nbformat": 4,
283+
"nbformat_minor": 4
284+
}

0 commit comments

Comments
 (0)