@@ -45,6 +45,7 @@ def normalize(self, data_vector):
45
45
normalised_data = data_vector / 255
46
46
return normalised_data
47
47
48
+
48
49
def prep_data ():
49
50
"""
50
51
This function preps the data set for further application
@@ -63,7 +64,7 @@ def prep_data():
63
64
return x_train_norm , x_test_norm , y_train , y_test
64
65
65
66
66
- def run_PCA (train_data , test_data ):
67
+ def run_pca (train_data , test_data ):
67
68
"""
68
69
This function performs PCA on data set and reduces its dimensionality
69
70
:param train_data: train data for PCA dimensionality reduction
@@ -82,7 +83,7 @@ def run_PCA(train_data, test_data):
82
83
return x_train_pca , x_test_pca
83
84
84
85
85
- def run_incremental_PCA (train_data , test_data , n_batches = 50 ):
86
+ def run_incremental_pca (train_data , test_data , n_batches = 50 ):
86
87
"""
87
88
:param train_data: train_data: train data for incremental PCA dimensionality reduction
88
89
:param test_data: test data for incremental PCA dimensionality reduction
@@ -101,7 +102,15 @@ def run_incremental_PCA(train_data, test_data, n_batches=50):
101
102
102
103
return x_train_pca_inc , x_test_pca_inc
103
104
105
+
104
106
def plot_digits (instances , images_per_row = 10 , ** options ):
107
+ """
108
+ This function plots the images
109
+ :param instances:
110
+ :param images_per_row: images per row
111
+ :param options:
112
+ :return: plots the image
113
+ """
105
114
size = 28
106
115
images_per_row = min (len (instances ), images_per_row )
107
116
images = [instance .reshape (size ,size ) for instance in instances ]
@@ -116,99 +125,109 @@ def plot_digits(instances, images_per_row=10, **options):
116
125
plt .imshow (image , cmap = mpl .cm .binary , ** options )
117
126
plt .axis ("off" )
118
127
128
+
119
129
def cf_matrix (model , X , Y ):
120
- #y_train_pred = cross_val_predict(knn, x_train_pca, y_train_data, cv=3)
130
+ """
131
+ This function calculates the confusion matrix given the data and the labels
132
+ :param model: type of model e.g. knn
133
+ :param X: training data
134
+ :param Y: data labels
135
+ :return: confusion matrix and predictions on test data
136
+ """
121
137
y_train_pred = cross_val_predict (model , X , Y , cv = 3 )
122
- conf_mx = confusion_matrix (Y , y_train_pred )
138
+ conf_mx = confusion_matrix (Y , y_train_pred )
123
139
print (conf_mx )
124
140
plt .matshow (conf_mx , cmap = plt .cm .gray )
125
141
plt .show ()
126
142
return conf_mx , y_train_pred
127
143
144
+
128
145
def cf_matrix_norm (cfm ):
146
+ """
147
+ This function normalizes the confusion matrix
148
+ :param cfm: confusion matrix
149
+ :return: normalized confusion matrix
150
+ """
129
151
row_sums = cfm .sum (axis = 1 , keepdims = True )
130
152
norm_conf_mx = cfm / row_sums
131
153
np .fill_diagonal (norm_conf_mx , 0 )
132
154
return norm_conf_mx
133
-
155
+
156
+
134
157
def plot_images (a ,b ,x_train , y_train , y_pred ):
135
- cl_a , cl_b = 6 , 2
136
- X_aa = x_train [(y_train == cl_a ) & (y_pred == cl_a )]
137
- X_ab = x_train [(y_train == cl_a ) & (y_pred == cl_b )]
138
- X_ba = x_train [(y_train == cl_b ) & (y_pred == cl_a )]
139
- X_bb = x_train [(y_train == cl_b ) & (y_pred == cl_b )]
158
+ """
159
+ This function plots the images in a 5x5 grid
160
+ :param a: true class label
161
+ :param b: predicted class label
162
+ :param x_train: training data
163
+ :param y_train: training data labels
164
+ :param y_pred: prediction on the test data
165
+ :return: None
166
+ """
167
+ cl_a = a
168
+ cl_b = b
169
+ x_aa = x_train [(y_train == cl_a ) & (y_pred == cl_a )]
170
+ x_ab = x_train [(y_train == cl_a ) & (y_pred == cl_b )]
171
+ x_ba = x_train [(y_train == cl_b ) & (y_pred == cl_a )]
172
+ x_bb = x_train [(y_train == cl_b ) & (y_pred == cl_b )]
140
173
plt .figure (figsize = (8 ,8 ))
141
- plt .subplot (221 ); plot_digits (X_aa [:25 ], images_per_row = 5 )
142
- plt .subplot (222 ); plot_digits (X_ab [:25 ], images_per_row = 5 )
143
- plt .subplot (223 ); plot_digits (X_ba [:25 ], images_per_row = 5 )
144
- plt .subplot (224 ); plot_digits (X_bb [:25 ], images_per_row = 5 )
174
+ plt .subplot (221 )
175
+ plot_digits (x_aa [:25 ], images_per_row = 5 )
176
+ plt .subplot (222 )
177
+ plot_digits (x_ab [:25 ], images_per_row = 5 )
178
+ plt .subplot (223 )
179
+ plot_digits (x_ba [:25 ], images_per_row = 5 )
180
+ plt .subplot (224 )
181
+ plot_digits (x_bb [:25 ], images_per_row = 5 )
145
182
plt .show ()
146
183
147
184
148
185
def main ():
186
+ # preparing the data set
149
187
x_train , x_test , y_train_data , y_test_data = prep_data ()
150
- x_train_pca , x_test_pca = run_incremental_PCA (x_train , x_test )
188
+
189
+ # running incremental pca on the data set
190
+ x_train_pca , x_test_pca = run_incremental_pca (x_train , x_test )
151
191
152
192
start = time .time ()
153
- #Create KNN Classifier
193
+
194
+ # Create KNN Classifier
154
195
knn = KNeighborsClassifier (n_neighbors = 5 )
155
- #Train the model using the training sets
196
+ # Run the model using the training sets
156
197
knn .fit (x_train_pca , y_train_data )
157
-
158
198
print ("Training time:" , (time .time () - start ))
159
-
160
- #Predict the response for test dataset
199
+
200
+ # Predict the response for test data set
161
201
y_pred = knn .predict (x_test_pca )
162
-
163
202
print ("Testing time:" , (time .time () - start ))
164
-
165
- #Import scikit-learn metrics module for accuracy calculation
166
- # Model Accuracy, how often is the classifier correct?
167
- print ("Accuracy:" , metrics .accuracy_score (y_test_data , y_pred ))
203
+
204
+ # calculating accuracy of the classifier
205
+ accuracy = metrics .accuracy_score (y_test_data , y_pred )
206
+ print ("accuracy of the classifier is: " , accuracy )
207
+
208
+ # classification report includes precision, recall, F1-score
168
209
print ("classification report: \n " )
169
210
print (metrics .classification_report (y_test_data , y_pred ))
170
211
171
- accuracy = metrics .accuracy_score (y_test_data , y_pred )
172
- print ("accuracy of the classifier is: " , accuracy )
212
+ # average accuracy
173
213
average_accuracy = np .mean (y_test_data == y_pred ) * 100
174
214
print ("The average_accuracy is {0:.1f}%" .format (average_accuracy ))
175
-
176
-
215
+
216
+ # calculating the confusion matrix
177
217
cf , y_train_pred = cf_matrix (knn , x_train_pca , y_train_data )
178
- norm_cf = cf_matrix_norm (cf )
218
+
219
+ # normalizing the confusion matrix and plotting it
220
+ norm_cf = cf_matrix_norm (cf )
179
221
plt .matshow (norm_cf , cmap = plt .cm .gray )
180
222
plt .show ()
181
223
182
224
cl_a , cl_b = 6 , 2
183
225
plot_images (cl_a ,cl_b ,x_train , y_train_data , y_train_pred )
184
-
185
-
186
226
187
227
188
228
if __name__ == "__main__" :
189
229
main ()
190
230
191
-
192
- # =============================================================================
193
- # plt.figure(figsize=(9,9))
194
- # example_images = np.r_[x_train[:12000:600], x_train[13000:30600:600], x_train[30600:60000:590]]
195
- # plot_digits(example_images, images_per_row=10)
196
- # plt.show()
197
- # =============================================================================
198
-
199
- # =============================================================================
200
- # X_aa = x_train[(y_train_data == cl_a) & (y_train_pred == cl_a)]
201
- # X_ab = x_train[(y_train_data == cl_a) & (y_train_pred == cl_b)]
202
- # X_ba = x_train[(y_train_data == cl_b) & (y_train_pred == cl_a)]
203
- # X_bb = x_train[(y_train_data == cl_b) & (y_train_pred == cl_b)]
204
- # plt.figure(figsize=(8,8))
205
- # plt.subplot(221); plot_digits(X_aa[:25], images_per_row=5)
206
- # plt.subplot(222); plot_digits(X_ab[:25], images_per_row=5)
207
- # plt.subplot(223); plot_digits(X_ba[:25], images_per_row=5)
208
- # plt.subplot(224); plot_digits(X_bb[:25], images_per_row=5)
209
- # plt.show()
210
- #
211
- # =============================================================================
212
231
# =============================================================================
213
232
# start = time.time()
214
233
# model.fit(x_train_pca, y_train_data)
0 commit comments