8
8
from sklearn .decomposition import IncrementalPCA
9
9
import numpy as np
10
10
import time
11
+ from sklearn import metrics
11
12
12
13
13
14
class Dataset (object ):
@@ -129,13 +130,16 @@ def prep_data():
129
130
x_train , y_train = data_set .load ("data/fashion" , "train" )
130
131
x_test , y_test = data_set .load ("data/fashion" , "t10k" )
131
132
133
+ shuffle_index = np .random .permutation (60000 )
134
+ x_train , y_train = x_train [shuffle_index ], y_train [shuffle_index ]
135
+
132
136
x_train_norm = data_set .normalize (x_train )
133
137
x_test_norm = data_set .normalize (x_test )
134
138
135
139
return x_train_norm , x_test_norm , y_train , y_test
136
140
137
141
138
- def run_PCA (train_data , test_data ):
142
+ def run_pca (train_data , test_data ):
139
143
"""
140
144
This function performs PCA on data set and reduces its dimensionality
141
145
:param train_data: train data for PCA dimensionality reduction
@@ -154,7 +158,7 @@ def run_PCA(train_data, test_data):
154
158
return x_train_pca , x_test_pca
155
159
156
160
157
- def run_incremental_PCA (train_data , test_data , n_batches = 50 ):
161
+ def run_incremental_pca (train_data , test_data , n_batches = 50 ):
158
162
"""
159
163
:param train_data: train_data: train data for incremental PCA dimensionality reduction
160
164
:param test_data: test data for incremental PCA dimensionality reduction
@@ -176,11 +180,17 @@ def run_incremental_PCA(train_data, test_data, n_batches=50):
176
180
177
181
if __name__ == "__main__" :
178
182
183
+ # preparing the data set
179
184
x_train , x_test , y_train_data , y_test_data = prep_data ()
180
- x_train_pca , x_test_pca = run_incremental_PCA (x_train , x_test )
181
185
186
+ # running incremental pca on the data set
187
+ x_train_pca , x_test_pca = run_incremental_pca (x_train , x_test )
188
+
189
+ # create the Bayes classifer
182
190
model = Bayes ()
183
191
start = time .time ()
192
+
193
+ # Run the model using the training sets
184
194
model .fit (x_train_pca , y_train_data )
185
195
print ("Training time:" , (time .time () - start ))
186
196
@@ -202,27 +212,21 @@ def run_incremental_PCA(train_data, test_data, n_batches=50):
202
212
len (y_test_data ),
203
213
)
204
214
215
+ # Predict the response for test data set
216
+ y_pred = model .predict (x_test_pca )
217
+ print ("Testing time:" , (time .time () - start ))
218
+
219
+ # calculating accuracy of the classifier
220
+ accuracy = metrics .accuracy_score (y_test_data , y_pred )
221
+ print ("accuracy of the classifier is: " , accuracy )
222
+
223
+ # classification report includes precision, recall, F1-score
224
+ print ("classification report: \n " )
225
+ print (metrics .classification_report (y_test_data , y_pred ))
226
+
227
+ # average accuracy
228
+ average_accuracy = np .mean (y_test_data == y_pred ) * 100
229
+ print ("The average_accuracy is {0:.1f}%" .format (average_accuracy ))
230
+
205
231
206
- #
207
- # X_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train')
208
- # X_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k')
209
- #
210
- # X_train = X_train.astype('float32') #images loaded in as int64, 0 to 255 integers
211
- # X_test = X_test.astype('float32')
212
- # # Normalization
213
- # X_train /= 255
214
- # X_test /= 255
215
-
216
- # plt.figure(figsize=(12,10))# Showing the Input Data after Normalizing
217
- # x, y = 4, 4
218
- # for i in range(15):
219
- # plt.subplot(y, x, i+1)
220
- # plt.imshow(X_train[i].reshape((28,28)),interpolation='nearest')
221
- # plt.show()
222
-
223
- # some_item = X_train[9000]
224
- # # some_item_image = some_item.reshape(28, 28)
225
- # # plt.imshow(some_item_image, cmap = matplotlib.cm.binary,interpolation="nearest")
226
- # # plt.axis("off")
227
- # # plt.show()
228
232
0 commit comments