Skip to content

Commit ba59b9c

Browse files
refactored code
1 parent 58b8fb9 commit ba59b9c

File tree

1 file changed

+72
-53
lines changed

1 file changed

+72
-53
lines changed

CMSC_828C_Project1/KNN_PCA.py

Lines changed: 72 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def normalize(self, data_vector):
4545
normalised_data = data_vector / 255
4646
return normalised_data
4747

48+
4849
def prep_data():
4950
"""
5051
This function preps the data set for further application
@@ -63,7 +64,7 @@ def prep_data():
6364
return x_train_norm, x_test_norm, y_train, y_test
6465

6566

66-
def run_PCA(train_data, test_data):
67+
def run_pca(train_data, test_data):
6768
"""
6869
This function performs PCA on data set and reduces its dimensionality
6970
:param train_data: train data for PCA dimensionality reduction
@@ -82,7 +83,7 @@ def run_PCA(train_data, test_data):
8283
return x_train_pca, x_test_pca
8384

8485

85-
def run_incremental_PCA(train_data, test_data, n_batches=50):
86+
def run_incremental_pca(train_data, test_data, n_batches=50):
8687
"""
8788
:param train_data: train_data: train data for incremental PCA dimensionality reduction
8889
:param test_data: test data for incremental PCA dimensionality reduction
@@ -101,7 +102,15 @@ def run_incremental_PCA(train_data, test_data, n_batches=50):
101102

102103
return x_train_pca_inc, x_test_pca_inc
103104

105+
104106
def plot_digits(instances, images_per_row=10, **options):
107+
"""
108+
This function plots the images
109+
:param instances:
110+
:param images_per_row: images per row
111+
:param options:
112+
:return: plots the image
113+
"""
105114
size = 28
106115
images_per_row = min(len(instances), images_per_row)
107116
images = [instance.reshape(size,size) for instance in instances]
@@ -116,99 +125,109 @@ def plot_digits(instances, images_per_row=10, **options):
116125
plt.imshow(image, cmap = mpl.cm.binary, **options)
117126
plt.axis("off")
118127

128+
119129
def cf_matrix(model, X, Y):
120-
#y_train_pred = cross_val_predict(knn, x_train_pca, y_train_data, cv=3)
130+
"""
131+
This function calculates the confusion matrix given the data and the labels
132+
:param model: type of model e.g. knn
133+
:param X: training data
134+
:param Y: data labels
135+
:return: confusion matrix and predictions on test data
136+
"""
121137
y_train_pred = cross_val_predict(model, X, Y, cv=3)
122-
conf_mx =confusion_matrix(Y, y_train_pred)
138+
conf_mx = confusion_matrix(Y, y_train_pred)
123139
print(conf_mx)
124140
plt.matshow(conf_mx, cmap=plt.cm.gray)
125141
plt.show()
126142
return conf_mx, y_train_pred
127143

144+
128145
def cf_matrix_norm(cfm):
146+
"""
147+
This function normalizes the confusion matrix
148+
:param cfm: confusion matrix
149+
:return: normalized confusion matrix
150+
"""
129151
row_sums = cfm.sum(axis=1, keepdims=True)
130152
norm_conf_mx = cfm / row_sums
131153
np.fill_diagonal(norm_conf_mx, 0)
132154
return norm_conf_mx
133-
155+
156+
134157
def plot_images(a,b,x_train, y_train, y_pred):
135-
cl_a, cl_b = 6, 2
136-
X_aa = x_train[(y_train == cl_a) & (y_pred == cl_a)]
137-
X_ab = x_train[(y_train == cl_a) & (y_pred == cl_b)]
138-
X_ba = x_train[(y_train == cl_b) & (y_pred == cl_a)]
139-
X_bb = x_train[(y_train == cl_b) & (y_pred == cl_b)]
158+
"""
159+
This function plots the images in a 5x5 grid
160+
:param a: true class label
161+
:param b: predicted class label
162+
:param x_train: training data
163+
:param y_train: training data labels
164+
:param y_pred: prediction on the test data
165+
:return: None
166+
"""
167+
cl_a = a
168+
cl_b = b
169+
x_aa = x_train[(y_train == cl_a) & (y_pred == cl_a)]
170+
x_ab = x_train[(y_train == cl_a) & (y_pred == cl_b)]
171+
x_ba = x_train[(y_train == cl_b) & (y_pred == cl_a)]
172+
x_bb = x_train[(y_train == cl_b) & (y_pred == cl_b)]
140173
plt.figure(figsize=(8,8))
141-
plt.subplot(221); plot_digits(X_aa[:25], images_per_row=5)
142-
plt.subplot(222); plot_digits(X_ab[:25], images_per_row=5)
143-
plt.subplot(223); plot_digits(X_ba[:25], images_per_row=5)
144-
plt.subplot(224); plot_digits(X_bb[:25], images_per_row=5)
174+
plt.subplot(221)
175+
plot_digits(x_aa[:25], images_per_row=5)
176+
plt.subplot(222)
177+
plot_digits(x_ab[:25], images_per_row=5)
178+
plt.subplot(223)
179+
plot_digits(x_ba[:25], images_per_row=5)
180+
plt.subplot(224)
181+
plot_digits(x_bb[:25], images_per_row=5)
145182
plt.show()
146183

147184

148185
def main():
186+
# preparing the data set
149187
x_train, x_test, y_train_data, y_test_data = prep_data()
150-
x_train_pca, x_test_pca = run_incremental_PCA(x_train, x_test)
188+
189+
# running incremental pca on the data set
190+
x_train_pca, x_test_pca = run_incremental_pca(x_train, x_test)
151191

152192
start = time.time()
153-
#Create KNN Classifier
193+
194+
# Create KNN Classifier
154195
knn = KNeighborsClassifier(n_neighbors=5)
155-
#Train the model using the training sets
196+
# Run the model using the training sets
156197
knn.fit(x_train_pca, y_train_data)
157-
158198
print("Training time:", (time.time() - start))
159-
160-
#Predict the response for test dataset
199+
200+
# Predict the response for test data set
161201
y_pred = knn.predict(x_test_pca)
162-
163202
print("Testing time:", (time.time() - start))
164-
165-
#Import scikit-learn metrics module for accuracy calculation
166-
# Model Accuracy, how often is the classifier correct?
167-
print("Accuracy:", metrics.accuracy_score(y_test_data, y_pred))
203+
204+
# calculating accuracy of the classifier
205+
accuracy = metrics.accuracy_score(y_test_data, y_pred)
206+
print("accuracy of the classifier is: ", accuracy)
207+
208+
# classification report includes precision, recall, F1-score
168209
print("classification report: \n")
169210
print(metrics.classification_report(y_test_data, y_pred))
170211

171-
accuracy = metrics.accuracy_score(y_test_data, y_pred)
172-
print("accuracy of the classifier is: ", accuracy )
212+
# average accuracy
173213
average_accuracy = np.mean(y_test_data == y_pred) * 100
174214
print("The average_accuracy is {0:.1f}%".format(average_accuracy))
175-
176-
215+
216+
# calculating the confusion matrix
177217
cf, y_train_pred = cf_matrix(knn, x_train_pca, y_train_data)
178-
norm_cf = cf_matrix_norm(cf)
218+
219+
# normalizing the confusion matrix and plotting it
220+
norm_cf = cf_matrix_norm(cf)
179221
plt.matshow(norm_cf, cmap=plt.cm.gray)
180222
plt.show()
181223

182224
cl_a, cl_b = 6, 2
183225
plot_images(cl_a,cl_b,x_train, y_train_data, y_train_pred)
184-
185-
186226

187227

188228
if __name__ == "__main__":
189229
main()
190230

191-
192-
# =============================================================================
193-
# plt.figure(figsize=(9,9))
194-
# example_images = np.r_[x_train[:12000:600], x_train[13000:30600:600], x_train[30600:60000:590]]
195-
# plot_digits(example_images, images_per_row=10)
196-
# plt.show()
197-
# =============================================================================
198-
199-
# =============================================================================
200-
# X_aa = x_train[(y_train_data == cl_a) & (y_train_pred == cl_a)]
201-
# X_ab = x_train[(y_train_data == cl_a) & (y_train_pred == cl_b)]
202-
# X_ba = x_train[(y_train_data == cl_b) & (y_train_pred == cl_a)]
203-
# X_bb = x_train[(y_train_data == cl_b) & (y_train_pred == cl_b)]
204-
# plt.figure(figsize=(8,8))
205-
# plt.subplot(221); plot_digits(X_aa[:25], images_per_row=5)
206-
# plt.subplot(222); plot_digits(X_ab[:25], images_per_row=5)
207-
# plt.subplot(223); plot_digits(X_ba[:25], images_per_row=5)
208-
# plt.subplot(224); plot_digits(X_bb[:25], images_per_row=5)
209-
# plt.show()
210-
#
211-
# =============================================================================
212231
# =============================================================================
213232
# start = time.time()
214233
# model.fit(x_train_pca, y_train_data)

0 commit comments

Comments
 (0)