Skip to content

Commit af05e89

Browse files
added evaluation reports to all files
1 parent ba59b9c commit af05e89

File tree

6 files changed

+365
-237
lines changed

6 files changed

+365
-237
lines changed

CMSC_828C_Project1/Bayes_LDA.py

+22-18
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from scipy.stats import multivariate_normal as mvn
99
from sklearn.preprocessing import StandardScaler
1010
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
11+
from sklearn import metrics
1112

1213

1314
class Dataset(object):
@@ -104,7 +105,7 @@ def predict(self, data):
104105
for category, g in iteritems(self.gaussian):
105106
mean, covariance = g["mean"], g["cov"]
106107
p[:, category] = mvn.logpdf(data, mean=mean, cov=covariance)
107-
+np.log(self.priors[category])
108+
+ np.log(self.priors[category])
108109

109110
return np.argmax(p, axis=1)
110111

@@ -128,6 +129,9 @@ def prep_data():
128129
x_train, y_train = data_set.load("data/fashion", "train")
129130
x_test, y_test = data_set.load("data/fashion", "t10k")
130131

132+
shuffle_index = np.random.permutation(60000)
133+
x_train, y_train = x_train[shuffle_index], y_train[shuffle_index]
134+
131135
x_train_norm = data_set.normalize(x_train)
132136
x_test_norm = data_set.normalize(x_test)
133137

@@ -180,24 +184,24 @@ def main():
180184
"Testing dataset size:",
181185
len(y_test_data),
182186
)
183-
187+
188+
# Predict the response for test dataset
189+
y_pred = model.predict(x_LDA_test)
190+
print("Testing time:", (time.time() - start))
191+
192+
# calculating accuracy of the classifier
193+
accuracy = metrics.accuracy_score(y_test_data, y_pred)
194+
print("accuracy of the classifier is: ", accuracy)
195+
196+
# classification report includes precision, recall, F1-score
197+
print("classification report: \n")
198+
print(metrics.classification_report(y_test_data, y_pred))
199+
200+
# average accuracy
201+
average_accuracy = np.mean(y_test_data == y_pred) * 100
202+
print("The average_accuracy is {0:.1f}%".format(average_accuracy))
203+
184204

185205
if __name__ == "__main__":
186206
main()
187207

188-
189-
# =============================================================================
190-
# if __name__ == '__main__':
191-
# model = Bayes()
192-
# t0 = datetime.now()
193-
# model.fit(x_train_LDA, y_train)
194-
# print("Training time:", (datetime.now() - t0))
195-
#
196-
# t0 = datetime.now()
197-
# print("Train accuracy:", model.accuracy(x_train_LDA, y_train))
198-
# print("Time to compute train accuracy:", (datetime.now() - t0), "Train size:", len(y_train))
199-
#
200-
# t0 = datetime.now()
201-
# print("Test accuracy:", model.accuracy(x_test_LDA, y_test))
202-
# print("Time to compute test accuracy:", (datetime.now() - t0), "Test size:", len(y_test))
203-
# =============================================================================

CMSC_828C_Project1/Bayes_PCA.py

+29-25
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from sklearn.decomposition import IncrementalPCA
99
import numpy as np
1010
import time
11+
from sklearn import metrics
1112

1213

1314
class Dataset(object):
@@ -129,13 +130,16 @@ def prep_data():
129130
x_train, y_train = data_set.load("data/fashion", "train")
130131
x_test, y_test = data_set.load("data/fashion", "t10k")
131132

133+
shuffle_index = np.random.permutation(60000)
134+
x_train, y_train = x_train[shuffle_index], y_train[shuffle_index]
135+
132136
x_train_norm = data_set.normalize(x_train)
133137
x_test_norm = data_set.normalize(x_test)
134138

135139
return x_train_norm, x_test_norm, y_train, y_test
136140

137141

138-
def run_PCA(train_data, test_data):
142+
def run_pca(train_data, test_data):
139143
"""
140144
This function performs PCA on data set and reduces its dimensionality
141145
:param train_data: train data for PCA dimensionality reduction
@@ -154,7 +158,7 @@ def run_PCA(train_data, test_data):
154158
return x_train_pca, x_test_pca
155159

156160

157-
def run_incremental_PCA(train_data, test_data, n_batches=50):
161+
def run_incremental_pca(train_data, test_data, n_batches=50):
158162
"""
159163
:param train_data: train_data: train data for incremental PCA dimensionality reduction
160164
:param test_data: test data for incremental PCA dimensionality reduction
@@ -176,11 +180,17 @@ def run_incremental_PCA(train_data, test_data, n_batches=50):
176180

177181
if __name__ == "__main__":
178182

183+
# preparing the data set
179184
x_train, x_test, y_train_data, y_test_data = prep_data()
180-
x_train_pca, x_test_pca = run_incremental_PCA(x_train, x_test)
181185

186+
# running incremental pca on the data set
187+
x_train_pca, x_test_pca = run_incremental_pca(x_train, x_test)
188+
189+
# create the Bayes classifer
182190
model = Bayes()
183191
start = time.time()
192+
193+
# Run the model using the training sets
184194
model.fit(x_train_pca, y_train_data)
185195
print("Training time:", (time.time() - start))
186196

@@ -202,27 +212,21 @@ def run_incremental_PCA(train_data, test_data, n_batches=50):
202212
len(y_test_data),
203213
)
204214

215+
# Predict the response for test data set
216+
y_pred = model.predict(x_test_pca)
217+
print("Testing time:", (time.time() - start))
218+
219+
# calculating accuracy of the classifier
220+
accuracy = metrics.accuracy_score(y_test_data, y_pred)
221+
print("accuracy of the classifier is: ", accuracy)
222+
223+
# classification report includes precision, recall, F1-score
224+
print("classification report: \n")
225+
print(metrics.classification_report(y_test_data, y_pred))
226+
227+
# average accuracy
228+
average_accuracy = np.mean(y_test_data == y_pred) * 100
229+
print("The average_accuracy is {0:.1f}%".format(average_accuracy))
230+
205231

206-
#
207-
# X_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train')
208-
# X_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k')
209-
#
210-
# X_train = X_train.astype('float32') #images loaded in as int64, 0 to 255 integers
211-
# X_test = X_test.astype('float32')
212-
# # Normalization
213-
# X_train /= 255
214-
# X_test /= 255
215-
216-
# plt.figure(figsize=(12,10))# Showing the Input Data after Normalizing
217-
# x, y = 4, 4
218-
# for i in range(15):
219-
# plt.subplot(y, x, i+1)
220-
# plt.imshow(X_train[i].reshape((28,28)),interpolation='nearest')
221-
# plt.show()
222-
223-
# some_item = X_train[9000]
224-
# # some_item_image = some_item.reshape(28, 28)
225-
# # plt.imshow(some_item_image, cmap = matplotlib.cm.binary,interpolation="nearest")
226-
# # plt.axis("off")
227-
# # plt.show()
228232

0 commit comments

Comments
 (0)