Skip to content

Commit c428c46

Browse files
added requirement.txt file
1 parent b9c1584 commit c428c46

File tree

8 files changed

+718
-1233
lines changed

8 files changed

+718
-1233
lines changed

CMSC_828C_Project1/Bayes_LDA.py

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#!/usr/bin/env python3
2+
3+
# importing required libraries
4+
from matplotlib import pyplot as plt
5+
from utils import mnist_reader
6+
from future.utils import iteritems
7+
from datetime import datetime
8+
from scipy.stats import norm
9+
from scipy.stats import multivariate_normal as mvn
10+
import numpy as np
11+
import matplotlib
12+
from sklearn.preprocessing import StandardScaler
13+
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
14+
15+
16+
class Dataset(object):
17+
18+
def __init__(self):
19+
pass
20+
21+
def load(self, folder_path, data_type):
22+
"""
23+
This function loads the data-set
24+
:param folder_path: path to data-set folder
25+
:param data_type: train or test data
26+
:return: data and labels
27+
"""
28+
train_data, test_data = mnist_reader.load_mnist(folder_path, kind=data_type)
29+
return train_data, test_data
30+
31+
def normalize(self, data_vector):
32+
"""
33+
This function normalizes the data
34+
:param data_vector: data to be normalised
35+
:return: normalised data
36+
"""
37+
data_vector.astype('float32')
38+
normalised_data = (data_vector / 255)
39+
return normalised_data
40+
41+
42+
data_set = Dataset()
43+
x_train, y_train = data_set.load('data/fashion', 'train')
44+
x_test, y_test = data_set.load('data/fashion', 't10k')
45+
46+
x_train_norm = data_set.normalize(x_train)
47+
x_test_norm = data_set.normalize(x_test)
48+
49+
sc = StandardScaler()
50+
x_train_scaled = sc.fit_transform(x_train_norm)
51+
x_test_scaled = sc.transform(x_test_norm)
52+
53+
lda = LDA(n_components=1)
54+
x_train_LDA = lda.fit_transform(x_train_scaled, y_train)
55+
x_test_LDA = lda.transform(x_test_scaled)
56+
57+
58+
class Bayes(object):
59+
60+
def __init__(self):
61+
self.priors = dict()
62+
self.gaussian = dict()
63+
64+
@staticmethod
65+
def mean(x):
66+
"""
67+
returns mean of the data
68+
:param x: data vector
69+
:return: mean
70+
"""
71+
mean_x = np.mean(x, axis=0)
72+
return mean_x
73+
74+
@staticmethod
75+
def covariance(x):
76+
"""
77+
returns covariance of the data
78+
:param x: data vector
79+
:return: covariance of the data
80+
"""
81+
cov_x = np.cov(x.T)
82+
return cov_x
83+
84+
def prior(self, labels):
85+
"""
86+
this function calculates the priors for each category
87+
:param labels: category labels
88+
:return: None
89+
"""
90+
for category in labels:
91+
self.priors[category] = {len(labels[labels == category]) / len(labels)}
92+
return 0
93+
94+
def fit(self, data, y):
95+
"""
96+
calculates associated mean and covariance of each class in the data-set
97+
:param data: data
98+
:param y : data labels
99+
:return: None
100+
"""
101+
smoothing_factor = 1e-2
102+
samples, feature_length = data.shape
103+
104+
labels = set(y)
105+
for category in labels:
106+
current_data = data[y == category]
107+
self.gaussian[category] = {
108+
'mean': current_data.mean(axis=0),
109+
'cov': np.cov(current_data.T) + np.eye(feature_length) * smoothing_factor
110+
}
111+
self.priors[category] = float(len(y[y == category])) / len(y)
112+
return 0
113+
114+
def predict(self, data):
115+
"""
116+
this function predicts the class of an unknown feature vector
117+
:param data: feature vectors whose class has to be determined
118+
:return: class of the feature vector
119+
"""
120+
samples, feature_length = data.shape
121+
k = len(self.gaussian)
122+
p = np.zeros((samples, k))
123+
124+
for category, g in iteritems(self.gaussian):
125+
mean, covariance = g['mean'], g['cov']
126+
p[:, category] = mvn.logpdf(data, mean=mean, cov=covariance) + np.log(self.priors[category])
127+
128+
return np.argmax(p, axis=1)
129+
130+
def accuracy(self, data, labels):
131+
"""
132+
returns the accuracy/ score of the prediction
133+
:param data: data
134+
:param labels: labels for each feature vector
135+
:return: score of the prediction
136+
"""
137+
prediction = self.predict(data)
138+
return np.mean(prediction == labels)
139+
140+
141+
if __name__ == '__main__':
142+
model = Bayes()
143+
t0 = datetime.now()
144+
model.fit(x_train_LDA, y_train)
145+
print("Training time:", (datetime.now() - t0))
146+
147+
t0 = datetime.now()
148+
print("Train accuracy:", model.accuracy(x_train_LDA, y_train))
149+
print("Time to compute train accuracy:", (datetime.now() - t0), "Train size:", len(y_train))
150+
151+
t0 = datetime.now()
152+
print("Test accuracy:", model.accuracy(x_test_LDA, y_test))
153+
print("Time to compute test accuracy:", (datetime.now() - t0), "Test size:", len(y_test))

CMSC_828C_Project1/Bayes_PCA.py

+187
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
#!/usr/bin/env python3
2+
3+
# importing required libraries
4+
from matplotlib import pyplot as plt
5+
from utils import mnist_reader
6+
from future.utils import iteritems
7+
from datetime import datetime
8+
from scipy.stats import norm
9+
from scipy.stats import multivariate_normal as mvn
10+
import numpy as np
11+
import matplotlib
12+
from sklearn.decomposition import PCA
13+
from sklearn.decomposition import IncrementalPCA
14+
15+
16+
class Dataset(object):
17+
18+
def __init__(self):
19+
pass
20+
21+
def load(self, folder_path, data_type):
22+
"""
23+
This function loads the data-set
24+
:param folder_path: path to data-set folder
25+
:param data_type: train or test data
26+
:return: data and labels
27+
"""
28+
train_data, test_data = mnist_reader.load_mnist(folder_path, kind=data_type)
29+
return train_data, test_data
30+
31+
def normalize(self, data_vector):
32+
"""
33+
This function normalizes the data
34+
:param data_vector: data to be normalised
35+
:return: normalised data
36+
"""
37+
data_vector.astype('float32')
38+
normalised_data = (data_vector / 255)
39+
return normalised_data
40+
41+
42+
data_set = Dataset()
43+
x_train, y_train = data_set.load('data/fashion', 'train')
44+
x_test, y_test = data_set.load('data/fashion', 't10k')
45+
46+
x_train_norm = data_set.normalize(x_train)
47+
x_test_norm = data_set.normalize(x_test)
48+
49+
# pca = PCA()
50+
# pca.fit(x_train)
51+
# cumsum = np.cumsum(pca.explained_variance_ratio_)
52+
# d = np.argmax(cumsum >= 0.95) + 1
53+
#
54+
# pca = PCA(n_components = 187)
55+
# x_train_pca = pca.fit_transform(x_train)
56+
# x_test_pca = pca.fit_transform(x_test)
57+
58+
n_batches = 50
59+
60+
inc_pca = IncrementalPCA(n_components=187)
61+
for X_batch in np.array_split(x_train_norm, n_batches):
62+
inc_pca.partial_fit(X_batch)
63+
x_train_pca_inc = inc_pca.transform(x_train_norm)
64+
65+
for X_batch in np.array_split(x_test_norm, n_batches):
66+
inc_pca.partial_fit(X_batch)
67+
x_test_pca_inc = inc_pca.transform(x_test_norm)
68+
#
69+
# X_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train')
70+
# X_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k')
71+
#
72+
# X_train = X_train.astype('float32') #images loaded in as int64, 0 to 255 integers
73+
# X_test = X_test.astype('float32')
74+
# # Normalization
75+
# X_train /= 255
76+
# X_test /= 255
77+
78+
# plt.figure(figsize=(12,10))# Showing the Input Data after Normalizing
79+
# x, y = 4, 4
80+
# for i in range(15):
81+
# plt.subplot(y, x, i+1)
82+
# plt.imshow(X_train[i].reshape((28,28)),interpolation='nearest')
83+
# plt.show()
84+
85+
# some_item = X_train[9000]
86+
# # some_item_image = some_item.reshape(28, 28)
87+
# # plt.imshow(some_item_image, cmap = matplotlib.cm.binary,interpolation="nearest")
88+
# # plt.axis("off")
89+
# # plt.show()
90+
91+
92+
class Bayes(object):
93+
94+
def __init__(self):
95+
self.priors = dict()
96+
self.gaussian = dict()
97+
98+
@staticmethod
99+
def mean(x):
100+
"""
101+
returns mean of the data
102+
:param x: data vector
103+
:return: mean
104+
"""
105+
mean_x = np.mean(x, axis=0)
106+
return mean_x
107+
108+
@staticmethod
109+
def covariance(x):
110+
"""
111+
returns covariance of the data
112+
:param x: data vector
113+
:return: covariance of the data
114+
"""
115+
cov_x = np.cov(x.T)
116+
return cov_x
117+
118+
def prior(self, labels):
119+
"""
120+
this function calculates the priors for each category
121+
:param labels: category labels
122+
:return: None
123+
"""
124+
for category in labels:
125+
self.priors[category] = {len(labels[labels == category]) / len(labels)}
126+
return 0
127+
128+
def fit(self, data, y):
129+
"""
130+
calculates associated mean and covariance of each class in the data-set
131+
:param data: data
132+
:param y : data labels
133+
:return: None
134+
"""
135+
smoothing_factor = 1e-2
136+
samples, feature_length = data.shape
137+
138+
labels = set(y)
139+
for category in labels:
140+
current_data = data[y == category]
141+
self.gaussian[category] = {
142+
'mean': current_data.mean(axis=0),
143+
'cov': np.cov(current_data.T) + np.eye(feature_length) * smoothing_factor
144+
}
145+
self.priors[category] = float(len(y[y == category])) / len(y)
146+
return 0
147+
148+
def predict(self, data):
149+
"""
150+
this function predicts the class of an unknown feature vector
151+
:param data: feature vectors whose class has to be determined
152+
:return: class of the feature vector
153+
"""
154+
samples, feature_length = data.shape
155+
k = len(self.gaussian)
156+
p = np.zeros((samples, k))
157+
158+
for category, g in iteritems(self.gaussian):
159+
mean, covariance = g['mean'], g['cov']
160+
p[:, category] = mvn.logpdf(data, mean=mean, cov=covariance) + np.log(self.priors[category])
161+
162+
return np.argmax(p, axis=1)
163+
164+
def accuracy(self, data, labels):
165+
"""
166+
returns the accuracy/ score of the prediction
167+
:param data: data
168+
:param labels: labels for each feature vector
169+
:return: score of the prediction
170+
"""
171+
prediction = self.predict(data)
172+
return np.mean(prediction == labels)
173+
174+
175+
if __name__ == '__main__':
176+
model = Bayes()
177+
t0 = datetime.now()
178+
model.fit(x_train_pca_inc, y_train)
179+
print("Training time:", (datetime.now() - t0))
180+
181+
t0 = datetime.now()
182+
print("Train accuracy:", model.accuracy(x_train_pca_inc, y_train))
183+
print("Time to compute train accuracy:", (datetime.now() - t0), "Train size:", len(y_train))
184+
185+
t0 = datetime.now()
186+
print("Test accuracy:", model.accuracy(x_test_pca_inc, y_test))
187+
print("Time to compute test accuracy:", (datetime.now() - t0), "Test size:", len(y_test))

0 commit comments

Comments
 (0)