-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathml.py.orig
132 lines (99 loc) · 4.89 KB
/
ml.py.orig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import cross_val_predict
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestCentroid
from sklearn.ensemble import RandomForestClassifier
def getArray(label, csvName): # csvList =[]
#for fileName in csvList:
data = np.loadtxt(csvName , delimiter=',', usecols=range(7,19), skiprows=1 )
# Drop Row if one of quality indexes are below 10.
quality_indexes = [1,3,5,7,9,11] # range(1, len(data[0]), 2)
for quality_index in quality_indexes:
data = data[ np.logical_not( data[:, quality_index] < 10 ) ]
#print data.shape
# Keep only O1.....
data = data[:,[0,2,4,6,8,10]]
num_rows, num_cols = data.shape
label_column = np.full((num_rows, 1), label) #.0, dtype=np.float32
#print data.shape print label_column.shape
data = np.hstack(( data , label_column ))
# print data[0]
#print csvName + " => " + str(label)
#print data.shape
return data
#return output
def main():
'''
blue_csv = ['raw_hansika_blue.csv', 'raw_heshan_blue.csv', 'raw_dinuka_blue.csv', 'raw_nadun_blue.csv', 'raw_ravindu_blue.csv']
green_csv = ['raw_hansika_green.csv', 'raw_heshan_green.csv', 'raw_dinuka_green.csv', 'raw_nadun_green.csv', 'raw_ravindu_green.csv']
red_csv = ['raw_hansika_red.csv', 'raw_heshan_red.csv', 'raw_dinuka_red.csv', 'raw_nadun_red.csv', 'raw_ravindu_red.csv']
train_data = np.concatenate(( getArray(1, red_csv[0]), getArray(1, red_csv[1]), getArray(1, red_csv[2]),
getArray(2, green_csv[0]), getArray(2, green_csv[1]), getArray(2, green_csv[2]),
#getArray(3, blue_csv[0]), getArray(3, blue_csv[1]), getArray(3, blue_csv[2])
), axis=0)
test_data = np.concatenate(( getArray(1, red_csv[3]), getArray(1, red_csv[4]),
getArray(2, green_csv[3]), getArray(2, green_csv[4]),
#getArray(3, blue_csv[3]), getArray(3, blue_csv[4])
), axis=0)
'''
train_data = np.loadtxt('train_fourier2.csv' , delimiter=',',)
test_data = np.loadtxt('test_fourier2.csv' , delimiter=',',)
print "TRAIN DATA"
print train_data.shape
print "TEST DATA"
print test_data.shape
#np.savetxt("train.csv", train_data, delimiter=",")
#np.savetxt("test.csv", test_data, delimiter=",")
train_X = train_data[:,:-1]
train_Y = train_data[:,-1]
test_X = test_data[:,:-1]
test_Y = test_data[:,-1]
# FOR CROSS VALIDATION
X_train, X_test, Y_train, Y_test = train_test_split(train_X, train_Y, test_size=0.4, random_state=0)
#predicted = cross_val_predict(clf, train_X, train_Y, cv=10)
#print("Cross-validation accuracy: ", metrics.accuracy_score(train_Y, predicted))
#############################################################################################
'''
X_features = RBFSampler(gamma=1, random_state=1).fit_transform(train_X)
X_testFeatures = RBFSampler(gamma=1, random_state=1).fit_transform(test_X)
clf = SGDClassifier()
clf.fit(X_features, train_Y)
print(clf.score(X_testFeatures, test_Y))
print(confusion_matrix(test_Y, clf.predict(X_testFeatures)))
'''
classifiers = [
SVC(decision_function_shape='ovo'),
#SVC(kernel='linear', C=1),
DecisionTreeClassifier(),
KNeighborsClassifier(n_neighbors=9),
GaussianNB(),
MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=3),
NearestCentroid(),
RandomForestClassifier()
]
for clf in classifiers:
print ""
print clf
clf.fit(train_X, train_Y)
#clf.fit(X_train, Y_train)
print clf.classes_
print(clf.score(test_X, test_Y)*100)
#print(clf.score(X_test, Y_test))
print(confusion_matrix(test_Y, clf.predict(test_X)))
#print(confusion_matrix(Y_test, clf.predict(X_test)))
<<<<<<< Updated upstream
=======
#x = raw_input("Wait for input: ")
>>>>>>> Stashed changes
if __name__ == "__main__":
main()