-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsvmExperiment.py
More file actions
84 lines (67 loc) · 3.28 KB
/
Copy pathsvmExperiment.py
File metadata and controls
84 lines (67 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import timeit
start = timeit.default_timer()
import itertools
import numpy as np
from scipy import stats
from scipy import linalg
import pylab as pl
from sklearn import svm, linear_model
from sklearn.model_selection import train_test_split
from random import random
from dataread import *
#reading the data from the excel
n_samples = 14970 #this is the number of images in our dataset
n_features = 34 #num of features per feature vector
data = np.empty((n_samples, n_features), dtype=np.int) #creates a 2d array with the first dimension number of samples and second number of features thus the matrix is 502 x 10
target = np.empty((n_samples), dtype=np.int) #creates a 1d array that is of length sample number ie 502
qid = np.empty((n_samples), dtype=np.int)
trainDat = trainData()
trainLabel = trainLabels()
# print(trainDat)
for y in range(n_samples-1):
for x in range(n_features):
data[y][x] = trainDat[x][y]# X axis is first index and Y is second!!!!! why numpy...
for i in range(n_samples-1):
if trainLabel[0][i] == 'triangle':# X axis is first index and Y is second!!!!! why numpy...
target[i] = 0
if trainLabel[0][i] == 'circle':# X axis is first index and Y is second!!!!! why numpy...
target[i] = 1
if trainLabel[0][i] == 'square':# X axis is first index and Y is second!!!!! why numpy...
target[i] = 2
if trainLabel[0][i] == 'star':# X axis is first index and Y is second!!!!! why numpy...
target[i] = 3
X_train, X_test, y_train, y_test = train_test_split(data, target, train_size = 0.8)
# split into train and test set
# cv = cross_validate.StratifiedShuffleSplit(target, test_size=.8) #TODO
# train, test = iter(cv).next() #creates a test mask array and a train mask array. these arrays are the indexes of x and y that are assigned to each set
# X_train, y_train = data[train], target[train] #using the train index mask create an copy array of both x and y respectivly that is only filled with the training items
# X_test, y_test, qid_test = data[test], target[test], qid[test] #using the test index mask create an copy array of both x and y respectivly that is only filled with the test items
svms = []
avgscore = 0
avgscores = 0
print("Initializing SVMs")
for i in range(10000): #make that many svms and test them
print("Initializing SVM: "+str(i))
thiscval = (i/10000)+0.0001
svms.append(svm.SVC(kernel='poly', C=thiscval, cache_size=5000)) # C is random float between 0.0 and 1.0
#cache_size is faster if bigger
print("C for SVM "+str(i)+" is "+str(thiscval))
print("SVM "+str(i)+" Initialized training")
svms[i].fit(X_train, y_train) # Train the selected svm
# coef = svms[i].coef_.ravel() / linalg.norm(svms[i].coef_)
clf_predict = svms[i].predict(X_test) #Predict values
score = svms[i].score(X_test, y_test) #Evaluate score
# print("SVM "+str(i)+" coef: "+sstr(coef))
print("SVM "+str(i)+" X_test prediction: "+str(clf_predict))
print("SVM "+str(i)+" score: "+str(score))
avgscore+=score
avgscores+=1
avgscore = avgscore/avgscores
print("The average score for the SVMs")
print(avgscore)
stop = timeit.default_timer()
print("Execution Time: ")
print(stop-start)
#for i in range(len(y_test)):
#clf_predict = clf.predict(X_test)
#print(str(clf_predict[i]) + ' ' + str(y_test[i]))