-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.cu
100 lines (85 loc) · 2.65 KB
/
model.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <iostream>
#include <chrono>
#include <assert.h>
#include <sys/stat.h>
#include "model.h"
float cudaInit() {
auto start = chrono::steady_clock::now();
cudaFree(0);
chrono::duration<double> duration = chrono::steady_clock::now() - start;
return duration.count();
}
Model::Model(char* checkpointDir, uint32_t batchSize): checkpointDir(checkpointDir) {
this->data = make_shared<Placeholder>(batchSize, 784);
this->labels = make_shared<Placeholder>(1, batchSize);
auto w1 = make_shared<Parameter>(256, 784, "w1");
auto b1 = make_shared<Parameter>(1, 256, "b1");
auto w2 = make_shared<Parameter>(10, 256, "w2");
auto b2 = make_shared<Parameter>(1, 10, "b2");
auto r1 = make_shared<Matmul>(w1, this->data);
auto r2 = make_shared<Add>(b1, r1);
auto r3 = make_shared<Relu>(r2);
auto r4 = make_shared<Matmul>(w2, r3);
auto result = make_shared<Add>(b2, r4);
auto max = make_shared<Argmax>(result);
this->out = max;
this->loss = make_shared<CrossEntropy>(result, this->labels);
this->parameters.push_back(w1);
this->parameters.push_back(b1);
this->parameters.push_back(w2);
this->parameters.push_back(b2);
struct stat sb;
if (checkpointDir && stat(checkpointDir, &sb) == 0 && S_ISDIR(sb.st_mode)) {
for (auto p : parameters) {
p->load(checkpointDir);
}
} else {
for (auto p : parameters) {
p->init();
}
}
}
float Model::evaluate(bool useGpu) {
this->out->forward(useGpu);
if (useGpu) {
this->out->out.toCpu();
}
int sum = 0;
float* outData = this->out->out.get();
float* labelsData = this->labels->out.get();
for (uint32_t i = 0; i < this->out->out.width; i++) {
sum += (outData[i] == labelsData[i]);
}
if (useGpu) {
this->out->out.toGpu();
}
return static_cast<float>(sum) / this->labels->width;
}
float Model::trainStep(float lr, bool useGpu) {
this->loss->forward(useGpu);
this->loss->backward();
for (auto p : parameters) {
// must divide LR by batch size because gradients are summed accross batch
p->step(lr / this->labels->width);
}
float loss;
if (useGpu) {
this->loss->out.toCpu();
loss = *this->loss->out.get();
this->loss->out.toGpu();
} else {
loss = *this->loss->out.get();
}
return loss;
}
void Model::save() {
if (this->checkpointDir) {
struct stat sb;
if (stat(checkpointDir, &sb) != 0) {
assert(!mkdir(checkpointDir, 0777));
}
for (auto p : parameters) {
p->save(this->checkpointDir);
}
}
}