-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathhpccg.ci
More file actions
157 lines (143 loc) · 5.36 KB
/
hpccg.ci
File metadata and controls
157 lines (143 loc) · 5.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
mainmodule charmHpccg {
extern module completion;
readonly int numChares;
readonly CProxy_charmMain mainProxy;
readonly CProxy_CompletionDetector detector;
mainchare charmMain {
entry charmMain(CkArgMsg*);
entry [reductiontarget] void matrixReady();
entry [reductiontarget] void foundExternals();
entry [reductiontarget] void done();
entry void start() {
when matrixReady() atomic {
detector
.start_detection(numChares,
CkCallback(CkIndex_charmHpccg::findExternals(), array),
CkCallback(CkCallback::ignore),
CkCallback(CkReductionTarget(charmMain, foundExternals), thisProxy),
0);
}
when foundExternals() atomic {
array.beginComputation(150, 0);
}
when done() atomic {
CkExit();
}
};
};
include "generate_matrix.hpp";
include "read_HPC_row.hpp";
array [1D] charmHpccg {
entry charmHpccg();
/// Matrix initialization routines - random generation or file reading
entry void generateMatrix(int nx, int ny, int nz) { atomic {
generate_matrix(nx, ny, nz, &A, &x, &b, &xexact, numChares, thisIndex);
contribute(CkCallback(CkReductionTarget(charmMain, matrixReady), mainProxy));
}};
entry void readMatrix(std::string fileName) { atomic {
read_HPC_row(fileName.c_str(), &A, &x, &b, &xexact, numChares, thisIndex);
contribute(CkCallback(CkReductionTarget(charmMain, matrixReady), mainProxy));
}};
/// Communication setup routines
entry void findExternals();
entry void needXElements(int requester, std::vector<int> rows);
/// Parallel sparse matrix-vector multiplication
entry void charmSpMV() {
atomic {
// Send the elements of the x vector to the other chares that need it
for(std::map<int, RemoteX>::iterator iter = xToSend.begin();
iter != xToSend.end(); ++iter) {
int count = iter->second.values.size();
double* buf = new double[count];
for (int i = 0; i < count; ++i)
buf[i] = p[iter->second.values[i]];
thisProxy[iter->first].transferX(thisIndex, count, buf);
}
}
// Gather up elements of the x vector that this chare needs from others
for (xMessagesReceived = 0; xMessagesReceived < xToReceive.size(); xMessagesReceived++) {
when transferX(int src, int size, double elms[size]) atomic {
CkAssert(xToReceive.find(src) != xToReceive.end());
CkAssert(size == xToReceive[src].values.size());
CkAssert(size + xToReceive[src].offset <= A->local_ncol);
memcpy(p + xToReceive[src].offset, elms, size * sizeof(double));
}
}
atomic {
// Run the local sparse matrix-vector computation
HPC_sparsemv(A, p, Ap); // 2*nnz ops
// Signal that the local chunk of the multiplication is done
// and the CG iteration can continue
spmvDone();
}
};
entry void transferX(int src, int size, double elms[size]);
entry void spmvDone();
/// Parallel dot-product
entry [local] void charmDDot(double* a, double* b) {
atomic {
double t4;
double result;
ddot(A->local_nrow, a, b, &result, t4); // 2*nrow ops
contribute(sizeof(double), &result, CkReduction::sum_double,
CkCallback(CkReductionTarget(charmHpccg, reduceResidual), thisProxy));
}
};
entry [reductiontarget] void reduceResidual(double res);
/// Main CG computation routine
entry void beginComputation(const int max_iter, const double tolerance) {
atomic "initialize" {
r = new double[A->local_nrow];
p = new double[A->local_ncol];
Ap = new double[A->local_nrow];
normr = 0.0;
rtrans = 0.0;
}
atomic {
waxpby(A->local_nrow, 1.0, x, 0.0, x, p);
thisProxy[thisIndex].charmSpMV();
}
when spmvDone() atomic {
waxpby(A->local_nrow, 1.0, b, -1.0, Ap, r);
charmDDot(r, r);
}
when reduceResidual(double res) atomic {
rtrans = res;
normr = sqrt(rtrans);
if (thisIndex == 0) ckout << "Initial Residual = "<< normr << endl;
}
for (iteration = 1; iteration < max_iter && normr > tolerance; iteration++) {
if (iteration == 1) atomic {
waxpby(A->local_nrow, 1.0, r, 0.0, r, p);
} else {
atomic {
charmDDot(r, r);
}
when reduceResidual(double newrtrans) atomic {
double beta = newrtrans / rtrans;
rtrans = newrtrans;
waxpby(A->local_nrow, 1.0, r, beta, p, p); // 2*nrow ops
}
}
atomic {
normr = sqrt(rtrans);
if (thisIndex == 0 && iteration % 15 == 0)
CkPrintf("Iteration = %d,\tResidual = %g\n", iteration, normr);
thisProxy[thisIndex].charmSpMV();
}
when spmvDone() atomic {
charmDDot(p, Ap);
}
when reduceResidual(double res) atomic {
double alpha = rtrans / res;
waxpby(A->local_nrow, 1.0, x, alpha, p, x);// 2*nrow ops
waxpby(A->local_nrow, 1.0, r, -alpha, Ap, r);// 2*nrow ops
//niters = k;
}
}
atomic {
contribute(CkCallback(CkReductionTarget(charmMain, done), mainProxy));
}
};
};
};