Skip to content

Commit 8140609

Browse files
author
Kent Knox
committed
Merge pull request #168 from jlgreathouse/explicit_zeroes
Read explicit zeroes from Matrix Market files :+1: Thank you Joseph, both for listening to our feedback and going back to refactor your last pull request.. Your fixes are much appreciated.
2 parents 5c1ade3 + e597c2a commit 8140609

31 files changed

+256
-145
lines changed

samples/sample-cg.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ int main (int argc, char* argv[])
195195
A.rowBlockSize * sizeof( cl_ulong ), NULL, &cl_status );
196196

197197

198-
fileError = clsparseSCsrMatrixfromFile( &A, matrix_path.c_str( ), control );
198+
// Read matrix market file with explicit zero values included.
199+
fileError = clsparseSCsrMatrixfromFile( &A, matrix_path.c_str( ), control, true );
199200

200201
// This function allocates memory for rowBlocks structure. If not called
201202
// the structure will not be calculated and clSPARSE will run the vectorized

samples/sample-spmv.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,8 @@ int main (int argc, char* argv[])
227227
A.rowBlockSize * sizeof( cl_ulong ), NULL, &cl_status );
228228

229229

230-
fileError = clsparseSCsrMatrixfromFile( &A, matrix_path.c_str( ), control );
230+
// Read matrix market file with explicit zero values included.
231+
fileError = clsparseSCsrMatrixfromFile( &A, matrix_path.c_str( ), control, true );
231232

232233
// This function allocates memory for rowBlocks structure. If not called
233234
// the structure will not be calculated and clSPARSE will run the vectorized

src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ template <typename T>
2525
class xSpMdM: public clsparseFunc
2626
{
2727
public:
28-
xSpMdM( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, size_t columns ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr ), num_columns( columns )
28+
xSpMdM( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, size_t columns, cl_bool keep_explicit_zeroes = true ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr ), num_columns( columns )
2929
{
3030
// Create and initialize our timer class, if the external timer shared library loaded
3131
if( sparseGetTimer )
@@ -44,6 +44,7 @@ class xSpMdM: public clsparseFunc
4444

4545

4646
clsparseEnableAsync( control, false );
47+
explicit_zeroes = keep_explicit_zeroes;
4748
}
4849

4950
~xSpMdM( )
@@ -124,7 +125,7 @@ class xSpMdM: public clsparseFunc
124125
csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, ( csrMtx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status );
125126
CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" );
126127

127-
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
128+
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes );
128129
if( fileError != clsparseSuccess )
129130
throw std::runtime_error( "Could not read matrix market data from disk: " + sparseFile);
130131

@@ -247,6 +248,7 @@ class xSpMdM: public clsparseFunc
247248
T alpha;
248249
T beta;
249250
size_t num_columns;
251+
cl_bool explicit_zeroes;
250252

251253
// OpenCL state
252254
cl_command_queue_properties cqProp;

src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ template <typename T>
2828
class xBiCGStab : public clsparseFunc
2929
{
3030
public:
31-
xBiCGStab( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType ):
31+
xBiCGStab( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, cl_bool keep_explicit_zeroes = true ):
3232
clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ),/* gpuTimer( nullptr ),*/ cpuTimer( nullptr )
3333
{
3434
// Create and initialize our timer class, if the external timer shared library loaded
@@ -48,6 +48,7 @@ class xBiCGStab : public clsparseFunc
4848

4949

5050
clsparseEnableAsync( control, false );
51+
explicit_zeroes = keep_explicit_zeroes;
5152

5253
solverControl = clsparseCreateSolverControl(DIAGONAL, 1000, 1e-6, 0);
5354
clsparseSolverPrintMode(solverControl, VERBOSE);
@@ -130,9 +131,9 @@ class xBiCGStab : public clsparseFunc
130131
CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" );
131132

132133
if(typeid(T) == typeid(float))
133-
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
134+
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes );
134135
else if (typeid(T) == typeid(double))
135-
fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
136+
fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes );
136137
else
137138
fileError = clsparseInvalidType;
138139

@@ -241,6 +242,8 @@ class xBiCGStab : public clsparseFunc
241242
cldenseVector x;
242243
cldenseVector y;
243244

245+
// host values
246+
cl_bool explicit_zeroes;
244247

245248
// OpenCL state
246249
cl_command_queue_properties cqProp;

src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ template <typename T>
2626
class xCG : public clsparseFunc
2727
{
2828
public:
29-
xCG( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType ):
29+
xCG( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, cl_bool keep_explicit_zeroes = true ):
3030
clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ),/* gpuTimer( nullptr ),*/ cpuTimer( nullptr )
3131
{
3232
// Create and initialize our timer class, if the external timer shared library loaded
@@ -46,6 +46,7 @@ class xCG : public clsparseFunc
4646

4747

4848
clsparseEnableAsync( control, false );
49+
explicit_zeroes = keep_explicit_zeroes;
4950

5051
solverControl = clsparseCreateSolverControl(NOPRECOND, 10000, 1e-4, 1e-8);
5152
clsparseSolverPrintMode(solverControl, NORMAL);
@@ -129,9 +130,9 @@ class xCG : public clsparseFunc
129130
CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" );
130131

131132
if(typeid(T) == typeid(float))
132-
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
133+
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes );
133134
else if (typeid(T) == typeid(double))
134-
fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
135+
fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes );
135136
else
136137
fileError = clsparseInvalidType;
137138

@@ -240,6 +241,8 @@ class xCG : public clsparseFunc
240241
cldenseVector x;
241242
cldenseVector y;
242243

244+
// host values
245+
cl_bool explicit_zeroes;
243246

244247
// OpenCL state
245248
cl_command_queue_properties cqProp;

src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ template <typename T>
2525
class xCoo2Csr: public clsparseFunc
2626
{
2727
public:
28-
xCoo2Csr( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr )
28+
xCoo2Csr( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, cl_bool keep_explicit_zeroes = true ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr )
2929
{
3030
// Create and initialize our timer class, if the external timer shared library loaded
3131
if( sparseGetTimer )
@@ -44,6 +44,7 @@ class xCoo2Csr: public clsparseFunc
4444

4545

4646
clsparseEnableAsync( control, false );
47+
explicit_zeroes = keep_explicit_zeroes;
4748
}
4849

4950
~xCoo2Csr( )
@@ -113,9 +114,9 @@ class xCoo2Csr: public clsparseFunc
113114
cooMatx.num_nonzeros * sizeof( cl_int ), NULL, &status );
114115

115116
if (typeid(T) == typeid(float))
116-
fileError = clsparseSCooMatrixfromFile(&cooMatx, path.c_str(), control);
117+
fileError = clsparseSCooMatrixfromFile( &cooMatx, path.c_str(), control, explicit_zeroes );
117118
else if (typeid(T) == typeid(double))
118-
fileError = clsparseDCooMatrixfromFile(&cooMatx, path.c_str(), control);
119+
fileError = clsparseDCooMatrixfromFile( &cooMatx, path.c_str(), control, explicit_zeroes );
119120
else
120121
fileError = clsparseInvalidType;
121122

@@ -202,6 +203,9 @@ class xCoo2Csr: public clsparseFunc
202203
clsparseCsrMatrix csrMtx;
203204
clsparseCooMatrix cooMatx;
204205

206+
// host values
207+
cl_bool explicit_zeroes;
208+
205209
//matrix dimension
206210
int n_rows;
207211
int n_cols;

src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ template <typename T>
2626
class xCsr2Coo : public clsparseFunc
2727
{
2828
public:
29-
xCsr2Coo(PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type dev_type) : clsparseFunc(dev_type, CL_QUEUE_PROFILING_ENABLE)
29+
xCsr2Coo(PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type dev_type, cl_bool keep_explicit_zeroes = true) : clsparseFunc(dev_type, CL_QUEUE_PROFILING_ENABLE)
3030
{
3131
gpuTimer = nullptr;
3232
cpuTimer = nullptr;
@@ -47,6 +47,7 @@ class xCsr2Coo : public clsparseFunc
4747
}
4848

4949
clsparseEnableAsync(control, false);
50+
explicit_zeroes = keep_explicit_zeroes;
5051
}// End of constructor
5152

5253
~xCsr2Coo()
@@ -131,9 +132,9 @@ class xCsr2Coo : public clsparseFunc
131132
CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets");
132133

133134
if (typeid(T) == typeid(float))
134-
fileError = clsparseSCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control);
135+
fileError = clsparseSCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control, explicit_zeroes);
135136
else if (typeid(T) == typeid(double))
136-
fileError = clsparseDCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control);
137+
fileError = clsparseDCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control, explicit_zeroes);
137138
else
138139
fileError = clsparseInvalidType;
139140

@@ -262,6 +263,8 @@ class xCsr2Coo : public clsparseFunc
262263
clsparseCsrMatrix csrMtx;
263264
clsparseCooMatrix cooMtx;
264265

266+
// host values
267+
cl_bool explicit_zeroes;
265268

266269
//OpenCL state
267270
cl_command_queue_properties cqProp;

src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ template <typename T>
2626
class xCsr2Dense : public clsparseFunc
2727
{
2828
public:
29-
xCsr2Dense(PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type dev_type) : clsparseFunc(dev_type, CL_QUEUE_PROFILING_ENABLE)
29+
xCsr2Dense(PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type dev_type, cl_bool keep_explicit_zeroes = true) : clsparseFunc(dev_type, CL_QUEUE_PROFILING_ENABLE)
3030
{
3131
gpuTimer = nullptr;
3232
cpuTimer = nullptr;
@@ -47,6 +47,7 @@ class xCsr2Dense : public clsparseFunc
4747
}
4848

4949
clsparseEnableAsync(control, false);
50+
explicit_zeroes = keep_explicit_zeroes;
5051
}// End of constructor
5152

5253
~xCsr2Dense()
@@ -131,9 +132,9 @@ class xCsr2Dense : public clsparseFunc
131132
CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets");
132133

133134
if (typeid(T) == typeid(float))
134-
fileError = clsparseSCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control);
135+
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str(), control, explicit_zeroes );
135136
else if (typeid(T) == typeid(double))
136-
fileError = clsparseDCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control);
137+
fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str(), control, explicit_zeroes );
137138
else
138139
fileError = clsparseInvalidType;
139140

@@ -234,7 +235,7 @@ class xCsr2Dense : public clsparseFunc
234235
cldenseMatrix denseMtx;
235236

236237
//host values
237-
238+
cl_bool explicit_zeroes;
238239

239240
//OpenCL state
240241
cl_command_queue_properties cqProp;

src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ template <typename T>
2525
class xDense2Csr: public clsparseFunc
2626
{
2727
public:
28-
xDense2Csr( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr )
28+
xDense2Csr( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, cl_bool keep_explicit_zeroes = true ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr )
2929
{
3030
gpuTimer = nullptr;
3131
cpuTimer = nullptr;
@@ -45,6 +45,7 @@ class xDense2Csr: public clsparseFunc
4545
cpuTimerID = cpuTimer->getUniqueID( "CPU xDense2Csr", 0 );
4646
}
4747
clsparseEnableAsync( control, false );
48+
explicit_zeroes = keep_explicit_zeroes;
4849
}// End of constructor
4950

5051
~xDense2Csr( )
@@ -135,9 +136,9 @@ class xDense2Csr: public clsparseFunc
135136
CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" );
136137

137138
if(typeid(T) == typeid(float))
138-
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
139+
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes );
139140
else if (typeid(T) == typeid(double))
140-
fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
141+
fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes );
141142
else
142143
fileError = clsparseInvalidType;
143144

@@ -261,9 +262,12 @@ class xDense2Csr: public clsparseFunc
261262

262263
//device values
263264
clsparseCsrMatrix csrMtx;
264-
clsparseCsrMatrix csrMatx;
265+
clsparseCsrMatrix csrMatx;
265266
cldenseMatrix A;
266267

268+
// host values
269+
cl_bool explicit_zeroes;
270+
267271
// OpenCL state
268272
cl_command_queue_properties cqProp;
269273

src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ clsparseStatus clsparseDcsrSpGemm(const clsparseCsrMatrix* sparseMatA, const cls
4141
template <typename T>
4242
class xSpMSpM : public clsparseFunc {
4343
public:
44-
xSpMSpM(PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType) : clsparseFunc(devType, CL_QUEUE_PROFILING_ENABLE), gpuTimer(nullptr), cpuTimer(nullptr)
44+
xSpMSpM(PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, cl_bool keep_explicit_zeroes = true) : clsparseFunc(devType, CL_QUEUE_PROFILING_ENABLE), gpuTimer(nullptr), cpuTimer(nullptr)
4545
{
4646
// Create and initialize our timer class, if the external timer shared library loaded
4747
if (sparseGetTimer)
@@ -58,6 +58,7 @@ class xSpMSpM : public clsparseFunc {
5858
cpuTimerID = cpuTimer->getUniqueID("CPU xSpMSpM", 0);
5959
}
6060
clsparseEnableAsync(control, false);
61+
explicit_zeroes = keep_explicit_zeroes;
6162
}
6263

6364
~xSpMSpM() {}
@@ -145,9 +146,9 @@ class xSpMSpM : public clsparseFunc {
145146
#endif
146147

147148
if (typeid(T) == typeid(float))
148-
fileError = clsparseSCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control);
149+
fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str(), control, explicit_zeroes );
149150
else if (typeid(T) == typeid(double))
150-
fileError = clsparseDCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control);
151+
fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str(), control, explicit_zeroes );
151152
else
152153
fileError = clsparseInvalidType;
153154

@@ -299,6 +300,7 @@ class xSpMSpM : public clsparseFunc {
299300
T alpha;
300301
T beta;
301302
size_t flopCnt; // Indicates total number of floating point operations
303+
cl_bool explicit_zeroes;
302304
// OpenCL state
303305
//cl_command_queue_properties cqProp;
304306
}; // class xSpMSpM
@@ -322,4 +324,4 @@ xSpMSpM<double>::xSpMSpM_Function(bool flush)
322324
clFinish(queue);
323325
}
324326

325-
#endif // CLSPARSE_BENCHMARK_SpM_SpM_HXX
327+
#endif // CLSPARSE_BENCHMARK_SpM_SpM_HXX

0 commit comments

Comments
 (0)