12 #include "timer.gettimeofday.c"
16 #define INDEXTYPE uint32_t
17 #ifdef SINGLEPRECISION
18 #define VALUETYPE float
20 #define VALUETYPE double
39 template <
typename NT,
typename ALLOC,
int DIM>
40 void fillzero (vector< array<NT,DIM>, ALLOC > & vecofarr)
42 for(
auto itr = vecofarr.begin(); itr != vecofarr.end(); ++itr)
44 itr->fill(static_cast<NT> (0));
48 template <
typename NT,
typename ALLOC,
int DIM>
49 void fillrandom (vector< array<NT,DIM>, ALLOC > & vecofarr)
51 for(
auto itr = vecofarr.begin(); itr != vecofarr.end(); ++itr)
53 #if (__GNUC__ == 4 && (__GNUC_MINOR__ < 7) )
55 for(
auto refarr = itr->begin(); refarr != itr->end(); ++refarr)
60 std::uniform_real_distribution<NT> distribution(0.0f, 1.0f);
62 auto generator = std::bind(distribution, engine);
63 std::generate_n(itr->begin(), DIM, generator);
68 template <
typename NT,
typename ALLOC,
int DIM>
69 void VerifyMM (vector< array<NT,DIM>, ALLOC > & control, vector< array<NT,DIM>, ALLOC > & test)
71 vector< array<NT,DIM> > error;
72 pair<size_t, size_t> maxerrloc;
75 for(
auto itr1 = control.begin(), itr2 = test.begin(); itr1 != control.end(); ++itr1, ++itr2)
78 transform(itr1->begin(), itr1->end(), itr2->begin(), entry.begin(),
absdiff<NT>());
79 auto maxelement = max_element(entry.begin(), entry.end());
80 size_t maxcol = maxelement-entry.begin();
81 if(*maxelement > prevmax)
83 maxerrloc = make_pair(itr1- control.begin(), maxcol);
84 prevmax = *maxelement;
86 error.emplace_back(entry);
89 cout <<
"Max error is: " << prevmax <<
" on y[" << maxerrloc.first <<
"][" << maxerrloc.second <<
"]=";
90 cout << test[maxerrloc.first][maxerrloc.second] << endl;
92 NT machEps = machineEpsilon<NT>();
93 cout <<
"Absolute machine epsilon is: " << machEps <<
" and y[" << maxerrloc.first <<
"][" << maxerrloc.second;
94 cout <<
"]*EPSILON becomes " << machEps * test[maxerrloc.first][maxerrloc.second] << endl;
96 NT sqrtm = sqrt(static_cast<NT>(control.size()));
97 cout <<
"sqrt(n) * relative error is: " << abs(machEps * test[maxerrloc.first][maxerrloc.second]) * sqrtm << endl;
98 if ( (abs(machEps * test[maxerrloc.first][maxerrloc.second]) * sqrtm) < abs(prevmax))
100 cout <<
"*** ATTENTION ***: error is more than sqrt(n) times the relative machine epsilon" << endl;
106 int main(
int argc,
char* argv[])
109 int gl_nworkers = __cilkrts_get_nworkers();
113 bool syminput =
false;
116 INDEXTYPE m = 0, n = 0, nnz = 0, forcelogbeta = 0;
120 cout <<
"Normal usage: ./a.out inputmatrix.mtx sym/nosym binary/text triples/csc" << endl;
121 cout <<
"Assuming matrix.txt is the input, matrix is unsymmetric, and stored in text(ascii) file" << endl;
122 inputname =
"matrix.txt";
126 cout <<
"Normal usage: ./a.out inputmatrix.mtx sym/nosym binary/text triples/csc" << endl;
127 cout <<
"Assuming that the matrix is unsymmetric, and stored in text(ascii) file" << endl;
132 cout <<
"Normal usage: ./a.out inputmatrix.mtx sym/nosym binary/text triples/csc" << endl;
133 cout <<
"Assuming matrix is stored in text(ascii) file" << endl;
135 string issym(argv[2]);
138 else if(issym ==
"nosym")
141 cout <<
"unrecognized option, assuming nosym" << endl;
146 string issym(argv[2]);
149 else if(issym ==
"nosym")
152 cout <<
"unrecognized option, assuming unsymmetric" << endl;
154 string isbinary(argv[3]);
155 if(isbinary ==
"text")
157 else if(isbinary ==
"binary")
160 cout <<
"unrecognized option, assuming text file" << endl;
164 string type(argv[4]);
168 cout <<
"Processing CSC binary" << endl;
173 forcelogbeta = atoi(argv[5]);
179 FILE * f = fopen(inputname.c_str(),
"r");
182 cerr <<
"Problem reading binary input file\n";
197 if (m <= 0 || n <= 0 || nnz <= 0)
199 cerr <<
"Problem with matrix size in binary input file\n";
203 cout <<
"Reading matrix with dimensions: "<< m <<
"-by-" << n <<
" having "<< nnz <<
" nonzeros" << endl;
211 size_t cols = fread(colpointers,
sizeof(
INDEXTYPE), n+1, f);
214 cerr <<
"Problem with FREAD, aborting... " << endl;
221 size_t cols = fread(colindices,
sizeof(
INDEXTYPE), nnz, f);
224 cerr <<
"Problem with FREAD, aborting... " << endl;
228 size_t rows = fread(rowindices,
sizeof(
INDEXTYPE), nnz, f);
229 size_t nums = fread(vals,
sizeof(
VALUETYPE), nnz, f);
231 if(rows != nnz || nums != nnz)
233 cerr <<
"Problem with FREAD, aborting... " << endl;
237 cout<<
"Reading matrix in binary took " << ((
VALUETYPE) (tend-tstart)) /1000 <<
" seconds" <<endl;
242 csc->
SetPointers(colpointers, rowindices, vals , nnz, m, n,
true);
248 delete [] colindices;
249 delete [] rowindices;
255 cout <<
"reading input matrix in text(ascii)... " << endl;
256 ifstream infile(inputname.c_str());
258 char c = infile.get();
261 infile.getline(line,256);
265 infile >> m >> n >> nnz;
270 if (infile.is_open())
273 while (! infile.eof() && cnz < nnz)
275 infile >> triples[cnz].
row >> triples[cnz].
col >> triples[cnz].
val;
283 cout<<
"Reading matrix in ascii took " << ((double) (tend-tstart)) /1000 <<
" seconds" <<endl;
285 cout <<
"converting to csc ... " << endl;
290 cout <<
"# workers: "<< gl_nworkers << endl;
293 double mflops = (2.0 *
static_cast<double>(nnz) *
RHSDIM) / 1000000.0;
294 cout <<
"generating " << RHSDIM <<
" multi vectors... " << endl;
295 typedef array<VALUETYPE, RHSDIM> PACKED;
296 vector< PACKED, aligned_allocator<PACKED, ALIGN> > x(n);
297 vector< PACKED, aligned_allocator<PACKED, ALIGN> > y_bicsb(m);
298 vector< PACKED, aligned_allocator<PACKED, ALIGN> > y_csc(m);
300 fillzero<VALUETYPE, aligned_allocator<PACKED, ALIGN>, RHSDIM>(y_csc);
301 fillzero<VALUETYPE, aligned_allocator<PACKED, ALIGN>, RHSDIM>(y_bicsb);
302 fillrandom<VALUETYPE, aligned_allocator<PACKED, ALIGN>, RHSDIM>(x);
305 cout <<
"starting SpMV ... " << endl;
306 cout <<
"Row imbalance is: " <<
RowImbalance(bicsb) << endl;
307 cout <<
"Col imbalance is: " <<
ColImbalance(bicsb) << endl;
311 bicsb_gespmv<PTARR>(bicsb, &(x[0]), &(y_bicsb[0]));
312 double t0 = timer_seconds_since_init();
314 for(
int i=0; i <
REPEAT; ++i)
316 bicsb_gespmv<PTARR>(bicsb, &(x[0]), &(y_bicsb[0]));
318 double t1 = timer_seconds_since_init();
320 double time = (t1-t0)/REPEAT;
321 cout<<
"BiCSB" <<
" time: " << time <<
" seconds" <<endl;
322 cout<<
"BiCSB" <<
" mflop/sec: " << mflops / time <<endl;
325 cout <<
"starting SpMV_T" << endl;
326 vector< PACKED, aligned_allocator<PACKED, ALIGN> > xt(m);
327 vector< PACKED, aligned_allocator<PACKED, ALIGN> > yt_bicsb(n);
328 vector< PACKED, aligned_allocator<PACKED, ALIGN> > yt_csc(n);
330 fillzero<VALUETYPE, aligned_allocator<PACKED, ALIGN>, RHSDIM>(yt_csc);
331 fillzero<VALUETYPE, aligned_allocator<PACKED, ALIGN>, RHSDIM>(yt_bicsb);
332 fillrandom<VALUETYPE, aligned_allocator<PACKED, ALIGN>, RHSDIM>(xt);
334 bicsb_gespmvt<PTARR>(bicsb, &(xt[0]), &(yt_bicsb[0]));
335 t0 = timer_seconds_since_init();
336 for(
int i=0; i <
REPEAT; ++i)
338 bicsb_gespmvt<PTARR>(bicsb, &(xt[0]), &(yt_bicsb[0]));
340 t1 = timer_seconds_since_init();
342 double totaltime = time + (t1-t0)/REPEAT;
343 time = (t1-t0)/REPEAT;
344 cout<<
"BiCSB Trans" <<
" time: " << time <<
" seconds" <<endl;
345 cout<<
"BiCSB Trans" <<
" mflop/sec: " << mflops / time <<endl;
348 cout<<
"BiCSB Total" <<
" time: " << totaltime <<
" seconds" <<endl;
349 cout<<
"BiCSB Total" <<
" mflop/sec: " << 2*mflops / totaltime <<endl;
352 csc_gaxpy_mm<RHSDIM>(*csc, &(x[0]), &(y_csc[0]));
353 t0 = timer_seconds_since_init();
354 for(
int i=0; i <
REPEAT; ++i)
356 csc_gaxpy_mm<RHSDIM>(*csc, &(x[0]), &(y_csc[0]));
358 t1 = timer_seconds_since_init();
359 double csctime = (t1-t0)/REPEAT;
360 cout<<
"CSC" <<
" time: " << csctime <<
" seconds" <<endl;
361 cout<<
"CSC" <<
" mflop/sec: " << mflops / csctime <<endl;
363 VerifyMM<VALUETYPE, aligned_allocator<PACKED, ALIGN>, RHSDIM>(y_csc, y_bicsb);
366 csc_gaxpy_mm_trans<RHSDIM> ( *csc, &(xt[0]), &(yt_csc[0]));
367 t0 = timer_seconds_since_init();
368 for(
int i=0; i <
REPEAT; ++i)
370 csc_gaxpy_mm_trans<RHSDIM> ( *csc, &(xt[0]), &(yt_csc[0]));
372 t1 = timer_seconds_since_init();
373 time = (t1-t0)/REPEAT;
374 cout <<
"Transposed CSC time: " << time <<
" seconds" << endl;
375 cout <<
"Transposed CSC mflop/sec: " << mflops/ time << endl;
377 VerifyMM<VALUETYPE, aligned_allocator<PACKED, ALIGN>, RHSDIM>(yt_csc, yt_bicsb);
int main(int argc, char *argv[])
void fillzero(vector< array< NT, DIM >, ALLOC > &vecofarr)
float ColImbalance(const BiCsb< NT, IT > &A)
void fillrandom(vector< array< NT, DIM >, ALLOC > &vecofarr)
void VerifyMM(vector< array< NT, DIM >, ALLOC > &control, vector< array< NT, DIM >, ALLOC > &test)
float RowImbalance(const CSB &A)
void SetPointers(ITYPE *colpointers, ITYPE *rowindices, T *vals, ITYPE size, ITYPE rows, ITYPE cols, bool fortran)