67 template <
typename PARMAT>
82 static T_promote
id(){
return -1; };
86 static T_promote
add(
const T_promote & arg1,
const T_promote & arg2)
88 return std::min(arg1, arg2);
91 static T_promote
multiply(
const bool & arg1,
const T_promote & arg2)
96 static void axpy(
bool a,
const T_promote & x, T_promote & y)
112 nthreads = omp_get_num_threads();
119 double tspmvall=0, tall=0;
125 for(
int i=0; i<
ITERS; ++i)
133 MPI_Barrier(MPI_COMM_WORLD);
134 double t1 = MPI_Wtime();
136 fringe.SetElement(source, source);
139 while(fringe.getnnz() > 0)
141 int64_t xnnz = fringe.getnnz();
142 fringe.setNumToInd();
143 double tstart = MPI_Wtime();
144 SpMV<SelectMinSR>(ABoolCSC, fringe, fringe,
false, SPA);
145 double tspmv = MPI_Wtime()-tstart;
147 int64_t ynnz = fringe.getnnz();
151 outs1 <<
"iteration: " << iterations <<
" xnnz: "<< xnnz <<
" ynnz: " << ynnz <<
" SpMSpV time: " << tspmv << endl;
157 MPI_Barrier(MPI_COMM_WORLD);
158 double t2 = MPI_Wtime();
169 iterall += iterations;
173 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
176 cout <<
"\nOverall stats:" << endl;
177 cout <<
" starting vertex: " << source << endl;
178 cout <<
" Avg number iterations: " << iterall/ITERS << endl;
179 cout <<
" Avg number of vertices found: " << visitedV/ITERS << endl;
180 cout <<
" Avg Number of edges traversed: " << visitedE/ITERS << endl;
181 cout <<
" Avg SpMSpV time: " << tspmvall/ITERS << endl;
182 cout <<
" Avg Total time: " << tall/ITERS << endl;
198 nthreads = omp_get_num_threads();
204 double tspmvall=0, tall=0;
210 for(
int i=0; i<
ITERS; ++i)
218 MPI_Barrier(MPI_COMM_WORLD);
219 double t1 = MPI_Wtime();
221 fringe.SetElement(source, source);
224 while(fringe.getnnz() > 0)
226 int64_t xnnz = fringe.getnnz();
227 fringe.setNumToInd();
228 double tstart = MPI_Wtime();
229 SpMV<SelectMinSR>(Aeff, fringe, fringe,
false);
230 double tspmv = MPI_Wtime()-tstart;
232 int64_t ynnz = fringe.getnnz();
236 outs1 <<
"iteration: " << iterations <<
" xnnz: "<< xnnz <<
" ynnz: " << ynnz <<
" SpMSpV time: " << tspmv << endl;
243 MPI_Barrier(MPI_COMM_WORLD);
244 double t2 = MPI_Wtime();
255 iterall += iterations;
259 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
262 cout <<
"\nOverall stats:" << endl;
263 cout <<
" starting vertex: " << source << endl;
264 cout <<
" Avg number iterations: " << iterall/ITERS << endl;
265 cout <<
" Avg number of vertices found: " << visitedV/ITERS << endl;
266 cout <<
" Avg Number of edges traversed: " << visitedE/ITERS << endl;
267 cout <<
" Avg SpMSpV time: " << tspmvall/ITERS << endl;
268 cout <<
" Avg Total time: " << tall/ITERS << endl;
290 nthreads = omp_get_num_threads();
296 double tspmvall=0, tall=0;
302 for(
int i=0; i<
ITERS; ++i)
310 MPI_Barrier(MPI_COMM_WORLD);
311 double t1 = MPI_Wtime();
313 fringe.SetElement(source, source);
316 while(fringe.getnnz() > 0)
318 int64_t xnnz = fringe.getnnz();
319 fringe.setNumToInd();
320 double tstart = MPI_Wtime();
321 SpMV<SelectMinSR>(ABoolCSC, fringe, fringe,
false);
322 double tspmv = MPI_Wtime()-tstart;
324 int64_t ynnz = fringe.getnnz();
327 outs1 <<
"iteration: " << iterations <<
" xnnz: "<< xnnz <<
" ynnz: " << ynnz <<
" SpMSpV time: " << tspmv << endl;
333 MPI_Barrier(MPI_COMM_WORLD);
334 double t2 = MPI_Wtime();
345 iterall += iterations;
349 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
352 cout <<
"\nOverall stats:" << endl;
353 cout <<
" starting vertex: " << source << endl;
354 cout <<
" Avg number iterations: " << iterall/ITERS << endl;
355 cout <<
" Avg number of vertices found: " << visitedV/ITERS << endl;
356 cout <<
" Avg Number of edges traversed: " << visitedE/ITERS << endl;
357 cout <<
" Avg SpMSpV time: " << tspmvall/ITERS << endl;
358 cout <<
" Avg Total time: " << tall/ITERS << endl;
369 int main(
int argc,
char* argv[])
374 MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
375 if (provided < MPI_THREAD_SERIALIZED)
377 printf(
"ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
378 MPI_Abort(MPI_COMM_WORLD, 1);
381 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
382 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
387 cout <<
"Usage: ./SpMSpVBench <-input|-rmat|-er> <scale|filename> " << endl;
388 cout <<
" optional parameters:" << endl;
389 cout <<
" -source \"source of BFS\" (default: 0) " << endl;
390 cout <<
" -iter \"number of BFS iterations\" (default: 1)" << endl;
391 cout <<
"Example with a user supplied matrix:" << endl;
392 cout <<
" mpirun -np 4 ./SpMSpVBench -input a.mtx -source 2" << endl;
393 cout <<
"Example with a user supplied matrix (pre-permute the input matrix for load balance):" << endl;
394 cout <<
" mpirun -np 4 ./SpMSpVBench -input a.mtx -permute" << endl;
395 cout <<
"Example with RMAT matrix: mpirun -np 4 ./SpMSpVBench -rmat 18" << endl;
396 cout <<
"Example with an Erdos-Renyi matrix: mpirun -np 4 ./SpMSpVBench -er 18" << endl;
402 shared_ptr<CommGrid> fullWorld;
403 fullWorld.reset(
new CommGrid(MPI_COMM_WORLD, 0, 0) );
409 nthreads = omp_get_num_threads();
420 bool scramble =
false;
423 bool randpermute =
false;
425 int maxthreads = nthreads;
426 int minthreads = nthreads;
427 string filename(argv[2]);
428 for (
int i = 1; i < argc; i++)
430 if (strcmp(argv[i],
"-permute")==0)
432 if(myrank == 0) cout <<
"Randomly permute the matrix " << endl;
435 if (strcmp(argv[i],
"-source")==0)
437 source = atoi(argv[i + 1]);
438 if(myrank == 0) cout <<
"Source vertex: " << source << endl;
440 if (strcmp(argv[i],
"-iter")==0)
442 ITERS = atoi(argv[i + 1]);
443 if(myrank == 0) cout <<
"Number of iterations: " <<
ITERS << endl;
449 if(
string(argv[1]) ==
string(
"-input"))
457 G->
Reduce(degrees,
Row, plus<int64_t>(), static_cast<int64_t>(0));
463 A.
Reduce(*ColSums,
Column, plus<int64_t>(), static_cast<int64_t>(0));
464 nonisov = ColSums->
FindInds(bind2nd(greater<int64_t>(), 0));
469 A(nonisov, nonisov,
true);
470 degrees = degrees(nonisov);
473 degrees = degrees(nonisov);
477 else if(
string(argv[1]) ==
string(
"-rmat"))
480 scale =
static_cast<unsigned>(atoi(argv[2]));
481 double initiator[4] = {.57, .19, .19, .05};
484 MPI_Barrier(MPI_COMM_WORLD);
490 Aeff.
Reduce(degrees,
Row, plus<int64_t>(), static_cast<int64_t>(0));
495 else if(
string(argv[1]) ==
string(
"-er"))
498 scale =
static_cast<unsigned>(atoi(argv[2]));
499 double initiator[4] = {.25, .25, .25, .25};
502 MPI_Barrier(MPI_COMM_WORLD);
508 Aeff.
Reduce(degrees,
Row, plus<int64_t>(), static_cast<int64_t>(0));
524 outs <<
"Load balance: " << balance << endl;
527 MPI_Barrier(MPI_COMM_WORLD);
533 BFS_CSC(Aeff, source, degrees);
FullyDistVec< IT, NT > Reduce(Dim dim, _BinaryOperation __binary_op, NT id, _UnaryOperation __unary_op) const
std::shared_ptr< CommGrid > getcommgrid() const
void ActivateThreading(int numsplits)
Compute the maximum of two values.
SpParMat< int64_t, bool, SpDCCols< int32_t, bool > > PSpMat_s32p64
void Set(const FullyDistSpVec< IT, NT > &rhs)
void GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble=false, bool packed=false)
FullyDistVec< IT, IT > FindInds(_Predicate pred) const
Return the indices where pred is true.
void BFS_CSC(PSpMat_s32p64 Aeff, int64_t source, FullyDistVec< int64_t, int64_t > degrees)
SpParMat< int64_t, int64_t, SpDCCols< int64_t, int64_t > > PSpMat_Int64
SpParMat< int64_t, int, SpDCCols< int32_t, int > > PSpMat_s32p64_Int
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
double cblas_allgathertime
int main(int argc, char *argv[])
FullyDistSpVec< IT, NT > Find(_Predicate pred) const
Return the elements for which pred is true.
double cblas_alltoalltime
SelectMaxSRing< bool, int32_t > SR
float LoadImbalance() const
static T_promote add(const T_promote &arg1, const T_promote &arg2)
NT GetElement(IT indx) const
void BFS_DCSC(PSpMat_s32p64 Aeff1, int64_t source, FullyDistVec< int64_t, int64_t > degrees)
static void axpy(bool a, const T_promote &x, T_promote &y)
SpParMat< int64_t, bool, SpCCols< int64_t, bool > > Par_CSC_Bool
NT Reduce(_BinaryOperation __binary_op, NT init) const
static T_promote multiply(const bool &arg1, const T_promote &arg2)
static void Print(const std::string &s)
void BFS_CSC_Split(PSpMat_s32p64 Aeff, int64_t source, FullyDistVec< int64_t, int64_t > degrees)
double cblas_mergeconttime
double cblas_localspmvtime
double cblas_transvectime
void Apply(_UnaryOperation __unary_op)
void Symmetricize(PARMAT &A)
SpParMat< int64_t, bool, SpDCCols< int64_t, bool > > PSpMat_Bool
static bool returnedSAID()
void ParallelReadMM(const std::string &filename, bool onebased, _BinaryOperation BinOp)