59 const uint64_t b[] = {0x2ULL, 0xCULL, 0xF0ULL, 0xFF00ULL, 0xFFFF0000ULL, 0xFFFFFFFF00000000ULL};
60 const unsigned int S[] = {1, 2, 4, 8, 16, 32};
64 for (i = 5; i >= 0; i--)
76 bool from_string(T & t,
const string& s, std::ios_base& (*f)(std::ios_base&))
79 return !(iss >> f >> t).fail();
83 template <
typename PARMAT>
97 struct prunediscovered:
public std::binary_function<int64_t, int64_t, int64_t >
101 return ( y == -1 ) ? x: -1;
105 int main(
int argc,
char* argv[])
110 int provided, flag, claimed;
111 MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided );
112 MPI_Is_thread_main( &flag );
114 SpParHelper::Print(
"This thread called init_thread but Is_thread_main gave false\n");
115 MPI_Query_thread( &claimed );
116 if (claimed != provided)
117 SpParHelper::Print(
"Query thread gave different thread level than requested\n");
119 MPI_Init(&argc, &argv);
120 int cblas_splits = 1;
123 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
124 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
129 cout <<
"Usage: ./tdbfs <Force,Input> <Scale Forced | Input Name> {FastGen}" << endl;
130 cout <<
"Example: ./tdbfs Force 25 FastGen" << endl;
141 shared_ptr<CommGrid> fullWorld;
142 fullWorld.reset(
new CommGrid(MPI_COMM_WORLD, 0, 0) );
145 PSpMat_Bool
A(fullWorld);
146 PSpMat_s32p64 Aeff(fullWorld);
151 bool scramble =
false;
153 if(
string(argv[1]) ==
string(
"Input"))
156 SpParHelper::Print(
"Read input\n");
159 G->
Reduce(degrees,
Row, plus<int64_t>(), static_cast<int64_t>(0));
164 A.
Reduce(*ColSums,
Column, plus<int64_t>(), static_cast<int64_t>(0));
165 nonisov = ColSums->
FindInds(bind2nd(greater<int64_t>(), 0));
167 A =
A(nonisov, nonisov);
170 SpParHelper::Print(
"Symmetricized and pruned\n");
175 tinfo <<
"Threading activated with " << cblas_splits <<
" threads" << endl;
176 SpParHelper::Print(tinfo.str());
180 else if(
string(argv[1]) ==
string(
"Binary"))
187 outs <<
"Reading " << argv[2] <<
" with " << n <<
" vertices and " << m <<
" edges" << endl;
188 SpParHelper::Print(outs.str());
190 SpParHelper::Print(
"Read binary input to distributed edge list\n");
193 SpParHelper::Print(
"Permuted Edges\n");
197 SpParHelper::Print(
"Renamed Vertices\n");
202 SpParHelper::Print(
"Created Int64 Sparse Matrix\n");
204 G->
Reduce(degrees,
Row, plus<int64_t>(), static_cast<int64_t>(0));
210 ostringstream loopinfo;
211 loopinfo <<
"Converted to Boolean and removed " << removed <<
" loops" << endl;
212 SpParHelper::Print(loopinfo.str());
217 A.
Reduce(*ColSums,
Column, plus<int64_t>(), static_cast<int64_t>(0));
218 A.
Reduce(*RowSums,
Row, plus<int64_t>(), static_cast<int64_t>(0));
219 ColSums->
EWiseApply(*RowSums, plus<int64_t>());
222 nonisov = ColSums->
FindInds(bind2nd(greater<int64_t>(), 0));
225 SpParHelper::Print(
"Found (and permuted) non-isolated vertices\n");
229 A(nonisov, nonisov,
true);
230 SpParHelper::Print(
"Dropped isolated vertices from input\n");
237 SpParHelper::Print(
"Symmetricized\n");
243 tinfo <<
"Threading activated with " << cblas_splits <<
" threads" << endl;
244 SpParHelper::Print(tinfo.str());
250 if(
string(argv[1]) ==
string(
"Force"))
252 scale =
static_cast<unsigned>(atoi(argv[2]));
254 outs <<
"Forcing scale to : " << scale << endl;
255 SpParHelper::Print(outs.str());
257 if(argc > 3 &&
string(argv[3]) == string(
"FastGen"))
259 SpParHelper::Print(
"Using fast vertex permutations; skipping edge permutations (like v2.1)\n");
265 SpParHelper::Print(
"Unknown option\n");
270 double initiator[4] = {.57, .19, .19, .05};
272 double t01 = MPI_Wtime();
278 SpParHelper::Print(
"Generated edge lists\n");
281 tinfo <<
"Generation took " << t02-t01 <<
" seconds" << endl;
282 SpParHelper::Print(tinfo.str());
285 SpParHelper::Print(
"Permuted Edges\n");
290 SpParHelper::Print(
"Renamed Vertices\n");
295 SpParHelper::Print(
"Generated renamed edge lists\n");
298 tinfo <<
"Generation took " << t02-t01 <<
" seconds" << endl;
299 SpParHelper::Print(tinfo.str());
303 MPI_Barrier(MPI_COMM_WORLD);
304 double t1 = MPI_Wtime();
309 SpParHelper::Print(
"Created Sparse Matrix (with int32 local indices and values)\n");
311 MPI_Barrier(MPI_COMM_WORLD);
312 double redts = MPI_Wtime();
313 G->
Reduce(degrees,
Row, plus<int64_t>(), static_cast<int64_t>(0));
314 MPI_Barrier(MPI_COMM_WORLD);
315 double redtf = MPI_Wtime();
317 ostringstream redtimeinfo;
318 redtimeinfo <<
"Calculated degrees in " << redtf-redts <<
" seconds" << endl;
319 SpParHelper::Print(redtimeinfo.str());
324 ostringstream loopinfo;
325 loopinfo <<
"Converted to Boolean and removed " << removed <<
" loops" << endl;
326 SpParHelper::Print(loopinfo.str());
331 A.
Reduce(*ColSums,
Column, plus<int64_t>(), static_cast<int64_t>(0));
332 A.Reduce(*RowSums,
Row, plus<int64_t>(), static_cast<int64_t>(0));
333 SpParHelper::Print(
"Reductions done\n");
334 ColSums->
EWiseApply(*RowSums, plus<int64_t>());
336 SpParHelper::Print(
"Intersection of colsums and rowsums found\n");
338 nonisov = ColSums->
FindInds(bind2nd(greater<int64_t>(), 0));
341 SpParHelper::Print(
"Found (and permuted) non-isolated vertices\n");
345 A(nonisov, nonisov,
true);
346 SpParHelper::Print(
"Dropped isolated vertices from input\n");
353 SpParHelper::Print(
"Symmetricized\n");
358 tinfo <<
"Threading activated with " << cblas_splits <<
" threads" << endl;
359 SpParHelper::Print(tinfo.str());
364 MPI_Barrier(MPI_COMM_WORLD);
365 double t2=MPI_Wtime();
367 ostringstream k1timeinfo;
368 k1timeinfo << (t2-t1) - (redtf-redts) <<
" seconds elapsed for Kernel #1" << endl;
369 SpParHelper::Print(k1timeinfo.str());
374 outs <<
"Load balance: " << balance << endl;
375 SpParHelper::Print(outs.str());
377 MPI_Barrier(MPI_COMM_WORLD);
378 double t1 = MPI_Wtime();
382 degrees = degrees(nonisov);
387 double nver = (double) degrees.TotalLength();
394 vector<double> loccands(
ITERS);
395 vector<int64_t> loccandints(
ITERS);
398 for(
int i=0; i<
ITERS; ++i)
399 loccands[i] = M.
rand();
400 copy(loccands.begin(), loccands.end(), ostream_iterator<double>(cout,
" ")); cout << endl;
401 transform(loccands.begin(), loccands.end(), loccands.begin(), bind2nd( multiplies<double>(), nver ));
403 for(
int i=0; i<
ITERS; ++i)
404 loccandints[i] = static_cast<int64_t>(loccands[i]);
405 copy(loccandints.begin(), loccandints.end(), ostream_iterator<double>(cout,
" ")); cout << endl;
408 for(
int i=0; i<
ITERS; ++i)
414 for(
int trials =0; trials <
MAXTRIALS; trials++)
421 MPI_Pcontrol(1,
"BFS");
424 for(
int i=0; i<
ITERS; ++i)
432 MPI_Barrier(MPI_COMM_WORLD);
433 double t1 = MPI_Wtime();
435 fringe.SetElement(Cands[i], Cands[i]);
437 while(fringe.getnnz() > 0)
439 fringe.setNumToInd();
440 fringe =
SpMV(Aeff, fringe,optbuf);
445 MPI_Barrier(MPI_COMM_WORLD);
446 double t2 = MPI_Wtime();
454 ostringstream outnew;
455 outnew << i <<
"th starting vertex was " << Cands[i] << endl;
456 outnew <<
"Number iterations: " << iterations << endl;
457 outnew <<
"Number of vertices found: " << parentsp.
Reduce(plus<int64_t>(), (
int64_t) 0) << endl;
458 outnew <<
"Number of edges traversed: " << nedges << endl;
459 outnew <<
"BFS time: " << t2-t1 <<
" seconds" << endl;
460 outnew <<
"MTEPS: " <<
static_cast<double>(nedges) / (t2-t1) / 1000000.0 << endl;
464 MTEPS[i] =
static_cast<double>(nedges) / (t2-t1) / 1000000.0;
465 SpParHelper::Print(outnew.str());
467 SpParHelper::Print(
"Finished\n");
469 MPI_Pcontrol(-1,
"BFS");
472 os <<
"Per iteration communication times: " << endl;
477 os <<
"Per iteration computation times: " << endl;
481 sort(EDGES, EDGES+ITERS);
482 os <<
"--------------------------" << endl;
483 os <<
"Min nedges: " << EDGES[0] << endl;
484 os <<
"First Quartile nedges: " << (EDGES[(ITERS/4)-1] + EDGES[ITERS/4])/2 << endl;
485 os <<
"Median nedges: " << (EDGES[(ITERS/2)-1] + EDGES[ITERS/2])/2 << endl;
486 os <<
"Third Quartile nedges: " << (EDGES[(3*ITERS/4) -1 ] + EDGES[3*ITERS/4])/2 << endl;
487 os <<
"Max nedges: " << EDGES[ITERS-1] << endl;
488 double mean = accumulate( EDGES, EDGES+ITERS, 0.0 )/
ITERS;
489 vector<double> zero_mean(ITERS);
490 transform(EDGES, EDGES+ITERS, zero_mean.begin(), bind2nd( minus<double>(), mean ));
492 double deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
493 deviation = sqrt( deviation / (ITERS-1) );
494 os <<
"Mean nedges: " << mean << endl;
495 os <<
"STDDEV nedges: " << deviation << endl;
496 os <<
"--------------------------" << endl;
498 sort(TIMES,TIMES+ITERS);
499 os <<
"Min time: " << TIMES[0] <<
" seconds" << endl;
500 os <<
"First Quartile time: " << (TIMES[(ITERS/4)-1] + TIMES[ITERS/4])/2 <<
" seconds" << endl;
501 os <<
"Median time: " << (TIMES[(ITERS/2)-1] + TIMES[ITERS/2])/2 <<
" seconds" << endl;
502 os <<
"Third Quartile time: " << (TIMES[(3*ITERS/4)-1] + TIMES[3*ITERS/4])/2 <<
" seconds" << endl;
503 os <<
"Max time: " << TIMES[ITERS-1] <<
" seconds" << endl;
504 mean = accumulate( TIMES, TIMES+ITERS, 0.0 )/
ITERS;
505 transform(TIMES, TIMES+ITERS, zero_mean.begin(), bind2nd( minus<double>(), mean ));
506 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
507 deviation = sqrt( deviation / (ITERS-1) );
508 os <<
"Mean time: " << mean <<
" seconds" << endl;
509 os <<
"STDDEV time: " << deviation <<
" seconds" << endl;
510 os <<
"--------------------------" << endl;
512 sort(MTEPS, MTEPS+ITERS);
513 os <<
"Min MTEPS: " << MTEPS[0] << endl;
514 os <<
"First Quartile MTEPS: " << (MTEPS[(ITERS/4)-1] + MTEPS[ITERS/4])/2 << endl;
515 os <<
"Median MTEPS: " << (MTEPS[(ITERS/2)-1] + MTEPS[ITERS/2])/2 << endl;
516 os <<
"Third Quartile MTEPS: " << (MTEPS[(3*ITERS/4)-1] + MTEPS[3*ITERS/4])/2 << endl;
517 os <<
"Max MTEPS: " << MTEPS[ITERS-1] << endl;
519 double hteps =
static_cast<double>(
ITERS) / accumulate(INVMTEPS, INVMTEPS+ITERS, 0.0);
520 os <<
"Harmonic mean of MTEPS: " << hteps << endl;
521 transform(INVMTEPS, INVMTEPS+ITERS, zero_mean.begin(), bind2nd(minus<double>(), 1/hteps));
522 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
523 deviation = sqrt( deviation / (ITERS-1) ) * (hteps*hteps);
524 os <<
"Harmonic standard deviation of MTEPS: " << deviation << endl;
525 SpParHelper::Print(os.str());
FullyDistVec< IT, NT > Reduce(Dim dim, _BinaryOperation __binary_op, NT id, _UnaryOperation __unary_op) const
std::shared_ptr< CommGrid > getcommgrid() const
void ActivateThreading(int numsplits)
void SetElement(IT indx, NT numx)
MPI_Datatype MPIType< int64_t >(void)
void Set(const FullyDistSpVec< IT, NT > &rhs)
void GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble=false, bool packed=false)
FullyDistVec< IT, IT > FindInds(_Predicate pred) const
Return the indices where pred is true.
SpParMat< int64_t, int, SpDCCols< int32_t, int > > PSpMat_s32p64_Int
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
void Symmetricize(PARMAT &A)
FullyDistSpVec< IT, NT > Find(_Predicate pred) const
Return the elements for which pred is true.
double cblas_alltoalltime
void PermEdges(DistEdgeList< IT > &DEL)
SelectMaxSRing< bool, int32_t > SR
float LoadImbalance() const
void RenameVertices(DistEdgeList< IU > &DEL)
void EWiseApply(const FullyDistVec< IT, NT2 > &other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, const bool useExtendedBinOp)
void OptimizeForGraph500(OptBuf< LIT, OT > &optbuf)
bool from_string(T &t, const string &s, std::ios_base &(*f)(std::ios_base &))
void ReadDistribute(const std::string &filename, int master, bool nonum, HANDLER handler, bool transpose=false, bool pario=false)
NT Reduce(_BinaryOperation __binary_op, NT init) const
double cblas_transvectime
int64_t operator()(int64_t x, const int64_t &y) const
double cblas_mergeconttime
double cblas_localspmvtime
int main(int argc, char *argv[])
void PrintInfo(std::string vectorname) const
SpParMat< int64_t, bool, SpDCCols< int64_t, bool > > PSpMat_Bool
double cblas_allgathertime
SpParMat< int64_t, int64_t, SpDCCols< int64_t, int64_t > > PSpMat_Int64
NT Reduce(_BinaryOperation __binary_op, NT identity) const
unsigned int highestbitset(uint64_t v)
void Apply(_UnaryOperation __unary_op)
SpParMat< int64_t, bool, SpDCCols< int32_t, bool > > PSpMat_s32p64