COMBINATORIAL_BLAS  1.6
TopDownBFS.cpp
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 #define DETERMINISTIC
30 #include "CombBLAS/CombBLAS.h"
31 #include <mpi.h>
32 #include <sys/time.h>
33 #include <iostream>
34 #include <functional>
35 #include <algorithm>
36 #include <vector>
37 #include <string>
38 #include <sstream>
39 
41 
47 
48 #define ITERS 16
49 #define EDGEFACTOR 16
50 using namespace std;
51 using namespace combblas;
52 
53 // 64-bit floor(log2(x)) function
54 // note: least significant bit is the "zeroth" bit
55 // pre: v > 0
56 unsigned int highestbitset(uint64_t v)
57 {
58  // b in binary is {10,1100, 11110000, 1111111100000000 ...}
59  const uint64_t b[] = {0x2ULL, 0xCULL, 0xF0ULL, 0xFF00ULL, 0xFFFF0000ULL, 0xFFFFFFFF00000000ULL};
60  const unsigned int S[] = {1, 2, 4, 8, 16, 32};
61  int i;
62 
63  unsigned int r = 0; // result of log2(v) will go here
64  for (i = 5; i >= 0; i--)
65  {
66  if (v & b[i]) // highestbitset is on the left half (i.e. v > S[i] for sure)
67  {
68  v >>= S[i];
69  r |= S[i];
70  }
71  }
72  return r;
73 }
74 
75 template <class T>
76 bool from_string(T & t, const string& s, std::ios_base& (*f)(std::ios_base&))
77 {
78  istringstream iss(s);
79  return !(iss >> f >> t).fail();
80 }
81 
82 
83 template <typename PARMAT>
84 void Symmetricize(PARMAT & A)
85 {
86  // boolean addition is practically a "logical or"
87  // therefore this doesn't destruct any links
88  PARMAT AT = A;
89  AT.Transpose();
90  A += AT;
91 }
92 
97 struct prunediscovered: public std::binary_function<int64_t, int64_t, int64_t >
98 {
99  int64_t operator()(int64_t x, const int64_t & y) const
100  {
101  return ( y == -1 ) ? x: -1;
102  }
103 };
104 
105 int main(int argc, char* argv[])
106 {
107  int nprocs, myrank;
108 #ifdef _OPENMP
109  int cblas_splits = omp_get_max_threads();
110  int provided, flag, claimed;
111  MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided );
112  MPI_Is_thread_main( &flag );
113  if (!flag)
114  SpParHelper::Print("This thread called init_thread but Is_thread_main gave false\n");
115  MPI_Query_thread( &claimed );
116  if (claimed != provided)
117  SpParHelper::Print("Query thread gave different thread level than requested\n");
118 #else
119  MPI_Init(&argc, &argv);
120  int cblas_splits = 1;
121 #endif
122 
123  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
124  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
125  if(argc < 3)
126  {
127  if(myrank == 0)
128  {
129  cout << "Usage: ./tdbfs <Force,Input> <Scale Forced | Input Name> {FastGen}" << endl;
130  cout << "Example: ./tdbfs Force 25 FastGen" << endl;
131  }
132  MPI_Finalize();
133  return -1;
134  }
135  {
138  typedef SpParMat < int64_t, bool, SpDCCols<int32_t,bool> > PSpMat_s32p64; // sequentially use 32-bits for local matrices, but parallel semantics are 64-bits
139  typedef SpParMat < int64_t, int, SpDCCols<int32_t,int> > PSpMat_s32p64_Int; // similarly mixed, but holds integers as upposed to booleans
141  shared_ptr<CommGrid> fullWorld;
142  fullWorld.reset( new CommGrid(MPI_COMM_WORLD, 0, 0) );
143 
144  // Declare objects
145  PSpMat_Bool A(fullWorld);
146  PSpMat_s32p64 Aeff(fullWorld);
147  FullyDistVec<int64_t, int64_t> degrees(fullWorld); // degrees of vertices (including multi-edges and self-loops)
148  FullyDistVec<int64_t, int64_t> nonisov(fullWorld); // id's of non-isolated (connected) vertices
149  unsigned scale;
150  OptBuf<int32_t, int64_t> optbuf; // let indices be 32-bits
151  bool scramble = false;
152 
153  if(string(argv[1]) == string("Input")) // input option
154  {
155  A.ReadDistribute(string(argv[2]), 0); // read it from file
156  SpParHelper::Print("Read input\n");
157 
158  PSpMat_Int64 * G = new PSpMat_Int64(A);
159  G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // identity is 0
160  delete G;
161 
162  Symmetricize(A); // A += A';
164  A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0)); // plus<int64_t> matches the type of the output vector
165  nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
166  delete ColSums;
167  A = A(nonisov, nonisov);
168  Aeff = PSpMat_s32p64(A);
169  A.FreeMemory();
170  SpParHelper::Print("Symmetricized and pruned\n");
171 
172  Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
173  #ifdef THREADED
174  ostringstream tinfo;
175  tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
176  SpParHelper::Print(tinfo.str());
177  Aeff.ActivateThreading(cblas_splits);
178  #endif
179  }
180  else if(string(argv[1]) == string("Binary"))
181  {
182  uint64_t n, m;
183  from_string(n,string(argv[3]),std::dec);
184  from_string(m,string(argv[4]),std::dec);
185 
186  ostringstream outs;
187  outs << "Reading " << argv[2] << " with " << n << " vertices and " << m << " edges" << endl;
188  SpParHelper::Print(outs.str());
189  DistEdgeList<int64_t> * DEL = new DistEdgeList<int64_t>(argv[2], n, m);
190  SpParHelper::Print("Read binary input to distributed edge list\n");
191 
192  PermEdges(*DEL);
193  SpParHelper::Print("Permuted Edges\n");
194 
195  RenameVertices(*DEL);
196  //DEL->Dump32bit("graph_permuted");
197  SpParHelper::Print("Renamed Vertices\n");
198 
199  // conversion from distributed edge list, keeps self-loops, sums duplicates
200  PSpMat_Int64 * G = new PSpMat_Int64(*DEL, false);
201  delete DEL; // free memory before symmetricizing
202  SpParHelper::Print("Created Int64 Sparse Matrix\n");
203 
204  G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // Identity is 0
205 
206  A = PSpMat_Bool(*G); // Convert to Boolean
207  delete G;
208  int64_t removed = A.RemoveLoops();
209 
210  ostringstream loopinfo;
211  loopinfo << "Converted to Boolean and removed " << removed << " loops" << endl;
212  SpParHelper::Print(loopinfo.str());
213  A.PrintInfo();
214 
217  A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0));
218  A.Reduce(*RowSums, Row, plus<int64_t>(), static_cast<int64_t>(0));
219  ColSums->EWiseApply(*RowSums, plus<int64_t>());
220  delete RowSums;
221 
222  nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
223  delete ColSums;
224 
225  SpParHelper::Print("Found (and permuted) non-isolated vertices\n");
226  nonisov.RandPerm(); // so that A(v,v) is load-balanced (both memory and time wise)
227  A.PrintInfo();
228  #ifndef NOPERMUTE
229  A(nonisov, nonisov, true); // in-place permute to save memory
230  SpParHelper::Print("Dropped isolated vertices from input\n");
231  A.PrintInfo();
232  #endif
233  Aeff = PSpMat_s32p64(A); // Convert to 32-bit local integers
234  A.FreeMemory();
235 
236  Symmetricize(Aeff); // A += A';
237  SpParHelper::Print("Symmetricized\n");
238  //A.Dump("graph_symmetric");
239 
240  Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
241  #ifdef THREADED
242  ostringstream tinfo;
243  tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
244  SpParHelper::Print(tinfo.str());
245  Aeff.ActivateThreading(cblas_splits);
246  #endif
247  }
248  else
249  {
250  if(string(argv[1]) == string("Force"))
251  {
252  scale = static_cast<unsigned>(atoi(argv[2]));
253  ostringstream outs;
254  outs << "Forcing scale to : " << scale << endl;
255  SpParHelper::Print(outs.str());
256 
257  if(argc > 3 && string(argv[3]) == string("FastGen"))
258  {
259  SpParHelper::Print("Using fast vertex permutations; skipping edge permutations (like v2.1)\n");
260  scramble = true;
261  }
262  }
263  else
264  {
265  SpParHelper::Print("Unknown option\n");
266  MPI_Finalize();
267  return -1;
268  }
269  // this is an undirected graph, so A*x does indeed BFS
270  double initiator[4] = {.57, .19, .19, .05};
271 
272  double t01 = MPI_Wtime();
273  double t02;
275  if(!scramble)
276  {
277  DEL->GenGraph500Data(initiator, scale, EDGEFACTOR);
278  SpParHelper::Print("Generated edge lists\n");
279  t02 = MPI_Wtime();
280  ostringstream tinfo;
281  tinfo << "Generation took " << t02-t01 << " seconds" << endl;
282  SpParHelper::Print(tinfo.str());
283 
284  PermEdges(*DEL);
285  SpParHelper::Print("Permuted Edges\n");
286  //DEL->Dump64bit("edges_permuted");
287  //SpParHelper::Print("Dumped\n");
288 
289  RenameVertices(*DEL); // intermediate: generates RandPerm vector, using MemoryEfficientPSort
290  SpParHelper::Print("Renamed Vertices\n");
291  }
292  else // fast generation
293  {
294  DEL->GenGraph500Data(initiator, scale, EDGEFACTOR, true, true ); // generate packed edges
295  SpParHelper::Print("Generated renamed edge lists\n");
296  t02 = MPI_Wtime();
297  ostringstream tinfo;
298  tinfo << "Generation took " << t02-t01 << " seconds" << endl;
299  SpParHelper::Print(tinfo.str());
300  }
301 
302  // Start Kernel #1
303  MPI_Barrier(MPI_COMM_WORLD);
304  double t1 = MPI_Wtime();
305 
306  // conversion from distributed edge list, keeps self-loops, sums duplicates
307  PSpMat_s32p64_Int * G = new PSpMat_s32p64_Int(*DEL, false);
308  delete DEL; // free memory before symmetricizing
309  SpParHelper::Print("Created Sparse Matrix (with int32 local indices and values)\n");
310 
311  MPI_Barrier(MPI_COMM_WORLD);
312  double redts = MPI_Wtime();
313  G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // Identity is 0
314  MPI_Barrier(MPI_COMM_WORLD);
315  double redtf = MPI_Wtime();
316 
317  ostringstream redtimeinfo;
318  redtimeinfo << "Calculated degrees in " << redtf-redts << " seconds" << endl;
319  SpParHelper::Print(redtimeinfo.str());
320  A = PSpMat_Bool(*G); // Convert to Boolean
321  delete G;
322  int64_t removed = A.RemoveLoops();
323 
324  ostringstream loopinfo;
325  loopinfo << "Converted to Boolean and removed " << removed << " loops" << endl;
326  SpParHelper::Print(loopinfo.str());
327  A.PrintInfo();
328 
329  FullyDistVec<int64_t, int64_t> * ColSums = new FullyDistVec<int64_t, int64_t>(A.getcommgrid());
330  FullyDistVec<int64_t, int64_t> * RowSums = new FullyDistVec<int64_t, int64_t>(A.getcommgrid());
331  A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0));
332  A.Reduce(*RowSums, Row, plus<int64_t>(), static_cast<int64_t>(0));
333  SpParHelper::Print("Reductions done\n");
334  ColSums->EWiseApply(*RowSums, plus<int64_t>());
335  delete RowSums;
336  SpParHelper::Print("Intersection of colsums and rowsums found\n");
337 
338  nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
339  delete ColSums;
340 
341  SpParHelper::Print("Found (and permuted) non-isolated vertices\n");
342  nonisov.RandPerm(); // so that A(v,v) is load-balanced (both memory and time wise)
343  A.PrintInfo();
344  #ifndef NOPERMUTE
345  A(nonisov, nonisov, true); // in-place permute to save memory
346  SpParHelper::Print("Dropped isolated vertices from input\n");
347  A.PrintInfo();
348  #endif
349 
350  Aeff = PSpMat_s32p64(A); // Convert to 32-bit local integers
351  A.FreeMemory();
352  Symmetricize(Aeff); // A += A';
353  SpParHelper::Print("Symmetricized\n");
354 
355  Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
356  #ifdef THREADED
357  ostringstream tinfo;
358  tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
359  SpParHelper::Print(tinfo.str());
360  Aeff.ActivateThreading(cblas_splits);
361  #endif
362  Aeff.PrintInfo();
363 
364  MPI_Barrier(MPI_COMM_WORLD);
365  double t2=MPI_Wtime();
366 
367  ostringstream k1timeinfo;
368  k1timeinfo << (t2-t1) - (redtf-redts) << " seconds elapsed for Kernel #1" << endl;
369  SpParHelper::Print(k1timeinfo.str());
370  }
371  Aeff.PrintInfo();
372  float balance = Aeff.LoadImbalance();
373  ostringstream outs;
374  outs << "Load balance: " << balance << endl;
375  SpParHelper::Print(outs.str());
376 
377  MPI_Barrier(MPI_COMM_WORLD);
378  double t1 = MPI_Wtime();
379 
380  // Now that every remaining vertex is non-isolated, randomly pick ITERS many of them as starting vertices
381  #ifndef NOPERMUTE
382  degrees = degrees(nonisov); // fix the degrees array too
383  degrees.PrintInfo("Degrees array");
384  #endif
385  // degrees.DebugPrint();
387  double nver = (double) degrees.TotalLength();
388 
389 #ifdef DETERMINISTIC
390  MTRand M(1);
391 #else
392  MTRand M; // generate random numbers with Mersenne Twister
393 #endif
394  vector<double> loccands(ITERS);
395  vector<int64_t> loccandints(ITERS);
396  if(myrank == 0)
397  {
398  for(int i=0; i<ITERS; ++i)
399  loccands[i] = M.rand();
400  copy(loccands.begin(), loccands.end(), ostream_iterator<double>(cout," ")); cout << endl;
401  transform(loccands.begin(), loccands.end(), loccands.begin(), bind2nd( multiplies<double>(), nver ));
402 
403  for(int i=0; i<ITERS; ++i)
404  loccandints[i] = static_cast<int64_t>(loccands[i]);
405  copy(loccandints.begin(), loccandints.end(), ostream_iterator<double>(cout," ")); cout << endl;
406  }
407  MPI_Bcast(&(loccandints[0]), ITERS, MPIType<int64_t>(),0,MPI_COMM_WORLD);
408  for(int i=0; i<ITERS; ++i)
409  {
410  Cands.SetElement(i,loccandints[i]);
411  }
412 
413  #define MAXTRIALS 1
414  for(int trials =0; trials < MAXTRIALS; trials++) // try different algorithms for BFS
415  {
417  cblas_alltoalltime = 0;
419  cblas_transvectime = 0;
421  MPI_Pcontrol(1,"BFS");
422 
423  double MTEPS[ITERS]; double INVMTEPS[ITERS]; double TIMES[ITERS]; double EDGES[ITERS];
424  for(int i=0; i<ITERS; ++i)
425  {
426  // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval);
427  FullyDistVec<int64_t, int64_t> parents ( Aeff.getcommgrid(), Aeff.getncol(), (int64_t) -1); // identity is -1
428 
429  // FullyDistSpVec ( shared_ptr<CommGrid> grid, IT glen);
430  FullyDistSpVec<int64_t, int64_t> fringe(Aeff.getcommgrid(), Aeff.getncol()); // numerical values are stored 0-based
431 
432  MPI_Barrier(MPI_COMM_WORLD);
433  double t1 = MPI_Wtime();
434 
435  fringe.SetElement(Cands[i], Cands[i]);
436  int iterations = 0;
437  while(fringe.getnnz() > 0)
438  {
439  fringe.setNumToInd();
440  fringe = SpMV(Aeff, fringe,optbuf); // SpMV with sparse vector (with indexisvalue flag preset), optimization enabled
441  fringe = EWiseMult(fringe, parents, true, (int64_t) -1); // clean-up vertices that already has parents
442  parents.Set(fringe);
443  iterations++;
444  }
445  MPI_Barrier(MPI_COMM_WORLD);
446  double t2 = MPI_Wtime();
447 
448  FullyDistSpVec<int64_t, int64_t> parentsp = parents.Find(bind2nd(greater<int64_t>(), -1));
449  parentsp.Apply(myset<int64_t>(1));
450 
451  // we use degrees on the directed graph, so that we don't count the reverse edges in the teps score
452  int64_t nedges = EWiseMult(parentsp, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
453 
454  ostringstream outnew;
455  outnew << i << "th starting vertex was " << Cands[i] << endl;
456  outnew << "Number iterations: " << iterations << endl;
457  outnew << "Number of vertices found: " << parentsp.Reduce(plus<int64_t>(), (int64_t) 0) << endl;
458  outnew << "Number of edges traversed: " << nedges << endl;
459  outnew << "BFS time: " << t2-t1 << " seconds" << endl;
460  outnew << "MTEPS: " << static_cast<double>(nedges) / (t2-t1) / 1000000.0 << endl;
461  outnew << "Total communication (average so far): " << (cblas_allgathertime + cblas_alltoalltime) / (i+1) << endl;
462  TIMES[i] = t2-t1;
463  EDGES[i] = nedges;
464  MTEPS[i] = static_cast<double>(nedges) / (t2-t1) / 1000000.0;
465  SpParHelper::Print(outnew.str());
466  }
467  SpParHelper::Print("Finished\n");
468  ostringstream os;
469  MPI_Pcontrol(-1,"BFS");
470 
471 
472  os << "Per iteration communication times: " << endl;
473  os << "AllGatherv: " << cblas_allgathertime / ITERS << endl;
474  os << "AlltoAllv: " << cblas_alltoalltime / ITERS << endl;
475  os << "Transvec: " << cblas_transvectime / ITERS << endl;
476 
477  os << "Per iteration computation times: " << endl;
478  os << "MergeCont: " << cblas_mergeconttime / ITERS << endl;
479  os << "LocalSpmv: " << cblas_localspmvtime / ITERS << endl;
480 
481  sort(EDGES, EDGES+ITERS);
482  os << "--------------------------" << endl;
483  os << "Min nedges: " << EDGES[0] << endl;
484  os << "First Quartile nedges: " << (EDGES[(ITERS/4)-1] + EDGES[ITERS/4])/2 << endl;
485  os << "Median nedges: " << (EDGES[(ITERS/2)-1] + EDGES[ITERS/2])/2 << endl;
486  os << "Third Quartile nedges: " << (EDGES[(3*ITERS/4) -1 ] + EDGES[3*ITERS/4])/2 << endl;
487  os << "Max nedges: " << EDGES[ITERS-1] << endl;
488  double mean = accumulate( EDGES, EDGES+ITERS, 0.0 )/ ITERS;
489  vector<double> zero_mean(ITERS); // find distances to the mean
490  transform(EDGES, EDGES+ITERS, zero_mean.begin(), bind2nd( minus<double>(), mean ));
491  // self inner-product is sum of sum of squares
492  double deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
493  deviation = sqrt( deviation / (ITERS-1) );
494  os << "Mean nedges: " << mean << endl;
495  os << "STDDEV nedges: " << deviation << endl;
496  os << "--------------------------" << endl;
497 
498  sort(TIMES,TIMES+ITERS);
499  os << "Min time: " << TIMES[0] << " seconds" << endl;
500  os << "First Quartile time: " << (TIMES[(ITERS/4)-1] + TIMES[ITERS/4])/2 << " seconds" << endl;
501  os << "Median time: " << (TIMES[(ITERS/2)-1] + TIMES[ITERS/2])/2 << " seconds" << endl;
502  os << "Third Quartile time: " << (TIMES[(3*ITERS/4)-1] + TIMES[3*ITERS/4])/2 << " seconds" << endl;
503  os << "Max time: " << TIMES[ITERS-1] << " seconds" << endl;
504  mean = accumulate( TIMES, TIMES+ITERS, 0.0 )/ ITERS;
505  transform(TIMES, TIMES+ITERS, zero_mean.begin(), bind2nd( minus<double>(), mean ));
506  deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
507  deviation = sqrt( deviation / (ITERS-1) );
508  os << "Mean time: " << mean << " seconds" << endl;
509  os << "STDDEV time: " << deviation << " seconds" << endl;
510  os << "--------------------------" << endl;
511 
512  sort(MTEPS, MTEPS+ITERS);
513  os << "Min MTEPS: " << MTEPS[0] << endl;
514  os << "First Quartile MTEPS: " << (MTEPS[(ITERS/4)-1] + MTEPS[ITERS/4])/2 << endl;
515  os << "Median MTEPS: " << (MTEPS[(ITERS/2)-1] + MTEPS[ITERS/2])/2 << endl;
516  os << "Third Quartile MTEPS: " << (MTEPS[(3*ITERS/4)-1] + MTEPS[3*ITERS/4])/2 << endl;
517  os << "Max MTEPS: " << MTEPS[ITERS-1] << endl;
518  transform(MTEPS, MTEPS+ITERS, INVMTEPS, safemultinv<double>()); // returns inf for zero teps
519  double hteps = static_cast<double>(ITERS) / accumulate(INVMTEPS, INVMTEPS+ITERS, 0.0);
520  os << "Harmonic mean of MTEPS: " << hteps << endl;
521  transform(INVMTEPS, INVMTEPS+ITERS, zero_mean.begin(), bind2nd(minus<double>(), 1/hteps));
522  deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
523  deviation = sqrt( deviation / (ITERS-1) ) * (hteps*hteps); // harmonic_std_dev
524  os << "Harmonic standard deviation of MTEPS: " << deviation << endl;
525  SpParHelper::Print(os.str());
526  }
527  }
528  MPI_Finalize();
529  return 0;
530 }
531 
FullyDistVec< IT, NT > Reduce(Dim dim, _BinaryOperation __binary_op, NT id, _UnaryOperation __unary_op) const
Definition: SpParMat.cpp:791
double rand()
std::shared_ptr< CommGrid > getcommgrid() const
Definition: SpParMat.h:275
void ActivateThreading(int numsplits)
Definition: SpParMat.cpp:2881
void SetElement(IT indx, NT numx)
MPI_Datatype MPIType< int64_t >(void)
Definition: MPIType.cpp:64
void Set(const FullyDistSpVec< IT, NT > &rhs)
void GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble=false, bool packed=false)
FullyDistVec< IT, IT > FindInds(_Predicate pred) const
Return the indices where pred is true.
SpParMat< int64_t, int, SpDCCols< int32_t, int > > PSpMat_s32p64_Int
Definition: SpMSpVBench.cpp:62
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition: Friends.h:694
void Symmetricize(PARMAT &A)
Definition: TopDownBFS.cpp:84
FullyDistSpVec< IT, NT > Find(_Predicate pred) const
Return the elements for which pred is true.
double cblas_alltoalltime
Definition: TopDownBFS.cpp:42
void PermEdges(DistEdgeList< IT > &DEL)
SelectMaxSRing< bool, int32_t > SR
Definition: SpMSpVBench.cpp:59
float LoadImbalance() const
Definition: SpParMat.cpp:665
void RenameVertices(DistEdgeList< IU > &DEL)
void EWiseApply(const FullyDistVec< IT, NT2 > &other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, const bool useExtendedBinOp)
void OptimizeForGraph500(OptBuf< LIT, OT > &optbuf)
Definition: SpParMat.cpp:2780
bool from_string(T &t, const string &s, std::ios_base &(*f)(std::ios_base &))
Definition: TopDownBFS.cpp:76
void ReadDistribute(const std::string &filename, int master, bool nonum, HANDLER handler, bool transpose=false, bool pario=false)
Definition: SpParMat.cpp:3560
#define MAXTRIALS
NT Reduce(_BinaryOperation __binary_op, NT init) const
double A
double cblas_transvectime
Definition: TopDownBFS.cpp:45
int64_t operator()(int64_t x, const int64_t &y) const
Definition: TopDownBFS.cpp:99
double cblas_mergeconttime
Definition: TopDownBFS.cpp:44
long int64_t
Definition: compat.h:21
#define EDGEFACTOR
Definition: TopDownBFS.cpp:49
IT getncol() const
Definition: SpParMat.cpp:694
double cblas_localspmvtime
Definition: TopDownBFS.cpp:46
void PrintInfo() const
Definition: SpParMat.cpp:2387
int main(int argc, char *argv[])
Definition: TopDownBFS.cpp:105
void PrintInfo(std::string vectorname) const
SpParMat< int64_t, bool, SpDCCols< int64_t, bool > > PSpMat_Bool
Definition: FilteredMIS.cpp:93
double cblas_allgathertime
Definition: TopDownBFS.cpp:43
Definition: CCGrid.h:4
SpParMat< int64_t, int64_t, SpDCCols< int64_t, int64_t > > PSpMat_Int64
NT Reduce(_BinaryOperation __binary_op, NT identity) const
int cblas_splits
Definition: TopDownBFS.cpp:40
unsigned int highestbitset(uint64_t v)
Definition: TopDownBFS.cpp:56
void Apply(_UnaryOperation __unary_op)
SpParMat< int64_t, bool, SpDCCols< int32_t, bool > > PSpMat_s32p64
#define ITERS
Definition: TopDownBFS.cpp:48