COMBINATORIAL_BLAS  1.6
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
main.cpp
Go to the documentation of this file.
1 #include <mpi.h>
2 
3 #include <string>
4 #include <iomanip>
5 #include <cstdio>
6 #include <cstdlib>
7 #include <iostream>
8 #include <vector>
9 #include <omp.h>
10 #include <sstream>
11 
12 
13 #ifdef _PROFILE_SORT
14 #include "sort_profiler.h"
15 #endif
16 
17 #include <binUtils.h>
18 #include <ompUtils.h>
19 #include <parUtils.h>
20 
21 #ifdef KWICK
22  #if KWAY > 2
23  #define SORT_FUNCTION par::HyperQuickSort_kway
24  #else
25  #ifdef SWAPRANKS
26  #define SORT_FUNCTION par::RankSwapSort
27  #else
28  #define SORT_FUNCTION par::HyperQuickSort
29  #endif
30  #endif
31 #else
32  #define SORT_FUNCTION par::sampleSort
33 #endif
34 
35 // #define __VERIFY__
36 
37 
41 };
42 
43 void printResults(int num_threads, MPI_Comm comm);
44 
45 void getStats(double val, double *meanV, double *minV, double *maxV, MPI_Comm comm)
46 {
47  int p;
48  double d, din;
49  din = val;
50  MPI_Comm_size(comm, &p);
51  MPI_Reduce(&din, &d, 1, MPI_DOUBLE, MPI_SUM, 0, comm); *meanV = d/p;
52  MPI_Reduce(&din, &d, 1, MPI_DOUBLE, MPI_MIN, 0, comm); *minV = d;
53  MPI_Reduce(&din, &d, 1, MPI_DOUBLE, MPI_MAX, 0, comm); *maxV = d;
54 }
55 
56 DistribType getDistType(char* code) {
57  if(!strcmp(code,"GAUSS\0")){
58  return GAUSS_DISTRIB;
59  }else{
60  return UNIF_DISTRIB;
61  }
62 }
63 
64 long getNumElements(char* code) {
65  unsigned int slen = strlen(code);
66  char dtype = code[0];
67  char tmp[128];
68  strncpy(tmp, code+1, slen-3); tmp[slen-3] = '\0';
69 
70  long numBytes = atol(tmp);
71  switch(code[slen-2]) {
72  case 'g':
73  case 'G':
74  numBytes *= 1024*1024*1024;
75  break;
76  case 'k':
77  case 'K':
78  numBytes *= 1024;
79  break;
80  case 'm':
81  case 'M':
82  numBytes *= 1024*1024;
83  break;
84  default:
85  std::cout << "unknown code " << code[slen-2] << std::endl;
86  return 0;
87  };
88 
89  switch (dtype) {
90  case 'd': // double array
91  return numBytes/sizeof(double);
92  break;
93  case 'f': // float array
94  return numBytes/sizeof(float);
95  break;
96  case 'i': // int array
97  return numBytes/sizeof(int);
98  break;
99  case 'l': // long array
100  return numBytes/sizeof(long);
101  break;
102  default:
103  std::cout << "unknown dtype: " << dtype << std::endl;
104  return 0;
105  };
106 
107 }
108 
109 template <class T>
110 bool verify (std::vector<T>& in_, std::vector<T> &out_, MPI_Comm comm){
111 
112  // Find out my identity in the default communicator
113  int myrank, p;
114  MPI_Comm_rank(comm, &myrank);
115  MPI_Comm_size(comm,&p);
116 
117  if (!myrank) std::cout << "Verifying sort" << std::endl;
118 
119  std::vector<T> in;
120  {
121  int N_local=in_.size()*sizeof(T);
122  std::vector<int> r_size(p, 0);
123  std::vector<int> r_disp(p, 0);
124  MPI_Gather(&N_local , 1, MPI_INT,
125  &r_size[0], 1, MPI_INT, 0, comm);
126  omp_par::scan(&r_size[0], &r_disp[0], p);
127 
128  if(!myrank) in.resize((r_size[p-1]+r_disp[p-1])/sizeof(T));
129  MPI_Gatherv((char*)&in_[0], N_local, MPI_BYTE,
130  (char*)&in [0], &r_size[0], &r_disp[0], MPI_BYTE, 0, comm);
131  }
132 
133  std::vector<T> out;
134  {
135  int N_local=out_.size()*sizeof(T);
136  std::vector<int> r_size(p, 0);
137  std::vector<int> r_disp(p, 0);
138  MPI_Gather(&N_local , 1, MPI_INT,
139  &r_size[0], 1, MPI_INT, 0, comm);
140  omp_par::scan(&r_size[0], &r_disp[0], p);
141 
142  if(!myrank) out.resize((r_size[p-1]+r_disp[p-1])/sizeof(T));
143  MPI_Gatherv((char*)&out_[0], N_local, MPI_BYTE,
144  (char*)&out [0], &r_size[0], &r_disp[0], MPI_BYTE, 0, comm);
145  }
146 
147  if(in.size()!=out.size()){
148  std::cout<<"Wrong size: in="<<in.size()<<" out="<<out.size()<<'\n';
149  return false;
150  }
151  std::sort(&in[0], &in[in.size()]);
152 
153  for(long j=0;j<in.size();j++)
154  if(in[j]!=out[j]){
155  std::cout<<"Failed at:"<<j<<'\n';
156 // std::cout<<"Failed at:"<<j<<"; in="<<in[j]<<" out="<<out[j]<<'\n';
157  return false;
158  }
159 
160  return true;
161 }
162 
163 template <class T>
164 double time_sort(size_t N, MPI_Comm comm, DistribType dist_type){
165  int myrank, p;
166 
167  MPI_Comm_rank(comm, &myrank);
168  MPI_Comm_size(comm,&p);
169  int omp_p=omp_get_max_threads();
170 
171  std::vector<IndexHolder<T>> in1(N);
172 
173  // Generate random data
174  std::vector<T> in(N);
175  if(dist_type==UNIF_DISTRIB){
176  #pragma omp parallel for
177  for(int j=0;j<omp_p;j++){
178  unsigned int seed=j*p+myrank;
179  size_t start=(j*N)/omp_p;
180  size_t end=((j+1)*N)/omp_p;
181  for(unsigned int i=start;i<end;i++){
182  in[i]=rand_r(&seed);
183  }
184  }
185  } else if(dist_type==GAUSS_DISTRIB){
186  double e=2.7182818284590452;
187  double log_e=log(e);
188 
189  unsigned int seed1=p+myrank;
190  long mn = rand_r(&seed1);
191 
192  #pragma omp parallel for
193  for(int j=0;j<omp_p;j++){
194  unsigned int seed=j*p+myrank;
195  size_t start=(j*N)/omp_p;
196  size_t end=((j+1)*N)/omp_p;
197  for(unsigned int i=start;i<end;i++){
198  in[i]= mn + sqrt(-2*log(rand_r(&seed)*1.0/RAND_MAX)/log_e)
199  * cos(rand_r(&seed)*2*M_PI/RAND_MAX)*RAND_MAX*0.1;
200  }
201  }
202  }
203 
204  std::vector<T> in_cpy=in;
205  std::vector<T> out;
206 
207  // in=in_cpy;
208  SORT_FUNCTION<T>(in_cpy, comm);
209 #ifdef __VERIFY__
210  verify(in,in_cpy,comm);
211 #endif
212 
213 #ifdef _PROFILE_SORT
214  total_sort.clear();
215 
216  seq_sort.clear();
218 
223 
226  hyper_merge.clear();
227 #endif
228 
229 
230  //Sort
231  MPI_Barrier(comm);
232  double wtime=-omp_get_wtime();
233  // SORT_FUNCTION<T>(in, out, comm);
234  SORT_FUNCTION<T>(in, comm);
235  MPI_Barrier(comm);
236  wtime+=omp_get_wtime();
237 
238  return wtime;
239 }
240 
241 
242 /*
243 template <class T>
244 double time_sort(size_t N, MPI_Comm comm, DistribType dist_type){
245  int myrank, p;
246 
247  MPI_Comm_rank(comm, &myrank);
248  MPI_Comm_size(comm,&p);
249  int omp_p=omp_get_max_threads();
250 
251  // Generate random data
252  std::vector<T> in(N);
253  if(dist_type==UNIF_DISTRIB){
254 #pragma omp parallel for
255  for(int j=0;j<omp_p;j++){
256  unsigned int seed=j*p+myrank;
257  size_t start=(j*N)/omp_p;
258  size_t end=((j+1)*N)/omp_p;
259  for(unsigned int i=start;i<end;i++){
260  in[i]=rand_r(&seed);
261  }
262  }
263  } else if(dist_type==GAUSS_DISTRIB){
264  double e=2.7182818284590452;
265  double log_e=log(e);
266 
267  unsigned int seed1=p+myrank;
268  long mn = rand_r(&seed1);
269 
270 #pragma omp parallel for
271  for(int j=0;j<omp_p;j++){
272  unsigned int seed=j*p+myrank;
273  size_t start=(j*N)/omp_p;
274  size_t end=((j+1)*N)/omp_p;
275  for(unsigned int i=start;i<end;i++){
276  in[i]= mn + sqrt(-2*log(rand_r(&seed)*1.0/RAND_MAX)/log_e)
277  * cos(rand_r(&seed)*2*M_PI/RAND_MAX)*RAND_MAX*0.1;
278  }
279  }
280  }
281 
282  std::vector<T> in_cpy=in;
283  std::vector<T> out;
284 
285  // in=in_cpy;
286  SORT_FUNCTION<T>(in_cpy, comm);
287 #ifdef __VERIFY__
288  verify(in,in_cpy,comm);
289 #endif
290 
291 #ifdef _PROFILE_SORT
292  total_sort.clear();
293 
294  seq_sort.clear();
295  sort_partitionw.clear();
296 
297  sample_get_splitters.clear();
298  sample_sort_splitters.clear();
299  sample_prepare_scatter.clear();
300  sample_do_all2all.clear();
301 
302  hyper_compute_splitters.clear();
303  hyper_communicate.clear();
304  hyper_merge.clear();
305 #endif
306 
307 
308  //Sort
309  MPI_Barrier(comm);
310  double wtime=-omp_get_wtime();
311  // SORT_FUNCTION<T>(in, out, comm);
312  SORT_FUNCTION<T>(in, comm);
313  MPI_Barrier(comm);
314  wtime+=omp_get_wtime();
315 
316  return wtime;
317 }*/
318 
319 int main(int argc, char **argv){
320 
321  if (argc < 4) {
322  std::cerr << "Usage: " << argv[0] << " numThreads typeSize typeDistrib" << std::endl;
323  std::cerr << "\t\t typeSize is a character for type of data follwed by data size per node." << std::endl;
324  std::cerr << "\t\t typeSize can be d-double, f-float, i-int, l-long." << std::endl;
325  std::cerr << "\t\t Examples:" << std::endl;
326  std::cerr << "\t\t i1GB : integer array of size 1GB" << std::endl;
327  std::cerr << "\t\t l1GB : long array of size 1GB" << std::endl;
328  std::cerr << "\t\t typeDistrib can be UNIF, GAUSS" << std::endl;
329  return 1;
330  }
331 
332  std::cout<<setiosflags(std::ios::fixed)<<std::setprecision(4)<<std::setiosflags(std::ios::right);
333 
334  //Set number of OpenMP threads to use.
335  int num_threads = atoi(argv[1]);
336  omp_set_num_threads(num_threads);
337 
338  // Initialize MPI
339  MPI_Init(&argc, &argv);
340 
341  // Find out my identity in the default communicator
342  int myrank;
343  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
344 
345  // Find out number of processes
346  int p;
347  MPI_Comm_size(MPI_COMM_WORLD, &p);
348 
349 
351  /*
352  // 1. read in file corresponding to the node % 100;
353  // 2. select splitters ... 5 - 20 - print ...
354  // 3. bucket and write ... with num_spliters = 20;
355 
356 
357 
358  if (!myrank) std::cout << myrank << ": reading file" << std::endl;
359  char fname[256];
360  // sprintf(fname, "/scratch/01903/hsundar/from_karl/input/part%d", myrank%100);
361  sprintf(fname, "part%d", myrank);
362  FILE* fp = fopen(fname, "rb");
363 
364  long lSize;
365  size_t result;
366  fseek (fp , 0 , SEEK_END);
367  lSize = ftell (fp);
368  rewind (fp);
369 
370  // test buckets ...
371  int nl = lSize/100;
372  std::vector<sortRecord> qq(nl), out;
373 
374  // printf("%d: read size is %ld\n", myrank, lSize);
375  / *
376  srand(myrank*1713);
377  for (int i=0; i<nl; i++) {
378  qq[i] = rand();
379  }
380  * /
381  result = fread ( &(*qq.begin()), 1, lSize, fp);
382  if (result != lSize) { std::cerr << myrank << ": Reading error" << std::endl; exit (3);}
383  fclose (fp);
384  if (!myrank) std::cout << myrank << ": finished reading file" << std::endl;
385 
386  omp_par::merge_sort(&qq[0], &qq[qq.size()]);
387 
388  MPI_Barrier(MPI_COMM_WORLD);
389  if (!myrank) std::cout << myrank << ": starting approx Select" << std::endl;
390 
391  std::vector<std::pair<sortRecord, DendroIntL> > sortBins = par::Sorted_approx_Select_skewed(qq, 9, MPI_COMM_WORLD);
392 
393  if (!myrank) std::cout << myrank << ": finished approx Select" << std::endl;
394  / *
395  if (!myrank) {
396  std::cout << "splitters = [" << std::endl;
397  for (int i=0; i<9; ++i) std::cout << sortBins[i] << " " << std::endl;;
398  std::cout << "]" << std::endl;
399  }
400  * /
401  / *
402  double t0 = omp_get_wtime();
403  par::bucketDataAndWriteSkewed(qq, sortBins, "/tmp/foo", MPI_COMM_WORLD);
404  double t1 = omp_get_wtime();
405 
406  * /
407  double t0 = omp_get_wtime();
408  // par::HyperQuickSort(qq, out, MPI_COMM_WORLD);
409  par::sampleSort(qq, MPI_COMM_WORLD);
410  double t1 = omp_get_wtime();
411 
412  std::cout << myrank << ": all done in " << t1-t0 << std::endl;
413 
414 
415  MPI_Finalize();
416  return 0;
418  */
419  int proc_group=0;
420  int min_np=1;
421  MPI_Comm comm;
422 
423  std::vector<double> tt(10000,0);
424 
425  int k = 0; // in case size based runs are needed
426  char dtype = argv[2][0];
427  long N = getNumElements(argv[2]);
428  DistribType dist_type=getDistType(argv[3]);
429  if (!N) {
430  std::cerr << "illegal typeSize code provided: " << argv[2] << std::endl;
431  return 2;
432  }
433 
434  std::string num;
435  std::stringstream mystream;
436  mystream << N*p;
437  num = mystream.str();
438  int insertPosition = num.length() - 3;
439  while (insertPosition > 0) {
440  num.insert(insertPosition, ",");
441  insertPosition-=3;
442  }
443  if (!myrank)
444  std::cout << "sorting array of size " << num << " keys of type " << dtype << std::endl;
445 
446  // check if arguments are ok ...
447 
448  { // -- full size run
449  double ttt;
450 
451  switch(dtype) {
452  case 'd':
453  ttt = time_sort<double>(N, MPI_COMM_WORLD,dist_type);
454  break;
455  case 'f':
456  ttt = time_sort<float>(N, MPI_COMM_WORLD,dist_type);
457  break;
458  case 'i':
459  ttt = time_sort<int>(N, MPI_COMM_WORLD,dist_type);
460  break;
461  case 'l':
462  ttt = time_sort<long>(N, MPI_COMM_WORLD,dist_type);
463  break;
464  };
465 #ifdef _PROFILE_SORT
466  if (!myrank) {
467  std::cout << "---------------------------------------------------------------------------" << std::endl;
468  #ifndef KWICK
469  std::cout << "\tSample Sort with " << KWAY << "-way all2all" << "\t\tMean\tMin\tMax" << std::endl;
470  #else
471  #ifdef SWAPRANKS
472  std::cout << "\t" << KWAY << "-way SwapRankSort " << "\t\tMean\tMin\tMax" << std::endl;
473  #else
474  std::cout << "\t" << KWAY << "-way HyperQuickSort" << "\t\tMean\tMin\tMax" << std::endl;
475  #endif
476  #endif
477  std::cout << "---------------------------------------------------------------------------" << std::endl;
478  }
479  printResults(num_threads, MPI_COMM_WORLD);
480 #endif
481 
482  if(!myrank){
483  tt[100*k+0]=ttt;
484  }
485  }
486 
487  // MPI_Finalize();
488  // return 0;
489 
490  for(int i=p; myrank<i && i>=min_np; i=i>>1) proc_group++;
491  MPI_Comm_split(MPI_COMM_WORLD, proc_group, myrank, &comm);
492 
493  { // smaller /2^k runs
494  int myrank_;
495  MPI_Comm_rank(comm, &myrank_);
496  double ttt;
497 
498  switch(dtype) {
499  case 'd':
500  ttt = time_sort<double>(N, comm,dist_type);
501  break;
502  case 'f':
503  ttt = time_sort<float>(N, comm,dist_type);
504  break;
505  case 'i':
506  ttt = time_sort<int>(N, comm,dist_type);
507  break;
508  case 'l':
509  ttt = time_sort<long>(N, comm,dist_type);
510  break;
511  };
512 
513  if(!myrank_){
514  tt[100*k+proc_group]=ttt;
515  }
516  }
517 
518  int num_groups=0;
519  if (!myrank) num_groups = proc_group;
520  MPI_Bcast(&num_groups, 1, MPI_INT, 0, MPI_COMM_WORLD);
521 
522  for(size_t i = 0; i < num_groups; ++i)
523  {
524  MPI_Barrier(MPI_COMM_WORLD);
525  if (proc_group == i) {
526  MPI_Barrier(comm);
527  printResults(num_threads, comm);
528  MPI_Barrier(comm);
529  }
530  }
531 
532  MPI_Barrier(MPI_COMM_WORLD);
533 
534  std::vector<double> tt_glb(10000);
535  MPI_Reduce(&tt[0], &tt_glb[0], 10000, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
536  if(!myrank){
537  std::cout<<"\nSort - Weak Scaling:\n";
538  for(int i=0;i<proc_group;i++){
539  int np=p;
540  if(i>0) np=(p>>(i-1))-(p>>i);
541  std::cout<<"\t\t\tP = "<<np<<"\t\t";
542  // for(int k=0;k<=log_N;k++)
543  std::cout<<tt_glb[100*k+i]<<' ';
544  std::cout<<'\n';
545  }
546  }
547 
548  // Shut down MPI
549  MPI_Finalize();
550  return 0;
551  }
552 
553 void printResults(int num_threads, MPI_Comm comm) {
554  int myrank, p, simd_width=0;
555  MPI_Comm_size(comm, &p);
556  MPI_Comm_rank(comm, &myrank);
557 #ifdef SIMD_MERGE
558  simd_width=SIMD_MERGE;
559 #endif
560  // reduce results
561  double t, meanV, minV, maxV;
562 
563 #ifdef _PROFILE_SORT
564  if (!myrank) {
565  // std::cout << std::endl;
566  // std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl;
567  std::cout << p << " tasks : " << num_threads << " threads " << simd_width << " SIMD_WIDTH " << std::endl;
568  // std::cout << "===========================================================================" << std::endl;
569  }
570  t = total_sort.seconds; getStats(t, &meanV, &minV, &maxV, comm);
571  if (!myrank) {
572  std::cout << "Total sort time \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
573  // std::cout << "----------------------------------------------------------------------" << std::endl;
574  }
575  t = seq_sort.seconds; getStats(t, &meanV, &minV, &maxV, comm);
576  if (!myrank) {
577  std::cout << "Sequential Sort \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
578  }
579  t = sort_partitionw.seconds; getStats(t, &meanV, &minV, &maxV, comm);
580  if (!myrank) {
581  std::cout << "partitionW \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
582  // std::cout << "----------------------------------------------------------------------" << std::endl;
583  }
584 #ifndef KWICK
585  t = sample_sort_splitters.seconds; getStats(t, &meanV, &minV, &maxV, comm);
586  if (!myrank) {
587  std::cout << "sort splitters \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
588  }
589  t = sample_prepare_scatter.seconds; getStats(t, &meanV, &minV, &maxV, comm);
590  if (!myrank) {
591  std::cout << "prepare scatter \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
592  }
593  t = sample_do_all2all.seconds; getStats(t, &meanV, &minV, &maxV, comm);
594  if (!myrank) {
595  std::cout << "all2all \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
596  }
597 #else
598  t = hyper_compute_splitters.seconds; getStats(t, &meanV, &minV, &maxV, comm);
599  if (!myrank) {
600  std::cout << "compute splitters \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
601  }
602  t = hyper_communicate.seconds; getStats(t, &meanV, &minV, &maxV, comm);
603  if (!myrank) {
604  std::cout << "exchange data \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
605  }
606  t = hyper_merge.seconds; getStats(t, &meanV, &minV, &maxV, comm);
607  if (!myrank) {
608  std::cout << "merge arrays \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
609  }
610  t = hyper_comm_split.seconds; getStats(t, &meanV, &minV, &maxV, comm);
611  if (!myrank) {
612  std::cout << "comm split \t\t\t" << meanV << "\t" << minV << "\t" << maxV << std::endl;
613  }
614  if (!myrank) {
615  std::string num;
616  std::stringstream mystream;
617  mystream << total_bytes;
618  num = mystream.str();
619  int insertPosition = num.length() - 3;
620  while (insertPosition > 0) {
621  num.insert(insertPosition, ",");
622  insertPosition-=3;
623  }
624  std::cout << "total comm \t\t\t" << num << " bytes" << std::endl;
625  }
626 
627 #endif
628 #endif
629  if (!myrank) {
630  // std::cout << "---------------------------------------------------------------------------" << std::endl;
631  std::cout << "" << std::endl;
632  }
633 }
634 
635 #define FALSE 0 // Boolean false
636 #define TRUE 1 // Boolean true
637 //===========================================================================
638 //= Function to generate Zipf (power law) distributed random variables =
639 //= - Input: alpha and N =
640 //= - Output: Returns with Zipf distributed random variable =
641 //===========================================================================
642 int zipf(double alpha, int n, unsigned int *seedp)
643 {
644  static int first = TRUE; // Static first time flag
645  static double c = 0; // Normalization constant
646  double z; // Uniform random number (0 < z < 1)
647  double sum_prob; // Sum of probabilities
648  double zipf_value; // Computed exponential value to be returned
649  int i; // Loop counter
650 
651  // Compute normalization constant on first call only
652  if (first == TRUE)
653  {
654  for (i=1; i<=n; i++)
655  c = c + (1.0 / pow((double) i, alpha));
656  c = 1.0 / c;
657  first = FALSE;
658  }
659 
660  // Pull a uniform random number (0 < z < 1)
661  do
662  {
663  z = rand_r(seedp)*(1.0/RAND_MAX);
664  }
665  while ((z == 0) || (z == 1));
666 
667  static std::vector<double> oopia;
668  #pragma omp critical
669  if(oopia.size()!=n){
670  oopia.resize(n);
671  for(int i=0;i<n;i++)
672  oopia[i]=1.0/pow((double) i, alpha);
673  }
674 
675  // Map z to the value
676  sum_prob = 0;
677  for (i=1; i<=n; i++)
678  {
679  sum_prob = sum_prob + c*oopia[i-1];
680  if (sum_prob >= z)
681  {
682  zipf_value = i;
683  break;
684  }
685  }
686 
687  // Assert that zipf_value is between 1 and N
688  assert((zipf_value >=1) && (zipf_value <= n));
689 
690  return(zipf_value);
691 }
sort_profiler_t total_sort
A set of parallel utilities.
sort_profiler_t sort_partitionw
void printResults(int num_threads, MPI_Comm comm)
Definition: main.cpp:553
int main(int argc, char **argv)
Definition: main.cpp:319
bool verify(std::vector< T > &in_, std::vector< T > &out_, MPI_Comm comm)
Definition: main.cpp:110
sort_profiler_t sample_prepare_scatter
sort_profiler_t hyper_communicate
void getStats(double val, double *meanV, double *minV, double *maxV, MPI_Comm comm)
Definition: main.cpp:45
DistribType getDistType(char *code)
Definition: main.cpp:56
long total_bytes
DistribType
Definition: main.cpp:38
double time_sort(size_t N, MPI_Comm comm, DistribType dist_type)
Definition: main.cpp:164
sort_profiler_t seq_sort
sort_profiler_t sample_get_splitters
sort_profiler_t sample_do_all2all
#define TRUE
Definition: main.cpp:636
sort_profiler_t hyper_merge
long getNumElements(char *code)
Definition: main.cpp:64
sort_profiler_t sample_sort_splitters
A set of efficient functions that use binary operations to perform some small computations.
sort_profiler_t hyper_comm_split
int zipf(double alpha, int n, unsigned int *seedp)
Definition: main.cpp:642
void scan(T *A, T *B, I cnt)
#define FALSE
Definition: main.cpp:635
sort_profiler_t hyper_compute_splitters