23 #define SORT_FUNCTION par::HyperQuickSort_kway 26 #define SORT_FUNCTION par::RankSwapSort 28 #define SORT_FUNCTION par::HyperQuickSort 32 #define SORT_FUNCTION par::sampleSort 45 void getStats(
double val,
double *meanV,
double *minV,
double *maxV, MPI_Comm comm)
50 MPI_Comm_size(comm, &p);
51 MPI_Reduce(&din, &d, 1, MPI_DOUBLE, MPI_SUM, 0, comm); *meanV = d/p;
52 MPI_Reduce(&din, &d, 1, MPI_DOUBLE, MPI_MIN, 0, comm); *minV = d;
53 MPI_Reduce(&din, &d, 1, MPI_DOUBLE, MPI_MAX, 0, comm); *maxV = d;
57 if(!strcmp(code,
"GAUSS\0")){
65 unsigned int slen = strlen(code);
68 strncpy(tmp, code+1, slen-3); tmp[slen-3] =
'\0';
70 long numBytes = atol(tmp);
71 switch(code[slen-2]) {
74 numBytes *= 1024*1024*1024;
82 numBytes *= 1024*1024;
85 std::cout <<
"unknown code " << code[slen-2] << std::endl;
91 return numBytes/
sizeof(double);
94 return numBytes/
sizeof(float);
97 return numBytes/
sizeof(int);
100 return numBytes/
sizeof(long);
103 std::cout <<
"unknown dtype: " << dtype << std::endl;
110 bool verify (std::vector<T>& in_, std::vector<T> &out_, MPI_Comm comm){
114 MPI_Comm_rank(comm, &myrank);
115 MPI_Comm_size(comm,&p);
117 if (!myrank) std::cout <<
"Verifying sort" << std::endl;
121 int N_local=in_.size()*
sizeof(T);
122 std::vector<int> r_size(p, 0);
123 std::vector<int> r_disp(p, 0);
124 MPI_Gather(&N_local , 1, MPI_INT,
125 &r_size[0], 1, MPI_INT, 0, comm);
128 if(!myrank) in.resize((r_size[p-1]+r_disp[p-1])/
sizeof(T));
129 MPI_Gatherv((
char*)&in_[0], N_local, MPI_BYTE,
130 (
char*)&in [0], &r_size[0], &r_disp[0], MPI_BYTE, 0, comm);
135 int N_local=out_.size()*
sizeof(T);
136 std::vector<int> r_size(p, 0);
137 std::vector<int> r_disp(p, 0);
138 MPI_Gather(&N_local , 1, MPI_INT,
139 &r_size[0], 1, MPI_INT, 0, comm);
142 if(!myrank) out.resize((r_size[p-1]+r_disp[p-1])/
sizeof(T));
143 MPI_Gatherv((
char*)&out_[0], N_local, MPI_BYTE,
144 (
char*)&out [0], &r_size[0], &r_disp[0], MPI_BYTE, 0, comm);
147 if(in.size()!=out.size()){
148 std::cout<<
"Wrong size: in="<<in.size()<<
" out="<<out.size()<<
'\n';
151 std::sort(&in[0], &in[in.size()]);
153 for(
long j=0;j<in.size();j++)
155 std::cout<<
"Failed at:"<<j<<
'\n';
167 MPI_Comm_rank(comm, &myrank);
168 MPI_Comm_size(comm,&p);
169 int omp_p=omp_get_max_threads();
171 std::vector<IndexHolder<T>> in1(N);
174 std::vector<T> in(N);
176 #pragma omp parallel for 177 for(
int j=0;j<omp_p;j++){
178 unsigned int seed=j*p+myrank;
179 size_t start=(j*N)/omp_p;
180 size_t end=((j+1)*N)/omp_p;
181 for(
unsigned int i=start;i<end;i++){
186 double e=2.7182818284590452;
189 unsigned int seed1=p+myrank;
190 long mn = rand_r(&seed1);
192 #pragma omp parallel for 193 for(
int j=0;j<omp_p;j++){
194 unsigned int seed=j*p+myrank;
195 size_t start=(j*N)/omp_p;
196 size_t end=((j+1)*N)/omp_p;
197 for(
unsigned int i=start;i<end;i++){
198 in[i]= mn + sqrt(-2*log(rand_r(&seed)*1.0/RAND_MAX)/log_e)
199 * cos(rand_r(&seed)*2*M_PI/RAND_MAX)*RAND_MAX*0.1;
204 std::vector<T> in_cpy=in;
208 SORT_FUNCTION<T>(in_cpy, comm);
232 double wtime=-omp_get_wtime();
234 SORT_FUNCTION<T>(in, comm);
236 wtime+=omp_get_wtime();
319 int main(
int argc,
char **argv){
322 std::cerr <<
"Usage: " << argv[0] <<
" numThreads typeSize typeDistrib" << std::endl;
323 std::cerr <<
"\t\t typeSize is a character for type of data follwed by data size per node." << std::endl;
324 std::cerr <<
"\t\t typeSize can be d-double, f-float, i-int, l-long." << std::endl;
325 std::cerr <<
"\t\t Examples:" << std::endl;
326 std::cerr <<
"\t\t i1GB : integer array of size 1GB" << std::endl;
327 std::cerr <<
"\t\t l1GB : long array of size 1GB" << std::endl;
328 std::cerr <<
"\t\t typeDistrib can be UNIF, GAUSS" << std::endl;
332 std::cout<<setiosflags(std::ios::fixed)<<std::setprecision(4)<<std::setiosflags(std::ios::right);
335 int num_threads = atoi(argv[1]);
336 omp_set_num_threads(num_threads);
339 MPI_Init(&argc, &argv);
343 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
347 MPI_Comm_size(MPI_COMM_WORLD, &p);
423 std::vector<double> tt(10000,0);
426 char dtype = argv[2][0];
430 std::cerr <<
"illegal typeSize code provided: " << argv[2] << std::endl;
435 std::stringstream mystream;
437 num = mystream.str();
438 int insertPosition = num.length() - 3;
439 while (insertPosition > 0) {
440 num.insert(insertPosition,
",");
444 std::cout <<
"sorting array of size " << num <<
" keys of type " << dtype << std::endl;
453 ttt = time_sort<double>(N, MPI_COMM_WORLD,dist_type);
456 ttt = time_sort<float>(N, MPI_COMM_WORLD,dist_type);
459 ttt = time_sort<int>(N, MPI_COMM_WORLD,dist_type);
462 ttt = time_sort<long>(N, MPI_COMM_WORLD,dist_type);
467 std::cout <<
"---------------------------------------------------------------------------" << std::endl;
469 std::cout <<
"\tSample Sort with " << KWAY <<
"-way all2all" <<
"\t\tMean\tMin\tMax" << std::endl;
472 std::cout <<
"\t" << KWAY <<
"-way SwapRankSort " <<
"\t\tMean\tMin\tMax" << std::endl;
474 std::cout <<
"\t" << KWAY <<
"-way HyperQuickSort" <<
"\t\tMean\tMin\tMax" << std::endl;
477 std::cout <<
"---------------------------------------------------------------------------" << std::endl;
490 for(
int i=p; myrank<i && i>=min_np; i=i>>1) proc_group++;
491 MPI_Comm_split(MPI_COMM_WORLD, proc_group, myrank, &comm);
495 MPI_Comm_rank(comm, &myrank_);
500 ttt = time_sort<double>(N, comm,dist_type);
503 ttt = time_sort<float>(N, comm,dist_type);
506 ttt = time_sort<int>(N, comm,dist_type);
509 ttt = time_sort<long>(N, comm,dist_type);
514 tt[100*k+proc_group]=ttt;
519 if (!myrank) num_groups = proc_group;
520 MPI_Bcast(&num_groups, 1, MPI_INT, 0, MPI_COMM_WORLD);
522 for(
size_t i = 0; i < num_groups; ++i)
524 MPI_Barrier(MPI_COMM_WORLD);
525 if (proc_group == i) {
532 MPI_Barrier(MPI_COMM_WORLD);
534 std::vector<double> tt_glb(10000);
535 MPI_Reduce(&tt[0], &tt_glb[0], 10000, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
537 std::cout<<
"\nSort - Weak Scaling:\n";
538 for(
int i=0;i<proc_group;i++){
540 if(i>0) np=(p>>(i-1))-(p>>i);
541 std::cout<<
"\t\t\tP = "<<np<<
"\t\t";
543 std::cout<<tt_glb[100*k+i]<<
' ';
554 int myrank, p, simd_width=0;
555 MPI_Comm_size(comm, &p);
556 MPI_Comm_rank(comm, &myrank);
558 simd_width=SIMD_MERGE;
561 double t, meanV, minV, maxV;
567 std::cout << p <<
" tasks : " << num_threads <<
" threads " << simd_width <<
" SIMD_WIDTH " << std::endl;
572 std::cout <<
"Total sort time \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
577 std::cout <<
"Sequential Sort \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
581 std::cout <<
"partitionW \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
587 std::cout <<
"sort splitters \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
591 std::cout <<
"prepare scatter \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
595 std::cout <<
"all2all \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
600 std::cout <<
"compute splitters \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
604 std::cout <<
"exchange data \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
608 std::cout <<
"merge arrays \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
612 std::cout <<
"comm split \t\t\t" << meanV <<
"\t" << minV <<
"\t" << maxV << std::endl;
616 std::stringstream mystream;
618 num = mystream.str();
619 int insertPosition = num.length() - 3;
620 while (insertPosition > 0) {
621 num.insert(insertPosition,
",");
624 std::cout <<
"total comm \t\t\t" << num <<
" bytes" << std::endl;
631 std::cout <<
"" << std::endl;
635 #define FALSE 0 // Boolean false 636 #define TRUE 1 // Boolean true 642 int zipf(
double alpha,
int n,
unsigned int *seedp)
644 static int first =
TRUE;
655 c = c + (1.0 / pow((
double) i, alpha));
663 z = rand_r(seedp)*(1.0/RAND_MAX);
665 while ((z == 0) || (z == 1));
667 static std::vector<double> oopia;
672 oopia[i]=1.0/pow((
double) i, alpha);
679 sum_prob = sum_prob + c*oopia[i-1];
688 assert((zipf_value >=1) && (zipf_value <= n));
sort_profiler_t total_sort
A set of parallel utilities.
sort_profiler_t sort_partitionw
void printResults(int num_threads, MPI_Comm comm)
int main(int argc, char **argv)
bool verify(std::vector< T > &in_, std::vector< T > &out_, MPI_Comm comm)
sort_profiler_t sample_prepare_scatter
sort_profiler_t hyper_communicate
void getStats(double val, double *meanV, double *minV, double *maxV, MPI_Comm comm)
DistribType getDistType(char *code)
double time_sort(size_t N, MPI_Comm comm, DistribType dist_type)
sort_profiler_t sample_get_splitters
sort_profiler_t sample_do_all2all
sort_profiler_t hyper_merge
long getNumElements(char *code)
sort_profiler_t sample_sort_splitters
A set of efficient functions that use binary operations to perform some small computations.
sort_profiler_t hyper_comm_split
int zipf(double alpha, int n, unsigned int *seedp)
void scan(T *A, T *B, I cnt)
sort_profiler_t hyper_compute_splitters