COMBINATORIAL_BLAS  1.6
Multiplier.h
Go to the documentation of this file.
1 #ifndef _MULTIPLIER_H_
2 #define _MULTIPLIER_H_
3 
4 #include "CombBLAS/CombBLAS.h"
5 #include "CCGrid.h"
6 #include "SUMMALayer.h"
7 
8 namespace combblas {
9 
10 template <typename IT, typename NT>
11 SpDCCols<IT, NT>* multiply(SpDCCols<IT, NT> & splitA, SpDCCols<IT, NT> & splitB, CCGrid & CMG, bool isBT, bool threaded)
12 {
13 
15  int myrank;
16  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
17  std::vector< SpTuples<IT,NT>* > unreducedC;
18 
19  MPI_Barrier(MPI_COMM_WORLD);
20  double time_beg = MPI_Wtime();
21 
22  SUMMALayer(splitA, splitB, unreducedC, CMG, isBT, threaded);
23 
24  MPI_Barrier(MPI_COMM_WORLD);
25  double time_mid = MPI_Wtime();
26 
27  SpDCCols<IT,NT> * mergedC;
28  mergedC = ReduceAll_threaded(unreducedC, CMG);
29  MPI_Barrier(MPI_COMM_WORLD);
30  double time_end = MPI_Wtime();
31  double time_total = time_end-time_beg;
32 
33  /*
34  int64_t local_nnz = mergedC->getnnz();
35  int64_t global_nnz = 0;
36 
37  MPI_Reduce(&local_nnz, &global_nnz, 1, MPIType<int64_t>(), MPI_SUM, 0, MPI_COMM_WORLD);
38  if(myrank == 0)
39  {
40  cout << "Global nonzeros in C is " << global_nnz << endl;
41  }
42  */
43 
44  int nthreads;
45 #pragma omp parallel
46  {
47  nthreads = omp_get_num_threads();
48  }
49  if(CMG.myrank == 0)
50  {
51  double time_other = time_total - (comm_bcast + comm_reduce + comp_summa + comp_reduce + comp_reduce_layer + comp_result);
52  //printf(" ----------------------------------------------------------------------------------------------\n");
53  //printf(" comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n");
54  //printf(" ----------------------------------------------------------------------------------------------\n");
55 
56  //printf("%10lf %12lf %12lf %10lf %12lf %12lf %12lf %10lf\n\n", comm_bcast, comm_reduce, comp_summa, comp_reduce, comp_reduce_layer, comp_result, time_other, time_total);
57  printf("%4d %4d %5d %6d %10lf %12lf %12lf %10lf %12lf %12lf %12lf %10lf\n", CMG.GridRows, CMG.GridCols, CMG.GridLayers, nthreads, comm_bcast, comm_reduce, comp_summa, comp_reduce, comp_reduce_layer, comp_result, time_other, time_total);
58  }
59 
60  return mergedC;
61 }
62 
63 }
64 
65 #endif
double comp_reduce
Definition: mpipspgemm.cpp:25
SpDCCols< IT, NT > * ReduceAll_threaded(std::vector< SpTuples< IT, NT > * > &unreducedC, CCGrid &CMG)
Definition: Reductions.h:134
void SUMMALayer(SpDCCols< IT, NT > &SplitA, SpDCCols< IT, NT > &SplitB, std::vector< SpTuples< IT, NT > * > &C, CCGrid &CMG, bool isBT, bool threaded)
Definition: SUMMALayer.h:25
double comp_result
Definition: mpipspgemm.cpp:26
double comm_reduce
Definition: mpipspgemm.cpp:23
Definition: CCGrid.h:4
double comp_reduce_layer
Definition: mpipspgemm.cpp:27
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
Definition: Multiplier.h:11
double comp_summa
Definition: mpipspgemm.cpp:24
int GridLayers
Definition: CCGrid.h:36
double comm_bcast
Definition: mpipspgemm.cpp:22