COMBINATORIAL_BLAS  1.6
test_mpipspgemm.cpp
Go to the documentation of this file.
1 #include <mpi.h>
2 #include <sys/time.h>
3 #include <iostream>
4 #include <functional>
5 #include <algorithm>
6 #include <vector>
7 #include <string>
8 #include <sstream>
9 #include <stdint.h>
10 #include <cmath>
11 #include "CombBLAS/CombBLAS.h"
12 #include "Glue.h"
13 #include "CCGrid.h"
14 #include "Reductions.h"
15 #include "Multiplier.h"
16 #include "SplitMatDist.h"
17 
18 using namespace std;
19 using namespace combblas;
20 
21 double comm_bcast;
22 double comm_reduce;
23 double comp_summa;
24 double comp_reduce;
25 double comp_result;
27 double comp_split;
28 double comp_trans;
29 double comm_split;
30 
31 #define ITERS 1
32 
33 int main(int argc, char *argv[])
34 {
35  int provided;
36  //MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);
37 
38 
39  MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
40  if (provided < MPI_THREAD_SERIALIZED)
41  {
42  printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
43  MPI_Abort(MPI_COMM_WORLD, 1);
44  }
45 
46 
47  int nprocs, myrank;
48  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
49  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
50 
51  if(argc != 8)
52  {
53  cout << argc << endl;
54  if(myrank == 0)
55  {
56  printf("Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <matA> <matB> <matC> <algo>\n");
57  printf("Example: ./mpipspgemm 4 4 2 matA.mtx matB.mtx matB.mtx threaded\n");
58  printf("algo: outer | column | threaded \n");
59  }
60  return -1;
61  }
62 
63  unsigned GRROWS = (unsigned) atoi(argv[1]);
64  unsigned GRCOLS = (unsigned) atoi(argv[2]);
65  unsigned C_FACTOR = (unsigned) atoi(argv[3]);
66  CCGrid CMG(C_FACTOR, GRCOLS);
67 
68  if(GRROWS != GRCOLS)
69  {
70  SpParHelper::Print("This version of the Combinatorial BLAS only works on a square logical processor grid\n");
71  MPI_Barrier(MPI_COMM_WORLD);
72  MPI_Abort(MPI_COMM_WORLD, 1);
73  }
74 
75  int layer_length = GRROWS*GRCOLS;
76  if(layer_length * C_FACTOR != nprocs)
77  {
78  SpParHelper::Print("The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n");
79  MPI_Barrier(MPI_COMM_WORLD);
80  MPI_Abort(MPI_COMM_WORLD, 1);
81  }
82 
83 
84 
85  SpDCCols<int32_t, double> splitA, splitB, controlC;
87  string type;
88 
89 
90  string fileA(argv[4]);
91  string fileB(argv[5]);
92  string fileC(argv[6]);
93 
94  {
95  shared_ptr<CommGrid> layerGrid;
96  layerGrid.reset( new CommGrid(CMG.layerWorld, 0, 0) );
97  FullyDistVec<int32_t, int32_t> p(layerGrid); // permutation vector defined on layers
98 
99  double t01 = MPI_Wtime();
100 
101  SpDCCols<int32_t, double> *A = ReadMat<double>(fileA, CMG, true, p);
102  SpDCCols<int32_t, double> *B = ReadMat<double>(fileB, CMG, true, p);
103  SpDCCols<int32_t, double> *C = ReadMat<double>(fileC, CMG, true, p);
104 
105  SplitMat(CMG, A, splitA, false);
106  SplitMat(CMG, B, splitB, true);
107  SplitMat(CMG, C, controlC, false);
108 
109  if(myrank == 0) cout << "Matrices read and replicated along layers : time " << MPI_Wtime() - t01 << endl;
110 
111  type = string(argv[7]);
112 
113  if(type == string("outer"))
114  {
115  for(int k=0; k<ITERS; k++)
116  {
117  splitB.Transpose(); // locally "transpose" [ABAB: check correctness]
118  splitC = multiply(splitA, splitB, CMG, true, false); // outer product
119  if (controlC == *splitC)
120  SpParHelper::Print("Outer product multiplication working correctly\n");
121  else
122  SpParHelper::Print("ERROR in Outer product multiplication, go fix it!\n");
123  delete splitC;
124  }
125 
126  }
127  else if(type == string("column"))
128  {
129 
130  for(int k=0; k<ITERS; k++)
131  {
132  splitC = multiply(splitA, splitB, CMG, false, false);
133  if (controlC == *splitC)
134  SpParHelper::Print("Col-heap multiplication working correctly\n");
135  else
136  SpParHelper::Print("ERROR in Col-heap multiplication, go fix it!\n");
137 
138  delete splitC;
139  }
140 
141  }
142  else // default threaded
143  {
144  for(int k=0; k<ITERS; k++)
145  {
146  splitC = multiply(splitA, splitB, CMG, false, true);
147  if (controlC == *splitC)
148  SpParHelper::Print("Col-heap-threaded multiplication working correctly\n");
149  else
150  SpParHelper::Print("ERROR in Col-heap-threaded multiplication, go fix it!\n");
151  delete splitC;
152  }
153  }
154  }
155 
156  MPI_Finalize();
157  return 0;
158 }
159 
160 
double B
double comp_summa
double comm_reduce
double comp_result
MPI_Comm layerWorld
Definition: CCGrid.h:41
#define ITERS
void SplitMat(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > &splitmat, bool rowsplit=false)
Definition: SplitMatDist.h:144
double comm_split
double A
int main(int argc, char *argv[])
double comm_bcast
double C
double comp_reduce
Definition: CCGrid.h:4
double comp_split
double comp_trans
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
Definition: Multiplier.h:11
void Transpose()
Mutator version, replaces the calling object.
Definition: SpDCCols.cpp:815
double comp_reduce_layer