COMBINATORIAL_BLAS  1.6
mpipspgemm.cpp
Go to the documentation of this file.
1 #include <mpi.h>
2 #include <sys/time.h>
3 #include <iostream>
4 #include <functional>
5 #include <algorithm>
6 #include <vector>
7 #include <string>
8 #include <sstream>
9 #include <stdint.h>
10 #include <cmath>
11 #include "CombBLAS/CombBLAS.h"
12 #include "Glue.h"
13 #include "CCGrid.h"
14 #include "Reductions.h"
15 #include "Multiplier.h"
16 #include "SplitMatDist.h"
17 
18 
19 using namespace std;
20 using namespace combblas;
21 
22 double comm_bcast;
23 double comm_reduce;
24 double comp_summa;
25 double comp_reduce;
26 double comp_result;
28 double comp_split;
29 double comp_trans;
30 double comm_split;
31 
32 #define ITERS 5
33 
34 int main(int argc, char *argv[])
35 {
36  int provided;
37 
38  MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
39  if (provided < MPI_THREAD_SERIALIZED)
40  {
41  printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
42  MPI_Abort(MPI_COMM_WORLD, 1);
43  }
44 
45 
46 
47  int nprocs, myrank;
48  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
49  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
50 
51  if(argc < 8)
52  {
53  if(myrank == 0)
54  {
55  printf("Usage (random): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type> <Scale> <EDGEFACTOR> <algo>\n");
56  printf("Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type=input> <matA> <matB> <algo>\n");
57  printf("Example: ./mpipspgemm 4 4 2 ER 19 16 outer\n");
58  printf("Example: ./mpipspgemm 4 4 2 Input matA.mtx matB.mtx column\n");
59  printf("Type ER: Erdos-Renyi\n");
60  printf("Type SSCA: R-MAT with SSCA benchmark parameters\n");
61  printf("Type G500: R-MAT with Graph500 benchmark parameters\n");
62  printf("algo: outer | column \n");
63  }
64  return -1;
65  }
66 
67 
68  unsigned GRROWS = (unsigned) atoi(argv[1]);
69  unsigned GRCOLS = (unsigned) atoi(argv[2]);
70  unsigned C_FACTOR = (unsigned) atoi(argv[3]);
71  CCGrid CMG(C_FACTOR, GRCOLS);
72  int nthreads;
73 #pragma omp parallel
74  {
75  nthreads = omp_get_num_threads();
76  }
77 
78 
79  if(GRROWS != GRCOLS)
80  {
81  SpParHelper::Print("This version of the Combinatorial BLAS only works on a square logical processor grid\n");
82  MPI_Barrier(MPI_COMM_WORLD);
83  MPI_Abort(MPI_COMM_WORLD, 1);
84  }
85 
86  int layer_length = GRROWS*GRCOLS;
87  if(layer_length * C_FACTOR != nprocs)
88  {
89  SpParHelper::Print("The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n");
90  MPI_Barrier(MPI_COMM_WORLD);
91  MPI_Abort(MPI_COMM_WORLD, 1);
92  }
93 
94 
95  {
96  SpDCCols<int64_t, double> splitA, splitB;
98  string type;
99  shared_ptr<CommGrid> layerGrid;
100  layerGrid.reset( new CommGrid(CMG.layerWorld, 0, 0) );
101  FullyDistVec<int64_t, int64_t> p(layerGrid); // permutation vector defined on layers
102 
103  if(string(argv[4]) == string("input")) // input option
104  {
105  string fileA(argv[5]);
106  string fileB(argv[6]);
107 
108  double t01 = MPI_Wtime();
109  SpDCCols<int64_t, double> *A = ReadMat<double>(fileA, CMG, true, p);
110  SpDCCols<int64_t, double> *B = ReadMat<double>(fileB, CMG, true, p);
111  SplitMat(CMG, A, splitA, false);
112  SplitMat(CMG, B, splitB, true); //row-split
113  if(myrank == 0) cout << "Matrices read and replicated along layers : time " << MPI_Wtime() - t01 << endl;
114  }
115  else
116  {
117  unsigned scale = (unsigned) atoi(argv[5]);
118  unsigned EDGEFACTOR = (unsigned) atoi(argv[6]);
119  double initiator[4];
120  if(string(argv[4]) == string("ER"))
121  {
122  initiator[0] = .25;
123  initiator[1] = .25;
124  initiator[2] = .25;
125  initiator[3] = .25;
126  }
127  else if(string(argv[4]) == string("G500"))
128  {
129  initiator[0] = .57;
130  initiator[1] = .19;
131  initiator[2] = .19;
132  initiator[3] = .05;
133  EDGEFACTOR = 16;
134  }
135  else if(string(argv[4]) == string("SSCA"))
136  {
137  initiator[0] = .6;
138  initiator[1] = .4/3;
139  initiator[2] = .4/3;
140  initiator[3] = .4/3;
141  EDGEFACTOR = 8;
142  }
143  else {
144  if(myrank == 0)
145  printf("The initiator parameter - %s - is not recognized.\n", argv[5]);
146  MPI_Abort(MPI_COMM_WORLD, 1);
147  }
148 
149 
150  double t01 = MPI_Wtime();
151  SpDCCols<int64_t, double> *A = GenMat<int64_t,double>(CMG, scale, EDGEFACTOR, initiator, true);
152  SpDCCols<int64_t, double> *B = GenMat<int64_t,double>(CMG, scale, EDGEFACTOR, initiator, true);
153 
154  SplitMat(CMG, A, splitA, false);
155  SplitMat(CMG, B, splitB, true); //row-split
156  if(myrank == 0) cout << "RMATs Generated and replicated along layers : time " << MPI_Wtime() - t01 << endl;
157 
158  }
159 
160  int64_t globalnnzA=0, globalnnzB=0;
161  int64_t localnnzA = splitA.getnnz();
162  int64_t localnnzB = splitB.getnnz();
163  MPI_Allreduce( &localnnzA, &globalnnzA, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
164  MPI_Allreduce( &localnnzB, &globalnnzB, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
165  if(myrank == 0) cout << "After split: nnzA= " << globalnnzA << " & nnzB= " << globalnnzB;
166 
167 
168 
169  type = string(argv[7]);
170  if(myrank == 0)
171  {
172  printf("\n Processor Grid (row x col x layers x threads): %dx%dx%dx%d \n", CMG.GridRows, CMG.GridCols, CMG.GridLayers, nthreads);
173  printf(" prow pcol layer thread comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n");
174  }
175  if(type == string("outer"))
176  {
177  splitB.Transpose(); //locally transpose for outer product
178  for(int k=0; k<ITERS; k++)
179  {
180  splitC = multiply(splitA, splitB, CMG, true, false); // outer product
181  delete splitC;
182  }
183  }
184 
185  else // default column-threaded
186  {
187  for(int k=0; ITERS>0 && k<ITERS-1; k++)
188  {
189  splitC = multiply(splitA, splitB, CMG, false, true);
190  delete splitC;
191  }
192  splitC = multiply(splitA, splitB, CMG, false, true);
193  int64_t nnzC=0;
194  int64_t localnnzC = splitC->getnnz();
195  MPI_Allreduce( &localnnzC, &nnzC, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
196  if(myrank == 0) cout << "\n After multiplication: nnzC= " << nnzC << endl << endl;
197  delete splitC;
198 
199  }
200  }
201 
202  MPI_Finalize();
203  return 0;
204 }
205 
206 
double B
IT getnnz() const
Definition: SpDCCols.h:301
double comm_reduce
Definition: mpipspgemm.cpp:23
MPI_Datatype MPIType< int64_t >(void)
Definition: MPIType.cpp:64
double comm_split
Definition: mpipspgemm.cpp:30
#define EDGEFACTOR
Definition: DirOptBFS.cpp:81
MPI_Comm layerWorld
Definition: CCGrid.h:41
int main(int argc, char *argv[])
Definition: mpipspgemm.cpp:34
double comm_bcast
Definition: mpipspgemm.cpp:22
void SplitMat(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > &splitmat, bool rowsplit=false)
Definition: SplitMatDist.h:144
double comp_split
Definition: mpipspgemm.cpp:28
double A
double comp_trans
Definition: mpipspgemm.cpp:29
double comp_summa
Definition: mpipspgemm.cpp:24
long int64_t
Definition: compat.h:21
double comp_reduce
Definition: mpipspgemm.cpp:25
Definition: CCGrid.h:4
#define ITERS
Definition: mpipspgemm.cpp:32
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
Definition: Multiplier.h:11
void Transpose()
Mutator version, replaces the calling object.
Definition: SpDCCols.cpp:815
double comp_result
Definition: mpipspgemm.cpp:26
int GridLayers
Definition: CCGrid.h:36
double comp_reduce_layer
Definition: mpipspgemm.cpp:27