COMBINATORIAL_BLAS  1.6
RestrictionOp.cpp
Go to the documentation of this file.
1 #include <mpi.h>
2 #include <sys/time.h>
3 #include <iostream>
4 #include <functional>
5 #include <algorithm>
6 #include <vector>
7 #include <string>
8 #include <sstream>
9 #include <stdint.h>
10 #include <cmath>
11 #include "CombBLAS/CombBLAS.h"
12 #include "Glue.h"
13 #include "CCGrid.h"
14 #include "Reductions.h"
15 #include "Multiplier.h"
16 #include "SplitMatDist.h"
17 #include "RestrictionOp.h"
18 
19 
20 using namespace std;
21 using namespace combblas;
22 
23 double comm_bcast;
24 double comm_reduce;
25 double comp_summa;
26 double comp_reduce;
27 double comp_result;
29 double comp_split;
30 double comp_trans;
31 double comm_split;
32 
33 #define ITERS 5
34 
35 int main(int argc, char *argv[])
36 {
37  int provided;
38  //MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);
39 
40 
41  MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
42  if (provided < MPI_THREAD_SERIALIZED)
43  {
44  printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
45  MPI_Abort(MPI_COMM_WORLD, 1);
46  }
47 
48 
49 
50  int nprocs, myrank;
51  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
52  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
53 
54  if(argc < 6)
55  {
56  if(myrank == 0)
57  {
58  printf("Usage (random): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type> <Scale> <EDGEFACTOR> \n");
59  printf("Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type=input> <matA> \n"); //TODO:<Scale> not meaningful here. Need to remove it. Still there because current scripts execute without error.
60  printf("Example: ./RestrictionOp 4 4 2 ER 19 16 \n");
61  printf("Example: ./RestrictionOp 4 4 2 Input matA.mtx\n");
62  printf("Type ER: Erdos-Renyi\n");
63  printf("Type SSCA: R-MAT with SSCA benchmark parameters\n");
64  printf("Type G500: R-MAT with Graph500 benchmark parameters\n");
65  }
66  return -1;
67  }
68 
69 
70  unsigned GRROWS = (unsigned) atoi(argv[1]);
71  unsigned GRCOLS = (unsigned) atoi(argv[2]);
72  unsigned C_FACTOR = (unsigned) atoi(argv[3]);
73  CCGrid CMG(C_FACTOR, GRCOLS);
74  int nthreads;
75 #pragma omp parallel
76  {
77  nthreads = omp_get_num_threads();
78  }
79 
80 
81  if(GRROWS != GRCOLS)
82  {
83  SpParHelper::Print("This version of the Combinatorial BLAS only works on a square logical processor grid\n");
84  MPI_Barrier(MPI_COMM_WORLD);
85  MPI_Abort(MPI_COMM_WORLD, 1);
86  }
87 
88  int layer_length = GRROWS*GRCOLS;
89  if(layer_length * C_FACTOR != nprocs)
90  {
91  SpParHelper::Print("The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n");
92  MPI_Barrier(MPI_COMM_WORLD);
93  MPI_Abort(MPI_COMM_WORLD, 1);
94  }
95  {
97 
98  string type;
99  shared_ptr<CommGrid> layerGrid;
100  layerGrid.reset( new CommGrid(CMG.layerWorld, 0, 0) );
101  FullyDistVec<int64_t, int64_t> p(layerGrid); // permutation vector defined on layers
102 
103  if(string(argv[4]) == string("input")) // input option
104  {
105  string fileA(argv[5]);
106 
107  double t01 = MPI_Wtime();
108  A = ReadMat<double>(fileA, CMG, true, p);
109 
110 
111  if(myrank == 0) cout << "Input matrix read : time " << MPI_Wtime() - t01 << endl;
112  }
113  else
114  {
115  unsigned scale = (unsigned) atoi(argv[5]);
116  unsigned EDGEFACTOR = (unsigned) atoi(argv[6]);
117  double initiator[4];
118  if(string(argv[4]) == string("ER"))
119  {
120  initiator[0] = .25;
121  initiator[1] = .25;
122  initiator[2] = .25;
123  initiator[3] = .25;
124  }
125  else if(string(argv[4]) == string("G500"))
126  {
127  initiator[0] = .57;
128  initiator[1] = .19;
129  initiator[2] = .19;
130  initiator[3] = .05;
131  EDGEFACTOR = 16;
132  }
133  else if(string(argv[4]) == string("SSCA"))
134  {
135  initiator[0] = .6;
136  initiator[1] = .4/3;
137  initiator[2] = .4/3;
138  initiator[3] = .4/3;
139  EDGEFACTOR = 8;
140  }
141  else {
142  if(myrank == 0)
143  printf("The initiator parameter - %s - is not recognized.\n", argv[5]);
144  MPI_Abort(MPI_COMM_WORLD, 1);
145  }
146 
147 
148  double t01 = MPI_Wtime();
149  A = GenMat<int64_t,double>(CMG, scale, EDGEFACTOR, initiator, true);
150 
151  if(myrank == 0) cout << "RMATs Generated : time " << MPI_Wtime() - t01 << endl;
152 
153  }
154 
155 
156 
159 
160  if(myrank == 0) cout << "Computing restriction matrix \n";
161  double t01 = MPI_Wtime();
162  RestrictionOp( CMG, A, R, RT);
163 
164  if(myrank == 0) cout << "Restriction Op computed : time " << MPI_Wtime() - t01 << endl;
165  SpDCCols<int64_t, double> *B = new SpDCCols<int64_t, double>(*A); // just a deep copy of A
166  SpDCCols<int64_t, double> splitA, splitB, splitR, splitRT;
167  SpDCCols<int64_t, double> *splitRTA;
168  SpDCCols<int64_t, double> *splitRTAR;
170 
171 
172  SplitMat(CMG, A, splitA, true);
173  SplitMat(CMG, B, splitB, false);
174  SplitMat(CMG, R, splitR, true);
175  SplitMat(CMG, RT, splitRT, false);
176 
177  if(myrank == 0)
178  {
179  printf("\n Processor Grid (row x col x layers x threads): %dx%dx%dx%d \n", CMG.GridRows, CMG.GridCols, CMG.GridLayers, nthreads);
180  printf(" prow pcol layer thread comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n");
181  }
182  SpParHelper::Print("Computing A square\n");
183 
184  splitC = multiply(splitB, splitA, CMG, false, true); // A^2
185  delete splitC;
186  splitC = multiply(splitB, splitA, CMG, false, true); // A^2
187 
188  SpParHelper::Print("Computing RTA\n");
189  splitRTA = multiply(splitRT, splitA, CMG, false, true);
190  delete splitRTA;
191  splitRTA = multiply(splitRT, splitA, CMG, false, true);
192 
193  SpParHelper::Print("Computing RTAR\n");
194  splitRTAR = multiply(*splitRTA, splitR, CMG, false, true);
195  delete splitRTAR;
196  splitRTAR = multiply(*splitRTA, splitR, CMG, false, true);
197 
198  // count nnz
199  int64_t nnzA=0, nnzR=0, nnzC=0, nnzRTA=0, nnzRTAR=0;
200  int64_t localnnzA = splitA.getnnz();
201  int64_t localnnzR = splitR.getnnz();
202  int64_t localnnzC = splitC->getnnz();
203  int64_t localnnzRTA = splitRTA->getnnz();
204  int64_t localnnzRTAR = splitRTAR->getnnz();
205  MPI_Allreduce( &localnnzA, &nnzA, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
206  MPI_Allreduce( &localnnzR, &nnzR, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
207  MPI_Allreduce( &localnnzC, &nnzC, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
208  MPI_Allreduce( &localnnzRTA, &nnzRTA, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
209  MPI_Allreduce( &localnnzRTAR, &nnzRTAR, 1, MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
210  if(myrank == 0)
211  {
212  cout << "----------------------------\n";
213  cout << " nnz(A)= " << nnzA << endl;
214  cout << " nnz(R)= " << nnzR << endl;
215  cout << " nnz(A^2)= " << nnzC << endl;
216  cout << " nnz(RTA)= " << nnzRTA << endl;
217  cout << " nnz(RTAR)= " << nnzRTAR << endl;
218  cout << "----------------------------\n";
219  }
220 
221 
222 
223  delete splitC;
224  delete splitRTA;
225  delete splitRTAR;
226 
227  }
228 
229 
230 
231 
232 
233 
234 
235  MPI_Finalize();
236  return 0;
237 }
238 
239 
double B
IT getnnz() const
Definition: SpDCCols.h:301
double comp_reduce
double comm_split
MPI_Datatype MPIType< int64_t >(void)
Definition: MPIType.cpp:64
double comp_result
#define EDGEFACTOR
Definition: DirOptBFS.cpp:81
MPI_Comm layerWorld
Definition: CCGrid.h:41
double comp_split
double comp_trans
void SplitMat(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > &splitmat, bool rowsplit=false)
Definition: SplitMatDist.h:144
double comp_summa
double A
double comm_bcast
long int64_t
Definition: compat.h:21
void RestrictionOp(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > *&R, SpDCCols< IT, NT > *&RT)
int main(int argc, char *argv[])
Definition: CCGrid.h:4
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
Definition: Multiplier.h:11
double comm_reduce
double comp_reduce_layer
int GridLayers
Definition: CCGrid.h:36