35 int main(
int argc,
char *argv[])
41 MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
42 if (provided < MPI_THREAD_SERIALIZED)
44 printf(
"ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
45 MPI_Abort(MPI_COMM_WORLD, 1);
51 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
52 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
58 printf(
"Usage (random): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type> <Scale> <EDGEFACTOR> \n");
59 printf(
"Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type=input> <matA> \n");
60 printf(
"Example: ./RestrictionOp 4 4 2 ER 19 16 \n");
61 printf(
"Example: ./RestrictionOp 4 4 2 Input matA.mtx\n");
62 printf(
"Type ER: Erdos-Renyi\n");
63 printf(
"Type SSCA: R-MAT with SSCA benchmark parameters\n");
64 printf(
"Type G500: R-MAT with Graph500 benchmark parameters\n");
70 unsigned GRROWS = (unsigned) atoi(argv[1]);
71 unsigned GRCOLS = (unsigned) atoi(argv[2]);
72 unsigned C_FACTOR = (unsigned) atoi(argv[3]);
73 CCGrid CMG(C_FACTOR, GRCOLS);
77 nthreads = omp_get_num_threads();
83 SpParHelper::Print(
"This version of the Combinatorial BLAS only works on a square logical processor grid\n");
84 MPI_Barrier(MPI_COMM_WORLD);
85 MPI_Abort(MPI_COMM_WORLD, 1);
88 int layer_length = GRROWS*GRCOLS;
89 if(layer_length * C_FACTOR != nprocs)
91 SpParHelper::Print(
"The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n");
92 MPI_Barrier(MPI_COMM_WORLD);
93 MPI_Abort(MPI_COMM_WORLD, 1);
99 shared_ptr<CommGrid> layerGrid;
103 if(
string(argv[4]) ==
string(
"input"))
105 string fileA(argv[5]);
107 double t01 = MPI_Wtime();
108 A = ReadMat<double>(fileA, CMG,
true, p);
111 if(myrank == 0) cout <<
"Input matrix read : time " << MPI_Wtime() - t01 << endl;
115 unsigned scale = (unsigned) atoi(argv[5]);
116 unsigned EDGEFACTOR = (unsigned) atoi(argv[6]);
118 if(
string(argv[4]) == string(
"ER"))
125 else if(
string(argv[4]) == string(
"G500"))
133 else if(
string(argv[4]) == string(
"SSCA"))
143 printf(
"The initiator parameter - %s - is not recognized.\n", argv[5]);
144 MPI_Abort(MPI_COMM_WORLD, 1);
148 double t01 = MPI_Wtime();
149 A = GenMat<int64_t,double>(CMG, scale,
EDGEFACTOR, initiator,
true);
151 if(myrank == 0) cout <<
"RMATs Generated : time " << MPI_Wtime() - t01 << endl;
160 if(myrank == 0) cout <<
"Computing restriction matrix \n";
161 double t01 = MPI_Wtime();
164 if(myrank == 0) cout <<
"Restriction Op computed : time " << MPI_Wtime() - t01 << endl;
179 printf(
"\n Processor Grid (row x col x layers x threads): %dx%dx%dx%d \n", CMG.
GridRows, CMG.
GridCols, CMG.
GridLayers, nthreads);
180 printf(
" prow pcol layer thread comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n");
182 SpParHelper::Print(
"Computing A square\n");
184 splitC =
multiply(splitB, splitA, CMG,
false,
true);
186 splitC =
multiply(splitB, splitA, CMG,
false,
true);
188 SpParHelper::Print(
"Computing RTA\n");
189 splitRTA =
multiply(splitRT, splitA, CMG,
false,
true);
191 splitRTA =
multiply(splitRT, splitA, CMG,
false,
true);
193 SpParHelper::Print(
"Computing RTAR\n");
194 splitRTAR =
multiply(*splitRTA, splitR, CMG,
false,
true);
196 splitRTAR =
multiply(*splitRTA, splitR, CMG,
false,
true);
199 int64_t nnzA=0, nnzR=0, nnzC=0, nnzRTA=0, nnzRTAR=0;
205 MPI_Allreduce( &localnnzA, &nnzA, 1,
MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
206 MPI_Allreduce( &localnnzR, &nnzR, 1,
MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
207 MPI_Allreduce( &localnnzC, &nnzC, 1,
MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
208 MPI_Allreduce( &localnnzRTA, &nnzRTA, 1,
MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
209 MPI_Allreduce( &localnnzRTAR, &nnzRTAR, 1,
MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
212 cout <<
"----------------------------\n";
213 cout <<
" nnz(A)= " << nnzA << endl;
214 cout <<
" nnz(R)= " << nnzR << endl;
215 cout <<
" nnz(A^2)= " << nnzC << endl;
216 cout <<
" nnz(RTA)= " << nnzRTA << endl;
217 cout <<
" nnz(RTAR)= " << nnzRTAR << endl;
218 cout <<
"----------------------------\n";
MPI_Datatype MPIType< int64_t >(void)
void SplitMat(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > &splitmat, bool rowsplit=false)
void RestrictionOp(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > *&R, SpDCCols< IT, NT > *&RT)
int main(int argc, char *argv[])
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)