34 int main(
int argc,
char *argv[])
38 MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
39 if (provided < MPI_THREAD_SERIALIZED)
41 printf(
"ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
42 MPI_Abort(MPI_COMM_WORLD, 1);
48 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
49 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
55 printf(
"Usage (random): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type> <Scale> <EDGEFACTOR> <algo>\n");
56 printf(
"Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type=input> <matA> <matB> <algo>\n");
57 printf(
"Example: ./mpipspgemm 4 4 2 ER 19 16 outer\n");
58 printf(
"Example: ./mpipspgemm 4 4 2 Input matA.mtx matB.mtx column\n");
59 printf(
"Type ER: Erdos-Renyi\n");
60 printf(
"Type SSCA: R-MAT with SSCA benchmark parameters\n");
61 printf(
"Type G500: R-MAT with Graph500 benchmark parameters\n");
62 printf(
"algo: outer | column \n");
68 unsigned GRROWS = (unsigned) atoi(argv[1]);
69 unsigned GRCOLS = (unsigned) atoi(argv[2]);
70 unsigned C_FACTOR = (unsigned) atoi(argv[3]);
71 CCGrid CMG(C_FACTOR, GRCOLS);
75 nthreads = omp_get_num_threads();
81 SpParHelper::Print(
"This version of the Combinatorial BLAS only works on a square logical processor grid\n");
82 MPI_Barrier(MPI_COMM_WORLD);
83 MPI_Abort(MPI_COMM_WORLD, 1);
86 int layer_length = GRROWS*GRCOLS;
87 if(layer_length * C_FACTOR != nprocs)
89 SpParHelper::Print(
"The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n");
90 MPI_Barrier(MPI_COMM_WORLD);
91 MPI_Abort(MPI_COMM_WORLD, 1);
99 shared_ptr<CommGrid> layerGrid;
103 if(
string(argv[4]) ==
string(
"input"))
105 string fileA(argv[5]);
106 string fileB(argv[6]);
108 double t01 = MPI_Wtime();
113 if(myrank == 0) cout <<
"Matrices read and replicated along layers : time " << MPI_Wtime() - t01 << endl;
117 unsigned scale = (unsigned) atoi(argv[5]);
118 unsigned EDGEFACTOR = (unsigned) atoi(argv[6]);
120 if(
string(argv[4]) == string(
"ER"))
127 else if(
string(argv[4]) == string(
"G500"))
135 else if(
string(argv[4]) == string(
"SSCA"))
145 printf(
"The initiator parameter - %s - is not recognized.\n", argv[5]);
146 MPI_Abort(MPI_COMM_WORLD, 1);
150 double t01 = MPI_Wtime();
156 if(myrank == 0) cout <<
"RMATs Generated and replicated along layers : time " << MPI_Wtime() - t01 << endl;
160 int64_t globalnnzA=0, globalnnzB=0;
163 MPI_Allreduce( &localnnzA, &globalnnzA, 1,
MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
164 MPI_Allreduce( &localnnzB, &globalnnzB, 1,
MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
165 if(myrank == 0) cout <<
"After split: nnzA= " << globalnnzA <<
" & nnzB= " << globalnnzB;
169 type = string(argv[7]);
172 printf(
"\n Processor Grid (row x col x layers x threads): %dx%dx%dx%d \n", CMG.
GridRows, CMG.
GridCols, CMG.
GridLayers, nthreads);
173 printf(
" prow pcol layer thread comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n");
175 if(type ==
string(
"outer"))
178 for(
int k=0; k<
ITERS; k++)
180 splitC =
multiply(splitA, splitB, CMG,
true,
false);
189 splitC =
multiply(splitA, splitB, CMG,
false,
true);
192 splitC =
multiply(splitA, splitB, CMG,
false,
true);
195 MPI_Allreduce( &localnnzC, &nnzC, 1,
MPIType<int64_t>(), MPI_SUM, MPI_COMM_WORLD);
196 if(myrank == 0) cout <<
"\n After multiplication: nnzC= " << nnzC << endl << endl;
MPI_Datatype MPIType< int64_t >(void)
int main(int argc, char *argv[])
void SplitMat(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > &splitmat, bool rowsplit=false)
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
void Transpose()
Mutator version, replaces the calling object.