10 template <
typename IT,
typename NT>
16 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
17 std::vector< SpTuples<IT,NT>* > unreducedC;
19 MPI_Barrier(MPI_COMM_WORLD);
20 double time_beg = MPI_Wtime();
22 SUMMALayer(splitA, splitB, unreducedC, CMG, isBT, threaded);
24 MPI_Barrier(MPI_COMM_WORLD);
25 double time_mid = MPI_Wtime();
29 MPI_Barrier(MPI_COMM_WORLD);
30 double time_end = MPI_Wtime();
31 double time_total = time_end-time_beg;
47 nthreads = omp_get_num_threads();
57 printf(
"%4d %4d %5d %6d %10lf %12lf %12lf %10lf %12lf %12lf %12lf %10lf\n", CMG.
GridRows, CMG.
GridCols, CMG.
GridLayers, nthreads,
comm_bcast,
comm_reduce,
comp_summa,
comp_reduce,
comp_reduce_layer,
comp_result, time_other, time_total);
SpDCCols< IT, NT > * ReduceAll_threaded(std::vector< SpTuples< IT, NT > * > &unreducedC, CCGrid &CMG)
void SUMMALayer(SpDCCols< IT, NT > &SplitA, SpDCCols< IT, NT > &SplitB, std::vector< SpTuples< IT, NT > * > &C, CCGrid &CMG, bool isBT, bool threaded)
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)