36 template <
typename SR,
typename IT,
typename NT>
39 double comp_begin, comm_begin, comp_time=0, comm_time=0;
41 MPI_Comm_size(fibWorld,&fprocs);
42 MPI_Comm_rank(fibWorld,&fibrank);
43 IT mdim = localmerged->
getnrow();
44 IT ndim = localmerged->
getncol();
52 comp_begin = MPI_Wtime();
53 std::vector<int> send_sizes(fprocs);
54 std::vector<int> recv_sizes(fprocs);
55 std::vector<int> recv_offsets(fprocs);
56 std::vector<int> send_offsets = findColSplitters<int>(localmerged, fprocs);
57 for(
int i=0; i<fprocs; i++)
59 send_sizes[i] = send_offsets[i+1] - send_offsets[i];
61 comp_time += (MPI_Wtime() - comp_begin);
65 comm_begin = MPI_Wtime();
66 MPI_Alltoall( send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1, MPI_INT,fibWorld);
67 comm_time += (MPI_Wtime() - comm_begin);
68 MPI_Datatype MPI_triple;
69 MPI_Type_contiguous(
sizeof(std::tuple<IT,IT,NT>), MPI_CHAR, &MPI_triple);
70 MPI_Type_commit(&MPI_triple);
74 comp_begin = MPI_Wtime();
76 for(
int i = 0; i < fprocs; i++ )
78 recv_count += recv_sizes[i];
80 std::tuple<IT,IT,NT> * recvbuf =
static_cast<std::tuple<IT, IT, NT>*
> (::operator
new (
sizeof(std::tuple<IT, IT, NT>[recv_count])));
83 for(
int i = 1; i < fprocs; i++ )
85 recv_offsets[i] = recv_offsets[i-1]+recv_sizes[i-1];
87 comp_time += (MPI_Wtime() - comp_begin);
91 comm_begin = MPI_Wtime();
92 MPI_Alltoallv( localmerged->tuples, send_sizes.data(), send_offsets.data(), MPI_triple, recvbuf, recv_sizes.data(), recv_offsets.data(), MPI_triple, fibWorld);
93 comm_time += (MPI_Wtime() - comm_begin);
98 comp_begin = MPI_Wtime();
99 IT ndimSplit = ndim/fprocs;
100 if(fibrank==(fprocs-1))
101 ndimSplit = ndim - ndimSplit * fibrank;
102 IT coloffset = fibrank * ndimSplit;
103 #pragma omp parallel for 104 for(
int k=0; k<recv_count; k++)
106 std::get<1>(recvbuf[k]) = std::get<1>(recvbuf[k]) - coloffset;
111 std::vector< SpTuples<IT,NT>* > lists;
112 for(
int i=0; i< fprocs; ++i)
115 lists.push_back(spTuples);
119 SpTuples<IT,NT> * globalmerged = MultiwayMerge<SR>(lists, mdim, ndimSplit,
false);
121 comp_time += (MPI_Wtime() - comp_begin);
126 ::operator
delete(recvbuf);
133 template <
typename NT,
typename IT>
137 IT mdim = unreducedC[0]->getnrow();
138 IT ndim = unreducedC[0]->getncol();
141 double loc_beg1 = MPI_Wtime();
143 SpTuples<IT, NT>* localmerged = MultiwayMerge<PTDD>(unreducedC, mdim, ndim,
true);
149 loc_beg1 = MPI_Wtime();
153 delete mergedSpTuples;
std::tuple< IT, IT, NT > * tuples
SpDCCols< IT, NT > * ReduceAll_threaded(std::vector< SpTuples< IT, NT > * > &unreducedC, CCGrid &CMG)
SpTuples< IT, NT > * ParallelReduce_Alltoall_threaded(MPI_Comm &fibWorld, SpTuples< IT, NT > *&localmerged)