24 MPI_Group orig_group, new_group;
26 MPI_Comm_size(orig_comm, &npes);
27 MPI_Comm_rank(orig_comm, &rank);
31 int *ranksAsc, *ranksDesc;
33 ranksAsc =
new int[splitterRank];
34 ranksDesc =
new int[( npes - splitterRank)];
37 int numDesc = ( npes - splitterRank - 1);
40 for(
int i=0; i<npes; i++) {
41 if( static_cast<unsigned int>(i) < splitterRank) {
45 ranksDesc[numDesc] = i;
50 MPI_Comm_group(orig_comm, &orig_group);
53 if (static_cast<unsigned int>(rank) < splitterRank) {
54 MPI_Group_incl(orig_group, splitterRank, ranksAsc, &new_group);
56 MPI_Group_incl(orig_group, (npes-splitterRank), ranksDesc, &new_group);
59 MPI_Comm_create(orig_comm, new_group, new_comm);
73 MPI_Group orig_group, new_group;
75 MPI_Comm_size(orig_comm, &npes);
76 MPI_Comm_rank(orig_comm, &rank);
80 int *ranksAsc, *ranksDesc;
82 ranksAsc =
new int[splitterRank];
83 ranksDesc =
new int[( npes - splitterRank)];
89 for(
int i = 0; i < npes; i++) {
90 if(static_cast<unsigned int>(i) < splitterRank) {
94 ranksDesc[numDesc] = i;
99 MPI_Comm_group(orig_comm, &orig_group);
102 if (static_cast<unsigned int>(rank) < splitterRank) {
103 MPI_Group_incl(orig_group, splitterRank, ranksAsc, &new_group);
105 MPI_Group_incl(orig_group, (npes-splitterRank), ranksDesc, &new_group);
108 MPI_Comm_create(orig_comm, new_group, new_comm);
121 #ifdef __PROFILE_WITH_BARRIER__ 125 MPI_Group orig_group, new_group;
127 MPI_Comm_size(comm, &size);
129 bool* isEmptyList =
new bool[
size];
130 par::Mpi_Allgather<bool>(&iAmEmpty, isEmptyList, 1, comm);
132 int numActive=0, numIdle=0;
133 for(
int i = 0; i <
size; i++) {
141 int* ranksActive =
new int[numActive];
142 int* ranksIdle =
new int[numIdle];
146 for(
int i = 0; i <
size; i++) {
148 ranksIdle[numIdle] = i;
151 ranksActive[numActive] = i;
156 delete [] isEmptyList;
160 MPI_Comm_group(comm, &orig_group);
164 MPI_Group_incl(orig_group, numActive, ranksActive, &new_group);
166 MPI_Group_incl(orig_group, numIdle, ranksIdle, &new_group);
170 MPI_Comm_create(comm, new_group, new_comm);
172 delete [] ranksActive;
182 #ifdef __PROFILE_WITH_BARRIER__ 186 MPI_Group orig_group, new_group;
189 MPI_Comm_rank(comm, &rank);
190 MPI_Comm_size(comm, &size);
192 int* ranksActive =
new int[splittingRank];
193 int* ranksIdle =
new int[size - splittingRank];
195 for(
int i = 0; i < splittingRank; i++) {
199 for(
int i = splittingRank; i <
size; i++) {
200 ranksIdle[i - splittingRank] = i;
204 MPI_Comm_group(comm, &orig_group);
207 if (rank < splittingRank) {
208 MPI_Group_incl(orig_group, splittingRank, ranksActive, &new_group);
210 MPI_Group_incl(orig_group, (size - splittingRank), ranksIdle, &new_group);
214 MPI_Comm_create(comm, new_group, new_comm);
216 delete [] ranksActive;
225 int splitComm2way(
const bool* isEmptyList, MPI_Comm * new_comm, MPI_Comm comm) {
227 MPI_Group orig_group, new_group;
229 MPI_Comm_size(comm, &size);
230 MPI_Comm_rank(comm, &rank);
232 int numActive=0, numIdle=0;
233 for(
int i = 0; i <
size; i++) {
241 int* ranksActive =
new int[numActive];
242 int* ranksIdle =
new int[numIdle];
246 for(
int i = 0; i <
size; i++) {
248 ranksIdle[numIdle] = i;
251 ranksActive[numActive] = i;
257 MPI_Comm_group(comm, &orig_group);
260 if (!isEmptyList[rank]) {
261 MPI_Group_incl(orig_group, numActive, ranksActive, &new_group);
263 MPI_Group_incl(orig_group, numIdle, ranksIdle, &new_group);
267 MPI_Comm_create(comm, new_group, new_comm);
269 delete [] ranksActive;
280 std::vector<int>& recv_sizes, std::vector<int>& recv_partners, MPI_Comm comm)
284 MPI_Comm_rank(comm, &rank);
285 MPI_Comm_size(comm, &npes);
287 unsigned int k = send_sizes.size();
293 for(
size_t i = 0; i < send_sizes.size(); ++i) {
294 lsz[i] = send_sizes[i];
296 par::Mpi_Scan<DendroIntL>( lsz, gscan, k, MPI_SUM, comm);
298 if (rank == npes-1) {
299 for(
size_t i = 0; i < k; ++i) {
304 par::Mpi_Bcast<DendroIntL>( gsz, k, npes-1, comm);
307 for(
size_t i = 0; i < k; ++i) {
308 segment_p0[i] = (i*npes)/k;
320 for(
size_t i = 0; i < k; ++i) {
322 int seg_npes = ( (i == k-1) ? npes - segment_p0[i] : segment_p0[i+1]-segment_p0[i] );
323 int overhang = gsz[i] % seg_npes;
325 if ( rank_mid < overhang*(gsz[i]/seg_npes + 1)) {
326 new_part = segment_p0[i] + rank_mid/(gsz[i]/seg_npes + 1);
328 new_part = segment_p0[i] + (rank_mid - overhang)/(gsz[i]/seg_npes);
330 send_partners[i] = new_part;
334 if (send_partners[0] == rank) {
337 for(
size_t i = 1; i < k; ++i)
339 if (send_partners[i] == rank) {
344 if (send_partners[i] == send_partners[i-1]) {
345 send_sizes[idx] += lsz[i];
357 MPI_Alloc_mem(
sizeof(
int)*npes, MPI_INFO_NULL, &rcv);
358 for(
size_t i = 0; i < npes; ++i) rcv[i] = 0;
360 MPI_Win_create(rcv, npes,
sizeof(
int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
363 MPI_Win_fence(MPI_MODE_NOPRECEDE, win);
364 for (
size_t i = 0; i < send_sizes.size(); i++)
367 MPI_Put(&(send_sizes[i]), 1, MPI_INT, send_partners[i], rank, 1, MPI_INT, win);
370 MPI_Win_fence((MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED), win);
372 recv_sizes.clear(); recv_partners.clear();
373 for(
size_t i = 0; i < npes; ++i)
376 recv_partners.push_back(i);
377 recv_sizes.push_back(rcv[i]);
A set of parallel utilities.
int getPrevHighestPowerOfTwo(unsigned int n)
Traits to determine MPI_DATATYPE from a C++ datatype.
int AdjustCommunicationPattern(std::vector< int > &send_sizes, std::vector< int > &send_partners, std::vector< int > &recv_sizes, std::vector< int > &recv_partners, MPI_Comm comm)
unsigned int splitCommBinary(MPI_Comm orig_comm, MPI_Comm *new_comm)
Splits a communication group into two, the first having a power of 2 number of processors and the oth...
Collection of Generic Parallel Functions: Sorting, Partitioning, Searching,...
int splitCommUsingSplittingRank(int splittingRank, MPI_Comm *new_comm, MPI_Comm orig_comm)
unsigned int splitCommBinaryNoFlip(MPI_Comm orig_comm, MPI_Comm *new_comm)
Splits a communication group into two, the first having a power of 2 number of processors and the oth...
A set of efficient functions that use binary operations to perform some small computations.
int splitComm2way(bool iAmEmpty, MPI_Comm *new_comm, MPI_Comm orig_comm)
Splits a communication group into two, one containing processors that passed a value of 'false' for t...