A set of parallel utilities. More...

#include "mpi.h"
#include <vector>
#include "parUtils.tcc"

Namespaces
	par
	Collection of Generic Parallel Functions: Sorting, Partitioning, Searching,...

Macros
#define	KEEP_HIGH 100

#define	KEEP_LOW 101

#define	DendroIntL int

#define	DendroIntLSpecifier %d

#define	DendroUIntLSpecifier %u

Functions
template<typename T >
int	par::Mpi_Isend (T buf, int count, int dest, int tag, MPI_Comm comm, MPI_Request request)

template<typename T >
int	par::Mpi_Issend (T buf, int count, int dest, int tag, MPI_Comm comm, MPI_Request request)

template<typename T >
int	par::Mpi_Recv (T buf, int count, int source, int tag, MPI_Comm comm, MPI_Status status)

template<typename T >
int	par::Mpi_Irecv (T buf, int count, int source, int tag, MPI_Comm comm, MPI_Request request)

template<typename T >
int	par::Mpi_Gather (T sendBuffer, T recvBuffer, int count, int root, MPI_Comm comm)

template<typename T , typename S >
int	par::Mpi_Sendrecv (T sendBuf, int sendCount, int dest, int sendTag, S recvBuf, int recvCount, int source, int recvTag, MPI_Comm comm, MPI_Status *status)

template<typename T >
int	par::Mpi_Bcast (T *buffer, int count, int root, MPI_Comm comm)

template<typename T >
int	par::Mpi_Scan (T sendbuf, T recvbuf, int count, MPI_Op op, MPI_Comm comm)

template<typename T >
int	par::Mpi_Reduce (T sendbuf, T recvbuf, int count, MPI_Op op, int root, MPI_Comm comm)

template<typename T >
int	par::Mpi_Allreduce (T sendbuf, T recvbuf, int count, MPI_Op op, MPI_Comm comm)

template<typename T >
int	par::Mpi_Alltoall (T sendbuf, T recvbuf, int count, MPI_Comm comm)

template<typename T >
int	par::Mpi_Allgatherv (T sendbuf, int sendcount, T recvbuf, int recvcounts, int displs, MPI_Comm comm)

template<typename T >
int	par::Mpi_Allgather (T sendbuf, T recvbuf, int count, MPI_Comm comm)

template<typename T >
int	par::Mpi_Alltoallv_sparse (T sendbuf, int sendcnts, int sdispls, T recvbuf, int recvcnts, int rdispls, MPI_Comm comm)

template<typename T >
int	par::Mpi_Alltoallv_dense (T sendbuf, int sendcnts, int sdispls, T recvbuf, int recvcnts, int rdispls, MPI_Comm comm)

template<typename T >
unsigned int	par::defaultWeight (const T *a)

template<typename T >
int	par::partitionW (std::vector< T > &vec, unsigned int(getWeight)(const T ), MPI_Comm comm)
	A parallel weighted partitioning function. In our implementation, we do not pose any restriction on the input or the number of processors. This function can be used with an odd number of processors as well. Some processors can pass an empty vector as input. The relative ordering of the elements is preserved. More...

template<typename T >
void	par::rankSamples (std::vector< T > &arr, std::vector< T > samples, MPI_Comm comm)

template<typename T >
std::vector< T >	par::Sorted_Sample_Select (std::vector< T > &arr, unsigned int kway, std::vector< unsigned int > &min_idx, std::vector< unsigned int > &max_idx, std::vector< DendroIntL > &splitter_ranks, MPI_Comm comm)

template<typename T >
std::vector< T >	par::Sorted_approx_Select (std::vector< T > &arr, unsigned int k, MPI_Comm comm)

template<typename T >
std::vector< std::pair< T, DendroIntL > >	par::Sorted_approx_Select_skewed (std::vector< T > &arr, unsigned int k, MPI_Comm comm)
	new one to handle skewed distributions ... More...

template<typename T >
std::vector< T >	par::GetRangeMean (std::vector< T > &arr, std::vector< unsigned int > range_min, std::vector< unsigned int > range_max, MPI_Comm comm)

template<typename T >
std::vector< T >	par::GuessRangeMedian (std::vector< T > &arr, std::vector< unsigned int > range_min, std::vector< unsigned int > range_max, MPI_Comm comm)

template<typename T >
std::vector< T >	par::Sorted_k_Select (std::vector< T > &arr, std::vector< unsigned int > min_idx, std::vector< unsigned int > max_idx, std::vector< DendroIntL > K, std::vector< T > guess, MPI_Comm comm)
	A parallel k-selection algorithms. More...

template<typename T >
int	par::HyperQuickSort (std::vector< T > &in, std::vector< T > &out, MPI_Comm comm)
	A parallel hyper quick sort implementation. More...

template<typename T >
int	par::HyperQuickSort_kway (std::vector< T > &in, std::vector< T > &out, MPI_Comm comm)

template<typename T >
int	par::HyperQuickSort_cav (std::vector< T > &in, std::vector< T > &out, MPI_Comm comm)

template<typename T >
int	par::HyperQuickSort (std::vector< T > &arr, MPI_Comm comm)

template<typename T >
int	par::HyperQuickSort_kway (std::vector< T > &in, MPI_Comm comm)

template<typename T >
int	par::HyperQuickSort_cav (std::vector< T > &in, MPI_Comm comm)

template<typename T >
int	par::sampleSort (std::vector< T > &in, std::vector< T > &out, MPI_Comm comm)
	A parallel sample sort implementation. In our implementation, we do not pose any restriction on the input or the number of processors. This function can be used with an odd number of processors as well. Some processors can pass an empty vector as input. If the total number of elements in the vector (globally) is fewer than 10*p^2, where p is the number of processors, then we will use bitonic sort instead of sample sort to sort the vector. We use a paralle bitonic sort to sort the samples in the sample sort algorithm. Hence, the complexity of the algorithm is O(n/p log n/p) + O(p log p). Here, n is the global length of the vector and p is the number of processors. More...

template<typename T >
int	par::sampleSort (std::vector< T > &in, MPI_Comm comm)

int	par::splitComm2way (bool iAmEmpty, MPI_Comm *new_comm, MPI_Comm orig_comm)
	Splits a communication group into two, one containing processors that passed a value of 'false' for the parameter 'iAmEmpty' and the another containing processors that passed a value of 'true' for the parameter. Both the groups are sorted in the ascending order of their ranks in the old comm. More...

int	par::splitComm2way (const bool isEmptyList, MPI_Comm new_comm, MPI_Comm orig_comm)
	Splits a communication group into two depending on the values in isEmptyList. Both the groups are sorted in the ascending order of their ranks in the old comm. All processors must call this function with the same 'isEmptyList' array. More...

int	par::splitCommUsingSplittingRank (int splittingRank, MPI_Comm *new_comm, MPI_Comm orig_comm)

unsigned int	par::splitCommBinary (MPI_Comm orig_comm, MPI_Comm *new_comm)
	Splits a communication group into two, the first having a power of 2 number of processors and the other having the remainder. The first group is sorted in the ascending order of their ranks in the old comm and the second group is sorted in the descending order of their ranks in the old comm. More...

unsigned int	par::splitCommBinaryNoFlip (MPI_Comm orig_comm, MPI_Comm *new_comm)
	Splits a communication group into two, the first having a power of 2 number of processors and the other having the remainder. Both the groups are sorted in the ascending order of their ranks in the old comm. More...

template<typename T >
void	par::MergeLists (std::vector< T > &listA, std::vector< T > &listB, int KEEP_WHAT)
	Merges lists A, and B, retaining either the low or the High in list A. More...

template<typename T >
void	par::MergeSplit (std::vector< T > &local_list, int which_keys, int partner, MPI_Comm comm)
	The main operation in the parallel bitonic sort algorithm. This implements the compare-split operation. More...

template<typename T >
void	par::Par_bitonic_sort_incr (std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)

template<typename T >
void	par::Par_bitonic_sort_decr (std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)

template<typename T >
void	par::Par_bitonic_merge_incr (std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)

template<typename T >
void	par::bitonicSort_binary (std::vector< T > &in, MPI_Comm comm)
	An implementation of parallel bitonic sort that expects the number of processors to be a power of 2. However, unlike most implementations, we do not expect the length of the vector (neither locally nor globally) to be a power of 2 or even. Moreover, each processor can call this with a different number of elements. However, we do expect that 'in' atleast has 1 element on each processor. More...

template<typename T >
void	par::bitonicSort (std::vector< T > &in, MPI_Comm comm)
	An implementation of parallel bitonic sort that does not expect the number of processors to be a power of 2. In fact, the number of processors can even be odd. Moreover, we do not even expect the length of the vector (neither locally nor globally) to be a power of 2 or even. Moreover, each processor can call this with a different number of elements. However, we do expect that 'in' atleast has 1 element on each processor. This recursively calls the function bitonicSort_binary, followed by a special parallel merge. More...

Detailed Description

A set of parallel utilities.

Author: Hari Sundar, hsund.nosp@m.ar@g.nosp@m.mail..nosp@m.com

Definition in file parUtils.h.

Macro Definition Documentation

◆ DendroIntL

#define DendroIntL int

Definition at line 28 of file parUtils.h.

◆ DendroIntLSpecifier

#define DendroIntLSpecifier %d

Definition at line 29 of file parUtils.h.

◆ DendroUIntLSpecifier

#define DendroUIntLSpecifier %u

Definition at line 30 of file parUtils.h.

◆ KEEP_HIGH

#define KEEP_HIGH 100

Definition at line 11 of file parUtils.h.

◆ KEEP_LOW

#define KEEP_LOW 101

Definition at line 12 of file parUtils.h.

Namespaces

Macros