31 #ifndef BITMAPFRINGE_H 32 #define BITMAPFRINGE_H 40 template <
class IT,
class VT>
51 long num_local_send = x.MyLocLength(), num_local_recv;
52 diagneigh = cg->GetComplementRank();
53 MPI_Sendrecv(&num_local_send, 1, MPI_LONG, diagneigh,
TROST,
54 &num_local_recv, 1, MPI_LONG, diagneigh,
TROST, World, &status);
57 MPI_Comm_size(ColWorld, &colneighs);
58 MPI_Comm_rank(ColWorld, &colrank);
60 int counts[colneighs];
61 counts[colrank] = num_local_recv;
62 MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, counts, 1, MPI_INT, ColWorld);
66 std::partial_sum(counts, counts+colneighs-1, dpls+1);
67 long total_size = dpls[colneighs-1] + counts[colneighs-1];
68 local_offset = x.LengthUntil();
72 word_dpls =
new int[colneighs];
73 byte_dpls =
new int[colneighs];
74 send_counts =
new int[colneighs];
75 for (
int c=0; c<colneighs; c++) {
76 word_dpls[c] = (dpls[c] + (64-(dpls[c]%64)))>>6;
77 byte_dpls[c] = word_dpls[c]<<3;
78 send_counts[c] = (counts[c] - (64-(dpls[c] % 64)) + 63)>>6;
82 trans_subword_disp = dpls[colrank] % 64;
86 trans_words_send = (num_local_send + local_subword_disp + 63)>>6;
87 trans_words_recv = (num_local_recv + trans_subword_disp + 63)>>6;
90 local_bm =
new BitMap(num_local_send + local_subword_disp);
91 next_bm =
new BitMap(num_local_send + local_subword_disp);
92 trans_bm =
new BitMap(num_local_recv + trans_subword_disp);
93 gather_bm =
new BitMap(total_size);
101 delete[] send_counts;
109 local_bm->
set_bit(spit.GetLocIndex() + local_subword_disp);
115 for (
long i=0; i<total_updates; i++)
116 local_bm->
set_bit(updates[i] - local_offset + local_subword_disp);
117 local_num_set = total_updates;
121 local_num_set = next_num_set;
130 next_num_set += num_updates;
136 MPI_Comm World = cg->GetWorld();
137 MPI_Comm ColWorld = cg->GetColWorld();
146 double t1 = MPI_Wtime();
150 double t2 = MPI_Wtime();
155 gather_bm->
data()[0] = 0;
156 uint64_t firsts[colneighs];
157 firsts[colrank] = trans_bm->
data()[0];
159 for (
int c=0; c<colneighs; c++)
160 gather_bm->
data()[word_dpls[c]-1] |= firsts[c];
169 IT *updates =
new IT[local_num_set];
170 IT bm_index=local_subword_disp, up_index=0;
172 if (local_bm->
get_bit(bm_index))
173 updates[up_index++] = bm_index - local_subword_disp;
176 while(bm_index != -1) {
177 updates[up_index++] = bm_index - local_subword_disp;
180 x.
BulkSet(updates, local_num_set);
186 IT global_num_set = 0;
187 MPI_Allreduce(&local_num_set, &global_num_set, 1, MPIType<IT>(), MPI_SUM, cg->GetWorld());
188 return global_num_set;
193 return local_subword_disp;
201 std::shared_ptr<CommGrid> cg;
215 int local_subword_disp;
216 int trans_subword_disp;
217 long trans_words_send;
218 long trans_words_recv;
223 #endif // BITMAPFRINGE_H
double bottomup_allgather
bool get_bit(uint64_t pos)
long get_next_bit(uint64_t pos)
BitMapFringe(std::shared_ptr< CommGrid > grid, FullyDistSpVec< IT, VT > &x)
void SetNext(IT local_index)
void UpdateSpVec(FullyDistSpVec< IT, VT > &x)
MPI_Datatype MPIType< uint64_t >(void)
std::shared_ptr< CommGrid > getcommgrid() const
void LoadFromUpdates(IT *updates, long total_updates)
MPI_Datatype MPIType< int32_t >(void)
void BulkSet(IT inds[], int count)
void LoadFromSpVec(FullyDistSpVec< IT, VT > &x)
void IncrementNumSet(int num_updates)
BitMap * TransposeGather()
void set_bit(uint64_t pos)