16 template <
typename RT,
typename IT,
typename NT>
19 std::vector<RT> splitters(nsplits+1);
20 splitters[0] =
static_cast<RT
>(0);
23 #pragma omp parallel for 25 for(
int i=1; i< nsplits; i++)
27 IT cur_col = i * (spTuples->
getncol()/nsplits);
28 std::tuple<IT,IT,NT> search_tuple(0, cur_col, 0);
29 std::tuple<IT,IT,NT>* it = std::lower_bound (spTuples->
tuples, spTuples->
tuples + spTuples->
getnnz(), search_tuple, comp);
30 splitters[i] = (RT) (it - spTuples->
tuples);
32 splitters[nsplits] = spTuples->
getnnz();
39 template<
class IT,
class NT>
42 int nlists = ArrSpTups.size();
44 std::vector<std::tuple<IT, IT, int>> heap(nlists);
45 std::vector<IT> curptr(nlists, static_cast<IT>(0));
47 for(
int i=0; i< nlists; ++i)
49 if(ArrSpTups[i]->getnnz()>0)
51 heap[hsize++] = std::make_tuple(std::get<0>(ArrSpTups[i]->tuples[0]), std::get<1>(ArrSpTups[i]->tuples[0]), i);
55 std::make_heap(heap.data(), heap.data()+hsize, std::not2(heapcomp));
57 std::tuple<IT, IT, NT> curTuple;
61 std::pop_heap(heap.data(), heap.data() + hsize, std::not2(heapcomp));
62 int source = std::get<2>(heap[hsize-1]);
63 if( (estnnz ==0) || (std::get<0>(curTuple) != std::get<0>(heap[hsize-1])) || (std::get<1>(curTuple) != std::get<1>(heap[hsize-1])))
65 curTuple = ArrSpTups[source]->tuples[curptr[source]];
69 if(curptr[source] != ArrSpTups[source]->getnnz())
71 heap[hsize-1] = std::make_tuple(std::get<0>(ArrSpTups[source]->tuples[curptr[source]]),
72 std::get<1>(ArrSpTups[source]->tuples[curptr[source]]), source);
73 std::push_heap(heap.data(), heap.data()+hsize, std::not2(heapcomp));
93 template<
class SR,
class IT,
class NT>
96 int nlists = ArrSpTups.size();
98 std::vector<std::tuple<IT, IT, int>> heap(nlists);
99 std::vector<IT> curptr(nlists, static_cast<IT>(0));
102 for(
int i=0; i< nlists; ++i)
104 if(ArrSpTups[i]->getnnz()>0)
106 estnnz += ArrSpTups[i]->getnnz();
107 heap[hsize++] = std::make_tuple(std::get<0>(ArrSpTups[i]->tuples[0]), std::get<1>(ArrSpTups[i]->tuples[0]), i);
111 std::make_heap(heap.data(), heap.data()+hsize, std::not2(heapcomp));
116 std::pop_heap(heap.data(), heap.data() + hsize, std::not2(heapcomp));
117 int source = std::get<2>(heap[hsize-1]);
120 ((std::get<0>(ntuples[cnz-1]) == std::get<0>(heap[hsize-1])) && (std::get<1>(ntuples[cnz-1]) == std::get<1>(heap[hsize-1]))) )
122 std::get<2>(ntuples[cnz-1]) = SR::add(std::get<2>(ntuples[cnz-1]), ArrSpTups[source]->numvalue(curptr[source]++));
126 ntuples[cnz++] = ArrSpTups[source]->tuples[curptr[source]++];
129 if(curptr[source] != ArrSpTups[source]->getnnz())
131 heap[hsize-1] = std::make_tuple(std::get<0>(ArrSpTups[source]->tuples[curptr[source]]),
132 std::get<1>(ArrSpTups[source]->tuples[curptr[source]]), source);
133 std::push_heap(heap.data(), heap.data()+hsize, std::not2(heapcomp));
146 template<
class SR,
class IT,
class NT>
150 int nlists = ArrSpTups.size();
163 std::tuple<IT, IT, NT>* mergeTups =
static_cast<std::tuple<IT, IT, NT>*
> 164 (::operator
new (
sizeof(std::tuple<IT, IT, NT>[ArrSpTups[0]->getnnz()])));
166 #pragma omp parallel for 168 for(
int i=0; i<ArrSpTups[0]->getnnz(); i++)
169 mergeTups[i] = ArrSpTups[0]->tuples[i];
171 return new SpTuples<IT,NT> (ArrSpTups[0]->getnnz(), mdim, ndim, mergeTups,
true);
176 for(
int i=0; i< nlists; ++i)
178 if((mdim != ArrSpTups[i]->getnrow()) || ndim != ArrSpTups[i]->getncol())
180 std::cerr <<
"Dimensions of SpTuples do not match on multiwayMerge()" << std::endl;
189 nthreads = omp_get_num_threads();
192 int nsplits = 4*nthreads;
193 nsplits = std::min(nsplits, (
int)ndim);
194 std::vector< std::vector<IT> > colPtrs;
195 for(
int i=0; i< nlists; i++)
197 colPtrs.push_back(findColSplitters<IT>(ArrSpTups[i], nsplits));
200 std::vector<IT> mergedNnzPerSplit(nsplits);
201 std::vector<IT> inputNnzPerSplit(nsplits);
204 #pragma omp parallel for schedule(dynamic) 206 for(
int i=0; i< nsplits; i++)
208 std::vector<SpTuples<IT,NT> *> listSplitTups(nlists);
209 IT t =
static_cast<IT
>(0);
210 for(
int j=0; j< nlists; ++j)
212 IT curnnz= colPtrs[j][i+1] - colPtrs[j][i];
213 listSplitTups[j] =
new SpTuples<IT, NT> (curnnz, mdim, ndim, ArrSpTups[j]->tuples + colPtrs[j][i],
true);
214 t += colPtrs[j][i+1] - colPtrs[j][i];
217 inputNnzPerSplit[i] = t;
220 std::vector<IT> mdisp(nsplits+1,0);
221 for(
int i=0; i<nsplits; ++i)
222 mdisp[i+1] = mdisp[i] + mergedNnzPerSplit[i];
223 IT mergedNnzAll = mdisp[nsplits];
226 #ifdef COMBBLAS_DEBUG 227 IT inputNnzAll = std::accumulate(inputNnzPerSplit.begin(), inputNnzPerSplit.end(),
static_cast<IT
>(0));
228 double ratio = inputNnzAll / (double) mergedNnzAll;
229 std::ostringstream outs;
230 outs <<
"Multiwaymerge: inputNnz/mergedNnz = " << ratio << std::endl;
236 std::tuple<IT, IT, NT> * mergeBuf =
static_cast<std::tuple<IT, IT, NT>*
> (::operator
new (
sizeof(std::tuple<IT, IT, NT>[mergedNnzAll])));
240 #pragma omp parallel for schedule(dynamic) 242 for(
int i=0; i< nsplits; i++)
244 std::vector<SpTuples<IT,NT> *> listSplitTups(nlists);
245 for(
int j=0; j< nlists; ++j)
247 IT curnnz= colPtrs[j][i+1] - colPtrs[j][i];
248 listSplitTups[j] =
new SpTuples<IT, NT> (curnnz, mdim, ndim, ArrSpTups[j]->tuples + colPtrs[j][i],
true);
250 SerialMerge<SR>(listSplitTups, mergeBuf + mdisp[i]);
253 for(
int i=0; i< nlists; i++)
258 return new SpTuples<IT, NT> (mergedNnzAll, mdim, ndim, mergeBuf,
true,
true);
std::tuple< IT, IT, NT > * tuples
IT SerialMergeNNZ(const std::vector< SpTuples< IT, NT > *> &ArrSpTups)
static void Print(const std::string &s)
std::vector< RT > findColSplitters(SpTuples< IT, NT > *&spTuples, int nsplits)
void SerialMerge(const std::vector< SpTuples< IT, NT > *> &ArrSpTups, std::tuple< IT, IT, NT > *ntuples)
SpTuples< IT, NT > * MultiwayMerge(std::vector< SpTuples< IT, NT > *> &ArrSpTups, IT mdim=0, IT ndim=0, bool delarrs=false)