49 template <
class IT,
class NT>
57 static std::vector<size_t>
find_order(
const std::vector<T> & values)
60 std::vector< std::pair<T, size_t> > tosort;
61 for(
auto & val: values)
63 tosort.push_back(std::make_pair(val,index++));
65 sort(tosort.begin(), tosort.end());
66 std::vector<size_t> permutation;
67 for(
auto & sorted: tosort)
69 permutation.push_back(sorted.second);
74 template <
typename IT1,
typename NT1,
typename IT2,
typename NT2>
75 static void push_to_vectors(std::vector<IT1> & rows, std::vector<IT1> & cols, std::vector<NT1> & vals, IT2 ii, IT2 jj, NT2 vv,
int symmetric,
bool onebased =
true)
85 if(symmetric && ii != jj)
93 static void ProcessLinesWithStringKeys(std::vector< std::map < std::string, uint64_t> > & allkeys, std::vector<std::string> & lines,
int nprocs)
95 std::string frstr, tostr;
96 uint64_t frhash, tohash;
98 for (
auto itr=lines.begin(); itr != lines.end(); ++itr)
102 sscanf(itr->c_str(),
"%s %s %lg", fr, to, &vv);
103 frstr = std::string(fr);
104 tostr = std::string(to);
108 double range_fr =
static_cast<double>(frhash) * static_cast<double>(nprocs);
109 double range_to =
static_cast<double>(tohash) * static_cast<double>(nprocs);
110 size_t owner_fr = range_fr /
static_cast<double>(std::numeric_limits<uint64_t>::max());
111 size_t owner_to = range_to /
static_cast<double>(std::numeric_limits<uint64_t>::max());
116 allkeys[owner_fr].insert(std::make_pair(frstr, frhash));
117 allkeys[owner_to].insert(std::make_pair(tostr, tohash));
122 template <
typename IT1,
typename NT1>
123 static void ProcessStrLinesNPermute(std::vector<IT1> & rows, std::vector<IT1> & cols, std::vector<NT1> & vals, std::vector<std::string> & lines, std::map<std::string, uint64_t> & ultperm)
127 std::string frstr, tostr;
129 for (
auto itr=lines.begin(); itr != lines.end(); ++itr)
131 sscanf(itr->c_str(),
"%s %s %lg", fr, to, &vv);
132 frstr = std::string(fr);
133 tostr = std::string(to);
135 rows.emplace_back((IT1) ultperm[frstr]);
136 cols.emplace_back((IT1) ultperm[tostr]);
137 vals.emplace_back((NT1) vv);
146 template <
typename IT1,
typename NT1>
147 static void ProcessLines(std::vector<IT1> & rows, std::vector<IT1> & cols, std::vector<NT1> & vals, std::vector<std::string> & lines,
int symmetric,
int type,
bool onebased =
true)
153 for (
auto itr=lines.begin(); itr != lines.end(); ++itr)
156 sscanf(itr->c_str(),
"%lld %lld %lg", &ii, &jj, &vv);
163 for (
auto itr=lines.begin(); itr != lines.end(); ++itr)
165 sscanf(itr->c_str(),
"%lld %lld %lld", &ii, &jj, &vv);
172 for (
auto itr=lines.begin(); itr != lines.end(); ++itr)
174 sscanf(itr->c_str(),
"%lld %lld", &ii, &jj);
180 std::cout <<
"COMBBLAS: Unrecognized matrix market scalar type" << std::endl;
186 template <
typename T>
187 static const T *
p2a (
const std::vector<T> & v)
189 if(v.empty())
return NULL;
193 template <
typename T>
194 static T *
p2a (std::vector<T> & v)
196 if(v.empty())
return NULL;
201 template<
typename _ForwardIterator>
202 static bool is_sorted(_ForwardIterator __first, _ForwardIterator __last)
204 if (__first == __last)
207 _ForwardIterator __next = __first;
208 for (++__next; __next != __last; __first = __next, ++__next)
209 if (*__next < *__first)
213 template<
typename _ForwardIterator,
typename _StrictWeakOrdering>
214 static bool is_sorted(_ForwardIterator __first, _ForwardIterator __last, _StrictWeakOrdering __comp)
216 if (__first == __last)
219 _ForwardIterator __next = __first;
220 for (++__next; __next != __last; __first = __next, ++__next)
221 if (__comp(*__next, *__first))
225 template<
typename _ForwardIter,
typename T>
226 static void iota(_ForwardIter __first, _ForwardIter __last, T __val)
228 while (__first != __last)
229 *__first++ = __val++;
231 template<
typename In,
typename Out,
typename UnPred>
232 static Out
copyIf(In first, In last, Out result, UnPred pred)
234 for ( ;first != last; ++first)
240 template<
typename T,
typename I1,
typename I2>
243 T ** array =
new T*[m];
244 for(I1 i = 0; i<m; ++i)
248 template<
typename T,
typename I>
251 for(I i = 0; i<m; ++i)
257 template <
typename SR,
typename NT1,
typename NT2,
typename IT,
typename OVT>
258 static IT
Popping(NT1 * numA, NT2 * numB,
StackEntry< OVT, std::pair<IT,IT> > * multstack,
259 IT & cnz, KNHeap< std::pair<IT,IT> , IT > & sHeap,
Isect<IT> * isect1,
Isect<IT> * isect2);
261 template <
typename IT,
typename NT1,
typename NT2>
265 template <
typename SR,
typename IT,
typename NT1,
typename NT2,
typename OVT>
269 template <
typename SR,
typename IT,
typename NT1,
typename NT2,
typename OVT>
271 StackEntry< OVT, std::pair<IT,IT> > * & multstack);
273 template <
typename NT,
typename IT>
276 NT * narray =
new NT[newsize];
277 memcpy(narray, array, newsize*
sizeof(NT));
283 template <
typename NT,
typename IT>
288 memcpy(multstack, tmpstack,
sizeof(
StackEntry<NT, std::pair<IT,IT> >) * cnzmax);
290 cnzmax = 2*cnzmax + add;
294 template <
typename IT>
296 {
return pair1.first < pair2.first; }
306 template <
typename SR,
typename NT1,
typename NT2,
typename IT,
typename OVT>
308 IT & cnz, KNHeap< std::pair<IT,IT>,IT > & sHeap,
Isect<IT> * isect1,
Isect<IT> * isect2)
310 std::pair<IT,IT> key;
312 sHeap.deleteMin(&key, &inc);
314 OVT value =
SR::multiply(numA[isect1[inc].current], numB[isect2[inc].current]);
315 if (!SR::returnedSAID())
319 if(multstack[cnz-1].key == key)
321 multstack[cnz-1].value = SR::add(multstack[cnz-1].value, value);
325 multstack[cnz].value = value;
326 multstack[cnz].key = key;
332 multstack[cnz].value = value;
333 multstack[cnz].key = key;
345 template <
typename IT,
typename NT1,
typename NT2>
352 for(IT i=0; i < Adcsc.
nzc; ++i)
355 cols[i].
size = Adcsc.
cp[i+1] - Adcsc.
cp[i];
359 for(IT i=0; i < Bdcsc.
nzc; ++i)
362 rows[i].
size = Bdcsc.
cp[i+1] - Bdcsc.
cp[i];
371 IT mink = std::min(Adcsc.
nzc, Bdcsc.
nzc);
374 itr1 = std::set_intersection(cols, cols + Adcsc.
nzc, rows, rows + Bdcsc.
nzc, isect1);
375 itr2 = std::set_intersection(rows, rows + Bdcsc.
nzc, cols, cols + Adcsc.
nzc, isect2);
385 template <
typename SR,
typename IT,
typename NT1,
typename NT2,
typename OVT>
389 std::pair<IT,IT> supremum(std::numeric_limits<IT>::max(), std::numeric_limits<IT>::max());
390 std::pair<IT,IT> infimum (std::numeric_limits<IT>::min(), std::numeric_limits<IT>::min());
392 KNHeap< std::pair<IT,IT> , IT > sHeapDcsc(supremum, infimum);
396 for(IT i=0; i< kisect; ++i)
399 sHeapDcsc.insert(key, i);
403 IT cnzmax = Adcsc.
nz + Bdcsc.
nz;
406 bool finished =
false;
410 if (cnz + kisect > cnzmax)
416 IT inc = Popping< SR >(Adcsc.
numx, Bdcsc.
numx, multstack, cnz, sHeapDcsc, isect1, isect2);
417 isect1[inc].current++;
419 if(isect1[inc].current < isect1[inc].
size + isect1[inc].start)
421 std::pair<IT,IT> key(Bdcsc.
ir[isect2[inc].
current], Adcsc.
ir[isect1[inc].current]);
422 sHeapDcsc.insert(key, inc);
426 else if(isect2[inc].current + 1 < isect2[inc].
size + isect2[inc].start)
428 isect1[inc].current = isect1[inc].start;
431 std::pair<IT,IT> key(Bdcsc.
ir[isect2[inc].
current], Adcsc.
ir[isect1[inc].current]);
432 sHeapDcsc.insert(key, inc);
448 template <
typename SR,
typename IT,
typename NT1,
typename NT2,
typename OVT>
450 StackEntry< OVT, std::pair<IT,IT> > * & multstack)
453 IT cnzmax = Adcsc.
nz + Bdcsc.
nz;
456 float cf =
static_cast<float>(nA+1) / static_cast<float>(Adcsc.
nzc);
457 IT csize =
static_cast<IT
>(ceil(cf));
462 for(IT i=0; i< Bdcsc.
nzc; ++i)
465 IT nnzcol = Bdcsc.
cp[i+1] - Bdcsc.
cp[i];
472 std::vector<IT> colnums(nnzcol);
476 std::vector< std::pair<IT,IT> > colinds(nnzcol);
477 std::copy(Bdcsc.
ir + Bdcsc.
cp[i], Bdcsc.
ir + Bdcsc.
cp[i+1], colnums.begin());
479 Adcsc.
FillColInds(&colnums[0], colnums.size(), colinds, aux, csize);
483 for(IT j = 0; (unsigned)j < colnums.size(); ++j)
485 if(colinds[j].first != colinds[j].second)
488 maxnnz += colinds[j].second - colinds[j].first;
491 std::make_heap(wset, wset+hsize);
493 if (cnz + maxnnz > cnzmax)
501 std::pop_heap(wset, wset + hsize);
502 IT locb = wset[hsize-1].
runr;
508 if (!SR::returnedSAID())
510 if(cnz != prevcnz && multstack[cnz-1].key.second == wset[hsize-1].
key)
512 multstack[cnz-1].value = SR::add(multstack[cnz-1].value, mrhs);
516 multstack[cnz].value = mrhs;
517 multstack[cnz++].key = std::make_pair(Bdcsc.
jc[i], wset[hsize-1].
key);
522 if( (++(colinds[locb].first)) != colinds[locb].second)
525 wset[hsize-1].
key = Adcsc.
ir[colinds[locb].first];
526 wset[hsize-1].
num = Adcsc.
numx[colinds[locb].first];
527 std::push_heap(wset, wset+hsize);
static void iota(_ForwardIter __first, _ForwardIter __last, T __val)
static void SpIntersect(const Dcsc< IT, NT1 > &Adcsc, const Dcsc< IT, NT2 > &Bdcsc, Isect< IT > *&cols, Isect< IT > *&rows, Isect< IT > *&isect1, Isect< IT > *&isect2, Isect< IT > *&itr1, Isect< IT > *&itr2)
static bool is_sorted(_ForwardIterator __first, _ForwardIterator __last)
static void DoubleStack(StackEntry< NT, std::pair< IT, IT > > *&multstack, IT &cnzmax, IT add)
static bool first_compare(std::pair< IT, IT > pair1, std::pair< IT, IT > pair2)
static const T * p2a(const std::vector< T > &v)
static void ProcessLinesWithStringKeys(std::vector< std::map< std::string, uint64_t > > &allkeys, std::vector< std::string > &lines, int nprocs)
static void ProcessStrLinesNPermute(std::vector< IT1 > &rows, std::vector< IT1 > &cols, std::vector< NT1 > &vals, std::vector< std::string > &lines, std::map< std::string, uint64_t > &ultperm)
static Out copyIf(In first, In last, Out result, UnPred pred)
IT * ir
row indices, size nz
IT * cp
The master array, size nzc+1 (keeps column pointers)
static void push_to_vectors(std::vector< IT1 > &rows, std::vector< IT1 > &cols, std::vector< NT1 > &vals, IT2 ii, IT2 jj, NT2 vv, int symmetric, bool onebased=true)
void FillColInds(const VT *colnums, IT nind, std::vector< std::pair< IT, IT > > &colinds, IT *aux, IT csize) const
static T ** allocate2D(I1 m, I2 n)
static std::vector< size_t > find_order(const std::vector< T > &values)
NT * numx
generic values, size nz
static T * p2a(std::vector< T > &v)
static IT SpColByCol(const Dcsc< IT, NT1 > &Adcsc, const Dcsc< IT, NT2 > &Bdcsc, IT nA, StackEntry< OVT, std::pair< IT, IT > > *&multstack)
IT nzc
number of columns with at least one non-zero in them
static void ProcessLines(std::vector< IT1 > &rows, std::vector< IT1 > &cols, std::vector< NT1 > &vals, std::vector< std::string > &lines, int symmetric, int type, bool onebased=true)
IT ConstructAux(IT ndim, IT *&aux) const
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
IT * jc
col indices, size nzc
void MurmurHash3_x64_64(const void *key, int len, uint32_t seed, void *out)
static IT SpCartesian(const Dcsc< IT, NT1 > &Adcsc, const Dcsc< IT, NT2 > &Bdcsc, IT kisect, Isect< IT > *isect1, Isect< IT > *isect2, StackEntry< OVT, std::pair< IT, IT > > *&multstack)
static void deallocate2D(T **array, I m)
static bool is_sorted(_ForwardIterator __first, _ForwardIterator __last, _StrictWeakOrdering __comp)
static void ShrinkArray(NT *&array, IT newsize)
static IT Popping(NT1 *numA, NT2 *numB, StackEntry< OVT, std::pair< IT, IT > > *multstack, IT &cnz, KNHeap< std::pair< IT, IT >, IT > &sHeap, Isect< IT > *isect1, Isect< IT > *isect2)