47 template <
class IU,
class NU>
50 template <
class IU,
class NU>
53 template <
class IU,
class NU>
63 template <
typename SR,
typename IU,
typename NU,
typename RHS,
typename LHS>
68 for(IU j =0; j<A.
dcsc->nzc; ++j)
70 IU colid = A.
dcsc->jc[j];
71 for(IU i = A.
dcsc->cp[j]; i< A.dcsc->cp[j+1]; ++i)
73 IU rowid = A.
dcsc->ir[i];
74 SR::axpy(A.
dcsc->numx[i], x[colid], y[rowid]);
81 template <
typename SR,
typename IU,
typename NU,
typename RHS,
typename LHS>
90 nthreads = omp_get_num_threads();
95 LHS ** tomerge = SpHelper::allocate2D<LHS>(nthreads, nlocrows);
98 for(
int i=0; i<nthreads; ++i)
100 std::fill_n(tomerge[i], nlocrows,
id);
103 #pragma omp parallel for 104 for(IU j =0; j<A.
dcsc->nzc; ++j)
108 curthread = omp_get_thread_num();
111 LHS * loc2merge = tomerge[curthread];
113 IU colid = A.
dcsc->jc[j];
114 for(IU i = A.
dcsc->cp[j]; i< A.dcsc->cp[j+1]; ++i)
116 IU rowid = A.
dcsc->ir[i];
117 SR::axpy(A.
dcsc->numx[i], x[colid], loc2merge[rowid]);
121 #pragma omp parallel for 122 for(IU j=0; j < nlocrows; ++j)
124 for(
int i=0; i< nthreads; ++i)
126 y[j] = SR::add(y[j], tomerge[i][j]);
138 template <
typename SR,
typename IU,
typename NUM,
typename DER,
typename IVT,
typename OVT>
140 int32_t * & sendindbuf, OVT * & sendnumbuf,
int * & sdispls,
int p_c,
PreAllocatedSPA<OVT> & SPA)
146 sdispls =
new int[p_c]();
147 if(A.
getnnz() > 0 && nnzx > 0)
152 int32_t nlocrows =
static_cast<int32_t
>(A.
getnrow());
153 int32_t perpiece = nlocrows / splits;
154 std::vector< std::vector< int32_t > > indy(splits);
155 std::vector< std::vector< OVT > > numy(splits);
159 #pragma omp parallel for // num_threads(6) 161 for(
int i=0; i<splits; ++i)
166 SpMXSpV_ForThreading<SR>(*(A.
GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece, SPA.
V_localy[i], SPA.
V_isthere[i], SPA.
V_inds[i]);
168 SpMXSpV_ForThreading<SR>(*(A.
GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece, SPA.
V_localy[i], SPA.
V_isthere[i], SPA.
V_inds[i]);
173 SpMXSpV_ForThreading<SR>(*(A.
GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
175 SpMXSpV_ForThreading<SR>(*(A.
GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
179 std::vector<int> accum(splits+1, 0);
180 for(
int i=0; i<splits; ++i)
181 accum[i+1] = accum[i] + indy[i].
size();
183 sendindbuf =
new int32_t[accum[splits]];
184 sendnumbuf =
new OVT[accum[splits]];
185 int32_t perproc = nlocrows / p_c;
186 int32_t last_rec = p_c-1;
190 std::vector<int32_t> end_recs(splits);
191 for(
int i=0; i<splits; ++i)
196 end_recs[i] = std::min(indy[i].back() / perproc, last_rec);
199 #pragma omp parallel for // num_threads(6) 201 for(
int i=0; i<splits; ++i)
207 int32_t beg_rec = std::min( indy[i].front() / perproc, last_rec);
214 while (k >= 0 && end_recs[k] == -1) k--;
217 std::fill(sdispls+end_recs[k]+1, sdispls+beg_rec+1, accum[i]);
222 if(beg_rec == end_recs[i])
224 std::transform(indy[i].begin(), indy[i].end(), indy[i].begin(), std::bind2nd(std::minus<int32_t>(), perproc*beg_rec));
225 std::copy(indy[i].begin(), indy[i].end(), sendindbuf+accum[i]);
226 std::copy(numy[i].begin(), numy[i].end(), sendnumbuf+accum[i]);
232 int end = indy[i].size();
233 for(
int cur=0; cur< end; ++cur)
235 int32_t cur_rec = std::min( indy[i][cur] / perproc, last_rec);
236 while(beg_rec != cur_rec)
238 sdispls[++beg_rec] = accum[i] + cur;
240 sendindbuf[ accum[i] + cur ] = indy[i][cur] - perproc*beg_rec;
241 sendnumbuf[ accum[i] + cur ] = numy[i][cur];
244 std::vector<int32_t>().swap(indy[i]);
245 std::vector<OVT>().swap(numy[i]);
246 bool lastnonzero =
true;
247 for(
int k=i+1; k < splits; ++k)
249 if(end_recs[k] != -1)
253 std::fill(sdispls+end_recs[i]+1, sdispls+p_c, accum[i+1]);
256 return accum[splits];
260 std::cout <<
"Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
279 template <
typename SR,
typename IU,
typename NUM,
typename DER,
typename IVT,
typename OVT>
281 int32_t * sendindbuf, OVT * sendnumbuf,
int * cnts,
int * dspls,
int p_c)
283 if(A.
getnnz() > 0 && nnzx > 0)
288 std::vector< std::vector<int32_t> > indy(splits);
289 std::vector< std::vector< OVT > > numy(splits);
290 int32_t nlocrows =
static_cast<int32_t
>(A.
getnrow());
291 int32_t perpiece = nlocrows / splits;
294 #pragma omp parallel for 296 for(
int i=0; i<splits; ++i)
299 SpMXSpV_ForThreading<SR>(*(A.
GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
301 SpMXSpV_ForThreading<SR>(*(A.
GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
304 int32_t perproc = nlocrows / p_c;
305 int32_t last_rec = p_c-1;
309 std::vector<int32_t> end_recs(splits);
310 for(
int i=0; i<splits; ++i)
315 end_recs[i] = std::min(indy[i].back() / perproc, last_rec);
318 int ** loc_rec_cnts =
new int *[splits];
320 #pragma omp parallel for 322 for(
int i=0; i<splits; ++i)
324 loc_rec_cnts[i] =
new int[p_c]();
327 int32_t cur_rec = std::min( indy[i].front() / perproc, last_rec);
328 int32_t lastdata = (cur_rec+1) * perproc;
329 for(
typename std::vector<int32_t>::iterator it = indy[i].begin(); it != indy[i].end(); ++it)
332 if( ( (*it) >= lastdata ) && cur_rec != last_rec )
334 cur_rec = std::min( (*it) / perproc, last_rec);
335 lastdata = (cur_rec+1) * perproc;
337 ++loc_rec_cnts[i][cur_rec];
342 #pragma omp parallel for 344 for(
int i=0; i<splits; ++i)
350 int32_t beg_rec = std::min( indy[i].front() / perproc, last_rec);
351 int32_t alreadysent = 0;
352 for(
int before = i-1; before >= 0; before--)
353 alreadysent += loc_rec_cnts[before][beg_rec];
355 if(beg_rec == end_recs[i])
357 std::transform(indy[i].begin(), indy[i].end(), indy[i].begin(), std::bind2nd(std::minus<int32_t>(), perproc*beg_rec));
358 std::copy(indy[i].begin(), indy[i].end(), sendindbuf + dspls[beg_rec] + alreadysent);
359 std::copy(numy[i].begin(), numy[i].end(), sendnumbuf + dspls[beg_rec] + alreadysent);
363 int32_t cur_rec = beg_rec;
364 int32_t lastdata = (cur_rec+1) * perproc;
365 for(
typename std::vector<int32_t>::iterator it = indy[i].begin(); it != indy[i].end(); ++it)
367 if( ( (*it) >= lastdata ) && cur_rec != last_rec )
369 cur_rec = std::min( (*it) / perproc, last_rec);
370 lastdata = (cur_rec+1) * perproc;
376 sendindbuf[ dspls[cur_rec] + alreadysent ] = (*it) - perproc*cur_rec;
377 sendnumbuf[ dspls[cur_rec] + (alreadysent++) ] = *(numy[i].begin() + (it-indy[i].begin()));
383 for(
int i=0; i< splits; ++i)
385 for(
int j=0; j< p_c; ++j)
386 cnts[j] += loc_rec_cnts[i][j];
387 delete [] loc_rec_cnts[i];
389 delete [] loc_rec_cnts;
393 std::cout <<
"Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
401 template <
typename SR,
typename MIND,
typename VIND,
typename DER,
typename NUM,
typename IVT,
typename OVT>
404 if(A.
getnnz() > 0 && nnzx > 0)
408 std::cout <<
"Call dcsc_gespmv_threaded instead" << std::endl;
412 SpMXSpV<SR>(*(A.
GetInternal()), (VIND) A.
getnrow(), indx, numx, nnzx, indy, numy, SPA);
420 template <
typename SR,
typename IU,
typename DER,
typename NUM,
typename IVT,
typename OVT>
422 int32_t * indy, OVT * numy,
int * cnts,
int * dspls,
int p_c,
bool indexisvalue)
424 if(A.
getnnz() > 0 && nnzx > 0)
432 SpMXSpV<SR>(*(A.
GetInternal()), (int32_t) A.
getnrow(), indx, numx, nnzx, indy, numy, cnts, dspls, p_c);
438 template<
typename IU>
441 A.splits = numsplits;
442 IU perpiece = A.m / A.splits;
443 std::vector<IU> prevcolids(A.splits, -1);
444 std::vector<IU> nzcs(A.splits, 0);
445 std::vector<IU> nnzs(A.splits, 0);
446 std::vector < std::vector < std::pair<IU,IU> > > colrowpairs(A.splits);
447 if(A.nnz > 0 && A.
dcsc != NULL)
449 for(IU i=0; i< A.
dcsc->nzc; ++i)
451 for(IU j = A.
dcsc->cp[i]; j< A.dcsc->cp[i+1]; ++j)
453 IU colid = A.
dcsc->jc[i];
454 IU rowid = A.
dcsc->ir[j];
455 IU owner = std::min(rowid / perpiece, static_cast<IU>(A.splits-1));
456 colrowpairs[owner].push_back(std::make_pair(colid, rowid - owner*perpiece));
458 if(prevcolids[owner] != colid)
460 prevcolids[owner] = colid;
473 for(
int i=0; i< A.splits; ++i)
475 sort(colrowpairs[i].begin(), colrowpairs[i].end());
477 std::fill(A.
dcscarr[i]->numx, A.
dcscarr[i]->numx+nnzs[i], static_cast<bool>(1));
479 IU cindex = colrowpairs[i][0].first;
480 IU rindex = colrowpairs[i][0].second;
483 A.
dcscarr[i]->jc[curnzc] = cindex;
484 A.
dcscarr[i]->cp[curnzc++] = 0;
486 for(IU j=1; j<nnzs[i]; ++j)
488 cindex = colrowpairs[i][j].first;
489 rindex = colrowpairs[i][j].second;
492 if(cindex != A.
dcscarr[i]->jc[curnzc-1])
494 A.
dcscarr[i]->jc[curnzc] = cindex;
495 A.
dcscarr[i]->cp[curnzc++] = j;
498 A.
dcscarr[i]->cp[curnzc] = nnzs[i];
509 template<
class SR,
class NUO,
class IU,
class NU1,
class NU2>
513 bool clearA =
false,
bool clearB =
false)
524 Isect<IU> *isect1, *isect2, *itr1, *itr2, *cols, *rows;
527 IU kisect =
static_cast<IU
>(itr1-isect1);
538 IU cnz = SpHelper::SpCartesian< SR > (*(A.
dcsc), *(B.
dcsc), kisect, isect1, isect2, multstack);
552 template<
class SR,
class NUO,
class IU,
class NU1,
class NU2>
556 bool clearA =
false,
bool clearB =
false)
565 IU cnz = SpHelper::SpColByCol< SR > (*(A.
dcsc), *(B.
dcsc), A.n, multstack);
576 template<
class SR,
class NUO,
class IU,
class NU1,
class NU2>
580 bool clearA =
false,
bool clearB =
false)
584 std::cout <<
"Tuples_AtXBt function has not been implemented yet !" << std::endl;
589 template<
class SR,
class NUO,
class IU,
class NU1,
class NU2>
593 bool clearA =
false,
bool clearB =
false)
597 std::cout <<
"Tuples_AtXBn function has not been implemented yet !" << std::endl;
604 template<
class SR,
class IU,
class NU>
607 int hsize = ArrSpTups.size();
614 mstar = ArrSpTups[0]->m;
615 nstar = ArrSpTups[0]->n;
617 for(
int i=1; i< hsize; ++i)
619 if((mstar != ArrSpTups[i]->m) || nstar != ArrSpTups[i]->n)
621 std::cerr <<
"Dimensions do not match on MergeAll()" << std::endl;
628 std::tuple<IU, IU, int> * heap =
new std::tuple<IU, IU, int> [hsize];
629 IU * curptr =
new IU[hsize];
630 std::fill_n(curptr, hsize, static_cast<IU>(0));
633 for(
int i=0; i< hsize; ++i)
635 estnnz += ArrSpTups[i]->getnnz();
636 heap[i] = std::make_tuple(std::get<0>(ArrSpTups[i]->tuples[0]), std::get<1>(ArrSpTups[i]->tuples[0]), i);
638 std::make_heap(heap, heap+hsize, std::not2(heapcomp));
640 std::tuple<IU, IU, NU> * ntuples =
new std::tuple<IU,IU,NU>[estnnz];
645 std::pop_heap(heap, heap + hsize, std::not2(heapcomp));
646 int source = std::get<2>(heap[hsize-1]);
649 ((std::get<0>(ntuples[cnz-1]) == std::get<0>(heap[hsize-1])) && (std::get<1>(ntuples[cnz-1]) == std::get<1>(heap[hsize-1]))) )
651 std::get<2>(ntuples[cnz-1]) = SR::add(std::get<2>(ntuples[cnz-1]), ArrSpTups[source]->numvalue(curptr[source]++));
655 ntuples[cnz++] = ArrSpTups[source]->tuples[curptr[source]++];
658 if(curptr[source] != ArrSpTups[source]->getnnz())
660 heap[hsize-1] = std::make_tuple(std::get<0>(ArrSpTups[source]->tuples[curptr[source]]),
661 std::get<1>(ArrSpTups[source]->tuples[curptr[source]]), source);
662 std::push_heap(heap, heap+hsize, std::not2(heapcomp));
674 for(
size_t i=0; i<ArrSpTups.size(); ++i)
693 template <
typename IU,
typename NU1,
typename NU2>
705 estnzc = std::min(A.
nzc, B->
nzc);
706 estnz = std::min(A.
nz, B->
nz);
719 while(i< A.
nzc && B != NULL && j<B->nzc)
721 if(A.
jc[i] > B->
jc[j]) ++j;
722 else if(A.
jc[i] < B->
jc[j]) ++i;
728 while (ii < A.
cp[i+1] && jj < B->cp[j+1])
730 if (A.
ir[ii] < B->
ir[jj]) ++ii;
731 else if (A.
ir[ii] > B->
ir[jj]) ++jj;
734 temp.
ir[curnz] = A.
ir[ii];
740 temp.
jc[curnzc++] = A.
jc[i];
741 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
750 while(i< A.
nzc && B != NULL && j< B->nzc)
752 if(A.
jc[i] > B->
jc[j]) ++j;
753 else if(A.
jc[i] < B->
jc[j])
755 temp.
jc[curnzc++] = A.
jc[i++];
756 for(IU k = A.
cp[i-1]; k< A.
cp[i]; k++)
758 temp.
ir[curnz] = A.
ir[k];
761 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (A.
cp[i] - A.
cp[i-1]);
768 while (ii < A.
cp[i+1] && jj < B->cp[j+1])
770 if (A.
ir[ii] > B->
ir[jj]) ++jj;
771 else if (A.
ir[ii] < B->
ir[jj])
773 temp.
ir[curnz] = A.
ir[ii];
782 while (ii < A.
cp[i+1])
784 temp.
ir[curnz] = A.
ir[ii];
790 temp.
jc[curnzc++] = A.
jc[i];
791 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
799 temp.
jc[curnzc++] = A.
jc[i++];
800 for(IU k = A.
cp[i-1]; k< A.
cp[i]; ++k)
802 temp.
ir[curnz] = A.
ir[k];
805 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (A.
cp[i] - A.
cp[i-1]);
809 temp.
Resize(curnzc, curnz);
813 template <
typename N_promote,
typename IU,
typename NU1,
typename NU2,
typename _BinaryOperation>
825 estnzc = std::min(A.
nzc, B->
nzc);
826 estnz = std::min(A.
nz, B->
nz);
839 while(i< A.
nzc && B != NULL && j<B->nzc)
841 if(A.
jc[i] > B->
jc[j]) ++j;
842 else if(A.
jc[i] < B->
jc[j]) ++i;
848 while (ii < A.
cp[i+1] && jj < B->cp[j+1])
850 if (A.
ir[ii] < B->
ir[jj]) ++ii;
851 else if (A.
ir[ii] > B->
ir[jj]) ++jj;
854 temp.
ir[curnz] = A.
ir[ii];
855 temp.
numx[curnz++] = __binary_op(A.
numx[ii++], B->
numx[jj++]);
860 temp.
jc[curnzc++] = A.
jc[i];
861 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
870 while(i< A.
nzc && B != NULL && j< B->nzc)
872 if(A.
jc[i] > B->
jc[j]) ++j;
873 else if(A.
jc[i] < B->
jc[j])
875 temp.
jc[curnzc++] = A.
jc[i++];
876 for(IU k = A.
cp[i-1]; k< A.
cp[i]; k++)
878 temp.
ir[curnz] = A.
ir[k];
879 temp.
numx[curnz++] = __binary_op(A.
numx[k], defaultBVal);
881 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (A.
cp[i] - A.
cp[i-1]);
888 while (ii < A.
cp[i+1] && jj < B->cp[j+1])
890 if (A.
ir[ii] > B->
ir[jj]) ++jj;
891 else if (A.
ir[ii] < B->
ir[jj])
893 temp.
ir[curnz] = A.
ir[ii];
894 temp.
numx[curnz++] = __binary_op(A.
numx[ii++], defaultBVal);
902 while (ii < A.
cp[i+1])
904 temp.
ir[curnz] = A.
ir[ii];
905 temp.
numx[curnz++] = __binary_op(A.
numx[ii++], defaultBVal);
910 temp.
jc[curnzc++] = A.
jc[i];
911 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
919 temp.
jc[curnzc++] = A.
jc[i++];
920 for(IU k = A.
cp[i-1]; k< A.
cp[i]; ++k)
922 temp.
ir[curnz] = A.
ir[k];
923 temp.
numx[curnz++] = __binary_op(A.
numx[k], defaultBVal);
925 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (A.
cp[i] - A.
cp[i-1]);
929 temp.
Resize(curnzc, curnz);
934 template<
typename IU,
typename NU1,
typename NU2>
942 if(A.nnz > 0 && B.nnz > 0)
947 else if (A.nnz > 0 && exclude)
959 template<
typename N_promote,
typename IU,
typename NU1,
typename NU2,
typename _BinaryOperation>
967 if(A.nnz > 0 && B.nnz > 0)
972 else if (A.nnz > 0 && notB)
992 template <
typename RETT,
typename IU,
typename NU1,
typename NU2,
typename _BinaryOperation,
typename _BinaryPredicate>
993 Dcsc<IU, RETT> EWiseApply(
const Dcsc<IU,NU1> * Ap,
const Dcsc<IU,NU2> * Bp, _BinaryOperation __binary_op, _BinaryPredicate do_op,
bool allowANulls,
bool allowBNulls,
const NU1& ANullVal,
const NU2& BNullVal,
const bool allowIntersect)
995 if (Ap == NULL && Bp == NULL)
998 if (Ap == NULL && Bp != NULL)
1018 temp.
jc[curnzc++] = B.
jc[j-1];
1019 for(IU k = B.
cp[j-1]; k< B.
cp[j]; ++k)
1021 if (do_op(ANullVal, B.
numx[k],
true,
false))
1023 temp.
ir[curnz] = B.
ir[k];
1024 temp.
numx[curnz++] = __binary_op(ANullVal, B.
numx[k],
true,
false);
1028 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
1030 temp.
Resize(curnzc, curnz);
1034 if (Ap != NULL && Bp == NULL)
1053 temp.
jc[curnzc++] = A.
jc[i-1];
1054 for(IU k = A.
cp[i-1]; k< A.
cp[i]; k++)
1056 if (do_op(A.
numx[k], BNullVal,
false,
true))
1058 temp.
ir[curnz] = A.
ir[k];
1059 temp.
numx[curnz++] = __binary_op(A.
numx[k], BNullVal,
false,
true);
1063 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
1065 temp.
Resize(curnzc, curnz);
1073 IU estnzc = A.
nzc + B.
nzc;
1074 IU estnz = A.
nz + B.
nz;
1082 while(i< A.
nzc && j<B.
nzc)
1084 if(A.
jc[i] > B.
jc[j])
1090 temp.
jc[curnzc++] = B.
jc[j-1];
1091 for(IU k = B.
cp[j-1]; k< B.
cp[j]; ++k)
1093 if (do_op(ANullVal, B.
numx[k],
true,
false))
1095 temp.
ir[curnz] = B.
ir[k];
1096 temp.
numx[curnz++] = __binary_op(ANullVal, B.
numx[k],
true,
false);
1100 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
1103 else if(A.
jc[i] < B.
jc[j])
1109 temp.
jc[curnzc++] = A.
jc[i-1];
1110 for(IU k = A.
cp[i-1]; k< A.
cp[i]; k++)
1112 if (do_op(A.
numx[k], BNullVal,
false,
true))
1114 temp.
ir[curnz] = A.
ir[k];
1115 temp.
numx[curnz++] = __binary_op(A.
numx[k], BNullVal,
false,
true);
1119 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
1124 temp.
jc[curnzc++] = A.
jc[i];
1128 while (ii < A.
cp[i+1] && jj < B.
cp[j+1])
1130 if (A.
ir[ii] < B.
ir[jj])
1132 if (allowBNulls && do_op(A.
numx[ii], BNullVal,
false,
true))
1134 temp.
ir[curnz] = A.
ir[ii];
1135 temp.
numx[curnz++] = __binary_op(A.
numx[ii++], BNullVal,
false,
true);
1140 else if (A.
ir[ii] > B.
ir[jj])
1142 if (allowANulls && do_op(ANullVal, B.
numx[jj],
true,
false))
1144 temp.
ir[curnz] = B.
ir[jj];
1145 temp.
numx[curnz++] = __binary_op(ANullVal, B.
numx[jj++],
true,
false);
1152 if (allowIntersect && do_op(A.
numx[ii], B.
numx[jj],
false,
false))
1154 temp.
ir[curnz] = A.
ir[ii];
1155 temp.
numx[curnz++] = __binary_op(A.
numx[ii++], B.
numx[jj++],
false,
false);
1164 while (ii < A.
cp[i+1])
1166 if (allowBNulls && do_op(A.
numx[ii], BNullVal,
false,
true))
1168 temp.
ir[curnz] = A.
ir[ii];
1169 temp.
numx[curnz++] = __binary_op(A.
numx[ii++], BNullVal,
false,
true);
1174 while (jj < B.
cp[j+1])
1176 if (allowANulls && do_op(ANullVal, B.
numx[jj],
true,
false))
1178 temp.
ir[curnz] = B.
ir[jj];
1179 temp.
numx[curnz++] = __binary_op(ANullVal, B.
numx[jj++],
true,
false);
1184 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
1189 while(allowBNulls && i< A.
nzc)
1192 temp.
jc[curnzc++] = A.
jc[i++];
1193 for(IU k = A.
cp[i-1]; k< A.
cp[i]; ++k)
1195 if (do_op(A.
numx[k], BNullVal,
false,
true))
1197 temp.
ir[curnz] = A.
ir[k];
1198 temp.
numx[curnz++] = __binary_op(A.
numx[k], BNullVal,
false,
true);
1202 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
1204 while(allowANulls && j < B.
nzc)
1207 temp.
jc[curnzc++] = B.
jc[j++];
1208 for(IU k = B.
cp[j-1]; k< B.
cp[j]; ++k)
1210 if (do_op(ANullVal, B.
numx[k],
true,
false))
1212 temp.
ir[curnz] = B.
ir[k];
1213 temp.
numx[curnz++] = __binary_op(ANullVal, B.
numx[k],
true,
false);
1217 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
1219 temp.
Resize(curnzc, curnz);
1223 template <
typename RETT,
typename IU,
typename NU1,
typename NU2,
typename _BinaryOperation,
typename _BinaryPredicate>
1224 SpDCCols<IU,RETT> EWiseApply (
const SpDCCols<IU,NU1> & A,
const SpDCCols<IU,NU2> & B, _BinaryOperation __binary_op, _BinaryPredicate do_op,
bool allowANulls,
bool allowBNulls,
const NU1& ANullVal,
const NU2& BNullVal,
const bool allowIntersect)
1229 Dcsc<IU, RETT> * tdcsc =
new Dcsc<IU, RETT>(EWiseApply<RETT>(A.
dcsc, B.
dcsc, __binary_op, do_op, allowANulls, allowBNulls, ANullVal, BNullVal, allowIntersect));
Dcsc< IT, NT > ** dcscarr
void BooleanRowSplit(SpDCCols< IU, bool > &A, int numsplits)
Dcsc< IU, N_promote > EWiseApply(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, _BinaryOperation __binary_op, bool notB, const NU2 &defaultBVal)
int generic_gespmv_threaded(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *&sendindbuf, OVT *&sendnumbuf, int *&sdispls, int p_c, PreAllocatedSPA< OVT > &SPA)
static void SpIntersect(const Dcsc< IT, NT1 > &Adcsc, const Dcsc< IT, NT2 > &Bdcsc, Isect< IT > *&cols, Isect< IT > *&rows, Isect< IT > *&isect1, Isect< IT > *&isect2, Isect< IT > *&itr1, Isect< IT > *&itr2)
std::vector< BitMap > V_isthere
SpTuples< IU, NUO > * Tuples_AtXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
SpTuples< IU, NUO > * Tuples_AnXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
void Resize(IT nzcnew, IT nznew)
IT * ir
row indices, size nz
std::vector< std::vector< OVT > > V_localy
IT * cp
The master array, size nzc+1 (keeps column pointers)
static void Print(const std::string &s)
std::vector< std::vector< uint32_t > > V_inds
void dcsc_gespmv(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector.
NT * numx
generic values, size nz
SpTuples< IU, NUO > * Tuples_AtXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
IT nzc
number of columns with at least one non-zero in them
SpTuples< IU, NUO > * Tuples_AnXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
void generic_gespmv_threaded_setbuffers(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *sendindbuf, OVT *sendnumbuf, int *cnts, int *dspls, int p_c)
IT * jc
col indices, size nzc
SpTuples< IU, NU > MergeAll(const std::vector< SpTuples< IU, NU > *> &ArrSpTups, IU mstar=0, IU nstar=0, bool delarrs=false)
void dcsc_gespmv_threaded(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector (multithreaded version)
static void deallocate2D(T **array, I m)
static void ShrinkArray(NT *&array, IT newsize)
void generic_gespmv(const SpMat< MIND, NUM, DER > &A, const VIND *indx, const IVT *numx, VIND nnzx, std::vector< VIND > &indy, std::vector< OVT > &numy, PreAllocatedSPA< OVT > &SPA)