39 template <
class IT,
class NT>
40 Dcsc<IT,NT>::Dcsc ():cp(NULL), jc(NULL), ir(NULL), numx(NULL),nz(0), nzc(0), memowned(true){}
42 template <
class IT,
class NT>
54 template <
class IT,
class NT>
59 cindex = multstack[j].key.first;
60 rindex = multstack[j].key.second;
64 rindex = std::numeric_limits<IT>::max();
65 cindex = std::numeric_limits<IT>::max();
70 template <
class IT,
class NT>
73 if(nnz == 0)
return *
this;
75 IT estnzc =
nzc + nnz;
84 getindices(multstack, rindex, cindex,j,nnz);
87 while(i<
nzc && cindex < std::numeric_limits<IT>::max())
92 temp.
jc[curnzc++] = cindex;
95 temp.
ir[curnz] = rindex;
96 temp.
numx[curnz++] = multstack[j-1].value;
98 getindices(multstack, rindex, cindex,j,nnz);
101 while(temp.
jc[curnzc-1] == cindex);
103 temp.
cp[curnzc] = temp.
cp[curnzc-1] + columncount;
105 else if(
jc[i] < cindex)
107 temp.
jc[curnzc++] =
jc[i++];
108 for(IT k =
cp[i-1]; k<
cp[i]; ++k)
110 temp.
ir[curnz] =
ir[k];
113 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (cp[i] - cp[i-1]);
117 temp.
jc[curnzc++] =
jc[i];
120 while (ii <
cp[i+1] && cindex ==
jc[i])
124 temp.
ir[curnz] =
ir[ii];
127 else if (
ir[ii] > rindex)
129 temp.
ir[curnz] = rindex;
130 temp.
numx[curnz++] = multstack[j-1].value;
132 getindices(multstack, rindex, cindex,j,nnz);
136 temp.
ir[curnz] =
ir[ii];
137 temp.
numx[curnz++] =
numx[ii++] + multstack[j-1].value;
139 getindices(multstack, rindex, cindex,j,nnz);
144 temp.
ir[curnz] =
ir[ii];
147 while (cindex ==
jc[i])
149 temp.
ir[curnz] = rindex;
150 temp.
numx[curnz++] = multstack[j-1].value;
152 getindices(multstack, rindex, cindex,j,nnz);
154 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
160 temp.
jc[curnzc++] =
jc[i++];
161 for(IT k =
cp[i-1]; k<
cp[i]; ++k)
163 temp.
ir[curnz] =
ir[k];
166 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (cp[i] - cp[i-1]);
168 while(cindex < std::numeric_limits<IT>::max())
171 temp.
jc[curnzc++] = cindex;
174 temp.
ir[curnz] = rindex;
175 temp.
numx[curnz++] = multstack[j-1].value;
177 getindices(multstack, rindex, cindex,j,nnz);
180 while(temp.
jc[curnzc-1] == cindex);
182 temp.
cp[curnzc] = temp.
cp[curnzc-1] + columncount;
184 temp.
Resize(curnzc, curnz);
194 template <
class IT,
class NT>
197 nzc = std::min(ndim, nnz);
206 IT cindex = multstack[0].key.first;
207 IT rindex = multstack[0].key.second;
210 numx[0] = multstack[0].value;
215 for(IT i=1; i<
nz; ++i)
217 cindex = multstack[i].key.first;
218 rindex = multstack[i].key.second;
221 numx[i] = multstack[i].value;
222 if(cindex !=
jc[curnzc-1])
237 template <
class IT,
class NT>
240 assert((nnz != 0) && (indices.size() == nnz));
247 std::fill_n(
numx, nnz, static_cast<NT>(1));
252 std::copy (indices.begin(), indices.end(),
jc);
257 std::copy (indices.begin(), indices.end(),
ir);
262 template <
class IT,
class NT>
263 template <
typename NNT>
268 for(IT i=0; i<
nz; ++i)
270 convert.
numx[i] =
static_cast<NNT
>(
numx[i]);
272 std::copy(
ir,
ir+nz, convert.
ir);
278 template <
class IT,
class NT>
279 template <
typename NIT,
typename NNT>
284 for(IT i=0; i<
nz; ++i)
285 convert.
numx[i] = static_cast<NNT>(
numx[i]);
286 for(IT i=0; i<
nz; ++i)
287 convert.
ir[i] = static_cast<NIT>(
ir[i]);
288 for(IT i=0; i<
nzc; ++i)
289 convert.
jc[i] = static_cast<NIT>(
jc[i]);
290 for(IT i=0; i<=
nzc; ++i)
291 convert.
cp[i] = static_cast<NIT>(
cp[i]);
295 template <
class IT,
class NT>
327 template <
class IT,
class NT>
373 template <
class IT,
class NT>
376 IT estnzc =
nzc + rhs.
nzc;
377 IT estnz =
nz + rhs.
nz;
385 while(i<
nzc && j<rhs.
nzc)
387 if(
jc[i] > rhs.
jc[j])
389 temp.
jc[curnzc++] = rhs.
jc[j++];
390 for(IT k = rhs.
cp[j-1]; k< rhs.
cp[j]; ++k)
392 temp.
ir[curnz] = rhs.
ir[k];
395 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (rhs.
cp[j] - rhs.
cp[j-1]);
397 else if(
jc[i] < rhs.
jc[j])
399 temp.
jc[curnzc++] =
jc[i++];
400 for(IT k =
cp[i-1]; k<
cp[i]; k++)
402 temp.
ir[curnz] =
ir[k];
405 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (cp[i] - cp[i-1]);
409 temp.
jc[curnzc++] =
jc[i];
413 while (ii <
cp[i+1] && jj < rhs.
cp[j+1])
415 if (
ir[ii] < rhs.
ir[jj])
417 temp.
ir[curnz] =
ir[ii];
420 else if (
ir[ii] > rhs.
ir[jj])
422 temp.
ir[curnz] = rhs.
ir[jj];
423 temp.
numx[curnz++] = rhs.
numx[jj++];
427 temp.
ir[curnz] =
ir[ii];
433 temp.
ir[curnz] =
ir[ii];
436 while (jj < rhs.
cp[j+1])
438 temp.
ir[curnz] = rhs.
ir[jj];
439 temp.
numx[curnz++] = rhs.
numx[jj++];
441 temp.
cp[curnzc] = temp.
cp[curnzc-1] + curnz-prevnz;
448 temp.
jc[curnzc++] =
jc[i++];
449 for(IT k =
cp[i-1]; k<
cp[i]; ++k)
451 temp.
ir[curnz] =
ir[k];
454 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (cp[i] - cp[i-1]);
458 temp.
jc[curnzc++] = rhs.
jc[j++];
459 for(IT k = rhs.
cp[j-1]; k< rhs.
cp[j]; ++k)
461 temp.
ir[curnz] = rhs.
ir[k];
464 temp.
cp[curnzc] = temp.
cp[curnzc-1] + (rhs.
cp[j] - rhs.
cp[j-1]);
466 temp.
Resize(curnzc, curnz);
471 template <
class IT,
class NT>
474 if(
nzc != rhs.
nzc)
return false;
475 bool same = std::equal(
cp,
cp+
nzc+1, rhs.
cp);
476 same = same && std::equal(
jc,
jc+
nzc, rhs.
jc);
477 same = same && std::equal(
ir,
ir+
nz, rhs.
ir);
480 std::vector<NT> error(
nz);
482 std::vector< std::pair<NT, NT> > error_original_pair(
nz);
483 for(IT i=0; i <
nz; ++i)
484 error_original_pair[i] = std::make_pair(error[i],
numx[i]);
485 if(error_original_pair.size() > 10)
487 partial_sort(error_original_pair.begin(), error_original_pair.begin()+10, error_original_pair.end(), std::greater< std::pair<NT,NT> >());
488 std::cout <<
"Highest 10 different entries are: " << std::endl;
489 for(IT i=0; i < 10; ++i)
490 std::cout <<
"Diff: " << error_original_pair[i].first <<
" on " << error_original_pair[i].second << std::endl;
494 sort(error_original_pair.begin(), error_original_pair.end(), std::greater< std::pair<NT,NT> >());
495 std::cout <<
"Highest different entries are: " << std::endl;
496 for(
typename std::vector< std::pair<NT, NT> >::iterator it=error_original_pair.begin(); it != error_original_pair.end(); ++it)
497 std::cout <<
"Diff: " << it->first <<
" on " << it->second << std::endl;
499 std::cout <<
"Same before num: " << same << std::endl;
503 same = same && std::equal(
numx,
numx+nz, rhs.
numx, epsilonequal );
512 template <
class IT,
class NT>
521 template <
class IT,
class NT>
522 template <
typename _UnaryOperation,
typename GlobalIT>
528 for(IT i=0; i<
nzc; ++i)
530 bool colexists =
false;
531 for(IT j=
cp[i]; j <
cp[i+1]; ++j)
533 if(!(__unary_op(make_tuple(rowOffset+
ir[j], colOffset+
jc[i],
numx[j]))))
539 if(colexists) ++prunednzc;
546 cp =
new IT[prunednzc+1];
547 jc =
new IT[prunednzc];
548 ir =
new IT[prunednnz];
549 numx =
new NT[prunednnz];
554 for(IT i=0; i<
nzc; ++i)
556 for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
558 if(!(__unary_op(make_tuple(rowOffset+oldir[j], colOffset+oldjc[i], oldnumx[j]))))
561 numx[cnnz++] = oldnumx[j];
571 assert(cnzc == prunednzc);
572 assert(cnnz == prunednnz);
602 template <
class IT,
class NT>
603 template <
typename _UnaryOperation>
609 for(IT i=0; i<
nzc; ++i)
611 bool colexists =
false;
612 for(IT j=
cp[i]; j <
cp[i+1]; ++j)
614 if(!(__unary_op(
numx[j])))
620 if(colexists) ++prunednzc;
627 cp =
new IT[prunednzc+1];
628 jc =
new IT[prunednzc];
629 ir =
new IT[prunednnz];
630 numx =
new NT[prunednnz];
635 for(IT i=0; i<
nzc; ++i)
637 for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
639 if(!(__unary_op(oldnumx[j])))
642 numx[cnnz++] = oldnumx[j];
652 assert(cnzc == prunednzc);
653 assert(cnnz == prunednnz);
684 template <
class IT,
class NT>
685 template <
typename _BinaryOperation>
691 for(IT i=0; i<
nzc; ++i)
693 bool colexists =
false;
694 for(IT j=
cp[i]; j <
cp[i+1]; ++j)
697 if(!(__binary_op(
numx[j], pvals[colid])))
703 if(colexists) ++prunednzc;
710 cp =
new IT[prunednzc+1];
711 jc =
new IT[prunednzc];
712 ir =
new IT[prunednnz];
713 numx =
new NT[prunednnz];
718 for(IT i=0; i<
nzc; ++i)
720 for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
723 if(!(__binary_op(oldnumx[j], pvals[colid])))
726 numx[cnnz++] = oldnumx[j];
736 assert(cnzc == prunednzc);
768 template <
class IT,
class NT>
769 template <
typename _BinaryOperation>
776 for(IT i=0; i<
nzc; ++i)
778 bool colexists =
false;
782 for(IT j=
cp[i]; j <
cp[i+1]; ++j)
784 if(!(__binary_op(
numx[j], pvals[k])))
795 prunednnz += (
cp[i+1] -
cp[i]);
797 if(colexists) ++prunednzc;
804 cp =
new IT[prunednzc+1];
805 jc =
new IT[prunednzc];
806 ir =
new IT[prunednnz];
807 numx =
new NT[prunednnz];
813 for(IT i=0; i<
nzc; ++i)
818 for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
820 if(!(__binary_op(oldnumx[j], pvals[k])))
823 numx[cnnz++] = oldnumx[j];
830 for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
833 numx[cnnz++] = oldnumx[j];
843 assert(cnzc == prunednzc);
873 template <
class IT,
class NT>
876 for(IT i=0; i<
nzc; ++i)
879 for(IT j=
cp[i]; j <
cp[i+1]; ++j)
882 numx[j] *= scaler[rowid][colid];
891 template <
class IT,
class NT>
892 template <
typename _BinaryOperation>
895 for(IT i=0; i<
nzc; ++i)
898 for(IT j=
cp[i]; j <
cp[i+1]; ++j)
901 array[rowid][colid] = __binary_op(array[rowid][colid],
numx[j]);
911 template <
class IT,
class NT>
914 float cf =
static_cast<float>(ndim+1) / static_cast<float>(
nzc);
915 IT colchunks =
static_cast<IT
> ( ceil( static_cast<float>(ndim+1) / ceil(cf)) );
917 aux =
new IT[colchunks+1];
919 IT chunksize =
static_cast<IT
>(ceil(cf));
923 for(IT i = 0; i<
nzc; ++i)
925 if(
jc[i] >= curchunk * chunksize)
927 while(
jc[i] >= curchunk * chunksize)
929 aux[curchunk++] = reg;
934 while(curchunk <= colchunks)
936 aux[curchunk++] = reg;
945 template <
class IT,
class NT>
960 if ( nzcnew == 0 && nznew == 0)
968 cp =
new IT[nzcnew+1];
972 std::copy(tmpcp, tmpcp+
nzc+1,
cp);
973 std::copy(tmpjc, tmpjc+
nzc,
jc);
977 std::copy(tmpcp, tmpcp+nzcnew+1,
cp);
978 std::copy(tmpjc, tmpjc+nzcnew,
jc);
988 numx =
new NT[nznew];
992 std::copy(tmpnumx, tmpnumx+
nz,
numx);
993 std::copy(tmpir, tmpir+
nz,
ir);
997 std::copy(tmpnumx, tmpnumx+nznew,
numx);
998 std::copy(tmpir, tmpir+nznew,
ir);
1012 template<
class IT,
class NT>
1015 IT base =
static_cast<IT
>(floor((
float) (colind/csize)));
1016 IT start = aux[base];
1017 IT end = aux[base+1];
1019 IT * itr = std::find(
jc + start,
jc + end, colind);
1021 found = (itr !=
jc + end);
1029 template<
class IT,
class NT>
1032 IT * itr = std::lower_bound(
jc,
jc+
nzc, cut);
1042 std::copy(jc, jc+pos, A->
jc);
1043 std::copy(cp, cp+pos+1, A->
cp);
1044 std::copy(
ir,
ir+cp[pos], A->
ir);
1054 std::copy(jc+pos, jc+
nzc, B->
jc);
1055 transform(B->
jc, B->
jc + (
nzc-pos), B->
jc, bind2nd(std::minus<IT>(), cut));
1056 std::copy(cp+pos, cp+
nzc+1, B->
cp);
1057 transform(B->
cp, B->
cp + (
nzc-pos+1), B->
cp, bind2nd(std::minus<IT>(), cp[pos]));
1058 std::copy(
ir+cp[pos],
ir+
nz, B->
ir);
1069 template<
class IT,
class NT>
1073 std::vector<IT> pos;
1074 for(
auto cutpoint = cuts.begin(); cutpoint != cuts.end(); ++cutpoint)
1076 IT * itr = std::lower_bound(jcbegin,
jc+
nzc, *cutpoint);
1077 pos.push_back(itr -
jc);
1088 std::copy(
jc,
jc+pos[0], parts[0]->
jc);
1089 std::copy(
cp,
cp+pos[0]+1, parts[0]->
cp);
1090 std::copy(
ir,
ir+
cp[pos[0]], parts[0]->
ir);
1093 int ncuts = cuts.size();
1094 for(
int i=1; i< ncuts; ++i)
1096 if(
cp[pos[i]] -
cp[pos[i-1]] == 0)
1102 parts[i] =
new Dcsc<IT,NT>(
cp[pos[i]] -
cp[pos[i-1]], pos[i] - pos[i-1]);
1103 std::copy(
jc+pos[i-1],
jc+pos[i], parts[i]->
jc);
1104 transform(parts[i]->
jc, parts[i]->
jc + (pos[i]-pos[i-1]), parts[i]->
jc, bind2nd(std::minus<IT>(), cuts[i-1]));
1106 std::copy(
cp+pos[i-1],
cp+pos[i]+1, parts[i]->
cp);
1107 transform(parts[i]->
cp, parts[i]->
cp + (pos[i]-pos[i-1]+1), parts[i]->
cp, bind2nd(std::minus<IT>(),
cp[pos[i-1]]));
1109 std::copy(
ir+
cp[pos[i-1]],
ir+
cp[pos[i]], parts[i]->
ir);
1113 if(
nz -
cp[pos[ncuts-1]] == 0)
1115 parts[ncuts] = NULL;
1120 std::copy(
jc+pos[ncuts-1],
jc+
nzc, parts[ncuts]->
jc);
1121 transform(parts[ncuts]->
jc, parts[ncuts]->
jc + (
nzc-pos[ncuts-1]), parts[ncuts]->
jc, bind2nd(std::minus<IT>(), cuts[ncuts-1]));
1123 std::copy(
cp+pos[ncuts-1],
cp+
nzc+1, parts[ncuts]->
cp);
1124 transform(parts[ncuts]->
cp, parts[ncuts]->
cp + (
nzc-pos[ncuts-1]+1), parts[ncuts]->
cp, bind2nd(std::minus<IT>(),
cp[pos[ncuts-1]]));
1125 std::copy(
ir+
cp[pos[ncuts-1]],
ir+
nz, parts[ncuts]->
ir);
1133 template<
class IT,
class NT>
1136 assert((A != NULL) && (B != NULL));
1137 IT cnz = A->
nz + B->
nz;
1138 IT cnzc = A->
nzc + B->
nzc;
1145 transform(
jc + A->
nzc,
jc + cnzc,
jc + A->
nzc, bind2nd(std::plus<IT>(), cut));
1149 transform(
cp + A->
nzc,
cp+cnzc+1,
cp + A->
nzc, bind2nd(std::plus<IT>(), A->
cp[A->
nzc]));
1151 std::copy(A->
ir, A->
ir + A->
nz,
ir);
1167 template<
class IT,
class NT>
1172 size_t nmembers = parts.size();
1173 for(
size_t i=0; i< nmembers; ++i)
1175 cnz += parts[i]->nz;
1176 cnzc += parts[i]->nzc;
1184 for(
size_t i=0; i< nmembers; ++i)
1186 std::copy(parts[i]->
jc, parts[i]->
jc + parts[i]->
nzc,
jc + run_nzc);
1187 transform(
jc + run_nzc,
jc + run_nzc + parts[i]->
nzc,
jc + run_nzc, bind2nd(std::plus<IT>(), offsets[i]));
1190 std::copy(parts[i]->
cp, parts[i]->
cp + parts[i]->
nzc,
cp + run_nzc);
1191 transform(
cp + run_nzc,
cp + run_nzc + parts[i]->
nzc,
cp + run_nzc, bind2nd(std::plus<IT>(),run_nz));
1193 std::copy(parts[i]->
ir, parts[i]->
ir + parts[i]->
nz,
ir + run_nz);
1194 std::copy(parts[i]->
numx, parts[i]->
numx + parts[i]->
nz,
numx + run_nz);
1196 run_nzc += parts[i]->nzc;
1197 run_nz += parts[i]->
nz;
1200 cp[run_nzc] = run_nz;
1209 template<
class IT,
class NT>
1215 IT mink = std::min(
nzc, nind);
1216 std::pair<IT,IT> * isect =
new std::pair<IT,IT>[mink];
1217 std::pair<IT,IT> * range1 =
new std::pair<IT,IT>[
nzc];
1218 std::pair<IT,IT> * range2 =
new std::pair<IT,IT>[nind];
1220 for(IT i=0; i <
nzc; ++i)
1222 range1[i] = std::make_pair(
jc[i], i);
1224 for(IT i=0; i < nind; ++i)
1226 range2[i] = std::make_pair(static_cast<IT>(colnums[i]), 0);
1229 std::pair<IT,IT> * itr = set_intersection(range1, range1 + nzc, range2, range2+nind, isect, SpHelper::first_compare<IT> );
1234 IT kisect =
static_cast<IT
>(itr-isect);
1235 for(IT j=0, i =0; j< nind; ++j)
1238 if( i == kisect || isect[i].first != static_cast<IT>(colnums[j]))
1241 colinds[j].first = 0;
1242 colinds[j].second = 0;
1246 IT p = isect[i++].second;
1247 colinds[j].first =
cp[p];
1248 colinds[j].second =
cp[p+1];
1256 for(IT j =0; j< nind; ++j)
1258 IT pos =
AuxIndex(static_cast<IT>(colnums[j]), found, aux, csize);
1261 colinds[j].first =
cp[pos];
1262 colinds[j].second =
cp[pos+1];
1266 colinds[j].first = 0;
1267 colinds[j].second = 0;
1274 template <
class IT,
class NT>
static void iota(_ForwardIter __first, _ForwardIter __last, T __val)
void EWiseScale(NT **scaler)
Dcsc< IT, NT > * PruneColumn(NT *pvals, _BinaryOperation __binary_op, bool inPlace)
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
void Resize(IT nzcnew, IT nznew)
IT * ir
row indices, size nz
void EWiseMult(const Dcsc< IT, NT > &rhs, bool exclude)
IT * cp
The master array, size nzc+1 (keeps column pointers)
void Merge(const Dcsc< IT, NT > *Adcsc, const Dcsc< IT, NT > *B, IT cut)
void FillColInds(const VT *colnums, IT nind, std::vector< std::pair< IT, IT > > &colinds, IT *aux, IT csize) const
Dcsc< IT, NT > * Prune(_UnaryOperation __unary_op, bool inPlace)
void ColSplit(std::vector< Dcsc< IT, NT > * > &parts, std::vector< IT > &cuts)
bool operator==(const Dcsc< IT, NT > &rhs)
IT AuxIndex(const IT colind, bool &found, IT *aux, IT csize) const
void ColConcatenate(std::vector< Dcsc< IT, NT > * > &parts, std::vector< IT > &offsets)
void UpdateDense(NT **array, _BinaryOperation __binary_op) const
Dcsc< IT, NT > & operator+=(const Dcsc< IT, NT > &rhs)
NT * numx
generic values, size nz
IT nzc
number of columns with at least one non-zero in them
IT ConstructAux(IT ndim, IT *&aux) const
Dcsc< IT, NT > * PruneI(_UnaryOperation __unary_op, bool inPlace, GlobalIT rowOffset, GlobalIT colOffset)
IT * jc
col indices, size nzc
Dcsc< IT, NT > & operator=(const Dcsc< IT, NT > &rhs)
Dcsc< IT, NT > & AddAndAssign(StackEntry< NT, std::pair< IT, IT > > *multstack, IT mdim, IT ndim, IT nnz)
void Split(Dcsc< IT, NT > *&A, Dcsc< IT, NT > *&B, IT cut)