COMBINATORIAL_BLAS  1.6
dcsc.cpp
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 05/15/2016 --------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc, Adam Lugowski ------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2016, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #include "dcsc.h"
31 #include <algorithm>
32 #include <functional>
33 #include <iostream>
34 #include "Friends.h"
35 #include "SpHelper.h"
36 
37 namespace combblas {
38 
39 template <class IT, class NT>
40 Dcsc<IT,NT>::Dcsc ():cp(NULL), jc(NULL), ir(NULL), numx(NULL),nz(0), nzc(0), memowned(true){}
41 
42 template <class IT, class NT>
43 Dcsc<IT,NT>::Dcsc (IT nnz, IT nzcol): nz(nnz),nzc(nzcol),memowned(true)
44 {
45  assert (nz != 0);
46  assert (nzc != 0);
47  cp = new IT[nzc+1];
48  jc = new IT[nzc];
49  ir = new IT[nz];
50  numx = new NT[nz];
51 }
52 
54 template <class IT, class NT>
55 inline void Dcsc<IT,NT>::getindices (StackEntry<NT, std::pair<IT,IT> > * multstack, IT & rindex, IT & cindex, IT & j, IT nnz)
56 {
57  if(j<nnz)
58  {
59  cindex = multstack[j].key.first;
60  rindex = multstack[j].key.second;
61  }
62  else
63  {
64  rindex = std::numeric_limits<IT>::max();
65  cindex = std::numeric_limits<IT>::max();
66  }
67  ++j;
68 }
69 
70 template <class IT, class NT>
71 Dcsc<IT,NT> & Dcsc<IT,NT>::AddAndAssign (StackEntry<NT, std::pair<IT,IT> > * multstack, IT mdim, IT ndim, IT nnz)
72 {
73  if(nnz == 0) return *this;
74 
75  IT estnzc = nzc + nnz;
76  IT estnz = nz + nnz;
77  Dcsc<IT,NT> temp(estnz, estnzc);
78 
79  IT curnzc = 0; // number of nonzero columns constructed so far
80  IT curnz = 0;
81  IT i = 0;
82  IT j = 0;
83  IT rindex, cindex;
84  getindices(multstack, rindex, cindex,j,nnz);
85 
86  temp.cp[0] = 0;
87  while(i< nzc && cindex < std::numeric_limits<IT>::max()) // i runs over columns of "this", j runs over all the nonzeros of "multstack"
88  {
89  if(jc[i] > cindex)
90  {
91  IT columncount = 0;
92  temp.jc[curnzc++] = cindex;
93  do
94  {
95  temp.ir[curnz] = rindex;
96  temp.numx[curnz++] = multstack[j-1].value;
97 
98  getindices(multstack, rindex, cindex,j,nnz);
99  ++columncount;
100  }
101  while(temp.jc[curnzc-1] == cindex); // loop until cindex changes
102 
103  temp.cp[curnzc] = temp.cp[curnzc-1] + columncount;
104  }
105  else if(jc[i] < cindex)
106  {
107  temp.jc[curnzc++] = jc[i++];
108  for(IT k = cp[i-1]; k< cp[i]; ++k)
109  {
110  temp.ir[curnz] = ir[k];
111  temp.numx[curnz++] = numx[k];
112  }
113  temp.cp[curnzc] = temp.cp[curnzc-1] + (cp[i] - cp[i-1]);
114  }
115  else // they are equal, merge the column
116  {
117  temp.jc[curnzc++] = jc[i];
118  IT ii = cp[i];
119  IT prevnz = curnz;
120  while (ii < cp[i+1] && cindex == jc[i]) // cindex would be MAX if multstack is deplated
121  {
122  if (ir[ii] < rindex)
123  {
124  temp.ir[curnz] = ir[ii];
125  temp.numx[curnz++] = numx[ii++];
126  }
127  else if (ir[ii] > rindex)
128  {
129  temp.ir[curnz] = rindex;
130  temp.numx[curnz++] = multstack[j-1].value;
131 
132  getindices(multstack, rindex, cindex,j,nnz);
133  }
134  else
135  {
136  temp.ir[curnz] = ir[ii];
137  temp.numx[curnz++] = numx[ii++] + multstack[j-1].value;
138 
139  getindices(multstack, rindex, cindex,j,nnz);
140  }
141  }
142  while (ii < cp[i+1])
143  {
144  temp.ir[curnz] = ir[ii];
145  temp.numx[curnz++] = numx[ii++];
146  }
147  while (cindex == jc[i])
148  {
149  temp.ir[curnz] = rindex;
150  temp.numx[curnz++] = multstack[j-1].value;
151 
152  getindices(multstack, rindex, cindex,j,nnz);
153  }
154  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
155  ++i;
156  }
157  }
158  while(i< nzc)
159  {
160  temp.jc[curnzc++] = jc[i++];
161  for(IT k = cp[i-1]; k< cp[i]; ++k)
162  {
163  temp.ir[curnz] = ir[k];
164  temp.numx[curnz++] = numx[k];
165  }
166  temp.cp[curnzc] = temp.cp[curnzc-1] + (cp[i] - cp[i-1]);
167  }
168  while(cindex < std::numeric_limits<IT>::max())
169  {
170  IT columncount = 0;
171  temp.jc[curnzc++] = cindex;
172  do
173  {
174  temp.ir[curnz] = rindex;
175  temp.numx[curnz++] = multstack[j-1].value;
176 
177  getindices(multstack, rindex, cindex,j,nnz);
178  ++columncount;
179  }
180  while(temp.jc[curnzc-1] == cindex); // loop until cindex changes
181 
182  temp.cp[curnzc] = temp.cp[curnzc-1] + columncount;
183  }
184  temp.Resize(curnzc, curnz);
185  *this = temp;
186  return *this;
187 }
188 
189 
194 template <class IT, class NT>
195 Dcsc<IT,NT>::Dcsc (StackEntry<NT, std::pair<IT,IT> > * multstack, IT mdim, IT ndim, IT nnz): nz(nnz),memowned(true)
196 {
197  nzc = std::min(ndim, nnz); // nzc can't exceed any of those
198 
199  assert(nz != 0 );
200  cp = new IT[nzc+1]; // to be shrinked
201  jc = new IT[nzc]; // to be shrinked
202  ir = new IT[nz];
203  numx = new NT[nz];
204 
205  IT curnzc = 0; // number of nonzero columns constructed so far
206  IT cindex = multstack[0].key.first;
207  IT rindex = multstack[0].key.second;
208 
209  ir[0] = rindex;
210  numx[0] = multstack[0].value;
211  jc[curnzc] = cindex;
212  cp[curnzc] = 0;
213  ++curnzc;
214 
215  for(IT i=1; i<nz; ++i)
216  {
217  cindex = multstack[i].key.first;
218  rindex = multstack[i].key.second;
219 
220  ir[i] = rindex;
221  numx[i] = multstack[i].value;
222  if(cindex != jc[curnzc-1])
223  {
224  jc[curnzc] = cindex;
225  cp[curnzc++] = i;
226  }
227  }
228  cp[curnzc] = nz;
229  Resize(curnzc, nz); // only shrink cp & jc arrays
230 }
231 
237 template <class IT, class NT>
238 Dcsc<IT,NT>::Dcsc (IT nnz, const std::vector<IT> & indices, bool isRow): nz(nnz),nzc(nnz),memowned(true)
239 {
240  assert((nnz != 0) && (indices.size() == nnz));
241  cp = new IT[nnz+1];
242  jc = new IT[nnz];
243  ir = new IT[nnz];
244  numx = new NT[nnz];
245 
246  SpHelper::iota(cp, cp+nnz+1, 0); // insert sequential values {0,1,2,..}
247  std::fill_n(numx, nnz, static_cast<NT>(1));
248 
249  if(isRow)
250  {
251  SpHelper::iota(ir, ir+nnz, 0);
252  std::copy (indices.begin(), indices.end(), jc);
253  }
254  else
255  {
256  SpHelper::iota(jc, jc+nnz, 0);
257  std::copy (indices.begin(), indices.end(), ir);
258  }
259 }
260 
261 
262 template <class IT, class NT>
263 template <typename NNT>
265 {
266  Dcsc<IT,NNT> convert(nz, nzc);
267 
268  for(IT i=0; i< nz; ++i)
269  {
270  convert.numx[i] = static_cast<NNT>(numx[i]);
271  }
272  std::copy(ir, ir+nz, convert.ir); // copy(first, last, result)
273  std::copy(jc, jc+nzc, convert.jc);
274  std::copy(cp, cp+nzc+1, convert.cp);
275  return convert;
276 }
277 
278 template <class IT, class NT>
279 template <typename NIT, typename NNT>
281 {
282  Dcsc<NIT,NNT> convert(nz, nzc);
283 
284  for(IT i=0; i< nz; ++i)
285  convert.numx[i] = static_cast<NNT>(numx[i]);
286  for(IT i=0; i< nz; ++i)
287  convert.ir[i] = static_cast<NIT>(ir[i]);
288  for(IT i=0; i< nzc; ++i)
289  convert.jc[i] = static_cast<NIT>(jc[i]);
290  for(IT i=0; i<= nzc; ++i)
291  convert.cp[i] = static_cast<NIT>(cp[i]);
292  return convert;
293 }
294 
295 template <class IT, class NT>
296 Dcsc<IT,NT>::Dcsc (const Dcsc<IT,NT> & rhs): nz(rhs.nz), nzc(rhs.nzc),memowned(true)
297 {
298  if(nz > 0)
299  {
300  numx = new NT[nz];
301  ir = new IT[nz];
302  std::copy(rhs.numx, rhs.numx + nz, numx); // numx can be a non-POD type
303  std::copy(rhs.ir, rhs.ir + nz, ir);
304  }
305  else
306  {
307  numx = NULL;
308  ir = NULL;
309  }
310  if(nzc > 0)
311  {
312  jc = new IT[nzc];
313  cp = new IT[nzc+1];
314  std::copy(rhs.jc, rhs.jc + nzc, jc);
315  std::copy(rhs.cp, rhs.cp + nzc + 1, cp);
316  }
317  else
318  {
319  jc = NULL;
320  cp = NULL;
321  }
322 }
323 
327 template <class IT, class NT>
329 {
330  if(this != &rhs)
331  {
332  // make empty first !
333  if(nz > 0)
334  {
335  delete[] numx;
336  delete[] ir;
337  }
338  if(nzc > 0)
339  {
340  delete[] jc;
341  delete[] cp;
342  }
343  nz = rhs.nz;
344  nzc = rhs.nzc;
345  if(nz > 0)
346  {
347  numx = new NT[nz];
348  ir = new IT[nz];
349  std::copy(rhs.numx, rhs.numx + nz, numx); // numx can be a non-POD type
350  std::copy(rhs.ir, rhs.ir + nz, ir);
351  }
352  else
353  {
354  numx = NULL;
355  ir = NULL;
356  }
357  if(nzc > 0)
358  {
359  jc = new IT[nzc];
360  cp = new IT[nzc+1];
361  std::copy(rhs.jc, rhs.jc + nzc, jc);
362  std::copy(rhs.cp, rhs.cp + nzc + 1, cp);
363  }
364  else
365  {
366  jc = NULL;
367  cp = NULL;
368  }
369  }
370  return *this;
371 }
372 
373 template <class IT, class NT>
374 Dcsc<IT, NT> & Dcsc<IT,NT>::operator+=(const Dcsc<IT,NT> & rhs) // add and assign operator
375 {
376  IT estnzc = nzc + rhs.nzc;
377  IT estnz = nz + rhs.nz;
378  Dcsc<IT,NT> temp(estnz, estnzc);
379 
380  IT curnzc = 0;
381  IT curnz = 0;
382  IT i = 0;
383  IT j = 0;
384  temp.cp[0] = 0;
385  while(i< nzc && j<rhs.nzc)
386  {
387  if(jc[i] > rhs.jc[j])
388  {
389  temp.jc[curnzc++] = rhs.jc[j++];
390  for(IT k = rhs.cp[j-1]; k< rhs.cp[j]; ++k)
391  {
392  temp.ir[curnz] = rhs.ir[k];
393  temp.numx[curnz++] = rhs.numx[k];
394  }
395  temp.cp[curnzc] = temp.cp[curnzc-1] + (rhs.cp[j] - rhs.cp[j-1]);
396  }
397  else if(jc[i] < rhs.jc[j])
398  {
399  temp.jc[curnzc++] = jc[i++];
400  for(IT k = cp[i-1]; k< cp[i]; k++)
401  {
402  temp.ir[curnz] = ir[k];
403  temp.numx[curnz++] = numx[k];
404  }
405  temp.cp[curnzc] = temp.cp[curnzc-1] + (cp[i] - cp[i-1]);
406  }
407  else
408  {
409  temp.jc[curnzc++] = jc[i];
410  IT ii = cp[i];
411  IT jj = rhs.cp[j];
412  IT prevnz = curnz;
413  while (ii < cp[i+1] && jj < rhs.cp[j+1])
414  {
415  if (ir[ii] < rhs.ir[jj])
416  {
417  temp.ir[curnz] = ir[ii];
418  temp.numx[curnz++] = numx[ii++];
419  }
420  else if (ir[ii] > rhs.ir[jj])
421  {
422  temp.ir[curnz] = rhs.ir[jj];
423  temp.numx[curnz++] = rhs.numx[jj++];
424  }
425  else
426  {
427  temp.ir[curnz] = ir[ii];
428  temp.numx[curnz++] = numx[ii++] + rhs.numx[jj++]; // might include zeros
429  }
430  }
431  while (ii < cp[i+1])
432  {
433  temp.ir[curnz] = ir[ii];
434  temp.numx[curnz++] = numx[ii++];
435  }
436  while (jj < rhs.cp[j+1])
437  {
438  temp.ir[curnz] = rhs.ir[jj];
439  temp.numx[curnz++] = rhs.numx[jj++];
440  }
441  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
442  ++i;
443  ++j;
444  }
445  }
446  while(i< nzc)
447  {
448  temp.jc[curnzc++] = jc[i++];
449  for(IT k = cp[i-1]; k< cp[i]; ++k)
450  {
451  temp.ir[curnz] = ir[k];
452  temp.numx[curnz++] = numx[k];
453  }
454  temp.cp[curnzc] = temp.cp[curnzc-1] + (cp[i] - cp[i-1]);
455  }
456  while(j < rhs.nzc)
457  {
458  temp.jc[curnzc++] = rhs.jc[j++];
459  for(IT k = rhs.cp[j-1]; k< rhs.cp[j]; ++k)
460  {
461  temp.ir[curnz] = rhs.ir[k];
462  temp.numx[curnz++] = rhs.numx[k];
463  }
464  temp.cp[curnzc] = temp.cp[curnzc-1] + (rhs.cp[j] - rhs.cp[j-1]);
465  }
466  temp.Resize(curnzc, curnz);
467  *this = temp;
468  return *this;
469 }
470 
471 template <class IT, class NT>
473 {
474  if(nzc != rhs.nzc) return false;
475  bool same = std::equal(cp, cp+nzc+1, rhs.cp);
476  same = same && std::equal(jc, jc+nzc, rhs.jc);
477  same = same && std::equal(ir, ir+nz, rhs.ir);
478 
479 #ifdef DEBUG
480  std::vector<NT> error(nz);
481  std::transform(numx, numx+nz, rhs.numx, error.begin(), absdiff<NT>());
482  std::vector< std::pair<NT, NT> > error_original_pair(nz);
483  for(IT i=0; i < nz; ++i)
484  error_original_pair[i] = std::make_pair(error[i], numx[i]);
485  if(error_original_pair.size() > 10) // otherwise would crush for small data
486  {
487  partial_sort(error_original_pair.begin(), error_original_pair.begin()+10, error_original_pair.end(), std::greater< std::pair<NT,NT> >());
488  std::cout << "Highest 10 different entries are: " << std::endl;
489  for(IT i=0; i < 10; ++i)
490  std::cout << "Diff: " << error_original_pair[i].first << " on " << error_original_pair[i].second << std::endl;
491  }
492  else
493  {
494  sort(error_original_pair.begin(), error_original_pair.end(), std::greater< std::pair<NT,NT> >());
495  std::cout << "Highest different entries are: " << std::endl;
496  for(typename std::vector< std::pair<NT, NT> >::iterator it=error_original_pair.begin(); it != error_original_pair.end(); ++it)
497  std::cout << "Diff: " << it->first << " on " << it->second << std::endl;
498  }
499  std::cout << "Same before num: " << same << std::endl;
500 #endif
501 
502  ErrorTolerantEqual<NT> epsilonequal;
503  same = same && std::equal(numx, numx+nz, rhs.numx, epsilonequal );
504  return same;
505 }
506 
512 template <class IT, class NT>
513 void Dcsc<IT,NT>::EWiseMult(const Dcsc<IT,NT> & rhs, bool exclude)
514 {
515  // We have a class with a friend function and a member function with the same name. Calling the friend function from the member function
516  // might (if the signature is the same) give compilation errors if not preceded by :: that denotes the global scope.
517  *this = combblas::EWiseMult((*this), &rhs, exclude); // call the binary version
518 }
519 
520 
521 template <class IT, class NT>
522 template <typename _UnaryOperation, typename GlobalIT>
523 Dcsc<IT,NT>* Dcsc<IT,NT>::PruneI(_UnaryOperation __unary_op, bool inPlace, GlobalIT rowOffset, GlobalIT colOffset)
524 {
525  // Two-pass algorithm
526  IT prunednnz = 0;
527  IT prunednzc = 0;
528  for(IT i=0; i<nzc; ++i)
529  {
530  bool colexists = false;
531  for(IT j=cp[i]; j < cp[i+1]; ++j)
532  {
533  if(!(__unary_op(make_tuple(rowOffset+ir[j], colOffset+jc[i], numx[j])))) // keep this nonzero
534  {
535  ++prunednnz;
536  colexists = true;
537  }
538  }
539  if(colexists) ++prunednzc;
540  }
541  IT * oldcp = cp;
542  IT * oldjc = jc;
543  IT * oldir = ir;
544  NT * oldnumx = numx;
545 
546  cp = new IT[prunednzc+1];
547  jc = new IT[prunednzc];
548  ir = new IT[prunednnz];
549  numx = new NT[prunednnz];
550 
551  IT cnzc = 0;
552  IT cnnz = 0;
553  cp[cnzc] = 0;
554  for(IT i=0; i<nzc; ++i)
555  {
556  for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
557  {
558  if(!(__unary_op(make_tuple(rowOffset+oldir[j], colOffset+oldjc[i], oldnumx[j])))) // keep this nonzero
559  {
560  ir[cnnz] = oldir[j];
561  numx[cnnz++] = oldnumx[j];
562  }
563  }
564  if(cnnz > cp[cnzc])
565  {
566  jc[cnzc] = oldjc[i];
567  cp[cnzc+1] = cnnz;
568  ++cnzc;
569  }
570  }
571  assert(cnzc == prunednzc);
572  assert(cnnz == prunednnz);
573  if (inPlace)
574  {
575  // delete the memory pointed by previous pointers
576  DeleteAll(oldnumx, oldir, oldjc, oldcp);
577  nz = cnnz;
578  nzc = cnzc;
579  return NULL;
580  }
581  else
582  {
583  // create a new object to store the data
584  Dcsc<IT,NT>* ret = new Dcsc<IT,NT>();
585  ret->cp = cp;
586  ret->jc = jc;
587  ret->ir = ir;
588  ret->numx = numx;
589  ret->nz = cnnz;
590  ret->nzc = cnzc;
591 
592  // put the previous pointers back
593  cp = oldcp;
594  jc = oldjc;
595  ir = oldir;
596  numx = oldnumx;
597 
598  return ret;
599  }
600 }
601 
602 template <class IT, class NT>
603 template <typename _UnaryOperation>
604 Dcsc<IT,NT>* Dcsc<IT,NT>::Prune(_UnaryOperation __unary_op, bool inPlace)
605 {
606  // Two-pass algorithm
607  IT prunednnz = 0;
608  IT prunednzc = 0;
609  for(IT i=0; i<nzc; ++i)
610  {
611  bool colexists = false;
612  for(IT j=cp[i]; j < cp[i+1]; ++j)
613  {
614  if(!(__unary_op(numx[j]))) // keep this nonzero
615  {
616  ++prunednnz;
617  colexists = true;
618  }
619  }
620  if(colexists) ++prunednzc;
621  }
622  IT * oldcp = cp;
623  IT * oldjc = jc;
624  IT * oldir = ir;
625  NT * oldnumx = numx;
626 
627  cp = new IT[prunednzc+1];
628  jc = new IT[prunednzc];
629  ir = new IT[prunednnz];
630  numx = new NT[prunednnz];
631 
632  IT cnzc = 0;
633  IT cnnz = 0;
634  cp[cnzc] = 0;
635  for(IT i=0; i<nzc; ++i)
636  {
637  for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
638  {
639  if(!(__unary_op(oldnumx[j]))) // keep this nonzero
640  {
641  ir[cnnz] = oldir[j];
642  numx[cnnz++] = oldnumx[j];
643  }
644  }
645  if(cnnz > cp[cnzc])
646  {
647  jc[cnzc] = oldjc[i];
648  cp[cnzc+1] = cnnz;
649  ++cnzc;
650  }
651  }
652  assert(cnzc == prunednzc);
653  assert(cnnz == prunednnz);
654  if (inPlace)
655  {
656  // delete the memory pointed by previous pointers
657  DeleteAll(oldnumx, oldir, oldjc, oldcp);
658  nz = cnnz;
659  nzc = cnzc;
660  return NULL;
661  }
662  else
663  {
664  // create a new object to store the data
665  Dcsc<IT,NT>* ret = new Dcsc<IT,NT>();
666  ret->cp = cp;
667  ret->jc = jc;
668  ret->ir = ir;
669  ret->numx = numx;
670  ret->nz = cnnz;
671  ret->nzc = cnzc;
672 
673  // put the previous pointers back
674  cp = oldcp;
675  jc = oldjc;
676  ir = oldir;
677  numx = oldnumx;
678 
679  return ret;
680  }
681 }
682 
683 
684 template <class IT, class NT>
685 template <typename _BinaryOperation>
686 Dcsc<IT,NT>* Dcsc<IT,NT>::PruneColumn(NT* pvals, _BinaryOperation __binary_op, bool inPlace)
687 {
688  // Two-pass algorithm
689  IT prunednnz = 0;
690  IT prunednzc = 0;
691  for(IT i=0; i<nzc; ++i)
692  {
693  bool colexists = false;
694  for(IT j=cp[i]; j < cp[i+1]; ++j)
695  {
696  IT colid = jc[i];
697  if(!(__binary_op(numx[j], pvals[colid]))) // keep this nonzero
698  {
699  ++prunednnz;
700  colexists = true;
701  }
702  }
703  if(colexists) ++prunednzc;
704  }
705  IT * oldcp = cp;
706  IT * oldjc = jc;
707  IT * oldir = ir;
708  NT * oldnumx = numx;
709 
710  cp = new IT[prunednzc+1];
711  jc = new IT[prunednzc];
712  ir = new IT[prunednnz];
713  numx = new NT[prunednnz];
714 
715  IT cnzc = 0;
716  IT cnnz = 0;
717  cp[cnzc] = 0;
718  for(IT i=0; i<nzc; ++i)
719  {
720  for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
721  {
722  IT colid = oldjc[i];
723  if(!(__binary_op(oldnumx[j], pvals[colid])))
724  {
725  ir[cnnz] = oldir[j];
726  numx[cnnz++] = oldnumx[j];
727  }
728  }
729  if(cnnz > cp[cnzc])
730  {
731  jc[cnzc] = oldjc[i];
732  cp[cnzc+1] = cnnz;
733  ++cnzc;
734  }
735  }
736  assert(cnzc == prunednzc);
737  if (inPlace)
738  {
739  // delete the memory pointed by previous pointers
740  DeleteAll(oldnumx, oldir, oldjc, oldcp);
741  nz = cnnz;
742  nzc = cnzc;
743  return NULL;
744  }
745  else
746  {
747  // create a new object to store the data
748  Dcsc<IT,NT>* ret = new Dcsc<IT,NT>();
749  ret->cp = cp;
750  ret->jc = jc;
751  ret->ir = ir;
752  ret->numx = numx;
753  ret->nz = cnnz;
754  ret->nzc = cnzc;
755 
756  // put the previous pointers back
757  cp = oldcp;
758  jc = oldjc;
759  ir = oldir;
760  numx = oldnumx;
761 
762  return ret;
763  }
764 }
765 
766 
767 // prune selected columns indexed by pinds
768 template <class IT, class NT>
769 template <typename _BinaryOperation>
770 Dcsc<IT,NT>* Dcsc<IT,NT>::PruneColumn(IT* pinds, NT* pvals, _BinaryOperation __binary_op, bool inPlace)
771 {
772  // Two-pass algorithm
773  IT prunednnz = 0;
774  IT prunednzc = 0;
775  IT k = 0;
776  for(IT i=0; i<nzc; ++i)
777  {
778  bool colexists = false;
779  IT colid = jc[i];
780  if(colid==pinds[k]) // pinds is sorted
781  {
782  for(IT j=cp[i]; j < cp[i+1]; ++j)
783  {
784  if(!(__binary_op(numx[j], pvals[k]))) // keep this nonzero
785  {
786  ++prunednnz;
787  colexists = true;
788  }
789  }
790  k++;
791  }
792  else // untouched columns
793  {
794  colexists = true;
795  prunednnz += (cp[i+1] - cp[i]);
796  }
797  if(colexists) ++prunednzc;
798  }
799  IT * oldcp = cp;
800  IT * oldjc = jc;
801  IT * oldir = ir;
802  NT * oldnumx = numx;
803 
804  cp = new IT[prunednzc+1];
805  jc = new IT[prunednzc];
806  ir = new IT[prunednnz];
807  numx = new NT[prunednnz];
808 
809  IT cnzc = 0;
810  IT cnnz = 0;
811  cp[cnzc] = 0;
812  k = 0;
813  for(IT i=0; i<nzc; ++i)
814  {
815  IT colid = oldjc[i];
816  if(colid==pinds[k]) // prunned columns
817  {
818  for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
819  {
820  if(!(__binary_op(oldnumx[j], pvals[k])))
821  {
822  ir[cnnz] = oldir[j];
823  numx[cnnz++] = oldnumx[j];
824  }
825  }
826  k++;
827  }
828  else // copy other columns
829  {
830  for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
831  {
832  ir[cnnz] = oldir[j];
833  numx[cnnz++] = oldnumx[j];
834  }
835  }
836  if(cnnz > cp[cnzc])
837  {
838  jc[cnzc] = oldjc[i];
839  cp[cnzc+1] = cnnz;
840  ++cnzc;
841  }
842  }
843  assert(cnzc == prunednzc);
844  if (inPlace)
845  {
846  // delete the memory pointed by previous pointers
847  DeleteAll(oldnumx, oldir, oldjc, oldcp);
848  nz = cnnz;
849  nzc = cnzc;
850  return NULL;
851  }
852  else
853  {
854  // create a new object to store the data
855  Dcsc<IT,NT>* ret = new Dcsc<IT,NT>();
856  ret->cp = cp;
857  ret->jc = jc;
858  ret->ir = ir;
859  ret->numx = numx;
860  ret->nz = cnnz;
861  ret->nzc = cnzc;
862 
863  // put the previous pointers back
864  cp = oldcp;
865  jc = oldjc;
866  ir = oldir;
867  numx = oldnumx;
868 
869  return ret;
870  }
871 }
872 
873 template <class IT, class NT>
874 void Dcsc<IT,NT>::EWiseScale(NT ** scaler)
875 {
876  for(IT i=0; i<nzc; ++i)
877  {
878  IT colid = jc[i];
879  for(IT j=cp[i]; j < cp[i+1]; ++j)
880  {
881  IT rowid = ir[j];
882  numx[j] *= scaler[rowid][colid];
883  }
884  }
885 }
886 
891 template <class IT, class NT>
892 template <typename _BinaryOperation>
893 void Dcsc<IT,NT>::UpdateDense(NT ** array, _BinaryOperation __binary_op) const
894 {
895  for(IT i=0; i<nzc; ++i)
896  {
897  IT colid = jc[i];
898  for(IT j=cp[i]; j < cp[i+1]; ++j)
899  {
900  IT rowid = ir[j];
901  array[rowid][colid] = __binary_op(array[rowid][colid], numx[j]);
902  }
903  }
904 }
905 
911 template <class IT, class NT>
912 IT Dcsc<IT,NT>::ConstructAux(IT ndim, IT * & aux) const
913 {
914  float cf = static_cast<float>(ndim+1) / static_cast<float>(nzc);
915  IT colchunks = static_cast<IT> ( ceil( static_cast<float>(ndim+1) / ceil(cf)) );
916 
917  aux = new IT[colchunks+1];
918 
919  IT chunksize = static_cast<IT>(ceil(cf));
920  IT reg = 0;
921  IT curchunk = 0;
922  aux[curchunk++] = 0;
923  for(IT i = 0; i< nzc; ++i)
924  {
925  if(jc[i] >= curchunk * chunksize) // beginning of the next chunk
926  {
927  while(jc[i] >= curchunk * chunksize) // consider any empty chunks
928  {
929  aux[curchunk++] = reg;
930  }
931  }
932  reg = i+1;
933  }
934  while(curchunk <= colchunks)
935  {
936  aux[curchunk++] = reg;
937  }
938  return colchunks;
939 }
940 
945 template <class IT, class NT>
946 void Dcsc<IT,NT>::Resize(IT nzcnew, IT nznew)
947 {
948  if(nzcnew == 0)
949  {
950  delete[] jc;
951  delete[] cp;
952  nzc = 0;
953  }
954  if(nznew == 0)
955  {
956  delete[] ir;
957  delete[] numx;
958  nz = 0;
959  }
960  if ( nzcnew == 0 && nznew == 0)
961  {
962  return;
963  }
964  if (nzcnew != nzc)
965  {
966  IT * tmpcp = cp;
967  IT * tmpjc = jc;
968  cp = new IT[nzcnew+1];
969  jc = new IT[nzcnew];
970  if(nzcnew > nzc) // Grow it (copy all of the old elements)
971  {
972  std::copy(tmpcp, tmpcp+nzc+1, cp); // copy(first, end, result)
973  std::copy(tmpjc, tmpjc+nzc, jc);
974  }
975  else // Shrink it (copy only a portion of the old elements)
976  {
977  std::copy(tmpcp, tmpcp+nzcnew+1, cp);
978  std::copy(tmpjc, tmpjc+nzcnew, jc);
979  }
980  delete[] tmpcp; // delete the memory pointed by previous pointers
981  delete[] tmpjc;
982  nzc = nzcnew;
983  }
984  if (nznew != nz)
985  {
986  NT * tmpnumx = numx;
987  IT * tmpir = ir;
988  numx = new NT[nznew];
989  ir = new IT[nznew];
990  if(nznew > nz) // Grow it (copy all of the old elements)
991  {
992  std::copy(tmpnumx, tmpnumx+nz, numx); // numx can be non-POD
993  std::copy(tmpir, tmpir+nz, ir);
994  }
995  else // Shrink it (copy only a portion of the old elements)
996  {
997  std::copy(tmpnumx, tmpnumx+nznew, numx);
998  std::copy(tmpir, tmpir+nznew, ir);
999  }
1000  delete[] tmpnumx; // delete the memory pointed by previous pointers
1001  delete[] tmpir;
1002  nz = nznew;
1003  }
1004 }
1005 
1012 template<class IT, class NT>
1013 IT Dcsc<IT,NT>::AuxIndex(const IT colind, bool & found, IT * aux, IT csize) const
1014 {
1015  IT base = static_cast<IT>(floor((float) (colind/csize)));
1016  IT start = aux[base];
1017  IT end = aux[base+1];
1018 
1019  IT * itr = std::find(jc + start, jc + end, colind);
1020 
1021  found = (itr != jc + end);
1022  return (itr-jc);
1023 }
1024 
1029 template<class IT, class NT>
1031 {
1032  IT * itr = std::lower_bound(jc, jc+nzc, cut);
1033  IT pos = itr - jc;
1034 
1035  if(cp[pos] == 0)
1036  {
1037  A = NULL;
1038  }
1039  else
1040  {
1041  A = new Dcsc<IT,NT>(cp[pos], pos);
1042  std::copy(jc, jc+pos, A->jc);
1043  std::copy(cp, cp+pos+1, A->cp);
1044  std::copy(ir, ir+cp[pos], A->ir);
1045  std::copy(numx, numx + cp[pos], A->numx); // copy(first, last, result)
1046  }
1047  if(nz-cp[pos] == 0)
1048  {
1049  B = NULL;
1050  }
1051  else
1052  {
1053  B = new Dcsc<IT,NT>(nz-cp[pos], nzc-pos);
1054  std::copy(jc+pos, jc+ nzc, B->jc);
1055  transform(B->jc, B->jc + (nzc-pos), B->jc, bind2nd(std::minus<IT>(), cut));
1056  std::copy(cp+pos, cp+nzc+1, B->cp);
1057  transform(B->cp, B->cp + (nzc-pos+1), B->cp, bind2nd(std::minus<IT>(), cp[pos]));
1058  std::copy(ir+cp[pos], ir+nz, B->ir);
1059  std::copy(numx+cp[pos], numx+nz, B->numx); // copy(first, last, result)
1060  }
1061 }
1062 
1069 template<class IT, class NT>
1070 void Dcsc<IT,NT>::ColSplit(std::vector< Dcsc<IT,NT>* > & parts, std::vector<IT> & cuts)
1071 {
1072  IT * jcbegin = jc;
1073  std::vector<IT> pos; // pos has "parts-1" entries
1074  for(auto cutpoint = cuts.begin(); cutpoint != cuts.end(); ++cutpoint)
1075  {
1076  IT * itr = std::lower_bound(jcbegin, jc+nzc, *cutpoint);
1077  pos.push_back(itr - jc);
1078  jcbegin = itr; // so that lower_bound searches a smaller vector
1079  }
1080 
1081  if(cp[pos[0]] == 0) // first piece
1082  {
1083  parts[0] = NULL;
1084  }
1085  else
1086  {
1087  parts[0] = new Dcsc<IT,NT>(cp[pos[0]], pos[0]); // Dcsc(nnz, nzc)
1088  std::copy(jc, jc+pos[0], parts[0]->jc); // std::copy
1089  std::copy(cp, cp+pos[0]+1, parts[0]->cp);
1090  std::copy(ir, ir+cp[pos[0]], parts[0]->ir);
1091  std::copy(numx, numx + cp[pos[0]], parts[0]->numx); // copy(first, last, result)
1092  }
1093  int ncuts = cuts.size(); // all except last piece
1094  for(int i=1; i< ncuts; ++i) // treat the first piece differently
1095  {
1096  if(cp[pos[i]] - cp[pos[i-1]] == 0)
1097  {
1098  parts[i] = NULL;
1099  }
1100  else
1101  {
1102  parts[i] = new Dcsc<IT,NT>(cp[pos[i]] - cp[pos[i-1]], pos[i] - pos[i-1]); // Dcsc(nnz, nzc)
1103  std::copy(jc+pos[i-1], jc+pos[i], parts[i]->jc); // std::copy
1104  transform(parts[i]->jc, parts[i]->jc + (pos[i]-pos[i-1]), parts[i]->jc, bind2nd(std::minus<IT>(), cuts[i-1])); // cuts[i-1] is well defined as i>=1
1105 
1106  std::copy(cp+pos[i-1], cp+pos[i]+1, parts[i]->cp);
1107  transform(parts[i]->cp, parts[i]->cp + (pos[i]-pos[i-1]+1), parts[i]->cp, bind2nd(std::minus<IT>(), cp[pos[i-1]]));
1108 
1109  std::copy(ir+cp[pos[i-1]], ir+cp[pos[i]], parts[i]->ir);
1110  std::copy(numx+cp[pos[i-1]], numx + cp[pos[i]], parts[i]->numx); // copy(first, last, result)
1111  }
1112  }
1113  if(nz - cp[pos[ncuts-1]] == 0)
1114  {
1115  parts[ncuts] = NULL;
1116  }
1117  else
1118  {
1119  parts[ncuts] = new Dcsc<IT,NT>(nz-cp[pos[ncuts-1]], nzc-pos[ncuts-1]); // ncuts = npieces -1
1120  std::copy(jc+pos[ncuts-1], jc+ nzc, parts[ncuts]->jc);
1121  transform(parts[ncuts]->jc, parts[ncuts]->jc + (nzc-pos[ncuts-1]), parts[ncuts]->jc, bind2nd(std::minus<IT>(), cuts[ncuts-1]));
1122 
1123  std::copy(cp+pos[ncuts-1], cp+nzc+1, parts[ncuts]->cp);
1124  transform(parts[ncuts]->cp, parts[ncuts]->cp + (nzc-pos[ncuts-1]+1), parts[ncuts]->cp, bind2nd(std::minus<IT>(), cp[pos[ncuts-1]]));
1125  std::copy(ir+cp[pos[ncuts-1]], ir+nz, parts[ncuts]->ir);
1126  std::copy(numx+cp[pos[ncuts-1]], numx+nz, parts[ncuts]->numx);
1127  }
1128 }
1129 
1130 
1131 // Assumes A and B are not NULL
1132 // When any is NULL, this function is not called anyway
1133 template<class IT, class NT>
1134 void Dcsc<IT,NT>::Merge(const Dcsc<IT,NT> * A, const Dcsc<IT,NT> * B, IT cut)
1135 {
1136  assert((A != NULL) && (B != NULL)); // handled at higher level
1137  IT cnz = A->nz + B->nz;
1138  IT cnzc = A->nzc + B->nzc;
1139  if(cnz > 0)
1140  {
1141  *this = Dcsc<IT,NT>(cnz, cnzc); // safe, because "this" can not be NULL inside a member function
1142 
1143  std::copy(A->jc, A->jc + A->nzc, jc); // copy(first, last, result)
1144  std::copy(B->jc, B->jc + B->nzc, jc + A->nzc);
1145  transform(jc + A->nzc, jc + cnzc, jc + A->nzc, bind2nd(std::plus<IT>(), cut));
1146 
1147  std::copy(A->cp, A->cp + A->nzc, cp);
1148  std::copy(B->cp, B->cp + B->nzc +1, cp + A->nzc);
1149  transform(cp + A->nzc, cp+cnzc+1, cp + A->nzc, bind2nd(std::plus<IT>(), A->cp[A->nzc]));
1150 
1151  std::copy(A->ir, A->ir + A->nz, ir);
1152  std::copy(B->ir, B->ir + B->nz, ir + A->nz);
1153 
1154  // since numx is potentially non-POD, we use std::copy
1155  std::copy(A->numx, A->numx + A->nz, numx);
1156  std::copy(B->numx, B->numx + B->nz, numx + A->nz);
1157  }
1158 }
1159 
1160 
1167 template<class IT, class NT>
1168 void Dcsc<IT,NT>::ColConcatenate(std::vector< Dcsc<IT,NT>* > & parts, std::vector<IT> & offsets)
1169 {
1170  IT cnz = 0;
1171  IT cnzc = 0;
1172  size_t nmembers = parts.size();
1173  for(size_t i=0; i< nmembers; ++i)
1174  {
1175  cnz += parts[i]->nz;
1176  cnzc += parts[i]->nzc;
1177  }
1178  if(cnz > 0)
1179  {
1180  *this = Dcsc<IT,NT>(cnz, cnzc); // safe, because "this" can not be NULL inside a member function
1181 
1182  IT run_nz = 0;
1183  IT run_nzc = 0;
1184  for(size_t i=0; i< nmembers; ++i)
1185  {
1186  std::copy(parts[i]->jc, parts[i]->jc + parts[i]->nzc, jc + run_nzc);
1187  transform(jc + run_nzc, jc + run_nzc + parts[i]->nzc, jc + run_nzc, bind2nd(std::plus<IT>(), offsets[i]));
1188 
1189  // remember: cp[nzc] = nnz
1190  std::copy(parts[i]->cp, parts[i]->cp + parts[i]->nzc, cp + run_nzc);
1191  transform(cp + run_nzc, cp + run_nzc + parts[i]->nzc, cp + run_nzc, bind2nd(std::plus<IT>(),run_nz));
1192 
1193  std::copy(parts[i]->ir, parts[i]->ir + parts[i]->nz, ir + run_nz);
1194  std::copy(parts[i]->numx, parts[i]->numx + parts[i]->nz, numx + run_nz);
1195 
1196  run_nzc += parts[i]->nzc;
1197  run_nz += parts[i]->nz;
1198  }
1199  // adjust the last pointer
1200  cp[run_nzc] = run_nz;
1201  }
1202 }
1203 
1209 template<class IT, class NT>
1210 template<class VT>
1211 void Dcsc<IT,NT>::FillColInds(const VT * colnums, IT nind, std::vector< std::pair<IT,IT> > & colinds, IT * aux, IT csize) const
1212 {
1213  if ( aux == NULL || (nzc / nind) < THRESHOLD) // use scanning indexing
1214  {
1215  IT mink = std::min(nzc, nind);
1216  std::pair<IT,IT> * isect = new std::pair<IT,IT>[mink];
1217  std::pair<IT,IT> * range1 = new std::pair<IT,IT>[nzc];
1218  std::pair<IT,IT> * range2 = new std::pair<IT,IT>[nind];
1219 
1220  for(IT i=0; i < nzc; ++i)
1221  {
1222  range1[i] = std::make_pair(jc[i], i); // get the actual nonzero value and the index to the ith nonzero
1223  }
1224  for(IT i=0; i < nind; ++i)
1225  {
1226  range2[i] = std::make_pair(static_cast<IT>(colnums[i]), 0); // second is dummy as all the intersecting elements are copied from the first range
1227  }
1228 
1229  std::pair<IT,IT> * itr = set_intersection(range1, range1 + nzc, range2, range2+nind, isect, SpHelper::first_compare<IT> );
1230  // isect now can iterate on a subset of the elements of range1
1231  // meaning that the intersection can be accessed directly by isect[i] instead of range1[isect[i]]
1232  // this is because the intersecting elements are COPIED to the output range "isect"
1233 
1234  IT kisect = static_cast<IT>(itr-isect); // size of the intersection
1235  for(IT j=0, i =0; j< nind; ++j)
1236  {
1237  // the elements represented by jc[isect[i]] are a subset of the elements represented by colnums[j]
1238  if( i == kisect || isect[i].first != static_cast<IT>(colnums[j]))
1239  {
1240  // not found, signal by setting first = second
1241  colinds[j].first = 0;
1242  colinds[j].second = 0;
1243  }
1244  else // i < kisect && dcsc->jc[isect[i]] == colnums[j]
1245  {
1246  IT p = isect[i++].second;
1247  colinds[j].first = cp[p];
1248  colinds[j].second = cp[p+1];
1249  }
1250  }
1251  DeleteAll(isect, range1, range2);
1252  }
1253  else // use aux based indexing
1254  {
1255  bool found;
1256  for(IT j =0; j< nind; ++j)
1257  {
1258  IT pos = AuxIndex(static_cast<IT>(colnums[j]), found, aux, csize);
1259  if(found)
1260  {
1261  colinds[j].first = cp[pos];
1262  colinds[j].second = cp[pos+1];
1263  }
1264  else // not found, signal by setting first = second
1265  {
1266  colinds[j].first = 0;
1267  colinds[j].second = 0;
1268  }
1269  }
1270  }
1271 }
1272 
1273 
1274 template <class IT, class NT>
1276 {
1277  if(nz > 0) // dcsc may be empty
1278  {
1279  delete[] numx;
1280  delete[] ir;
1281  }
1282  if(nzc > 0)
1283  {
1284  delete[] jc;
1285  delete[] cp;
1286  }
1287 }
1288 
1289 }
double B
static void iota(_ForwardIter __first, _ForwardIter __last, T __val)
Definition: SpHelper.h:226
void EWiseScale(NT **scaler)
Definition: dcsc.cpp:874
bool memowned
Definition: dcsc.h:126
Dcsc< IT, NT > * PruneColumn(NT *pvals, _BinaryOperation __binary_op, bool inPlace)
Definition: dcsc.cpp:686
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition: Friends.h:694
#define THRESHOLD
Definition: SpDefs.h:125
Definition: StackEntry.h:9
void DeleteAll(A arr1)
Definition: Deleter.h:48
void Resize(IT nzcnew, IT nznew)
Definition: dcsc.cpp:946
IT * ir
row indices, size nz
Definition: dcsc.h:121
void EWiseMult(const Dcsc< IT, NT > &rhs, bool exclude)
Definition: dcsc.cpp:513
IT * cp
The master array, size nzc+1 (keeps column pointers)
Definition: dcsc.h:117
void Merge(const Dcsc< IT, NT > *Adcsc, const Dcsc< IT, NT > *B, IT cut)
Definition: dcsc.cpp:1134
void FillColInds(const VT *colnums, IT nind, std::vector< std::pair< IT, IT > > &colinds, IT *aux, IT csize) const
Definition: dcsc.cpp:1211
double A
Dcsc< IT, NT > * Prune(_UnaryOperation __unary_op, bool inPlace)
Definition: dcsc.cpp:604
void ColSplit(std::vector< Dcsc< IT, NT > * > &parts, std::vector< IT > &cuts)
Definition: dcsc.cpp:1070
bool operator==(const Dcsc< IT, NT > &rhs)
Definition: dcsc.cpp:472
IT AuxIndex(const IT colind, bool &found, IT *aux, IT csize) const
Definition: dcsc.cpp:1013
void ColConcatenate(std::vector< Dcsc< IT, NT > * > &parts, std::vector< IT > &offsets)
Definition: dcsc.cpp:1168
void UpdateDense(NT **array, _BinaryOperation __binary_op) const
Definition: dcsc.cpp:893
Dcsc< IT, NT > & operator+=(const Dcsc< IT, NT > &rhs)
Definition: dcsc.cpp:374
NT * numx
generic values, size nz
Definition: dcsc.h:122
IT nzc
number of columns with at least one non-zero in them
Definition: dcsc.h:125
IT ConstructAux(IT ndim, IT *&aux) const
Definition: dcsc.cpp:912
Dcsc< IT, NT > * PruneI(_UnaryOperation __unary_op, bool inPlace, GlobalIT rowOffset, GlobalIT colOffset)
Definition: dcsc.cpp:523
Definition: CCGrid.h:4
IT * jc
col indices, size nzc
Definition: dcsc.h:120
Dcsc< IT, NT > & operator=(const Dcsc< IT, NT > &rhs)
Definition: dcsc.cpp:328
Dcsc< IT, NT > & AddAndAssign(StackEntry< NT, std::pair< IT, IT > > *multstack, IT mdim, IT ndim, IT nnz)
Definition: dcsc.cpp:71
void Split(Dcsc< IT, NT > *&A, Dcsc< IT, NT > *&B, IT cut)
Definition: dcsc.cpp:1030