COMBINATORIAL_BLAS  1.6
Friends.h
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #ifndef _FRIENDS_H_
31 #define _FRIENDS_H_
32 
33 #include <iostream>
34 #include "SpMat.h" // Best to include the base class first
35 #include "SpHelper.h"
36 #include "StackEntry.h"
37 #include "Isect.h"
38 #include "Deleter.h"
39 #include "SpImpl.h"
40 #include "SpParHelper.h"
41 #include "Compare.h"
42 #include "CombBLAS.h"
43 #include "PreAllocatedSPA.h"
44 
45 namespace combblas {
46 
47 template <class IU, class NU>
48 class SpTuples;
49 
50 template <class IU, class NU>
51 class SpDCCols;
52 
53 template <class IU, class NU>
54 class Dcsc;
55 
56 /*************************************************************************************************/
57 /**************************** SHARED ADDRESS SPACE FRIEND FUNCTIONS ******************************/
58 /****************************** MULTITHREADED LOGIC ALSO GOES HERE *******************************/
59 /*************************************************************************************************/
60 
61 
63 template <typename SR, typename IU, typename NU, typename RHS, typename LHS>
64 void dcsc_gespmv (const SpDCCols<IU, NU> & A, const RHS * x, LHS * y)
65 {
66  if(A.nnz > 0)
67  {
68  for(IU j =0; j<A.dcsc->nzc; ++j) // for all nonzero columns
69  {
70  IU colid = A.dcsc->jc[j];
71  for(IU i = A.dcsc->cp[j]; i< A.dcsc->cp[j+1]; ++i)
72  {
73  IU rowid = A.dcsc->ir[i];
74  SR::axpy(A.dcsc->numx[i], x[colid], y[rowid]);
75  }
76  }
77  }
78 }
79 
81 template <typename SR, typename IU, typename NU, typename RHS, typename LHS>
82 void dcsc_gespmv_threaded (const SpDCCols<IU, NU> & A, const RHS * x, LHS * y)
83 {
84  if(A.nnz > 0)
85  {
86  int nthreads=1;
87  #ifdef _OPENMP
88  #pragma omp parallel
89  {
90  nthreads = omp_get_num_threads();
91  }
92  #endif
93 
94  IU nlocrows = A.getnrow();
95  LHS ** tomerge = SpHelper::allocate2D<LHS>(nthreads, nlocrows);
96  auto id = SR::id();
97 
98  for(int i=0; i<nthreads; ++i)
99  {
100  std::fill_n(tomerge[i], nlocrows, id);
101  }
102 
103  #pragma omp parallel for
104  for(IU j =0; j<A.dcsc->nzc; ++j) // for all nonzero columns
105  {
106  int curthread = 1;
107  #ifdef _OPENMP
108  curthread = omp_get_thread_num();
109  #endif
110 
111  LHS * loc2merge = tomerge[curthread];
112 
113  IU colid = A.dcsc->jc[j];
114  for(IU i = A.dcsc->cp[j]; i< A.dcsc->cp[j+1]; ++i)
115  {
116  IU rowid = A.dcsc->ir[i];
117  SR::axpy(A.dcsc->numx[i], x[colid], loc2merge[rowid]);
118  }
119  }
120 
121  #pragma omp parallel for
122  for(IU j=0; j < nlocrows; ++j)
123  {
124  for(int i=0; i< nthreads; ++i)
125  {
126  y[j] = SR::add(y[j], tomerge[i][j]);
127  }
128  }
129  SpHelper::deallocate2D(tomerge, nthreads);
130  }
131 }
132 
133 
138 template <typename SR, typename IU, typename NUM, typename DER, typename IVT, typename OVT>
139 int generic_gespmv_threaded (const SpMat<IU,NUM,DER> & A, const int32_t * indx, const IVT * numx, int32_t nnzx,
140  int32_t * & sendindbuf, OVT * & sendnumbuf, int * & sdispls, int p_c, PreAllocatedSPA<OVT> & SPA)
141 {
142  // FACTS: Split boundaries (for multithreaded execution) are independent of recipient boundaries
143  // Two splits might create output to the same recipient (needs to be merged)
144  // However, each split's output is distinct (no duplicate elimination is needed after merge)
145 
146  sdispls = new int[p_c](); // initialize to zero (as all indy might be empty)
147  if(A.getnnz() > 0 && nnzx > 0)
148  {
149  int splits = A.getnsplit();
150  if(splits > 0)
151  {
152  int32_t nlocrows = static_cast<int32_t>(A.getnrow());
153  int32_t perpiece = nlocrows / splits;
154  std::vector< std::vector< int32_t > > indy(splits);
155  std::vector< std::vector< OVT > > numy(splits);
156 
157  // Parallelize with OpenMP
158  #ifdef _OPENMP
159  #pragma omp parallel for // num_threads(6)
160  #endif
161  for(int i=0; i<splits; ++i)
162  {
163  if(SPA.initialized)
164  {
165  if(i != splits-1)
166  SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece, SPA.V_localy[i], SPA.V_isthere[i], SPA.V_inds[i]);
167  else
168  SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece, SPA.V_localy[i], SPA.V_isthere[i], SPA.V_inds[i]);
169  }
170  else
171  {
172  if(i != splits-1)
173  SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
174  else
175  SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
176  }
177  }
178 
179  std::vector<int> accum(splits+1, 0);
180  for(int i=0; i<splits; ++i)
181  accum[i+1] = accum[i] + indy[i].size();
182 
183  sendindbuf = new int32_t[accum[splits]];
184  sendnumbuf = new OVT[accum[splits]];
185  int32_t perproc = nlocrows / p_c;
186  int32_t last_rec = p_c-1;
187 
188  // keep recipients of last entries in each split (-1 for an empty split)
189  // so that we can delete indy[] and numy[] contents as soon as they are processed
190  std::vector<int32_t> end_recs(splits);
191  for(int i=0; i<splits; ++i)
192  {
193  if(indy[i].empty())
194  end_recs[i] = -1;
195  else
196  end_recs[i] = std::min(indy[i].back() / perproc, last_rec);
197  }
198  #ifdef _OPENMP
199  #pragma omp parallel for // num_threads(6)
200  #endif
201  for(int i=0; i<splits; ++i)
202  {
203  if(!indy[i].empty()) // guarantee that .begin() and .end() are not null
204  {
205  // FACT: Data is sorted, so if the recipient of begin is the same as the owner of end,
206  // then the whole data is sent to the same processor
207  int32_t beg_rec = std::min( indy[i].front() / perproc, last_rec);
208 
209  // We have to test the previous "split", to see if we are marking a "recipient head"
210  // set displacement markers for the completed (previous) buffers only
211  if(i != 0)
212  {
213  int k = i-1;
214  while (k >= 0 && end_recs[k] == -1) k--; // loop backwards until seeing an non-empty split
215  if(k >= 0) // we found a non-empty split
216  {
217  std::fill(sdispls+end_recs[k]+1, sdispls+beg_rec+1, accum[i]); // last entry to be set is sdispls[beg_rec]
218  }
219  // else fill sdispls[1...beg_rec] with zero (already done)
220  }
221  // else set sdispls[0] to zero (already done)
222  if(beg_rec == end_recs[i]) // fast case
223  {
224  std::transform(indy[i].begin(), indy[i].end(), indy[i].begin(), std::bind2nd(std::minus<int32_t>(), perproc*beg_rec));
225  std::copy(indy[i].begin(), indy[i].end(), sendindbuf+accum[i]);
226  std::copy(numy[i].begin(), numy[i].end(), sendnumbuf+accum[i]);
227  }
228  else // slow case
229  {
230  // FACT: No matter how many splits or threads, there will be only one "recipient head"
231  // Therefore there are no race conditions for marking send displacements (sdispls)
232  int end = indy[i].size();
233  for(int cur=0; cur< end; ++cur)
234  {
235  int32_t cur_rec = std::min( indy[i][cur] / perproc, last_rec);
236  while(beg_rec != cur_rec)
237  {
238  sdispls[++beg_rec] = accum[i] + cur; // first entry to be set is sdispls[beg_rec+1]
239  }
240  sendindbuf[ accum[i] + cur ] = indy[i][cur] - perproc*beg_rec; // convert to receiver's local index
241  sendnumbuf[ accum[i] + cur ] = numy[i][cur];
242  }
243  }
244  std::vector<int32_t>().swap(indy[i]);
245  std::vector<OVT>().swap(numy[i]);
246  bool lastnonzero = true; // am I the last nonzero split?
247  for(int k=i+1; k < splits; ++k)
248  {
249  if(end_recs[k] != -1)
250  lastnonzero = false;
251  }
252  if(lastnonzero)
253  std::fill(sdispls+end_recs[i]+1, sdispls+p_c, accum[i+1]);
254  } // end_if(!indy[i].empty)
255  } // end parallel for
256  return accum[splits];
257  }
258  else
259  {
260  std::cout << "Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
261  return 0;
262  }
263  }
264  else
265  {
266  sendindbuf = NULL;
267  sendnumbuf = NULL;
268  return 0;
269  }
270 }
271 
272 
279 template <typename SR, typename IU, typename NUM, typename DER, typename IVT, typename OVT>
280 void generic_gespmv_threaded_setbuffers (const SpMat<IU,NUM,DER> & A, const int32_t * indx, const IVT * numx, int32_t nnzx,
281  int32_t * sendindbuf, OVT * sendnumbuf, int * cnts, int * dspls, int p_c)
282 {
283  if(A.getnnz() > 0 && nnzx > 0)
284  {
285  int splits = A.getnsplit();
286  if(splits > 0)
287  {
288  std::vector< std::vector<int32_t> > indy(splits);
289  std::vector< std::vector< OVT > > numy(splits);
290  int32_t nlocrows = static_cast<int32_t>(A.getnrow());
291  int32_t perpiece = nlocrows / splits;
292 
293  #ifdef _OPENMP
294  #pragma omp parallel for
295  #endif
296  for(int i=0; i<splits; ++i)
297  {
298  if(i != splits-1)
299  SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
300  else
301  SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
302  }
303 
304  int32_t perproc = nlocrows / p_c;
305  int32_t last_rec = p_c-1;
306 
307  // keep recipients of last entries in each split (-1 for an empty split)
308  // so that we can delete indy[] and numy[] contents as soon as they are processed
309  std::vector<int32_t> end_recs(splits);
310  for(int i=0; i<splits; ++i)
311  {
312  if(indy[i].empty())
313  end_recs[i] = -1;
314  else
315  end_recs[i] = std::min(indy[i].back() / perproc, last_rec);
316  }
317 
318  int ** loc_rec_cnts = new int *[splits];
319  #ifdef _OPENMP
320  #pragma omp parallel for
321  #endif
322  for(int i=0; i<splits; ++i)
323  {
324  loc_rec_cnts[i] = new int[p_c](); // thread-local recipient data
325  if(!indy[i].empty()) // guarantee that .begin() and .end() are not null
326  {
327  int32_t cur_rec = std::min( indy[i].front() / perproc, last_rec);
328  int32_t lastdata = (cur_rec+1) * perproc; // one past last entry that goes to this current recipient
329  for(typename std::vector<int32_t>::iterator it = indy[i].begin(); it != indy[i].end(); ++it)
330  {
331 
332  if( ( (*it) >= lastdata ) && cur_rec != last_rec )
333  {
334  cur_rec = std::min( (*it) / perproc, last_rec);
335  lastdata = (cur_rec+1) * perproc;
336  }
337  ++loc_rec_cnts[i][cur_rec];
338  }
339  }
340  }
341  #ifdef _OPENMP
342  #pragma omp parallel for
343  #endif
344  for(int i=0; i<splits; ++i)
345  {
346  if(!indy[i].empty()) // guarantee that .begin() and .end() are not null
347  {
348  // FACT: Data is sorted, so if the recipient of begin is the same as the owner of end,
349  // then the whole data is sent to the same processor
350  int32_t beg_rec = std::min( indy[i].front() / perproc, last_rec);
351  int32_t alreadysent = 0; // already sent per recipient
352  for(int before = i-1; before >= 0; before--)
353  alreadysent += loc_rec_cnts[before][beg_rec];
354 
355  if(beg_rec == end_recs[i]) // fast case
356  {
357  std::transform(indy[i].begin(), indy[i].end(), indy[i].begin(), std::bind2nd(std::minus<int32_t>(), perproc*beg_rec));
358  std::copy(indy[i].begin(), indy[i].end(), sendindbuf + dspls[beg_rec] + alreadysent);
359  std::copy(numy[i].begin(), numy[i].end(), sendnumbuf + dspls[beg_rec] + alreadysent);
360  }
361  else // slow case
362  {
363  int32_t cur_rec = beg_rec;
364  int32_t lastdata = (cur_rec+1) * perproc; // one past last entry that goes to this current recipient
365  for(typename std::vector<int32_t>::iterator it = indy[i].begin(); it != indy[i].end(); ++it)
366  {
367  if( ( (*it) >= lastdata ) && cur_rec != last_rec )
368  {
369  cur_rec = std::min( (*it) / perproc, last_rec);
370  lastdata = (cur_rec+1) * perproc;
371 
372  // if this split switches to a new recipient after sending some data
373  // then it's sure that no data has been sent to that recipient yet
374  alreadysent = 0;
375  }
376  sendindbuf[ dspls[cur_rec] + alreadysent ] = (*it) - perproc*cur_rec; // convert to receiver's local index
377  sendnumbuf[ dspls[cur_rec] + (alreadysent++) ] = *(numy[i].begin() + (it-indy[i].begin()));
378  }
379  }
380  }
381  }
382  // Deallocated rec counts serially once all threads complete
383  for(int i=0; i< splits; ++i)
384  {
385  for(int j=0; j< p_c; ++j)
386  cnts[j] += loc_rec_cnts[i][j];
387  delete [] loc_rec_cnts[i];
388  }
389  delete [] loc_rec_cnts;
390  }
391  else
392  {
393  std::cout << "Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
394  }
395  }
396 }
397 
401 template <typename SR, typename MIND, typename VIND, typename DER, typename NUM, typename IVT, typename OVT>
402 void generic_gespmv (const SpMat<MIND,NUM,DER> & A, const VIND * indx, const IVT * numx, VIND nnzx, std::vector<VIND> & indy, std::vector<OVT> & numy, PreAllocatedSPA<OVT> & SPA)
403 {
404  if(A.getnnz() > 0 && nnzx > 0)
405  {
406  if(A.getnsplit() > 0)
407  {
408  std::cout << "Call dcsc_gespmv_threaded instead" << std::endl;
409  }
410  else
411  {
412  SpMXSpV<SR>(*(A.GetInternal()), (VIND) A.getnrow(), indx, numx, nnzx, indy, numy, SPA);
413  }
414  }
415 }
416 
420 template <typename SR, typename IU, typename DER, typename NUM, typename IVT, typename OVT>
421 void generic_gespmv (const SpMat<IU,NUM,DER> & A, const int32_t * indx, const IVT * numx, int32_t nnzx,
422  int32_t * indy, OVT * numy, int * cnts, int * dspls, int p_c, bool indexisvalue)
423 {
424  if(A.getnnz() > 0 && nnzx > 0)
425  {
426  if(A.getnsplit() > 0)
427  {
428  SpParHelper::Print("Call dcsc_gespmv_threaded instead\n");
429  }
430  else
431  {
432  SpMXSpV<SR>(*(A.GetInternal()), (int32_t) A.getnrow(), indx, numx, nnzx, indy, numy, cnts, dspls, p_c);
433  }
434  }
435 }
436 
437 
438 template<typename IU>
439 void BooleanRowSplit(SpDCCols<IU, bool> & A, int numsplits)
440 {
441  A.splits = numsplits;
442  IU perpiece = A.m / A.splits;
443  std::vector<IU> prevcolids(A.splits, -1); // previous column id's are set to -1
444  std::vector<IU> nzcs(A.splits, 0);
445  std::vector<IU> nnzs(A.splits, 0);
446  std::vector < std::vector < std::pair<IU,IU> > > colrowpairs(A.splits);
447  if(A.nnz > 0 && A.dcsc != NULL)
448  {
449  for(IU i=0; i< A.dcsc->nzc; ++i)
450  {
451  for(IU j = A.dcsc->cp[i]; j< A.dcsc->cp[i+1]; ++j)
452  {
453  IU colid = A.dcsc->jc[i];
454  IU rowid = A.dcsc->ir[j];
455  IU owner = std::min(rowid / perpiece, static_cast<IU>(A.splits-1));
456  colrowpairs[owner].push_back(std::make_pair(colid, rowid - owner*perpiece));
457 
458  if(prevcolids[owner] != colid)
459  {
460  prevcolids[owner] = colid;
461  ++nzcs[owner];
462  }
463  ++nnzs[owner];
464  }
465  }
466  }
467  delete A.dcsc; // claim memory
468  //copy(nzcs.begin(), nzcs.end(), ostream_iterator<IU>(cout," " )); cout << endl;
469  //copy(nnzs.begin(), nnzs.end(), ostream_iterator<IU>(cout," " )); cout << endl;
470  A.dcscarr = new Dcsc<IU,bool>*[A.splits];
471 
472  // To be parallelized with OpenMP
473  for(int i=0; i< A.splits; ++i)
474  {
475  sort(colrowpairs[i].begin(), colrowpairs[i].end()); // sort w.r.t. columns
476  A.dcscarr[i] = new Dcsc<IU,bool>(nnzs[i],nzcs[i]);
477  std::fill(A.dcscarr[i]->numx, A.dcscarr[i]->numx+nnzs[i], static_cast<bool>(1));
478  IU curnzc = 0; // number of nonzero columns constructed so far
479  IU cindex = colrowpairs[i][0].first;
480  IU rindex = colrowpairs[i][0].second;
481 
482  A.dcscarr[i]->ir[0] = rindex;
483  A.dcscarr[i]->jc[curnzc] = cindex;
484  A.dcscarr[i]->cp[curnzc++] = 0;
485 
486  for(IU j=1; j<nnzs[i]; ++j)
487  {
488  cindex = colrowpairs[i][j].first;
489  rindex = colrowpairs[i][j].second;
490 
491  A.dcscarr[i]->ir[j] = rindex;
492  if(cindex != A.dcscarr[i]->jc[curnzc-1])
493  {
494  A.dcscarr[i]->jc[curnzc] = cindex;
495  A.dcscarr[i]->cp[curnzc++] = j;
496  }
497  }
498  A.dcscarr[i]->cp[curnzc] = nnzs[i];
499  }
500 }
501 
502 
509 template<class SR, class NUO, class IU, class NU1, class NU2>
511  (const SpDCCols<IU, NU1> & A,
512  const SpDCCols<IU, NU2> & B,
513  bool clearA = false, bool clearB = false)
514 {
515  IU mdim = A.m;
516  IU ndim = B.m; // B is already transposed
517 
518  if(A.isZero() || B.isZero())
519  {
520  if(clearA) delete const_cast<SpDCCols<IU, NU1> *>(&A);
521  if(clearB) delete const_cast<SpDCCols<IU, NU2> *>(&B);
522  return new SpTuples< IU, NUO >(0, mdim, ndim); // just return an empty matrix
523  }
524  Isect<IU> *isect1, *isect2, *itr1, *itr2, *cols, *rows;
525  SpHelper::SpIntersect(*(A.dcsc), *(B.dcsc), cols, rows, isect1, isect2, itr1, itr2);
526 
527  IU kisect = static_cast<IU>(itr1-isect1); // size of the intersection ((itr1-isect1) == (itr2-isect2))
528  if(kisect == 0)
529  {
530  if(clearA) delete const_cast<SpDCCols<IU, NU1> *>(&A);
531  if(clearB) delete const_cast<SpDCCols<IU, NU2> *>(&B);
532  DeleteAll(isect1, isect2, cols, rows);
533  return new SpTuples< IU, NUO >(0, mdim, ndim);
534  }
535 
536  StackEntry< NUO, std::pair<IU,IU> > * multstack;
537 
538  IU cnz = SpHelper::SpCartesian< SR > (*(A.dcsc), *(B.dcsc), kisect, isect1, isect2, multstack);
539  DeleteAll(isect1, isect2, cols, rows);
540 
541  if(clearA) delete const_cast<SpDCCols<IU, NU1> *>(&A);
542  if(clearB) delete const_cast<SpDCCols<IU, NU2> *>(&B);
543  return new SpTuples<IU, NUO> (cnz, mdim, ndim, multstack);
544 }
545 
552 template<class SR, class NUO, class IU, class NU1, class NU2>
554  (const SpDCCols<IU, NU1> & A,
555  const SpDCCols<IU, NU2> & B,
556  bool clearA = false, bool clearB = false)
557 {
558  IU mdim = A.m;
559  IU ndim = B.n;
560  if(A.isZero() || B.isZero())
561  {
562  return new SpTuples<IU, NUO>(0, mdim, ndim);
563  }
564  StackEntry< NUO, std::pair<IU,IU> > * multstack;
565  IU cnz = SpHelper::SpColByCol< SR > (*(A.dcsc), *(B.dcsc), A.n, multstack);
566 
567  if(clearA)
568  delete const_cast<SpDCCols<IU, NU1> *>(&A);
569  if(clearB)
570  delete const_cast<SpDCCols<IU, NU2> *>(&B);
571 
572  return new SpTuples<IU, NUO> (cnz, mdim, ndim, multstack);
573 }
574 
575 
576 template<class SR, class NUO, class IU, class NU1, class NU2>
578  (const SpDCCols<IU, NU1> & A,
579  const SpDCCols<IU, NU2> & B,
580  bool clearA = false, bool clearB = false)
581 {
582  IU mdim = A.n;
583  IU ndim = B.m;
584  std::cout << "Tuples_AtXBt function has not been implemented yet !" << std::endl;
585 
586  return new SpTuples<IU, NUO> (0, mdim, ndim);
587 }
588 
589 template<class SR, class NUO, class IU, class NU1, class NU2>
591  (const SpDCCols<IU, NU1> & A,
592  const SpDCCols<IU, NU2> & B,
593  bool clearA = false, bool clearB = false)
594 {
595  IU mdim = A.n;
596  IU ndim = B.n;
597  std::cout << "Tuples_AtXBn function has not been implemented yet !" << std::endl;
598 
599  return new SpTuples<IU, NUO> (0, mdim, ndim);
600 }
601 
602 // Performs a balanced merge of the array of SpTuples
603 // Assumes the input parameters are already column sorted
604 template<class SR, class IU, class NU>
605 SpTuples<IU,NU> MergeAll( const std::vector<SpTuples<IU,NU> *> & ArrSpTups, IU mstar = 0, IU nstar = 0, bool delarrs = false )
606 {
607  int hsize = ArrSpTups.size();
608  if(hsize == 0)
609  {
610  return SpTuples<IU,NU>(0, mstar,nstar);
611  }
612  else
613  {
614  mstar = ArrSpTups[0]->m;
615  nstar = ArrSpTups[0]->n;
616  }
617  for(int i=1; i< hsize; ++i)
618  {
619  if((mstar != ArrSpTups[i]->m) || nstar != ArrSpTups[i]->n)
620  {
621  std::cerr << "Dimensions do not match on MergeAll()" << std::endl;
622  return SpTuples<IU,NU>(0,0,0);
623  }
624  }
625  if(hsize > 1)
626  {
627  ColLexiCompare<IU,int> heapcomp;
628  std::tuple<IU, IU, int> * heap = new std::tuple<IU, IU, int> [hsize]; // (rowindex, colindex, source-id)
629  IU * curptr = new IU[hsize];
630  std::fill_n(curptr, hsize, static_cast<IU>(0));
631  IU estnnz = 0;
632 
633  for(int i=0; i< hsize; ++i)
634  {
635  estnnz += ArrSpTups[i]->getnnz();
636  heap[i] = std::make_tuple(std::get<0>(ArrSpTups[i]->tuples[0]), std::get<1>(ArrSpTups[i]->tuples[0]), i);
637  }
638  std::make_heap(heap, heap+hsize, std::not2(heapcomp));
639 
640  std::tuple<IU, IU, NU> * ntuples = new std::tuple<IU,IU,NU>[estnnz];
641  IU cnz = 0;
642 
643  while(hsize > 0)
644  {
645  std::pop_heap(heap, heap + hsize, std::not2(heapcomp)); // result is stored in heap[hsize-1]
646  int source = std::get<2>(heap[hsize-1]);
647 
648  if( (cnz != 0) &&
649  ((std::get<0>(ntuples[cnz-1]) == std::get<0>(heap[hsize-1])) && (std::get<1>(ntuples[cnz-1]) == std::get<1>(heap[hsize-1]))) )
650  {
651  std::get<2>(ntuples[cnz-1]) = SR::add(std::get<2>(ntuples[cnz-1]), ArrSpTups[source]->numvalue(curptr[source]++));
652  }
653  else
654  {
655  ntuples[cnz++] = ArrSpTups[source]->tuples[curptr[source]++];
656  }
657 
658  if(curptr[source] != ArrSpTups[source]->getnnz()) // That array has not been depleted
659  {
660  heap[hsize-1] = std::make_tuple(std::get<0>(ArrSpTups[source]->tuples[curptr[source]]),
661  std::get<1>(ArrSpTups[source]->tuples[curptr[source]]), source);
662  std::push_heap(heap, heap+hsize, std::not2(heapcomp));
663  }
664  else
665  {
666  --hsize;
667  }
668  }
669  SpHelper::ShrinkArray(ntuples, cnz);
670  DeleteAll(heap, curptr);
671 
672  if(delarrs)
673  {
674  for(size_t i=0; i<ArrSpTups.size(); ++i)
675  delete ArrSpTups[i];
676  }
677  return SpTuples<IU,NU> (cnz, mstar, nstar, ntuples);
678  }
679  else
680  {
681  SpTuples<IU,NU> ret = *ArrSpTups[0];
682  if(delarrs)
683  delete ArrSpTups[0];
684  return ret;
685  }
686 }
687 
693 template <typename IU, typename NU1, typename NU2>
695 {
696  typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
697  IU estnzc, estnz;
698  if(exclude)
699  {
700  estnzc = A.nzc;
701  estnz = A.nz;
702  }
703  else
704  {
705  estnzc = std::min(A.nzc, B->nzc);
706  estnz = std::min(A.nz, B->nz);
707  }
708 
709  Dcsc<IU,N_promote> temp(estnz, estnzc);
710 
711  IU curnzc = 0;
712  IU curnz = 0;
713  IU i = 0;
714  IU j = 0;
715  temp.cp[0] = 0;
716 
717  if(!exclude) // A = A .* B
718  {
719  while(i< A.nzc && B != NULL && j<B->nzc)
720  {
721  if(A.jc[i] > B->jc[j]) ++j;
722  else if(A.jc[i] < B->jc[j]) ++i;
723  else
724  {
725  IU ii = A.cp[i];
726  IU jj = B->cp[j];
727  IU prevnz = curnz;
728  while (ii < A.cp[i+1] && jj < B->cp[j+1])
729  {
730  if (A.ir[ii] < B->ir[jj]) ++ii;
731  else if (A.ir[ii] > B->ir[jj]) ++jj;
732  else
733  {
734  temp.ir[curnz] = A.ir[ii];
735  temp.numx[curnz++] = A.numx[ii++] * B->numx[jj++];
736  }
737  }
738  if(prevnz < curnz) // at least one nonzero exists in this column
739  {
740  temp.jc[curnzc++] = A.jc[i];
741  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
742  }
743  ++i;
744  ++j;
745  }
746  }
747  }
748  else // A = A .* not(B)
749  {
750  while(i< A.nzc && B != NULL && j< B->nzc)
751  {
752  if(A.jc[i] > B->jc[j]) ++j;
753  else if(A.jc[i] < B->jc[j])
754  {
755  temp.jc[curnzc++] = A.jc[i++];
756  for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
757  {
758  temp.ir[curnz] = A.ir[k];
759  temp.numx[curnz++] = A.numx[k];
760  }
761  temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
762  }
763  else
764  {
765  IU ii = A.cp[i];
766  IU jj = B->cp[j];
767  IU prevnz = curnz;
768  while (ii < A.cp[i+1] && jj < B->cp[j+1])
769  {
770  if (A.ir[ii] > B->ir[jj]) ++jj;
771  else if (A.ir[ii] < B->ir[jj])
772  {
773  temp.ir[curnz] = A.ir[ii];
774  temp.numx[curnz++] = A.numx[ii++];
775  }
776  else // eliminate those existing nonzeros
777  {
778  ++ii;
779  ++jj;
780  }
781  }
782  while (ii < A.cp[i+1])
783  {
784  temp.ir[curnz] = A.ir[ii];
785  temp.numx[curnz++] = A.numx[ii++];
786  }
787 
788  if(prevnz < curnz) // at least one nonzero exists in this column
789  {
790  temp.jc[curnzc++] = A.jc[i];
791  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
792  }
793  ++i;
794  ++j;
795  }
796  }
797  while(i< A.nzc)
798  {
799  temp.jc[curnzc++] = A.jc[i++];
800  for(IU k = A.cp[i-1]; k< A.cp[i]; ++k)
801  {
802  temp.ir[curnz] = A.ir[k];
803  temp.numx[curnz++] = A.numx[k];
804  }
805  temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
806  }
807  }
808 
809  temp.Resize(curnzc, curnz);
810  return temp;
811 }
812 
813 template <typename N_promote, typename IU, typename NU1, typename NU2, typename _BinaryOperation>
814 Dcsc<IU, N_promote> EWiseApply(const Dcsc<IU,NU1> & A, const Dcsc<IU,NU2> * B, _BinaryOperation __binary_op, bool notB, const NU2& defaultBVal)
815 {
816  //typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
817  IU estnzc, estnz;
818  if(notB)
819  {
820  estnzc = A.nzc;
821  estnz = A.nz;
822  }
823  else
824  {
825  estnzc = std::min(A.nzc, B->nzc);
826  estnz = std::min(A.nz, B->nz);
827  }
828 
829  Dcsc<IU,N_promote> temp(estnz, estnzc);
830 
831  IU curnzc = 0;
832  IU curnz = 0;
833  IU i = 0;
834  IU j = 0;
835  temp.cp[0] = 0;
836 
837  if(!notB) // A = A .* B
838  {
839  while(i< A.nzc && B != NULL && j<B->nzc)
840  {
841  if(A.jc[i] > B->jc[j]) ++j;
842  else if(A.jc[i] < B->jc[j]) ++i;
843  else
844  {
845  IU ii = A.cp[i];
846  IU jj = B->cp[j];
847  IU prevnz = curnz;
848  while (ii < A.cp[i+1] && jj < B->cp[j+1])
849  {
850  if (A.ir[ii] < B->ir[jj]) ++ii;
851  else if (A.ir[ii] > B->ir[jj]) ++jj;
852  else
853  {
854  temp.ir[curnz] = A.ir[ii];
855  temp.numx[curnz++] = __binary_op(A.numx[ii++], B->numx[jj++]);
856  }
857  }
858  if(prevnz < curnz) // at least one nonzero exists in this column
859  {
860  temp.jc[curnzc++] = A.jc[i];
861  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
862  }
863  ++i;
864  ++j;
865  }
866  }
867  }
868  else // A = A .* not(B)
869  {
870  while(i< A.nzc && B != NULL && j< B->nzc)
871  {
872  if(A.jc[i] > B->jc[j]) ++j;
873  else if(A.jc[i] < B->jc[j])
874  {
875  temp.jc[curnzc++] = A.jc[i++];
876  for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
877  {
878  temp.ir[curnz] = A.ir[k];
879  temp.numx[curnz++] = __binary_op(A.numx[k], defaultBVal);
880  }
881  temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
882  }
883  else
884  {
885  IU ii = A.cp[i];
886  IU jj = B->cp[j];
887  IU prevnz = curnz;
888  while (ii < A.cp[i+1] && jj < B->cp[j+1])
889  {
890  if (A.ir[ii] > B->ir[jj]) ++jj;
891  else if (A.ir[ii] < B->ir[jj])
892  {
893  temp.ir[curnz] = A.ir[ii];
894  temp.numx[curnz++] = __binary_op(A.numx[ii++], defaultBVal);
895  }
896  else // eliminate those existing nonzeros
897  {
898  ++ii;
899  ++jj;
900  }
901  }
902  while (ii < A.cp[i+1])
903  {
904  temp.ir[curnz] = A.ir[ii];
905  temp.numx[curnz++] = __binary_op(A.numx[ii++], defaultBVal);
906  }
907 
908  if(prevnz < curnz) // at least one nonzero exists in this column
909  {
910  temp.jc[curnzc++] = A.jc[i];
911  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
912  }
913  ++i;
914  ++j;
915  }
916  }
917  while(i< A.nzc)
918  {
919  temp.jc[curnzc++] = A.jc[i++];
920  for(IU k = A.cp[i-1]; k< A.cp[i]; ++k)
921  {
922  temp.ir[curnz] = A.ir[k];
923  temp.numx[curnz++] = __binary_op(A.numx[k], defaultBVal);
924  }
925  temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
926  }
927  }
928 
929  temp.Resize(curnzc, curnz);
930  return temp;
931 }
932 
933 
934 template<typename IU, typename NU1, typename NU2>
936 {
937  typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
938  assert(A.m == B.m);
939  assert(A.n == B.n);
940 
941  Dcsc<IU, N_promote> * tdcsc = NULL;
942  if(A.nnz > 0 && B.nnz > 0)
943  {
944  tdcsc = new Dcsc<IU, N_promote>(EWiseMult(*(A.dcsc), B.dcsc, exclude));
945  return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
946  }
947  else if (A.nnz > 0 && exclude) // && B.nnz == 0
948  {
949  tdcsc = new Dcsc<IU, N_promote>(EWiseMult(*(A.dcsc), (const Dcsc<IU,NU2>*)NULL, exclude));
950  return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
951  }
952  else
953  {
954  return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
955  }
956 }
957 
958 
959 template<typename N_promote, typename IU, typename NU1, typename NU2, typename _BinaryOperation>
960 SpDCCols<IU, N_promote> EWiseApply (const SpDCCols<IU,NU1> & A, const SpDCCols<IU,NU2> & B, _BinaryOperation __binary_op, bool notB, const NU2& defaultBVal)
961 {
962  //typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
963  assert(A.m == B.m);
964  assert(A.n == B.n);
965 
966  Dcsc<IU, N_promote> * tdcsc = NULL;
967  if(A.nnz > 0 && B.nnz > 0)
968  {
969  tdcsc = new Dcsc<IU, N_promote>(EWiseApply<N_promote>(*(A.dcsc), B.dcsc, __binary_op, notB, defaultBVal));
970  return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
971  }
972  else if (A.nnz > 0 && notB) // && B.nnz == 0
973  {
974  tdcsc = new Dcsc<IU, N_promote>(EWiseApply<N_promote>(*(A.dcsc), (const Dcsc<IU,NU2>*)NULL, __binary_op, notB, defaultBVal));
975  return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
976  }
977  else
978  {
979  return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
980  }
981 }
982 
992 template <typename RETT, typename IU, typename NU1, typename NU2, typename _BinaryOperation, typename _BinaryPredicate>
993 Dcsc<IU, RETT> EWiseApply(const Dcsc<IU,NU1> * Ap, const Dcsc<IU,NU2> * Bp, _BinaryOperation __binary_op, _BinaryPredicate do_op, bool allowANulls, bool allowBNulls, const NU1& ANullVal, const NU2& BNullVal, const bool allowIntersect)
994 {
995  if (Ap == NULL && Bp == NULL)
996  return Dcsc<IU,RETT>(0, 0);
997 
998  if (Ap == NULL && Bp != NULL)
999  {
1000  if (!allowANulls)
1001  return Dcsc<IU,RETT>(0, 0);
1002 
1003  const Dcsc<IU,NU2> & B = *Bp;
1004  IU estnzc = B.nzc;
1005  IU estnz = B.nz;
1006  Dcsc<IU,RETT> temp(estnz, estnzc);
1007 
1008  IU curnzc = 0;
1009  IU curnz = 0;
1010  //IU i = 0;
1011  IU j = 0;
1012  temp.cp[0] = 0;
1013  while(j<B.nzc)
1014  {
1015  // Based on the if statement below which handles A null values.
1016  j++;
1017  IU prevnz = curnz;
1018  temp.jc[curnzc++] = B.jc[j-1];
1019  for(IU k = B.cp[j-1]; k< B.cp[j]; ++k)
1020  {
1021  if (do_op(ANullVal, B.numx[k], true, false))
1022  {
1023  temp.ir[curnz] = B.ir[k];
1024  temp.numx[curnz++] = __binary_op(ANullVal, B.numx[k], true, false);
1025  }
1026  }
1027  //temp.cp[curnzc] = temp.cp[curnzc-1] + (B.cp[j] - B.cp[j-1]);
1028  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1029  }
1030  temp.Resize(curnzc, curnz);
1031  return temp;
1032  }
1033 
1034  if (Ap != NULL && Bp == NULL)
1035  {
1036  if (!allowBNulls)
1037  return Dcsc<IU,RETT>(0, 0);
1038 
1039  const Dcsc<IU,NU1> & A = *Ap;
1040  IU estnzc = A.nzc;
1041  IU estnz = A.nz;
1042  Dcsc<IU,RETT> temp(estnz, estnzc);
1043 
1044  IU curnzc = 0;
1045  IU curnz = 0;
1046  IU i = 0;
1047  //IU j = 0;
1048  temp.cp[0] = 0;
1049  while(i< A.nzc)
1050  {
1051  i++;
1052  IU prevnz = curnz;
1053  temp.jc[curnzc++] = A.jc[i-1];
1054  for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
1055  {
1056  if (do_op(A.numx[k], BNullVal, false, true))
1057  {
1058  temp.ir[curnz] = A.ir[k];
1059  temp.numx[curnz++] = __binary_op(A.numx[k], BNullVal, false, true);
1060  }
1061  }
1062  //temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
1063  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1064  }
1065  temp.Resize(curnzc, curnz);
1066  return temp;
1067  }
1068 
1069  // both A and B are non-NULL at this point
1070  const Dcsc<IU,NU1> & A = *Ap;
1071  const Dcsc<IU,NU2> & B = *Bp;
1072 
1073  IU estnzc = A.nzc + B.nzc;
1074  IU estnz = A.nz + B.nz;
1075  Dcsc<IU,RETT> temp(estnz, estnzc);
1076 
1077  IU curnzc = 0;
1078  IU curnz = 0;
1079  IU i = 0;
1080  IU j = 0;
1081  temp.cp[0] = 0;
1082  while(i< A.nzc && j<B.nzc)
1083  {
1084  if(A.jc[i] > B.jc[j])
1085  {
1086  j++;
1087  if (allowANulls)
1088  {
1089  IU prevnz = curnz;
1090  temp.jc[curnzc++] = B.jc[j-1];
1091  for(IU k = B.cp[j-1]; k< B.cp[j]; ++k)
1092  {
1093  if (do_op(ANullVal, B.numx[k], true, false))
1094  {
1095  temp.ir[curnz] = B.ir[k];
1096  temp.numx[curnz++] = __binary_op(ANullVal, B.numx[k], true, false);
1097  }
1098  }
1099  //temp.cp[curnzc] = temp.cp[curnzc-1] + (B.cp[j] - B.cp[j-1]);
1100  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1101  }
1102  }
1103  else if(A.jc[i] < B.jc[j])
1104  {
1105  i++;
1106  if (allowBNulls)
1107  {
1108  IU prevnz = curnz;
1109  temp.jc[curnzc++] = A.jc[i-1];
1110  for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
1111  {
1112  if (do_op(A.numx[k], BNullVal, false, true))
1113  {
1114  temp.ir[curnz] = A.ir[k];
1115  temp.numx[curnz++] = __binary_op(A.numx[k], BNullVal, false, true);
1116  }
1117  }
1118  //temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
1119  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1120  }
1121  }
1122  else
1123  {
1124  temp.jc[curnzc++] = A.jc[i];
1125  IU ii = A.cp[i];
1126  IU jj = B.cp[j];
1127  IU prevnz = curnz;
1128  while (ii < A.cp[i+1] && jj < B.cp[j+1])
1129  {
1130  if (A.ir[ii] < B.ir[jj])
1131  {
1132  if (allowBNulls && do_op(A.numx[ii], BNullVal, false, true))
1133  {
1134  temp.ir[curnz] = A.ir[ii];
1135  temp.numx[curnz++] = __binary_op(A.numx[ii++], BNullVal, false, true);
1136  }
1137  else
1138  ii++;
1139  }
1140  else if (A.ir[ii] > B.ir[jj])
1141  {
1142  if (allowANulls && do_op(ANullVal, B.numx[jj], true, false))
1143  {
1144  temp.ir[curnz] = B.ir[jj];
1145  temp.numx[curnz++] = __binary_op(ANullVal, B.numx[jj++], true, false);
1146  }
1147  else
1148  jj++;
1149  }
1150  else
1151  {
1152  if (allowIntersect && do_op(A.numx[ii], B.numx[jj], false, false))
1153  {
1154  temp.ir[curnz] = A.ir[ii];
1155  temp.numx[curnz++] = __binary_op(A.numx[ii++], B.numx[jj++], false, false); // might include zeros
1156  }
1157  else
1158  {
1159  ii++;
1160  jj++;
1161  }
1162  }
1163  }
1164  while (ii < A.cp[i+1])
1165  {
1166  if (allowBNulls && do_op(A.numx[ii], BNullVal, false, true))
1167  {
1168  temp.ir[curnz] = A.ir[ii];
1169  temp.numx[curnz++] = __binary_op(A.numx[ii++], BNullVal, false, true);
1170  }
1171  else
1172  ii++;
1173  }
1174  while (jj < B.cp[j+1])
1175  {
1176  if (allowANulls && do_op(ANullVal, B.numx[jj], true, false))
1177  {
1178  temp.ir[curnz] = B.ir[jj];
1179  temp.numx[curnz++] = __binary_op(ANullVal, B.numx[jj++], true, false);
1180  }
1181  else
1182  jj++;
1183  }
1184  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1185  ++i;
1186  ++j;
1187  }
1188  }
1189  while(allowBNulls && i< A.nzc) // remaining A elements after B ran out
1190  {
1191  IU prevnz = curnz;
1192  temp.jc[curnzc++] = A.jc[i++];
1193  for(IU k = A.cp[i-1]; k< A.cp[i]; ++k)
1194  {
1195  if (do_op(A.numx[k], BNullVal, false, true))
1196  {
1197  temp.ir[curnz] = A.ir[k];
1198  temp.numx[curnz++] = __binary_op(A.numx[k], BNullVal, false, true);
1199  }
1200  }
1201  //temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
1202  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1203  }
1204  while(allowANulls && j < B.nzc) // remaining B elements after A ran out
1205  {
1206  IU prevnz = curnz;
1207  temp.jc[curnzc++] = B.jc[j++];
1208  for(IU k = B.cp[j-1]; k< B.cp[j]; ++k)
1209  {
1210  if (do_op(ANullVal, B.numx[k], true, false))
1211  {
1212  temp.ir[curnz] = B.ir[k];
1213  temp.numx[curnz++] = __binary_op(ANullVal, B.numx[k], true, false);
1214  }
1215  }
1216  //temp.cp[curnzc] = temp.cp[curnzc-1] + (B.cp[j] - B.cp[j-1]);
1217  temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1218  }
1219  temp.Resize(curnzc, curnz);
1220  return temp;
1221 }
1222 
1223 template <typename RETT, typename IU, typename NU1, typename NU2, typename _BinaryOperation, typename _BinaryPredicate>
1224 SpDCCols<IU,RETT> EWiseApply (const SpDCCols<IU,NU1> & A, const SpDCCols<IU,NU2> & B, _BinaryOperation __binary_op, _BinaryPredicate do_op, bool allowANulls, bool allowBNulls, const NU1& ANullVal, const NU2& BNullVal, const bool allowIntersect)
1225 {
1226  assert(A.m == B.m);
1227  assert(A.n == B.n);
1228 
1229  Dcsc<IU, RETT> * tdcsc = new Dcsc<IU, RETT>(EWiseApply<RETT>(A.dcsc, B.dcsc, __binary_op, do_op, allowANulls, allowBNulls, ANullVal, BNullVal, allowIntersect));
1230  return SpDCCols<IU, RETT> (A.m , A.n, tdcsc);
1231 }
1232 
1233 
1234 }
1235 
1236 #endif
double B
Dcsc< IT, NT > ** dcscarr
Definition: SpDCCols.h:352
void BooleanRowSplit(SpDCCols< IU, bool > &A, int numsplits)
Definition: Friends.h:439
Dcsc< IU, N_promote > EWiseApply(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, _BinaryOperation __binary_op, bool notB, const NU2 &defaultBVal)
Definition: Friends.h:814
int generic_gespmv_threaded(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *&sendindbuf, OVT *&sendnumbuf, int *&sdispls, int p_c, PreAllocatedSPA< OVT > &SPA)
Definition: Friends.h:139
static void SpIntersect(const Dcsc< IT, NT1 > &Adcsc, const Dcsc< IT, NT2 > &Bdcsc, Isect< IT > *&cols, Isect< IT > *&rows, Isect< IT > *&isect1, Isect< IT > *&isect2, Isect< IT > *&itr1, Isect< IT > *&itr2)
Definition: SpHelper.h:346
std::vector< BitMap > V_isthere
SpTuples< IU, NUO > * Tuples_AtXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition: Friends.h:591
int size
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition: Friends.h:694
IT getnnz() const
Definition: SpMat.h:158
SpTuples< IU, NUO > * Tuples_AnXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition: Friends.h:511
Definition: StackEntry.h:9
void DeleteAll(A arr1)
Definition: Deleter.h:48
void Resize(IT nzcnew, IT nznew)
Definition: dcsc.cpp:946
IT * ir
row indices, size nz
Definition: dcsc.h:121
bool isZero() const
Definition: SpDCCols.h:298
Dcsc< IT, NT > * dcsc
Definition: SpDCCols.h:351
std::vector< std::vector< OVT > > V_localy
IT * cp
The master array, size nzc+1 (keeps column pointers)
Definition: dcsc.h:117
double A
static void Print(const std::string &s)
std::vector< std::vector< uint32_t > > V_inds
void dcsc_gespmv(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector.
Definition: Friends.h:64
NT * numx
generic values, size nz
Definition: dcsc.h:122
SpTuples< IU, NUO > * Tuples_AtXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition: Friends.h:578
auto GetInternal() const
Definition: SpMat.h:91
IT nzc
number of columns with at least one non-zero in them
Definition: dcsc.h:125
Definition: CCGrid.h:4
IT getnrow() const
Definition: SpDCCols.h:299
SpTuples< IU, NUO > * Tuples_AnXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition: Friends.h:554
void generic_gespmv_threaded_setbuffers(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *sendindbuf, OVT *sendnumbuf, int *cnts, int *dspls, int p_c)
Definition: Friends.h:280
IT getnrow() const
Definition: SpMat.h:156
IT * jc
col indices, size nzc
Definition: dcsc.h:120
SpTuples< IU, NU > MergeAll(const std::vector< SpTuples< IU, NU > *> &ArrSpTups, IU mstar=0, IU nstar=0, bool delarrs=false)
Definition: Friends.h:605
void dcsc_gespmv_threaded(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector (multithreaded version)
Definition: Friends.h:82
static void deallocate2D(T **array, I m)
Definition: SpHelper.h:249
int getnsplit() const
Definition: SpMat.h:99
static void ShrinkArray(NT *&array, IT newsize)
Definition: SpHelper.h:274
void generic_gespmv(const SpMat< MIND, NUM, DER > &A, const VIND *indx, const IVT *numx, VIND nnzx, std::vector< VIND > &indy, std::vector< OVT > &numy, PreAllocatedSPA< OVT > &SPA)
Definition: Friends.h:402