CombBLAS/html/_friends_8h_source.html

 /****************************************************************/
 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
 /* version 1.6 -------------------------------------------------*/
 /* date: 6/15/2017 ---------------------------------------------*/
 /* authors: Ariful Azad, Aydin Buluc  --------------------------*/
 /****************************************************************/
 /*
  Copyright (c) 2010-2017, The Regents of the University of California

  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
  in the Software without restriction, including without limitation the rights
  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  copies of the Software, and to permit persons to whom the Software is
  furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  THE SOFTWARE.
  */


 #ifndef _FRIENDS_H_
 #define _FRIENDS_H_

 #include <iostream>
 #include "SpMat.h"  // Best to include the base class first
 #include "SpHelper.h"
 #include "StackEntry.h"
 #include "Isect.h"
 #include "Deleter.h"
 #include "SpImpl.h"
 #include "SpParHelper.h"
 #include "Compare.h"
 #include "CombBLAS.h"
 #include "PreAllocatedSPA.h"

 namespace combblas {

 template <class IU, class NU>
 class SpTuples;

 template <class IU, class NU>
 class SpDCCols;

 template <class IU, class NU>
 class Dcsc;

 /*************************************************************************************************/
 /**************************** SHARED ADDRESS SPACE FRIEND FUNCTIONS ******************************/
 /****************************** MULTITHREADED LOGIC ALSO GOES HERE *******************************/
 /*************************************************************************************************/


 template <typename SR, typename IU, typename NU, typename RHS, typename LHS>
 void dcsc_gespmv (const SpDCCols<IU, NU> & A, const RHS * x, LHS * y)
 {
     if(A.nnz > 0)
     {
         for(IU j =0; j<A.dcsc->nzc; ++j)    // for all nonzero columns
         {
             IU colid = A.dcsc->jc[j];
             for(IU i = A.dcsc->cp[j]; i< A.dcsc->cp[j+1]; ++i)
             {
                 IU rowid = A.dcsc->ir[i];
                 SR::axpy(A.dcsc->numx[i], x[colid], y[rowid]);
             }
         }
     }
 }

 template <typename SR, typename IU, typename NU, typename RHS, typename LHS>
 void dcsc_gespmv_threaded (const SpDCCols<IU, NU> & A, const RHS * x, LHS * y)
 {
     if(A.nnz > 0)
     {
         int nthreads=1;
         #ifdef _OPENMP
         #pragma omp parallel
         {
                     nthreads = omp_get_num_threads();
                 }
         #endif

         IU nlocrows =  A.getnrow();
         LHS ** tomerge = SpHelper::allocate2D<LHS>(nthreads, nlocrows);
         auto id = SR::id();

         for(int i=0; i<nthreads; ++i)
         {
             std::fill_n(tomerge[i], nlocrows, id);
         }

         #pragma omp parallel for
         for(IU j =0; j<A.dcsc->nzc; ++j)    // for all nonzero columns
         {
             int curthread = 1;
             #ifdef _OPENMP
             curthread = omp_get_thread_num();
             #endif

             LHS * loc2merge = tomerge[curthread];

             IU colid = A.dcsc->jc[j];
             for(IU i = A.dcsc->cp[j]; i< A.dcsc->cp[j+1]; ++i)
             {
                 IU rowid = A.dcsc->ir[i];
                 SR::axpy(A.dcsc->numx[i], x[colid], loc2merge[rowid]);
             }
         }

         #pragma omp parallel for
         for(IU j=0; j < nlocrows; ++j)
         {
             for(int i=0; i< nthreads; ++i)
             {
                 y[j] = SR::add(y[j], tomerge[i][j]);
             }
         }
         SpHelper::deallocate2D(tomerge, nthreads);
     }
 }


 template <typename SR, typename IU, typename NUM, typename DER, typename IVT, typename OVT>
 int generic_gespmv_threaded (const SpMat<IU,NUM,DER> & A, const int32_t * indx, const IVT * numx, int32_t nnzx,
         int32_t * & sendindbuf, OVT * & sendnumbuf, int * & sdispls, int p_c, PreAllocatedSPA<OVT> & SPA)
 {
     // FACTS: Split boundaries (for multithreaded execution) are independent of recipient boundaries
     // Two splits might create output to the same recipient (needs to be merged)
     // However, each split's output is distinct (no duplicate elimination is needed after merge)

     sdispls = new int[p_c]();   // initialize to zero (as all indy might be empty)
     if(A.getnnz() > 0 && nnzx > 0)
     {
         int splits = A.getnsplit();
         if(splits > 0)
         {
             int32_t nlocrows = static_cast<int32_t>(A.getnrow());
             int32_t perpiece = nlocrows / splits;
             std::vector< std::vector< int32_t > > indy(splits);
             std::vector< std::vector< OVT > > numy(splits);

             // Parallelize with OpenMP
             #ifdef _OPENMP
             #pragma omp parallel for // num_threads(6)
             #endif
             for(int i=0; i<splits; ++i)
             {
                 if(SPA.initialized)
                 {
                     if(i != splits-1)
                          SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece, SPA.V_localy[i], SPA.V_isthere[i], SPA.V_inds[i]);
                     else
                         SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece, SPA.V_localy[i], SPA.V_isthere[i], SPA.V_inds[i]);
                 }
                 else
                 {
                     if(i != splits-1)
                         SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
                     else
                         SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
                 }
             }

             std::vector<int> accum(splits+1, 0);
             for(int i=0; i<splits; ++i)
                 accum[i+1] = accum[i] + indy[i].size();

             sendindbuf = new int32_t[accum[splits]];
             sendnumbuf = new OVT[accum[splits]];
             int32_t perproc = nlocrows / p_c;
             int32_t last_rec = p_c-1;

             // keep recipients of last entries in each split (-1 for an empty split)
             // so that we can delete indy[] and numy[] contents as soon as they are processed
             std::vector<int32_t> end_recs(splits);
             for(int i=0; i<splits; ++i)
             {
                 if(indy[i].empty())
                     end_recs[i] = -1;
                 else
                     end_recs[i] = std::min(indy[i].back() / perproc, last_rec);
             }
             #ifdef _OPENMP
             #pragma omp parallel for // num_threads(6)
             #endif
             for(int i=0; i<splits; ++i)
             {
                 if(!indy[i].empty())    // guarantee that .begin() and .end() are not null
                 {
                     // FACT: Data is sorted, so if the recipient of begin is the same as the owner of end,
                     // then the whole data is sent to the same processor
                     int32_t beg_rec = std::min( indy[i].front() / perproc, last_rec);

                     // We have to test the previous "split", to see if we are marking a "recipient head"
                     // set displacement markers for the completed (previous) buffers only
                     if(i != 0)
                     {
                         int k = i-1;
                         while (k >= 0 && end_recs[k] == -1) k--;    // loop backwards until seeing an non-empty split
                         if(k >= 0)  // we found a non-empty split
                         {
                             std::fill(sdispls+end_recs[k]+1, sdispls+beg_rec+1, accum[i]);  // last entry to be set is sdispls[beg_rec]
                         }
                         // else fill sdispls[1...beg_rec] with zero (already done)
                     }
                     // else set sdispls[0] to zero (already done)
                     if(beg_rec == end_recs[i])  // fast case
                     {
                         std::transform(indy[i].begin(), indy[i].end(), indy[i].begin(), std::bind2nd(std::minus<int32_t>(), perproc*beg_rec));
             std::copy(indy[i].begin(), indy[i].end(), sendindbuf+accum[i]);
             std::copy(numy[i].begin(), numy[i].end(), sendnumbuf+accum[i]);
                     }
                     else    // slow case
                     {
                         // FACT: No matter how many splits or threads, there will be only one "recipient head"
                         // Therefore there are no race conditions for marking send displacements (sdispls)
                         int end = indy[i].size();
                         for(int cur=0; cur< end; ++cur)
                         {
                             int32_t cur_rec = std::min( indy[i][cur] / perproc, last_rec);
                             while(beg_rec != cur_rec)
                             {
                                 sdispls[++beg_rec] = accum[i] + cur;    // first entry to be set is sdispls[beg_rec+1]
                             }
                             sendindbuf[ accum[i] + cur ] = indy[i][cur] - perproc*beg_rec;  // convert to receiver's local index
                             sendnumbuf[ accum[i] + cur ] = numy[i][cur];
                         }
                     }
                     std::vector<int32_t>().swap(indy[i]);
                     std::vector<OVT>().swap(numy[i]);
                     bool lastnonzero = true;    // am I the last nonzero split?
                     for(int k=i+1; k < splits; ++k)
                     {
                         if(end_recs[k] != -1)
                             lastnonzero = false;
                     }
                     if(lastnonzero)
                         std::fill(sdispls+end_recs[i]+1, sdispls+p_c, accum[i+1]);
                 }   // end_if(!indy[i].empty)
             }   // end parallel for
             return accum[splits];
         }
         else
         {
             std::cout << "Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
             return 0;
         }
     }
     else
     {
         sendindbuf = NULL;
         sendnumbuf = NULL;
         return 0;
     }
 }


 template <typename SR, typename IU, typename NUM, typename DER, typename IVT, typename OVT>
 void generic_gespmv_threaded_setbuffers (const SpMat<IU,NUM,DER> & A, const int32_t * indx, const IVT * numx, int32_t nnzx,
                  int32_t * sendindbuf, OVT * sendnumbuf, int * cnts, int * dspls, int p_c)
 {
     if(A.getnnz() > 0 && nnzx > 0)
     {
         int splits = A.getnsplit();
         if(splits > 0)
         {
             std::vector< std::vector<int32_t> > indy(splits);
             std::vector< std::vector< OVT > > numy(splits);
             int32_t nlocrows = static_cast<int32_t>(A.getnrow());
             int32_t perpiece = nlocrows / splits;

             #ifdef _OPENMP
             #pragma omp parallel for
             #endif
             for(int i=0; i<splits; ++i)
             {
                 if(i != splits-1)
                     SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
                 else
                     SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
             }

             int32_t perproc = nlocrows / p_c;
             int32_t last_rec = p_c-1;

             // keep recipients of last entries in each split (-1 for an empty split)
             // so that we can delete indy[] and numy[] contents as soon as they are processed
             std::vector<int32_t> end_recs(splits);
             for(int i=0; i<splits; ++i)
             {
                 if(indy[i].empty())
                     end_recs[i] = -1;
                 else
                     end_recs[i] = std::min(indy[i].back() / perproc, last_rec);
             }

             int ** loc_rec_cnts = new int *[splits];
             #ifdef _OPENMP
             #pragma omp parallel for
             #endif
             for(int i=0; i<splits; ++i)
             {
                 loc_rec_cnts[i]  = new int[p_c](); // thread-local recipient data
                 if(!indy[i].empty())    // guarantee that .begin() and .end() are not null
                 {
                     int32_t cur_rec = std::min( indy[i].front() / perproc, last_rec);
                     int32_t lastdata = (cur_rec+1) * perproc;  // one past last entry that goes to this current recipient
                     for(typename std::vector<int32_t>::iterator it = indy[i].begin(); it != indy[i].end(); ++it)
                     {

                         if( ( (*it) >= lastdata ) && cur_rec != last_rec )
                         {
                             cur_rec = std::min( (*it) / perproc, last_rec);
                             lastdata = (cur_rec+1) * perproc;
                         }
                         ++loc_rec_cnts[i][cur_rec];
                     }
                 }
             }
             #ifdef _OPENMP
             #pragma omp parallel for
             #endif
             for(int i=0; i<splits; ++i)
             {
                 if(!indy[i].empty())    // guarantee that .begin() and .end() are not null
                 {
                     // FACT: Data is sorted, so if the recipient of begin is the same as the owner of end,
                     // then the whole data is sent to the same processor
                     int32_t beg_rec = std::min( indy[i].front() / perproc, last_rec);
                     int32_t alreadysent = 0;    // already sent per recipient
                     for(int before = i-1; before >= 0; before--)
                          alreadysent += loc_rec_cnts[before][beg_rec];

                     if(beg_rec == end_recs[i])  // fast case
                     {
             std::transform(indy[i].begin(), indy[i].end(), indy[i].begin(), std::bind2nd(std::minus<int32_t>(), perproc*beg_rec));
             std::copy(indy[i].begin(), indy[i].end(), sendindbuf + dspls[beg_rec] + alreadysent);
             std::copy(numy[i].begin(), numy[i].end(), sendnumbuf + dspls[beg_rec] + alreadysent);
                     }
                     else    // slow case
                     {
                         int32_t cur_rec = beg_rec;
                         int32_t lastdata = (cur_rec+1) * perproc;  // one past last entry that goes to this current recipient
                         for(typename std::vector<int32_t>::iterator it = indy[i].begin(); it != indy[i].end(); ++it)
                         {
                             if( ( (*it) >= lastdata ) && cur_rec != last_rec )
                             {
                                 cur_rec = std::min( (*it) / perproc, last_rec);
                                 lastdata = (cur_rec+1) * perproc;

                                 // if this split switches to a new recipient after sending some data
                                 // then it's sure that no data has been sent to that recipient yet
                                 alreadysent = 0;
                             }
                             sendindbuf[ dspls[cur_rec] + alreadysent ] = (*it) - perproc*cur_rec;   // convert to receiver's local index
                             sendnumbuf[ dspls[cur_rec] + (alreadysent++) ] = *(numy[i].begin() + (it-indy[i].begin()));
                         }
                     }
                 }
             }
             // Deallocated rec counts serially once all threads complete
             for(int i=0; i< splits; ++i)
             {
                 for(int j=0; j< p_c; ++j)
                     cnts[j] += loc_rec_cnts[i][j];
                 delete [] loc_rec_cnts[i];
             }
             delete [] loc_rec_cnts;
         }
         else
         {
             std::cout << "Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
         }
     }
 }

 template <typename SR, typename MIND, typename VIND, typename DER, typename NUM, typename IVT, typename OVT>
 void generic_gespmv (const SpMat<MIND,NUM,DER> & A, const VIND * indx, const IVT * numx, VIND nnzx, std::vector<VIND> & indy, std::vector<OVT>  & numy, PreAllocatedSPA<OVT> & SPA)
 {
     if(A.getnnz() > 0 && nnzx > 0)
     {
         if(A.getnsplit() > 0)
         {
             std::cout << "Call dcsc_gespmv_threaded instead" << std::endl;
         }
         else
         {
             SpMXSpV<SR>(*(A.GetInternal()), (VIND) A.getnrow(), indx, numx, nnzx, indy, numy, SPA);
         }
     }
 }

 template <typename SR, typename IU, typename DER, typename NUM, typename IVT, typename OVT>
 void generic_gespmv (const SpMat<IU,NUM,DER> & A, const int32_t * indx, const IVT * numx, int32_t nnzx,
         int32_t * indy, OVT * numy, int * cnts, int * dspls, int p_c, bool indexisvalue)
 {
     if(A.getnnz() > 0 && nnzx > 0)
     {
         if(A.getnsplit() > 0)
         {
             SpParHelper::Print("Call dcsc_gespmv_threaded instead\n");
         }
         else
         {
             SpMXSpV<SR>(*(A.GetInternal()), (int32_t) A.getnrow(), indx, numx, nnzx, indy, numy, cnts, dspls, p_c);
         }
     }
 }


 template<typename IU>
 void BooleanRowSplit(SpDCCols<IU, bool> & A, int numsplits)
 {
     A.splits = numsplits;
     IU perpiece = A.m / A.splits;
     std::vector<IU> prevcolids(A.splits, -1);   // previous column id's are set to -1
     std::vector<IU> nzcs(A.splits, 0);
     std::vector<IU> nnzs(A.splits, 0);
     std::vector < std::vector < std::pair<IU,IU> > > colrowpairs(A.splits);
     if(A.nnz > 0 && A.dcsc != NULL)
     {
         for(IU i=0; i< A.dcsc->nzc; ++i)
         {
             for(IU j = A.dcsc->cp[i]; j< A.dcsc->cp[i+1]; ++j)
             {
                 IU colid = A.dcsc->jc[i];
                 IU rowid = A.dcsc->ir[j];
                 IU owner = std::min(rowid / perpiece, static_cast<IU>(A.splits-1));
                 colrowpairs[owner].push_back(std::make_pair(colid, rowid - owner*perpiece));

                 if(prevcolids[owner] != colid)
                 {
                     prevcolids[owner] = colid;
                     ++nzcs[owner];
                 }
                 ++nnzs[owner];
             }
         }
     }
     delete A.dcsc;  // claim memory
     //copy(nzcs.begin(), nzcs.end(), ostream_iterator<IU>(cout," " )); cout << endl;
     //copy(nnzs.begin(), nnzs.end(), ostream_iterator<IU>(cout," " )); cout << endl;
     A.dcscarr = new Dcsc<IU,bool>*[A.splits];

     // To be parallelized with OpenMP
     for(int i=0; i< A.splits; ++i)
     {
         sort(colrowpairs[i].begin(), colrowpairs[i].end()); // sort w.r.t. columns
         A.dcscarr[i] = new Dcsc<IU,bool>(nnzs[i],nzcs[i]);
         std::fill(A.dcscarr[i]->numx, A.dcscarr[i]->numx+nnzs[i], static_cast<bool>(1));
         IU curnzc = 0;              // number of nonzero columns constructed so far
         IU cindex = colrowpairs[i][0].first;
         IU rindex = colrowpairs[i][0].second;

         A.dcscarr[i]->ir[0] = rindex;
         A.dcscarr[i]->jc[curnzc] = cindex;
         A.dcscarr[i]->cp[curnzc++] = 0;

         for(IU j=1; j<nnzs[i]; ++j)
         {
             cindex = colrowpairs[i][j].first;
             rindex = colrowpairs[i][j].second;

             A.dcscarr[i]->ir[j] = rindex;
             if(cindex != A.dcscarr[i]->jc[curnzc-1])
             {
                 A.dcscarr[i]->jc[curnzc] = cindex;
                 A.dcscarr[i]->cp[curnzc++] = j;
             }
         }
         A.dcscarr[i]->cp[curnzc] = nnzs[i];
     }
 }


 template<class SR, class NUO, class IU, class NU1, class NU2>
 SpTuples<IU, NUO> * Tuples_AnXBt
                     (const SpDCCols<IU, NU1> & A,
                      const SpDCCols<IU, NU2> & B,
                     bool clearA = false, bool clearB = false)
 {
     IU mdim = A.m;
     IU ndim = B.m;  // B is already transposed

     if(A.isZero() || B.isZero())
     {
         if(clearA)  delete const_cast<SpDCCols<IU, NU1> *>(&A);
         if(clearB)  delete const_cast<SpDCCols<IU, NU2> *>(&B);
         return new SpTuples< IU, NUO >(0, mdim, ndim);  // just return an empty matrix
     }
     Isect<IU> *isect1, *isect2, *itr1, *itr2, *cols, *rows;
     SpHelper::SpIntersect(*(A.dcsc), *(B.dcsc), cols, rows, isect1, isect2, itr1, itr2);

     IU kisect = static_cast<IU>(itr1-isect1);       // size of the intersection ((itr1-isect1) == (itr2-isect2))
     if(kisect == 0)
     {
         if(clearA)  delete const_cast<SpDCCols<IU, NU1> *>(&A);
         if(clearB)  delete const_cast<SpDCCols<IU, NU2> *>(&B);
         DeleteAll(isect1, isect2, cols, rows);
         return new SpTuples< IU, NUO >(0, mdim, ndim);
     }

     StackEntry< NUO, std::pair<IU,IU> > * multstack;

     IU cnz = SpHelper::SpCartesian< SR > (*(A.dcsc), *(B.dcsc), kisect, isect1, isect2, multstack);
     DeleteAll(isect1, isect2, cols, rows);

     if(clearA)  delete const_cast<SpDCCols<IU, NU1> *>(&A);
     if(clearB)  delete const_cast<SpDCCols<IU, NU2> *>(&B);
     return new SpTuples<IU, NUO> (cnz, mdim, ndim, multstack);
 }

 template<class SR, class NUO, class IU, class NU1, class NU2>
 SpTuples<IU, NUO> * Tuples_AnXBn
                     (const SpDCCols<IU, NU1> & A,
                      const SpDCCols<IU, NU2> & B,
                     bool clearA = false, bool clearB = false)
 {
     IU mdim = A.m;
     IU ndim = B.n;
     if(A.isZero() || B.isZero())
     {
         return new SpTuples<IU, NUO>(0, mdim, ndim);
     }
     StackEntry< NUO, std::pair<IU,IU> > * multstack;
     IU cnz = SpHelper::SpColByCol< SR > (*(A.dcsc), *(B.dcsc), A.n,  multstack);

     if(clearA)
         delete const_cast<SpDCCols<IU, NU1> *>(&A);
     if(clearB)
         delete const_cast<SpDCCols<IU, NU2> *>(&B);

     return new SpTuples<IU, NUO> (cnz, mdim, ndim, multstack);
 }


 template<class SR, class NUO, class IU, class NU1, class NU2>
 SpTuples<IU, NUO> * Tuples_AtXBt
                     (const SpDCCols<IU, NU1> & A,
                      const SpDCCols<IU, NU2> & B,
                     bool clearA = false, bool clearB = false)
 {
     IU mdim = A.n;
     IU ndim = B.m;
     std::cout << "Tuples_AtXBt function has not been implemented yet !" << std::endl;

     return new SpTuples<IU, NUO> (0, mdim, ndim);
 }

 template<class SR, class NUO, class IU, class NU1, class NU2>
 SpTuples<IU, NUO> * Tuples_AtXBn
                     (const SpDCCols<IU, NU1> & A,
                      const SpDCCols<IU, NU2> & B,
                     bool clearA = false, bool clearB = false)
 {
     IU mdim = A.n;
     IU ndim = B.n;
     std::cout << "Tuples_AtXBn function has not been implemented yet !" << std::endl;

     return new SpTuples<IU, NUO> (0, mdim, ndim);
 }

 // Performs a balanced merge of the array of SpTuples
 // Assumes the input parameters are already column sorted
 template<class SR, class IU, class NU>
 SpTuples<IU,NU> MergeAll( const std::vector<SpTuples<IU,NU> *> & ArrSpTups, IU mstar = 0, IU nstar = 0, bool delarrs = false )
 {
     int hsize =  ArrSpTups.size();
     if(hsize == 0)
     {
         return SpTuples<IU,NU>(0, mstar,nstar);
     }
     else
     {
         mstar = ArrSpTups[0]->m;
         nstar = ArrSpTups[0]->n;
     }
     for(int i=1; i< hsize; ++i)
     {
         if((mstar != ArrSpTups[i]->m) || nstar != ArrSpTups[i]->n)
         {
             std::cerr << "Dimensions do not match on MergeAll()" << std::endl;
             return SpTuples<IU,NU>(0,0,0);
         }
     }
     if(hsize > 1)
     {
         ColLexiCompare<IU,int> heapcomp;
         std::tuple<IU, IU, int> * heap = new std::tuple<IU, IU, int> [hsize];   // (rowindex, colindex, source-id)
         IU * curptr = new IU[hsize];
         std::fill_n(curptr, hsize, static_cast<IU>(0));
         IU estnnz = 0;

         for(int i=0; i< hsize; ++i)
         {
             estnnz += ArrSpTups[i]->getnnz();
             heap[i] = std::make_tuple(std::get<0>(ArrSpTups[i]->tuples[0]), std::get<1>(ArrSpTups[i]->tuples[0]), i);
         }
     std::make_heap(heap, heap+hsize, std::not2(heapcomp));

         std::tuple<IU, IU, NU> * ntuples = new std::tuple<IU,IU,NU>[estnnz];
         IU cnz = 0;

         while(hsize > 0)
         {
       std::pop_heap(heap, heap + hsize, std::not2(heapcomp));         // result is stored in heap[hsize-1]
             int source = std::get<2>(heap[hsize-1]);

             if( (cnz != 0) &&
                 ((std::get<0>(ntuples[cnz-1]) == std::get<0>(heap[hsize-1])) && (std::get<1>(ntuples[cnz-1]) == std::get<1>(heap[hsize-1]))) )
             {
                 std::get<2>(ntuples[cnz-1])  = SR::add(std::get<2>(ntuples[cnz-1]), ArrSpTups[source]->numvalue(curptr[source]++));
             }
             else
             {
                 ntuples[cnz++] = ArrSpTups[source]->tuples[curptr[source]++];
             }

             if(curptr[source] != ArrSpTups[source]->getnnz())   // That array has not been depleted
             {
                 heap[hsize-1] = std::make_tuple(std::get<0>(ArrSpTups[source]->tuples[curptr[source]]),
                                 std::get<1>(ArrSpTups[source]->tuples[curptr[source]]), source);
         std::push_heap(heap, heap+hsize, std::not2(heapcomp));
             }
             else
             {
                 --hsize;
             }
         }
         SpHelper::ShrinkArray(ntuples, cnz);
         DeleteAll(heap, curptr);

         if(delarrs)
         {
             for(size_t i=0; i<ArrSpTups.size(); ++i)
                 delete ArrSpTups[i];
         }
         return SpTuples<IU,NU> (cnz, mstar, nstar, ntuples);
     }
     else
     {
         SpTuples<IU,NU> ret = *ArrSpTups[0];
         if(delarrs)
             delete ArrSpTups[0];
         return ret;
     }
 }

 template <typename IU, typename NU1, typename NU2>
 Dcsc<IU, typename promote_trait<NU1,NU2>::T_promote> EWiseMult(const Dcsc<IU,NU1> & A, const Dcsc<IU,NU2> * B, bool exclude)
 {
     typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
     IU estnzc, estnz;
     if(exclude)
     {
         estnzc = A.nzc;
         estnz = A.nz;
     }
     else
     {
         estnzc = std::min(A.nzc, B->nzc);
         estnz  = std::min(A.nz, B->nz);
     }

     Dcsc<IU,N_promote> temp(estnz, estnzc);

     IU curnzc = 0;
     IU curnz = 0;
     IU i = 0;
     IU j = 0;
     temp.cp[0] = 0;

     if(!exclude)    // A = A .* B
     {
         while(i< A.nzc && B != NULL && j<B->nzc)
         {
             if(A.jc[i] > B->jc[j])      ++j;
             else if(A.jc[i] < B->jc[j])     ++i;
             else
             {
                 IU ii = A.cp[i];
                 IU jj = B->cp[j];
                 IU prevnz = curnz;
                 while (ii < A.cp[i+1] && jj < B->cp[j+1])
                 {
                     if (A.ir[ii] < B->ir[jj])   ++ii;
                     else if (A.ir[ii] > B->ir[jj])  ++jj;
                     else
                     {
                         temp.ir[curnz] = A.ir[ii];
                         temp.numx[curnz++] = A.numx[ii++] * B->numx[jj++];
                     }
                 }
                 if(prevnz < curnz)  // at least one nonzero exists in this column
                 {
                     temp.jc[curnzc++] = A.jc[i];
                     temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
                 }
                 ++i;
                 ++j;
             }
         }
     }
     else    // A = A .* not(B)
     {
         while(i< A.nzc && B != NULL && j< B->nzc)
         {
             if(A.jc[i] > B->jc[j])      ++j;
             else if(A.jc[i] < B->jc[j])
             {
                 temp.jc[curnzc++] = A.jc[i++];
                 for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
                 {
                     temp.ir[curnz]      = A.ir[k];
                     temp.numx[curnz++]  = A.numx[k];
                 }
                 temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
             }
             else
             {
                 IU ii = A.cp[i];
                 IU jj = B->cp[j];
                 IU prevnz = curnz;
                 while (ii < A.cp[i+1] && jj < B->cp[j+1])
                 {
                     if (A.ir[ii] > B->ir[jj])   ++jj;
                     else if (A.ir[ii] < B->ir[jj])
                     {
                         temp.ir[curnz] = A.ir[ii];
                         temp.numx[curnz++] = A.numx[ii++];
                     }
                     else    // eliminate those existing nonzeros
                     {
                         ++ii;
                         ++jj;
                     }
                 }
                 while (ii < A.cp[i+1])
                 {
                     temp.ir[curnz] = A.ir[ii];
                     temp.numx[curnz++] = A.numx[ii++];
                 }

                 if(prevnz < curnz)  // at least one nonzero exists in this column
                 {
                     temp.jc[curnzc++] = A.jc[i];
                     temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
                 }
                 ++i;
                 ++j;
             }
         }
         while(i< A.nzc)
         {
             temp.jc[curnzc++] = A.jc[i++];
             for(IU k = A.cp[i-1]; k< A.cp[i]; ++k)
             {
                 temp.ir[curnz]  = A.ir[k];
                 temp.numx[curnz++] = A.numx[k];
             }
             temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
         }
     }

     temp.Resize(curnzc, curnz);
     return temp;
 }

 template <typename N_promote, typename IU, typename NU1, typename NU2, typename _BinaryOperation>
 Dcsc<IU, N_promote> EWiseApply(const Dcsc<IU,NU1> & A, const Dcsc<IU,NU2> * B, _BinaryOperation __binary_op, bool notB, const NU2& defaultBVal)
 {
     //typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
     IU estnzc, estnz;
     if(notB)
     {
         estnzc = A.nzc;
         estnz = A.nz;
     }
     else
     {
         estnzc = std::min(A.nzc, B->nzc);
         estnz  = std::min(A.nz, B->nz);
     }

     Dcsc<IU,N_promote> temp(estnz, estnzc);

     IU curnzc = 0;
     IU curnz = 0;
     IU i = 0;
     IU j = 0;
     temp.cp[0] = 0;

     if(!notB)   // A = A .* B
     {
         while(i< A.nzc && B != NULL && j<B->nzc)
         {
             if(A.jc[i] > B->jc[j])      ++j;
             else if(A.jc[i] < B->jc[j])     ++i;
             else
             {
                 IU ii = A.cp[i];
                 IU jj = B->cp[j];
                 IU prevnz = curnz;
                 while (ii < A.cp[i+1] && jj < B->cp[j+1])
                 {
                     if (A.ir[ii] < B->ir[jj])   ++ii;
                     else if (A.ir[ii] > B->ir[jj])  ++jj;
                     else
                     {
                         temp.ir[curnz] = A.ir[ii];
                         temp.numx[curnz++] = __binary_op(A.numx[ii++], B->numx[jj++]);
                     }
                 }
                 if(prevnz < curnz)  // at least one nonzero exists in this column
                 {
                     temp.jc[curnzc++] = A.jc[i];
                     temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
                 }
                 ++i;
                 ++j;
             }
         }
     }
     else    // A = A .* not(B)
     {
         while(i< A.nzc && B != NULL && j< B->nzc)
         {
             if(A.jc[i] > B->jc[j])      ++j;
             else if(A.jc[i] < B->jc[j])
             {
                 temp.jc[curnzc++] = A.jc[i++];
                 for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
                 {
                     temp.ir[curnz]      = A.ir[k];
                     temp.numx[curnz++]  = __binary_op(A.numx[k], defaultBVal);
                 }
                 temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
             }
             else
             {
                 IU ii = A.cp[i];
                 IU jj = B->cp[j];
                 IU prevnz = curnz;
                 while (ii < A.cp[i+1] && jj < B->cp[j+1])
                 {
                     if (A.ir[ii] > B->ir[jj])   ++jj;
                     else if (A.ir[ii] < B->ir[jj])
                     {
                         temp.ir[curnz] = A.ir[ii];
                         temp.numx[curnz++] = __binary_op(A.numx[ii++], defaultBVal);
                     }
                     else    // eliminate those existing nonzeros
                     {
                         ++ii;
                         ++jj;
                     }
                 }
                 while (ii < A.cp[i+1])
                 {
                     temp.ir[curnz] = A.ir[ii];
                     temp.numx[curnz++] = __binary_op(A.numx[ii++], defaultBVal);
                 }

                 if(prevnz < curnz)  // at least one nonzero exists in this column
                 {
                     temp.jc[curnzc++] = A.jc[i];
                     temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
                 }
                 ++i;
                 ++j;
             }
         }
         while(i< A.nzc)
         {
             temp.jc[curnzc++] = A.jc[i++];
             for(IU k = A.cp[i-1]; k< A.cp[i]; ++k)
             {
                 temp.ir[curnz]  = A.ir[k];
                 temp.numx[curnz++] = __binary_op(A.numx[k], defaultBVal);
             }
             temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
         }
     }

     temp.Resize(curnzc, curnz);
     return temp;
 }


 template<typename IU, typename NU1, typename NU2>
 SpDCCols<IU, typename promote_trait<NU1,NU2>::T_promote > EWiseMult (const SpDCCols<IU,NU1> & A, const SpDCCols<IU,NU2> & B, bool exclude)
 {
     typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
     assert(A.m == B.m);
     assert(A.n == B.n);

     Dcsc<IU, N_promote> * tdcsc = NULL;
     if(A.nnz > 0 && B.nnz > 0)
     {
         tdcsc = new Dcsc<IU, N_promote>(EWiseMult(*(A.dcsc), B.dcsc, exclude));
         return  SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
     }
     else if (A.nnz > 0 && exclude) // && B.nnz == 0
     {
         tdcsc = new Dcsc<IU, N_promote>(EWiseMult(*(A.dcsc), (const Dcsc<IU,NU2>*)NULL, exclude));
         return  SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
     }
     else
     {
         return  SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
     }
 }


 template<typename N_promote, typename IU, typename NU1, typename NU2, typename _BinaryOperation>
 SpDCCols<IU, N_promote> EWiseApply (const SpDCCols<IU,NU1> & A, const SpDCCols<IU,NU2> & B, _BinaryOperation __binary_op, bool notB, const NU2& defaultBVal)
 {
     //typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
     assert(A.m == B.m);
     assert(A.n == B.n);

     Dcsc<IU, N_promote> * tdcsc = NULL;
     if(A.nnz > 0 && B.nnz > 0)
     {
         tdcsc = new Dcsc<IU, N_promote>(EWiseApply<N_promote>(*(A.dcsc), B.dcsc, __binary_op, notB, defaultBVal));
         return  SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
     }
     else if (A.nnz > 0 && notB) // && B.nnz == 0
     {
         tdcsc = new Dcsc<IU, N_promote>(EWiseApply<N_promote>(*(A.dcsc), (const Dcsc<IU,NU2>*)NULL, __binary_op, notB, defaultBVal));
         return  SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
     }
     else
     {
         return  SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
     }
 }

 template <typename RETT, typename IU, typename NU1, typename NU2, typename _BinaryOperation, typename _BinaryPredicate>
 Dcsc<IU, RETT> EWiseApply(const Dcsc<IU,NU1> * Ap, const Dcsc<IU,NU2> * Bp, _BinaryOperation __binary_op, _BinaryPredicate do_op, bool allowANulls, bool allowBNulls, const NU1& ANullVal, const NU2& BNullVal, const bool allowIntersect)
 {
     if (Ap == NULL && Bp == NULL)
         return Dcsc<IU,RETT>(0, 0);

     if (Ap == NULL && Bp != NULL)
     {
         if (!allowANulls)
             return Dcsc<IU,RETT>(0, 0);

         const Dcsc<IU,NU2> & B = *Bp;
         IU estnzc = B.nzc;
         IU estnz  = B.nz;
         Dcsc<IU,RETT> temp(estnz, estnzc);

         IU curnzc = 0;
         IU curnz = 0;
         //IU i = 0;
         IU j = 0;
         temp.cp[0] = 0;
         while(j<B.nzc)
         {
             // Based on the if statement below which handles A null values.
             j++;
             IU prevnz = curnz;
             temp.jc[curnzc++] = B.jc[j-1];
             for(IU k = B.cp[j-1]; k< B.cp[j]; ++k)
             {
                 if (do_op(ANullVal, B.numx[k], true, false))
                 {
                     temp.ir[curnz]      = B.ir[k];
                     temp.numx[curnz++]  = __binary_op(ANullVal, B.numx[k], true, false);
                 }
             }
             //temp.cp[curnzc] = temp.cp[curnzc-1] + (B.cp[j] - B.cp[j-1]);
             temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
         }
         temp.Resize(curnzc, curnz);
         return temp;
     }

     if (Ap != NULL && Bp == NULL)
     {
         if (!allowBNulls)
             return Dcsc<IU,RETT>(0, 0);

         const Dcsc<IU,NU1> & A = *Ap;
         IU estnzc = A.nzc;
         IU estnz  = A.nz;
         Dcsc<IU,RETT> temp(estnz, estnzc);

         IU curnzc = 0;
         IU curnz = 0;
         IU i = 0;
         //IU j = 0;
         temp.cp[0] = 0;
         while(i< A.nzc)
         {
             i++;
             IU prevnz = curnz;
             temp.jc[curnzc++] = A.jc[i-1];
             for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
             {
                 if (do_op(A.numx[k], BNullVal, false, true))
                 {
                     temp.ir[curnz]      = A.ir[k];
                     temp.numx[curnz++]  = __binary_op(A.numx[k], BNullVal, false, true);
                 }
             }
             //temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
             temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
         }
         temp.Resize(curnzc, curnz);
         return temp;
     }

     // both A and B are non-NULL at this point
     const Dcsc<IU,NU1> & A = *Ap;
     const Dcsc<IU,NU2> & B = *Bp;

     IU estnzc = A.nzc + B.nzc;
     IU estnz  = A.nz + B.nz;
     Dcsc<IU,RETT> temp(estnz, estnzc);

     IU curnzc = 0;
     IU curnz = 0;
     IU i = 0;
     IU j = 0;
     temp.cp[0] = 0;
     while(i< A.nzc && j<B.nzc)
     {
         if(A.jc[i] > B.jc[j])
         {
             j++;
             if (allowANulls)
             {
                 IU prevnz = curnz;
                 temp.jc[curnzc++] = B.jc[j-1];
                 for(IU k = B.cp[j-1]; k< B.cp[j]; ++k)
                 {
                     if (do_op(ANullVal, B.numx[k], true, false))
                     {
                         temp.ir[curnz]      = B.ir[k];
                         temp.numx[curnz++]  = __binary_op(ANullVal, B.numx[k], true, false);
                     }
                 }
                 //temp.cp[curnzc] = temp.cp[curnzc-1] + (B.cp[j] - B.cp[j-1]);
                 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
             }
         }
         else if(A.jc[i] < B.jc[j])
         {
             i++;
             if (allowBNulls)
             {
                 IU prevnz = curnz;
                 temp.jc[curnzc++] = A.jc[i-1];
                 for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
                 {
                     if (do_op(A.numx[k], BNullVal, false, true))
                     {
                         temp.ir[curnz]      = A.ir[k];
                         temp.numx[curnz++]  = __binary_op(A.numx[k], BNullVal, false, true);
                     }
                 }
                 //temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
                 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
             }
         }
         else
         {
             temp.jc[curnzc++] = A.jc[i];
             IU ii = A.cp[i];
             IU jj = B.cp[j];
             IU prevnz = curnz;
             while (ii < A.cp[i+1] && jj < B.cp[j+1])
             {
                 if (A.ir[ii] < B.ir[jj])
                 {
                     if (allowBNulls && do_op(A.numx[ii], BNullVal, false, true))
                     {
                         temp.ir[curnz] = A.ir[ii];
                         temp.numx[curnz++] = __binary_op(A.numx[ii++], BNullVal, false, true);
                     }
                     else
                         ii++;
                 }
                 else if (A.ir[ii] > B.ir[jj])
                 {
                     if (allowANulls && do_op(ANullVal, B.numx[jj], true, false))
                     {
                         temp.ir[curnz] = B.ir[jj];
                         temp.numx[curnz++] = __binary_op(ANullVal, B.numx[jj++], true, false);
                     }
                     else
                         jj++;
                 }
                 else
                 {
                     if (allowIntersect && do_op(A.numx[ii], B.numx[jj], false, false))
                     {
                         temp.ir[curnz] = A.ir[ii];
                         temp.numx[curnz++] = __binary_op(A.numx[ii++], B.numx[jj++], false, false); // might include zeros
                     }
                     else
                     {
                         ii++;
                         jj++;
                     }
                 }
             }
             while (ii < A.cp[i+1])
             {
                 if (allowBNulls && do_op(A.numx[ii], BNullVal, false, true))
                 {
                     temp.ir[curnz] = A.ir[ii];
                     temp.numx[curnz++] = __binary_op(A.numx[ii++], BNullVal, false, true);
                 }
                 else
                     ii++;
             }
             while (jj < B.cp[j+1])
             {
                 if (allowANulls && do_op(ANullVal, B.numx[jj], true, false))
                 {
                     temp.ir[curnz] = B.ir[jj];
                     temp.numx[curnz++] = __binary_op(ANullVal, B.numx[jj++], true, false);
                 }
                 else
                     jj++;
             }
             temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
             ++i;
             ++j;
         }
     }
     while(allowBNulls && i< A.nzc) // remaining A elements after B ran out
     {
         IU prevnz = curnz;
         temp.jc[curnzc++] = A.jc[i++];
         for(IU k = A.cp[i-1]; k< A.cp[i]; ++k)
         {
             if (do_op(A.numx[k], BNullVal, false, true))
             {
                 temp.ir[curnz]  = A.ir[k];
                 temp.numx[curnz++] = __binary_op(A.numx[k], BNullVal, false, true);
             }
         }
         //temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
         temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
     }
     while(allowANulls && j < B.nzc) // remaining B elements after A ran out
     {
         IU prevnz = curnz;
         temp.jc[curnzc++] = B.jc[j++];
         for(IU k = B.cp[j-1]; k< B.cp[j]; ++k)
         {
             if (do_op(ANullVal, B.numx[k], true, false))
             {
                 temp.ir[curnz]  = B.ir[k];
                 temp.numx[curnz++]  = __binary_op(ANullVal, B.numx[k], true, false);
             }
         }
         //temp.cp[curnzc] = temp.cp[curnzc-1] + (B.cp[j] - B.cp[j-1]);
         temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
     }
     temp.Resize(curnzc, curnz);
     return temp;
 }

 template <typename RETT, typename IU, typename NU1, typename NU2, typename _BinaryOperation, typename _BinaryPredicate>
 SpDCCols<IU,RETT> EWiseApply (const SpDCCols<IU,NU1> & A, const SpDCCols<IU,NU2> & B, _BinaryOperation __binary_op, _BinaryPredicate do_op, bool allowANulls, bool allowBNulls, const NU1& ANullVal, const NU2& BNullVal, const bool allowIntersect)
 {
     assert(A.m == B.m);
     assert(A.n == B.n);

     Dcsc<IU, RETT> * tdcsc = new Dcsc<IU, RETT>(EWiseApply<RETT>(A.dcsc, B.dcsc, __binary_op, do_op, allowANulls, allowBNulls, ANullVal, BNullVal, allowIntersect));
     return  SpDCCols<IU, RETT> (A.m , A.n, tdcsc);
 }


 }

 #endif
B
double B

combblas::SpDCCols::dcscarr
Dcsc< IT, NT > ** dcscarr
Definition: SpDCCols.h:352

combblas::BooleanRowSplit
void BooleanRowSplit(SpDCCols< IU, bool > &A, int numsplits)
Definition: Friends.h:439

combblas::SpTuples
Definition: Friends.h:48

combblas::EWiseApply
Dcsc< IU, N_promote > EWiseApply(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, _BinaryOperation __binary_op, bool notB, const NU2 &defaultBVal)
Definition: Friends.h:814

Compare.h

combblas::generic_gespmv_threaded
int generic_gespmv_threaded(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *&sendindbuf, OVT *&sendnumbuf, int *&sdispls, int p_c, PreAllocatedSPA< OVT > &SPA)
Definition: Friends.h:139

combblas::SpHelper::SpIntersect
static void SpIntersect(const Dcsc< IT, NT1 > &Adcsc, const Dcsc< IT, NT2 > &Bdcsc, Isect< IT > *&cols, Isect< IT > *&rows, Isect< IT > *&isect1, Isect< IT > *&isect2, Isect< IT > *&itr1, Isect< IT > *&itr2)
Definition: SpHelper.h:346

SpImpl.h

combblas::PreAllocatedSPA::V_isthere
std::vector< BitMap > V_isthere
Definition: PreAllocatedSPA.h:212

combblas::SpMat
Definition: SpMat.h:55

combblas::Tuples_AtXBn
SpTuples< IU, NUO > * Tuples_AtXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition: Friends.h:591

size
int size

Isect.h

combblas::EWiseMult
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition: Friends.h:694

combblas::SpMat::getnnz
IT getnnz() const
Definition: SpMat.h:158

combblas::Tuples_AnXBt
SpTuples< IU, NUO > * Tuples_AnXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition: Friends.h:511

combblas::StackEntry
Definition: StackEntry.h:9

combblas::DeleteAll
void DeleteAll(A arr1)
Definition: Deleter.h:48

combblas::Dcsc::Resize
void Resize(IT nzcnew, IT nznew)
Definition: dcsc.cpp:946

combblas::Dcsc::ir
IT * ir
row indices, size nz
Definition: dcsc.h:121

combblas::SpDCCols::isZero
bool isZero() const
Definition: SpDCCols.h:298

SpHelper.h

combblas::SpDCCols::dcsc
Dcsc< IT, NT > * dcsc
Definition: SpDCCols.h:351

SpMat.h

combblas::PreAllocatedSPA::V_localy
std::vector< std::vector< OVT > > V_localy
Definition: PreAllocatedSPA.h:214

combblas::Dcsc::cp
IT * cp
The master array, size nzc+1 (keeps column pointers)
Definition: dcsc.h:117

combblas::Dcsc::nz
IT nz
Definition: dcsc.h:124

combblas::Isect
Definition: Isect.h:39

CombBLAS.h

combblas::Dcsc
Definition: dcsc.h:47

A
double A

combblas::SpParHelper::Print
static void Print(const std::string &s)
Definition: SpParHelper.cpp:811

combblas::PreAllocatedSPA::V_inds
std::vector< std::vector< uint32_t > > V_inds
Definition: PreAllocatedSPA.h:211

combblas::dcsc_gespmv
void dcsc_gespmv(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector.
Definition: Friends.h:64

combblas::Dcsc::numx
NT * numx
generic values, size nz
Definition: dcsc.h:122

PreAllocatedSPA.h

Deleter.h

combblas::Tuples_AtXBt
SpTuples< IU, NUO > * Tuples_AtXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition: Friends.h:578

combblas::SpMat::GetInternal
auto GetInternal() const
Definition: SpMat.h:91

combblas::Dcsc::nzc
IT nzc
number of columns with at least one non-zero in them
Definition: dcsc.h:125

SpParHelper.h

combblas::promote_trait
Definition: promote.h:38

combblas
Definition: CCGrid.h:4

StackEntry.h

combblas::SpDCCols
Definition: Friends.h:51

combblas::ColLexiCompare
Definition: Compare.h:95

combblas::SpDCCols::getnrow
IT getnrow() const
Definition: SpDCCols.h:299

combblas::Tuples_AnXBn
SpTuples< IU, NUO > * Tuples_AnXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition: Friends.h:554

combblas::generic_gespmv_threaded_setbuffers
void generic_gespmv_threaded_setbuffers(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *sendindbuf, OVT *sendnumbuf, int *cnts, int *dspls, int p_c)
Definition: Friends.h:280

combblas::PreAllocatedSPA::initialized
bool initialized
Definition: PreAllocatedSPA.h:215

combblas::SpMat::getnrow
IT getnrow() const
Definition: SpMat.h:156

combblas::Dcsc::jc
IT * jc
col indices, size nzc
Definition: dcsc.h:120

combblas::MergeAll
SpTuples< IU, NU > MergeAll(const std::vector< SpTuples< IU, NU > *> &ArrSpTups, IU mstar=0, IU nstar=0, bool delarrs=false)
Definition: Friends.h:605

combblas::PreAllocatedSPA
Definition: PreAllocatedSPA.h:42

combblas::dcsc_gespmv_threaded
void dcsc_gespmv_threaded(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector (multithreaded version)
Definition: Friends.h:82

combblas::SpHelper::deallocate2D
static void deallocate2D(T **array, I m)
Definition: SpHelper.h:249

combblas::SpMat::getnsplit
int getnsplit() const
Definition: SpMat.h:99

combblas::SpHelper::ShrinkArray
static void ShrinkArray(NT *&array, IT newsize)
Definition: SpHelper.h:274

combblas::generic_gespmv
void generic_gespmv(const SpMat< MIND, NUM, DER > &A, const VIND *indx, const IVT *numx, VIND nnzx, std::vector< VIND > &indy, std::vector< OVT > &numy, PreAllocatedSPA< OVT > &SPA)
Definition: Friends.h:402