Compressed Sparse Blocks  1.2
 All Classes Files Functions Variables Typedefs Friends Macros Pages
csbsym.h
Go to the documentation of this file.
1 #ifndef _CSBSYM_H
2 #define _CSBSYM_H
3 
4 #include <cmath>
5 #include <vector>
6 #include <algorithm>
7 #include <numeric> // for std:accumulate()
8 #include <limits> // C++ style numeric_limits<T>
9 #include <ostream>
10 #include <iterator>
11 #include "csc.h"
12 #include "mortoncompare.h"
13 
14 using namespace std;
15 
16 
17 inline void atomicallyIncrementDouble(volatile double *target, const double by){
18  asm volatile(
19  "movq %0, %%rax \n\t" // rax = *(%0)
20  "xorpd %%xmm0, %%xmm0 \n\t" // xmm0 = [0.0,0.0]
21  "movsd %1, %%xmm0\n\t" // xmm0[lo] = *(%1)
22  "1:\n\t"
23  // rax (containing *target) was last set at startup or by a failed cmpxchg
24  "movq %%rax, %%xmm1\n\t" // xmm1[lo] = rax
25  "addsd %%xmm0, %%xmm1\n\t" // xmm1 = xmm0 + xmm1 = by + xmm1
26  "movq %%xmm1, %%r8 \n\t" // r8 = xmm1[lo]
27  "lock cmpxchgq %%r8, %0\n\t" // if(*(%0)==rax){ZF=1;*(%0)=r8}else{ZF=0;rax=*(%0);}
28  "jnz 1b\n\t" // jump back if failed (ZF=0)
29  : "=m"(*target) // outputs
30  : "m"(by) // inputs
31  : "cc", "memory", "%rax", "%r8", "%xmm0", "%xmm1" // clobbered
32  );
33  return;
34 }
35 
36 /* Symmetric CSB implementation
37 ** Only upper triangle is stored
38 ** top[i][0] gives the ith diagonal block for every i
39 ** Since this class works only for symmetric (hence square) matrices,
40 ** each compressed sparse block is (lowbits+1)x(lowbits+1) and ncsb = nbr = nbc
41 */
42 template <class NT, class IT>
43 class CsbSym
44 {
45 public:
46  CsbSym ():nz(0), n(0), ncsb(0) {} // default constructor (dummy)
47 
48  CsbSym (const CsbSym<NT, IT> & rhs); // copy constructor
49  ~CsbSym();
50  CsbSym<NT,IT> & operator=(const CsbSym<NT,IT> & rhs); // assignment operator
51  CsbSym (Csc<NT, IT> & csc, int workers);
52 
53  ofstream & PrintStats(ofstream & outfile) const;
54  ofstream & Dump(ofstream & outfile) const;
55  IT colsize() const { return n;}
56  IT rowsize() const { return n;}
57  bool isPar() const { return ispar; }
58 
59 private:
60  void Init(int workers, IT forcelogbeta = 0);
61  void SeqSpMV(const NT * __restrict x, NT * __restrict y) const;
62  void BMult(IT** chunks, IT start, IT end, const NT * __restrict x, NT * __restrict y, IT ysize) const;
63 
64  void BlockPar(IT start, IT end, const NT * __restrict subx, const NT * __restrict subx_mirror,
65  NT * __restrict suby, NT * __restrict suby_mirror, IT rangebeg, IT rangeend, IT cutoff) const;
66  void BlockTriPar(IT start, IT end, const NT * __restrict subx, NT * __restrict suby, IT rangebeg, IT rangeend, IT cutoff) const;
67 
68  void SortBlocks(pair<IT, pair<IT,IT> > * pairarray, NT * val);
69  void DivideIterationSpace(IT * & lspace, IT * & rspace, IT & lsize, IT & rsize, IT size, IT d) const;
70 
71  void MultAddAtomics(NT * __restrict y, const NT * __restrict x, const IT d) const;
72  void MultDiag(NT * __restrict y, const NT * __restrict x, const IT d) const;
73  void MultMainDiag(NT * __restrict y, const NT * __restrict x) const;
74 
75  float Imbalance(IT d) const;
76  IT nzsum(IT d) const;
77 
78  IT ** top ; // pointers array (indexed by higher-order bits of the coordinate index), size = nbr*(nbc+1)
79  IT * bot; // contains lower-order bits of the coordinate index, size nnz
80  NT * num; // contains numerical values, size nnz
81 
82  vector< pair<IT,NT> > diagonal;
83 
84  bool ispar;
85  IT nz; // # nonzeros
86  IT n; // #{rows} = #{columns}
87  IT blcrange; // range indexed by one block
88 
89  IT ncsb; // #{block rows) = #{block cols}
90 
91  IT nlowbits; // # lower order bits (for both rows and columns)
92  IT nhighbits;
93  IT highmask; // mask with the first log(n)/2 bits = 1 and the other bits = 0
94  IT lowmask;
95 
96  MortCompSym<IT> mortoncmp; // comparison operator w.r.t. the (inverted N)-morton layout
97 
98  template <typename NU, typename IU>
99  friend void csbsym_gespmv (const CsbSym<NU, IU> & A, const NU * x, NU * y);
100 };
101 
102 
103 #include "friends.h"
104 #include "csbsym.cpp"
105 #endif
106 
void csbsym_gespmv(const CsbSym< NT, IT > &A, const NT *__restrict x, NT *__restrict y)
Definition: friends.h:281
void atomicallyIncrementDouble(volatile double *target, const double by)
Definition: csbsym.h:17
IT rowsize() const
Definition: csbsym.h:56
Definition: csbsym.h:43
Definition: csc.h:15
IT colsize() const
Definition: csbsym.h:55
CsbSym()
Definition: csbsym.h:46
bool isPar() const
Definition: csbsym.h:57