Compressed Sparse Blocks  1.2
bicsb.h
Go to the documentation of this file.
1 #ifndef _BICSB_H
2 #define _BICSB_H
3
4 #include <cmath>
5 #include <vector>
6 #include <algorithm>
7 #include <numeric> // for std:accumulate()
8 #include <limits> // C++ style numeric_limits<T>
9 #include <tuple>
10 #include "csc.h"
11 #include "mortoncompare.h"
12
13 using namespace std;
14
15 // CSB variant where nonzeros "within each block" are sorted w.r.t. the bit-interleaved order
16 // Implementer's (Aydin) notes:
17 // - to ensure correctness in BlockPar, we use square blocks (lowcolmask = highcolmask)
18 template <class NT, class IT>
19 class BiCsb
20 {
21 public:
22  BiCsb ():nz(0), m(0), n(0), nbc(0), nbr(0) {} // default constructor (dummy)
23
24  BiCsb (IT size,IT rows, IT cols, int workers);
25  BiCsb (IT size,IT rows, IT cols, IT * ri, IT * ci, NT * val, int workers, IT forcelogbeta = 0);
26
27  BiCsb (const BiCsb<NT, IT> & rhs); // copy constructor
28  ~BiCsb();
29  BiCsb<NT,IT> & operator=(const BiCsb<NT,IT> & rhs); // assignment operator
30  BiCsb (Csc<NT, IT> & csc, int workers, IT forcelogbeta = 0);
31
32  ofstream & PrintStats(ofstream & outfile) const;
33  IT colsize() const { return n;}
34  IT rowsize() const { return m;}
35  IT numnonzeros() const { return nz; }
36  bool isPar() const { return ispar; }
37
38 private:
39  void Init(int workers, IT forcelogbeta = 0);
40
41  template <typename SR, typename RHS, typename LHS>
42  void SubSpMV(IT * btop, IT bstart, IT bend, const RHS * __restrict x, LHS * __restrict suby) const;
43
44  template <typename SR, typename RHS, typename LHS>
45  void SubSpMVTrans(IT col, IT rowstart, IT rowend, const RHS * __restrict x, LHS * __restrict suby) const;
46
47  template <typename SR, typename RHS, typename LHS>
48  void SubSpMVTrans(const vector< tuple<IT,IT,IT> > & chunk, const RHS * __restrict x, LHS * __restrict suby) const;
49
50  template <typename SR, typename RHS, typename LHS>
51  void BMult(IT** chunks, IT start, IT end, const RHS * __restrict x, LHS * __restrict y, IT ysize) const;
52
53  template <typename SR, typename RHS, typename LHS>
54  void BTransMult(vector< vector< tuple<IT,IT,IT> > * > & chunks, IT start, IT end, const RHS * __restrict x, LHS * __restrict y, IT ysize) const;
55
56  template <typename SR, typename RHS, typename LHS>
57  void BlockPar(IT start, IT end, const RHS * __restrict subx, LHS * __restrict suby,
58  IT rangebeg, IT rangeend, IT cutoff) const;
59
60  template <typename SR, typename RHS, typename LHS>
61  void BlockParT(IT start, IT end, const RHS * __restrict subx, LHS * __restrict suby,
62  IT rangebeg, IT rangeend, IT cutoff) const;
63
64  void SortBlocks(pair<IT, pair<IT,IT> > * pairarray, NT * val);
65
66  IT ** top ; // pointers array (indexed by higher-order bits of the coordinate index), size ~= ntop+1
67  IT * bot; // contains lower-order bits of the coordinate index, size nnz
68  NT * num; // contains numerical values, size nnz
69
70  bool ispar;
71  IT nz; // # nonzeros
72  IT m; // # rows
73  IT n; // # columns
74  IT blcrange; // range indexed by one block
75
76  IT nbc; // #{column blocks} = #{blocks in any block row}
77  IT nbr; // #{block rows)
78
79  IT rowlowbits; // # lower order bits for rows
80  IT rowhighbits;
81  IT highrowmask; // mask with the first log(m)/2 bits = 1 and the other bits = 0
83
84  IT collowbits; // # lower order bits for columns
85  IT colhighbits;
86  IT highcolmask; // mask with the first log(n)/2 bits = 1 and the other bits = 0
88
89  MortonCompare<IT> mortoncmp; // comparison operator w.r.t. the N-morton layout
90
91  template <typename SR, typename NU, typename IU, typename RHS, typename LHS>
92  friend void bicsb_gespmv (const BiCsb<NU, IU> & A, const RHS * x, LHS * y);
93
94  template <typename SR, typename NU, typename IU, typename RHS, typename LHS>
95  friend void bicsb_gespmvt (const BiCsb<NU, IU> & A, const RHS * __restrict x, LHS * __restrict y);
96
97  template <class CSB>
98  friend float RowImbalance(const CSB & A); // just befriend the BiCsb instantiation
99
100  template <typename NU, typename IU>
101  friend float ColImbalance(const BiCsb<NU, IU> & A);
102 };
103
104
105 // Partial template specialization
106 template <class IT>
107 class BiCsb<bool,IT>
108 {
109 public:
110  BiCsb ():nz(0), m(0), n(0), nbc(0), nbr(0) {} // default constructor (dummy)
111
112  BiCsb (IT size,IT rows, IT cols, int workers);
113  BiCsb (IT size,IT rows, IT cols, IT * ri, IT * ci, int workers, IT forcelogbeta = 0);
114
115  BiCsb (const BiCsb<bool, IT> & rhs); // copy constructor
116  ~BiCsb();
117  BiCsb<bool,IT> & operator=(const BiCsb<bool,IT> & rhs); // assignment operator
118
119  template <typename NT>
120  BiCsb (Csc<NT, IT> & csc, int workers);
121
122  IT colsize() const { return n;}
123  IT rowsize() const { return m;}
124  IT numnonzeros() const { return nz; }
125  bool isPar() const { return ispar; }
126
127 private:
128  void Init(int workers, IT forcelogbeta = 0);
129
130  template <typename SR, typename RHS, typename LHS>
131  void SubSpMV(IT * btop, IT bstart, IT bend, const RHS * __restrict x, LHS * __restrict suby) const;
132
133  template <typename SR, typename RHS, typename LHS>
134  void SubSpMVTrans(IT col, IT rowstart, IT rowend, const RHS * __restrict x, LHS * __restrict suby) const;
135
136  template <typename SR, typename RHS, typename LHS>
137  void SubSpMVTrans(const vector< tuple<IT,IT,IT> > & chunk, const RHS * __restrict x, LHS * __restrict suby) const;
138
139  template <typename SR, typename RHS, typename LHS>
140  void BMult(IT ** chunks, IT start, IT end, const RHS * __restrict x, LHS * __restrict y, IT ysize) const;
141
142  template <typename SR, typename RHS, typename LHS>
143  void BTransMult(vector< vector< tuple<IT,IT,IT> > * > & chunks, IT start, IT end, const RHS * __restrict x, LHS * __restrict y, IT ysize) const;
144
145  template <typename SR, typename RHS, typename LHS>
146  void BlockPar(IT start, IT end, const RHS * __restrict subx, LHS * __restrict suby,
147  IT rangebeg, IT rangeend, IT cutoff) const;
148
149  template <typename SR, typename RHS, typename LHS>
150  void BlockParT(IT start, IT end, const RHS * __restrict subx, LHS * __restrict suby,
151  IT rangebeg, IT rangeend, IT cutoff) const;
152
153  void SortBlocks(pair<IT, pair<IT,IT> > * pairarray);
154
155  IT ** top ; // pointers array (indexed by higher-order bits of the coordinate index), size ~= ntop+1
156  IT * bot; // contains lower-order bits of the coordinate index, size nnz
157
158  bool ispar;
159  IT nz; // # nonzeros
160  IT m; // # rows
161  IT n; // # columns
162  IT blcrange; // range indexed by one block
163
164  IT nbc; // #{column blocks} = #{blocks in any block row}
165  IT nbr; // #{block rows)
166
167  IT rowlowbits; // # lower order bits for rows
168  IT rowhighbits;
169  IT highrowmask; // mask with the first log(m)/2 bits = 1 and the other bits = 0
171
172  IT collowbits; // # lower order bits for columns
173  IT colhighbits;
174  IT highcolmask; // mask with the first log(n)/2 bits = 1 and the other bits = 0
176
177  MortonCompare<IT> mortoncmp; // comparison operator w.r.t. the N-morton layout
178
179  template <typename SR, typename NU, typename IU, typename RHS, typename LHS>
180  friend void bicsb_gespmv (const BiCsb<NU, IU> & A, const RHS * __restrict x, LHS * __restrict y);
181
182  template <typename SR, typename NU, typename IU, typename RHS, typename LHS>
183  friend void bicsb_gespmvt (const BiCsb<NU, IU> & A, const RHS * __restrict x, LHS * __restrict y);
184
185  template <class CSB>
186  friend float RowImbalance(const CSB & A); // befriend any CSB instantiation
187
188  template <typename NU, typename IU>
189  friend float ColImbalance(const BiCsb<NU, IU> & A);
190 };
191
192 #include "friends.h"
193 #include "bicsb.cpp"
194 #endif
void bicsb_gespmv(const BiCsb< NT, IT > &A, const RHS *__restrict x, LHS *__restrict y)
Definition: friends.h:113
BiCsb()
Definition: bicsb.h:22
IT rowsize() const
Definition: bicsb.h:34
IT rowsize() const
Definition: bicsb.h:123
float ColImbalance(const BiCsb< NT, IT > &A)
Definition: friends.h:414
IT numnonzeros() const
Definition: bicsb.h:35
IT colsize() const
Definition: bicsb.h:33
void bicsb_gespmvt(const BiCsb< NT, IT > &A, const RHS *__restrict x, LHS *__restrict y)
Definition: friends.h:184
Definition: csc.h:15
IT numnonzeros() const
Definition: bicsb.h:124
float RowImbalance(const CSB &A)
Definition: friends.h:400
Definition: bicsb.h:19
IT colsize() const
Definition: bicsb.h:122
bool isPar() const
Definition: bicsb.h:36
bool isPar() const
Definition: bicsb.h:125