20 "xorpd %%xmm0, %%xmm0 \n\t"
21 "movsd %1, %%xmm0\n\t"
24 "movq %%rax, %%xmm1\n\t"
25 "addsd %%xmm0, %%xmm1\n\t"
26 "movq %%xmm1, %%r8 \n\t"
27 "lock cmpxchgq %%r8, %0\n\t"
31 :
"cc",
"memory",
"%rax",
"%r8",
"%xmm0",
"%xmm1"
42 template <
class NT,
class IT>
53 ofstream & PrintStats(ofstream & outfile)
const;
54 ofstream & Dump(ofstream & outfile)
const;
57 bool isPar()
const {
return ispar; }
60 void Init(
int workers, IT forcelogbeta = 0);
61 void SeqSpMV(
const NT * __restrict x, NT * __restrict y)
const;
62 void BMult(IT** chunks, IT start, IT end,
const NT * __restrict x, NT * __restrict y, IT ysize)
const;
64 void BlockPar(IT start, IT end,
const NT * __restrict subx,
const NT * __restrict subx_mirror,
65 NT * __restrict suby, NT * __restrict suby_mirror, IT rangebeg, IT rangeend, IT cutoff)
const;
66 void BlockTriPar(IT start, IT end,
const NT * __restrict subx, NT * __restrict suby, IT rangebeg, IT rangeend, IT cutoff)
const;
68 void SortBlocks(pair<IT, pair<IT,IT> > * pairarray, NT * val);
69 void DivideIterationSpace(IT * & lspace, IT * & rspace, IT & lsize, IT & rsize, IT size, IT d)
const;
71 void MultAddAtomics(NT * __restrict y,
const NT * __restrict x,
const IT d)
const;
72 void MultDiag(NT * __restrict y,
const NT * __restrict x,
const IT d)
const;
73 void MultMainDiag(NT * __restrict y,
const NT * __restrict x)
const;
75 float Imbalance(IT d)
const;
82 vector< pair<IT,NT> > diagonal;
98 template <
typename NU,
typename IU>
void csbsym_gespmv(const CsbSym< NT, IT > &A, const NT *__restrict x, NT *__restrict y)
void atomicallyIncrementDouble(volatile double *target, const double by)