Definition: SSEspmv.cpp:71
ssp_s16
signed short ssp_s16
Definition: SSEspmv.cpp:56
ssp_m128
Definition: SSEspmv.cpp:68
atomicallyIncrementDouble
void atomicallyIncrementDouble(volatile double *target, const double by)
Definition: SSEspmv.cpp:146
popcountall
void popcountall(const unsigned char *__restrict M, unsigned *__restrict counts, size_t n)
Definition: SSEspmv.cpp:1232
] & 0x0F);
1245  counts[7+i*8] = __builtin_popcount(M[7+i*8] & 0x0F);
1246  }
1247  for(size_t i=nn*8; i<n; ++i)
1248  {
1249  counts[i] = __builtin_popcount(M[i] & 0x0F);
1250  }
1251 }
1252 
1253 void popcountall(const unsigned short * __restrict M, unsigned * __restrict counts, size_t n)
1254 {
1255  size_t nn = n/8;
1256  for(size_t i=0; i<nn; ++i)
1257  {
1258  counts[i*8] = __builtin_popcount(M[i*8]);
1259  counts[1+i*8] = __builtin_popcount(M[1+i*8]);
1260  counts[2+i*8] = __builtin_popcount(M[2+i*8]);
1261  counts[3+i*8] = __builtin_popcount(M[3+i*8]);
1262  counts[4+i*8] = __builtin_popcount(M[4+i*8]);
1263  counts[5+i*8] = __builtin_popcount(M[5+i*8]);
1264  counts[6+i*8] = __builtin_popcount(M[6+i*8]);
1265  counts[7+i*8] = __builtin_popcount(M[7+i*8]);
1266  }
1267  for(size_t i=nn*8; i<n; ++i)
1268  {
1269  counts[i] = __builtin_popcount(M[i]);
1270  }
1271 }
1272 
1273 void popcountall(const uint64_t * __restrict M, unsigned * __restrict counts, size_t n)
1274 {
1275  size_t nn = n/8;
1276  for(size_t i=0; i<nn; ++i)
1277  {
1278  counts[i*8] = __builtin_popcountl(M[i*8]);
1279  counts[1+i*8] = __builtin_popcountl(M[1+i*8]);
1280  counts[2+i*8] = __builtin_popcountl(M[2+i*8]);
1281  counts[3+i*8] = __builtin_popcountl(M[3+i*8]);
1282  counts[4+i*8] = __builtin_popcountl(M[4+i*8]);
1283  counts[5+i*8] = __builtin_popcountl(M[5+i*8]);
1284  counts[6+i*8] = __builtin_popcountl(M[6+i*8]);
1285  counts[7+i*8] = __builtin_popcountl(M[7+i*8]);
1286  }
1287  for(size_t i=nn*8; i<n; ++i)
1288  {
1289  counts[i] = __builtin_popcountl(M[i]);
1290  }
1291 }
1292 
1293 
1294 
ssp_u32
unsigned int ssp_u32
Definition: SSEspmv.cpp:60
SSEspmv
void SSEspmv(const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits)
Definition: SSEspmv.cpp:395
ssp_s64
signed long long ssp_s64
Definition: SSEspmv.cpp:65
ssp_u16
unsigned short ssp_u16
Definition: SSEspmv.cpp:57
ssp_m128::f64
ssp_f64 f64[2]
Definition: SSEspmv.cpp:76
masktable16
const unsigned short masktable16[16]
Definition: SSEspmv.cpp:46
ssp_u8
unsigned char ssp_u8
Definition: SSEspmv.cpp:54
ssp_m128::i
__m128i i
Definition: SSEspmv.cpp:72
masktable64
const uint64_t masktable64[64]
Definition: SSEspmv.cpp:28
ssp_f32
float ssp_f32
Definition: SSEspmv.cpp:62
ssp_s8
signed char ssp_s8
Definition: SSEspmv.cpp:53
ssp_f64
double ssp_f64
Definition: SSEspmv.cpp:63
ssp_m128::f
__m128 f
Definition: SSEspmv.cpp:70
symcsr
void symcsr(const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits)
Definition: SSEspmv.cpp:165
ssp_blendv_pd_SSE2
__m128d ssp_blendv_pd_SSE2(__m128d a, __m128d b, __m128d mask)
Definition: SSEspmv.cpp:92
SSEsym
void SSEsym(const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *__restrict Y, unsigned lowmask, unsigned nlbits)
Definition: SSEspmv.cpp:266
BitReverse
unsigned short BitReverse(unsigned short v)
Definition: SSEspmv.cpp:136
ssp_s32
signed int ssp_s32
Definition: SSEspmv.cpp:59
ssp_u64
unsigned long long ssp_u64
Definition: SSEspmv.cpp:66
BitReverseTable64
const unsigned char BitReverseTable64[]
Definition: SSEspmv.cpp:122
ssp_m128::d
__m128d d
Definition: SSEspmv.cpp:71
ssp_s16
signed short ssp_s16
Definition: SSEspmv.cpp:56
ssp_m128
Definition: SSEspmv.cpp:68
atomicallyIncrementDouble
void atomicallyIncrementDouble(volatile double *target, const double by)
Definition: SSEspmv.cpp:146
popcountall
void popcountall(const unsigned char *__restrict M, unsigned *__restrict counts, size_t n)
Definition: SSEspmv.cpp:1232
] & 0x0F);
1245  counts[7+i*8] = __builtin_popcount(M[7+i*8] & 0x0F);
1246  }
1247  for(size_t i=nn*8; i<n; ++i)
1248  {
1249  counts[i] = __builtin_popcount(M[i] & 0x0F);
1250  }
1251 }
1252 
1253 void popcountall(const unsigned short * __restrict M, unsigned * __restrict counts, size_t n)
1254 {
1255  size_t nn = n/8;
1256  for(size_t i=0; i<nn; ++i)
1257  {
1258  counts[i*8] = __builtin_popcount(M[i*8]);
1259  counts[1+i*8] = __builtin_popcount(M[1+i*8]);
1260  counts[2+i*8] = __builtin_popcount(M[2+i*8]);
1261  counts[3+i*8] = __builtin_popcount(M[3+i*8]);
1262  counts[4+i*8] = __builtin_popcount(M[4+i*8]);
1263  counts[5+i*8] = __builtin_popcount(M[5+i*8]);
1264  counts[6+i*8] = __builtin_popcount(M[6+i*8]);
1265  counts[7+i*8] = __builtin_popcount(M[7+i*8]);
1266  }
1267  for(size_t i=nn*8; i<n; ++i)
1268  {
1269  counts[i] = __builtin_popcount(M[i]);
1270  }
1271 }
1272 
1273 void popcountall(const uint64_t * __restrict M, unsigned * __restrict counts, size_t n)
1274 {
1275  size_t nn = n/8;
1276  for(size_t i=0; i<nn; ++i)
1277  {
1278  counts[i*8] = __builtin_popcountl(M[i*8]);
1279  counts[1+i*8] = __builtin_popcountl(M[1+i*8]);
1280  counts[2+i*8] = __builtin_popcountl(M[2+i*8]);
1281  counts[3+i*8] = __builtin_popcountl(M[3+i*8]);
1282  counts[4+i*8] = __builtin_popcountl(M[4+i*8]);
1283  counts[5+i*8] = __builtin_popcountl(M[5+i*8]);
1284  counts[6+i*8] = __builtin_popcountl(M[6+i*8]);
1285  counts[7+i*8] = __builtin_popcountl(M[7+i*8]);
1286  }
1287  for(size_t i=nn*8; i<n; ++i)
1288  {
1289  counts[i] = __builtin_popcountl(M[i]);
1290  }
1291 }
1292 
1293 
1294 
ssp_u32
unsigned int ssp_u32
Definition: SSEspmv.cpp:60
SSEspmv
void SSEspmv(const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits)
Definition: SSEspmv.cpp:395
ssp_s64
signed long long ssp_s64
Definition: SSEspmv.cpp:65
ssp_u16
unsigned short ssp_u16
Definition: SSEspmv.cpp:57
ssp_m128::f64
ssp_f64 f64[2]
Definition: SSEspmv.cpp:76
masktable16
const unsigned short masktable16[16]
Definition: SSEspmv.cpp:46
ssp_u8
unsigned char ssp_u8
Definition: SSEspmv.cpp:54
ssp_m128::i
__m128i i
Definition: SSEspmv.cpp:72
masktable64
const uint64_t masktable64[64]
Definition: SSEspmv.cpp:28
ssp_f32
float ssp_f32
Definition: SSEspmv.cpp:62
ssp_s8
signed char ssp_s8
Definition: SSEspmv.cpp:53
ssp_f64
double ssp_f64
Definition: SSEspmv.cpp:63
ssp_m128::f
__m128 f
Definition: SSEspmv.cpp:70
symcsr
void symcsr(const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits)
Definition: SSEspmv.cpp:165
ssp_blendv_pd_SSE2
__m128d ssp_blendv_pd_SSE2(__m128d a, __m128d b, __m128d mask)
Definition: SSEspmv.cpp:92
SSEsym
void SSEsym(const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *__restrict Y, unsigned lowmask, unsigned nlbits)
Definition: SSEspmv.cpp:266
BitReverse
unsigned short BitReverse(unsigned short v)
Definition: SSEspmv.cpp:136
ssp_s32
signed int ssp_s32
Definition: SSEspmv.cpp:59
ssp_u64
unsigned long long ssp_u64
Definition: SSEspmv.cpp:66
BitReverseTable64
const unsigned char BitReverseTable64[]
Definition: SSEspmv.cpp:122
ssp_m128::d
__m128d d
Definition: SSEspmv.cpp:71
ssp_s16
signed short ssp_s16
Definition: SSEspmv.cpp:56
ssp_m128
Definition: SSEspmv.cpp:68
atomicallyIncrementDouble
void atomicallyIncrementDouble(volatile double *target, const double by)
Definition: SSEspmv.cpp:146
popcountall
void popcountall(const unsigned char *__restrict M, unsigned *__restrict counts, size_t n)
Definition: SSEspmv.cpp:1232
] & 0x0F);
1245  counts[7+i*8] = __builtin_popcount(M[7+i*8] & 0x0F);
1246  }
1247  for(size_t i=nn*8; i<n; ++i)
1248  {
1249  counts[i] = __builtin_popcount(M[i] & 0x0F);
1250  }
1251 }
1252 
1253 void popcountall(const unsigned short * __restrict M, unsigned * __restrict counts, size_t n)
1254 {
1255  size_t nn = n/8;
1256  for(size_t i=0; i<nn; ++i)
1257  {
1258  counts[i*8] = __builtin_popcount(M[i*8]);
1259  counts[1+i*8] = __builtin_popcount(M[1+i*8]);
1260  counts[2+i*8] = __builtin_popcount(M[2+i*8]);
1261  counts[3+i*8] = __builtin_popcount(M[3+i*8]);
1262  counts[4+i*8] = __builtin_popcount(M[4+i*8]);
1263  counts[5+i*8] = __builtin_popcount(M[5+i*8]);
1264  counts[6+i*8] = __builtin_popcount(M[6+i*8]);
1265  counts[7+i*8] = __builtin_popcount(M[7+i*8]);
1266  }
1267  for(size_t i=nn*8; i<n; ++i)
1268  {
1269  counts[i] = __builtin_popcount(M[i]);
1270  }
1271 }
1272 
1273 void popcountall(const uint64_t * __restrict M, unsigned * __restrict counts, size_t n)
1274 {
1275  size_t nn = n/8;
1276  for(size_t i=0; i<nn; ++i)
1277  {
1278  counts[i*8] = __builtin_popcountl(M[i*8]);
1279  counts[1+i*8] = __builtin_popcountl(M[1+i*8]);
1280  counts[2+i*8] = __builtin_popcountl(M[2+i*8]);
1281  counts[3+i*8] = __builtin_popcountl(M[3+i*8]);
1282  counts[4+i*8] = __builtin_popcountl(M[4+i*8]);
1283  counts[5+i*8] = __builtin_popcountl(M[5+i*8]);
1284  counts[6+i*8] = __builtin_popcountl(M[6+i*8]);
1285  counts[7+i*8] = __builtin_popcountl(M[7+i*8]);
1286  }
1287  for(size_t i=nn*8; i<n; ++i)
1288  {
1289  counts[i] = __builtin_popcountl(M[i]);
1290  }
1291 }
1292 
1293 
1294 
ssp_u32
unsigned int ssp_u32
Definition: SSEspmv.cpp:60
SSEspmv
void SSEspmv(const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits)
Definition: SSEspmv.cpp:395
ssp_s64
signed long long ssp_s64
Definition: SSEspmv.cpp:65
ssp_u16
unsigned short ssp_u16
Definition: SSEspmv.cpp:57
ssp_m128::f64
ssp_f64 f64[2]
Definition: SSEspmv.cpp:76
masktable16
const unsigned short masktable16[16]
Definition: SSEspmv.cpp:46
ssp_u8
unsigned char ssp_u8
Definition: SSEspmv.cpp:54
ssp_m128::i
__m128i i
Definition: SSEspmv.cpp:72
masktable64
const uint64_t masktable64[64]
Definition: SSEspmv.cpp:28
ssp_f32
float ssp_f32
Definition: SSEspmv.cpp:62
ssp_s8
signed char ssp_s8
Definition: SSEspmv.cpp:53
ssp_f64
double ssp_f64
Definition: SSEspmv.cpp:63
ssp_m128::f
__m128 f
Definition: SSEspmv.cpp:70
symcsr
void symcsr(const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits)
Definition: SSEspmv.cpp:165
ssp_blendv_pd_SSE2
__m128d ssp_blendv_pd_SSE2(__m128d a, __m128d b, __m128d mask)
Definition: SSEspmv.cpp:92
SSEsym
void SSEsym(const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *__restrict Y, unsigned lowmask, unsigned nlbits)
Definition: SSEspmv.cpp:266
BitReverse
unsigned short BitReverse(unsigned short v)
Definition: SSEspmv.cpp:136
ssp_s32
signed int ssp_s32
Definition: SSEspmv.cpp:59
ssp_u64
unsigned long long ssp_u64
Definition: SSEspmv.cpp:66
BitReverseTable64
const unsigned char BitReverseTable64[]
Definition: SSEspmv.cpp:122
ssp_m128::d
__m128d d
Definition: SSEspmv.cpp:71
ssp_s16
signed short ssp_s16
Definition: SSEspmv.cpp:56
ssp_m128
Definition: SSEspmv.cpp:68
atomicallyIncrementDouble
void atomicallyIncrementDouble(volatile double *target, const double by)
Definition: SSEspmv.cpp:146
popcountall
void popcountall(const unsigned char *__restrict M, unsigned *__restrict counts, size_t n)
Definition: SSEspmv.cpp:1232
] & 0x0F);
1245  counts[7+i*8] = __builtin_popcount(M[7+i*8] & 0x0F);
1246  }
1247  for(size_t i=nn*8; i<n; ++i)
1248  {
1249  counts[i] = __builtin_popcount(M[i] & 0x0F);
1250  }
1251 }
1252 
1253 void popcountall(const unsigned short * __restrict M, unsigned * __restrict counts, size_t n)
1254 {
1255  size_t nn = n/8;
1256  for(size_t i=0; i<nn; ++i)
1257  {
1258  counts[i*8] = __builtin_popcount(M[i*8]);
1259  counts[1+i*8] = __builtin_popcount(M[1+i*8]);
1260  counts[2+i*8] = __builtin_popcount(M[2+i*8]);
1261  counts[3+i*8] = __builtin_popcount(M[3+i*8]);
1262  counts[4+i*8] = __builtin_popcount(M[4+i*8]);
1263  counts[5+i*8] = __builtin_popcount(M[5+i*8]);
1264  counts[6+i*8] = __builtin_popcount(M[6+i*8]);
1265  counts[7+i*8] = __builtin_popcount(M[7+i*8]);
1266  }
1267  for(size_t i=nn*8; i<n; ++i)
1268  {
1269  counts[i] = __builtin_popcount(M[i]);
1270  }
1271 }
1272 
1273 void popcountall(const uint64_t * __restrict M, unsigned * __restrict counts, size_t n)
1274 {
1275  size_t nn = n/8;
1276  for(size_t i=0; i<nn; ++i)
1277  {
1278  counts[i*8] = __builtin_popcountl(M[i*8]);
1279  counts[1+i*8] = __builtin_popcountl(M[1+i*8]);
1280  counts[2+i*8] = __builtin_popcountl(M[2+i*8]);
1281  counts[3+i*8] = __builtin_popcountl(M[3+i*8]);
1282  counts[4+i*8] = __builtin_popcountl(M[4+i*8]);
1283  counts[5+i*8] = __builtin_popcountl(M[5+i*8]);
1284  counts[6+i*8] = __builtin_popcountl(M[6+i*8]);
1285  counts[7+i*8] = __builtin_popcountl(M[7+i*8]);
1286  }
1287  for(size_t i=nn*8; i<n; ++i)
1288  {
1289  counts[i] = __builtin_popcountl(M[i]);
1290  }
1291 }
1292 
1293 
1294 
ssp_u32
unsigned int ssp_u32
Definition: SSEspmv.cpp:60
SSEspmv
void SSEspmv(const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits)
Definition: SSEspmv.cpp:395
ssp_s64
signed long long ssp_s64
Definition: SSEspmv.cpp:65
ssp_u16
unsigned short ssp_u16
Definition: SSEspmv.cpp:57
ssp_m128::f64
ssp_f64 f64[2]
Definition: SSEspmv.cpp:76
masktable16
const unsigned short masktable16[16]
Definition: SSEspmv.cpp:46
ssp_u8
unsigned char ssp_u8
Definition: SSEspmv.cpp:54
ssp_m128::i
__m128i i
Definition: SSEspmv.cpp:72
masktable64
const uint64_t masktable64[64]
Definition: SSEspmv.cpp:28
ssp_f32
float ssp_f32
Definition: SSEspmv.cpp:62
ssp_s8
signed char ssp_s8
Definition: SSEspmv.cpp:53