size_t n)
1256 for(
size_t i=0; i<nn; ++i)
1258 counts[i*8] = __builtin_popcount(M[i*8]);
1259 counts[1+i*8] = __builtin_popcount(M[1+i*8]);
1260 counts[2+i*8] = __builtin_popcount(M[2+i*8]);
1261 counts[3+i*8] = __builtin_popcount(M[3+i*8]);
1262 counts[4+i*8] = __builtin_popcount(M[4+i*8]);
1263 counts[5+i*8] = __builtin_popcount(M[5+i*8]);
1264 counts[6+i*8] = __builtin_popcount(M[6+i*8]);
1265 counts[7+i*8] = __builtin_popcount(M[7+i*8]);
1267 for(
size_t i=nn*8; i<n; ++i)
1269 counts[i] = __builtin_popcount(M[i]);
1273 void popcountall(
const uint64_t * __restrict M,
unsigned * __restrict counts,
size_t n)
1276 for(
size_t i=0; i<nn; ++i)
1278 counts[i*8] = __builtin_popcountl(M[i*8]);
1279 counts[1+i*8] = __builtin_popcountl(M[1+i*8]);
1280 counts[2+i*8] = __builtin_popcountl(M[2+i*8]);
1281 counts[3+i*8] = __builtin_popcountl(M[3+i*8]);
1282 counts[4+i*8] = __builtin_popcountl(M[4+i*8]);
1283 counts[5+i*8] = __builtin_popcountl(M[5+i*8]);
1284 counts[6+i*8] = __builtin_popcountl(M[6+i*8]);
1285 counts[7+i*8] = __builtin_popcountl(M[7+i*8]);
1287 for(
size_t i=nn*8; i<n; ++i)
1289 counts[i] = __builtin_popcountl(M[i]);
void SSEspmv(const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits)
const unsigned short masktable16[16]
const uint64_t masktable64[64]