size_t n)
 1256     for(
size_t i=0; i<nn; ++i)
 
 1258         counts[i*8] = __builtin_popcount(M[i*8]);
 
 1259         counts[1+i*8] = __builtin_popcount(M[1+i*8]);
 
 1260         counts[2+i*8] = __builtin_popcount(M[2+i*8]);
 
 1261         counts[3+i*8] = __builtin_popcount(M[3+i*8]);
 
 1262         counts[4+i*8] = __builtin_popcount(M[4+i*8]);
 
 1263         counts[5+i*8] = __builtin_popcount(M[5+i*8]);
 
 1264         counts[6+i*8] = __builtin_popcount(M[6+i*8]);
 
 1265         counts[7+i*8] = __builtin_popcount(M[7+i*8]);
 
 1267     for(
size_t i=nn*8; i<n; ++i)
 
 1269         counts[i] = __builtin_popcount(M[i]);
 
 1273 void popcountall(
const uint64_t * __restrict M, 
unsigned * __restrict counts, 
size_t n)
 
 1276     for(
size_t i=0; i<nn; ++i)
 
 1278         counts[i*8] = __builtin_popcountl(M[i*8]);
 
 1279         counts[1+i*8] = __builtin_popcountl(M[1+i*8]);
 
 1280         counts[2+i*8] = __builtin_popcountl(M[2+i*8]);
 
 1281         counts[3+i*8] = __builtin_popcountl(M[3+i*8]);
 
 1282         counts[4+i*8] = __builtin_popcountl(M[4+i*8]);
 
 1283         counts[5+i*8] = __builtin_popcountl(M[5+i*8]);
 
 1284         counts[6+i*8] = __builtin_popcountl(M[6+i*8]);
 
 1285         counts[7+i*8] = __builtin_popcountl(M[7+i*8]);
 
 1287     for(
size_t i=nn*8; i<n; ++i)
 
 1289         counts[i] = __builtin_popcountl(M[i]);
 
void SSEspmv(const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits)
const unsigned short masktable16[16]
const uint64_t masktable64[64]