ass="keywordtype">int ssize)
317
{
318
p_fetch = 0;
319
int
* __restrict a = (
int
*) addr;
320
const
int
inc =
CLSIZE
/ssize;
// number of elements in one cache line
321
322
// Grab every 64th address
323
for
(
int
i=0; i<total; i+=inc)
324
{
325
p_fetch += a[i];
326
}
327
}
328
329
330
331
template
<
class
T,
class
ITYPE>
332
void
Sym<T, ITYPE>::Transpose
()
333
{
334
// when we jump to the next block in the same block-column, we move leaddim positions inside "top" array
335
// leadim ~= sqrt(n) => number of blocks in each block-row
336
ITYPE leaddim = lowcolmask+1;
337
Sym
symT(nz, m, n);
// create empty transposed object
338
339
ITYPE k = 0;
340
ITYPE cnz = 0;
341
342
for
(ITYPE j = 0; j < leaddim; ++j)
// scan columns of top-level structure (~sqrt(n) iterations)
343
{
344
for
(ITYPE i = j; i < ntop ; i += leaddim)
// iterates ~ sqrt(m) times within the block column
345
{
346
symT.top[k++] = cnz;
347
cnz += top[i+1]-top[i];
348
}
349
}
350
symT.top[k] = cnz;
351
352
// Embarrassingly parallel sort of indices to get new bottom array
353
// ITYPE nindex = (highmask & csc.ir [i]) | ((highmask & bot) >> 4);
354
}
355
nextpoweroftwo
unsigned int nextpoweroftwo(unsigned int v)