ass="keywordtype">int ssize)
317 {
318  p_fetch = 0;
319  int * __restrict a = (int*) addr;
320  const int inc = CLSIZE/ssize; // number of elements in one cache line
321 
322  // Grab every 64th address
323  for(int i=0; i<total; i+=inc)
324  {
325  p_fetch += a[i];
326  }
327 }
328 
329 
330 
331 template <class T, class ITYPE>
332 void Sym<T, ITYPE>::Transpose()
333 {
334  // when we jump to the next block in the same block-column, we move leaddim positions inside "top" array
335  // leadim ~= sqrt(n) => number of blocks in each block-row
336  ITYPE leaddim = lowcolmask+1;
337  Sym symT(nz, m, n); // create empty transposed object
338 
339  ITYPE k = 0;
340  ITYPE cnz = 0;
341 
342  for(ITYPE j = 0; j < leaddim; ++j) // scan columns of top-level structure (~sqrt(n) iterations)
343  {
344  for(ITYPE i = j; i < ntop ; i += leaddim) // iterates ~ sqrt(m) times within the block column
345  {
346  symT.top[k++] = cnz;
347  cnz += top[i+1]-top[i];
348  }
349  }
350  symT.top[k] = cnz;
351 
352  // Embarrassingly parallel sort of indices to get new bottom array
353  // ITYPE nindex = (highmask & csc.ir [i]) | ((highmask & bot) >> 4);
354 }
355 
nextpoweroftwo
unsigned int nextpoweroftwo(unsigned int v)