#include <stdio.h>#include <stdlib.h>#include <stdint.h>#include <climits>#include <iostream>#include <cmath>#include <vector>#include <mmintrin.h>#include <xmmintrin.h>#include <emmintrin.h>#include <pmmintrin.h>#include <cilk/cilk_api.h>#include <cilk/cilk.h>Go to the source code of this file.
Classes | |
| struct | int_least_helper< Category > |
| struct | int_least_helper< 8 > |
| struct | int_least_helper< 4 > |
| struct | int_least_helper< 2 > |
| struct | thread_data |
| struct | absdiff< T > |
Macros | |
| #define | __int64 long long |
| #define | SYNCHED __cilkrts_synched() |
| #define | DETECT __cilkscreen_enable_checking() |
| #define | ENDDETECT __cilkscreen_disable_checking() |
| #define | WORKERS __cilkrts_get_nworkers() |
| #define | UNROLL 1 |
| #define | RHSDIM 1 |
| #define | BALANCETH 2 |
| #define | RBDIM 8 |
| #define | RBSIZE (RBDIM*RBDIM) |
| #define | SLACKNESS 8 |
| #define | KBYTE 1024 |
| #define | L2SIZE (256*KBYTE / RHSDIM) |
| #define | CLSIZE 64 |
| #define | BREAKEVEN 4 |
| #define | MINNNZTOPAR 128 |
| #define | BREAKNRB (8/RBDIM) |
| #define | MINNRBTOPAR (256/RBDIM) |
| #define | LOGSERIAL 15 |
| #define | ROLLING 20 |
| #define | EPSILON 0.0001 |
| #define | REPEAT 10 |
| #define | absdiff(x, y) ( (x) > (y) ? (x-y) : (y-x)) |
Functions | |
| CILK_EXPORT __CILKRTS_NOTHROW int | __cilkrts_synched (void) |
| template<typename MTYPE > | |
| MTYPE | GetMaskTable (unsigned int index) |
| template<> | |
| uint64_t | GetMaskTable< uint64_t > (unsigned int index) |
| template<> | |
| unsigned short | GetMaskTable< unsigned short > (unsigned int index) |
| template<> | |
| unsigned char | GetMaskTable< unsigned char > (unsigned int index) |
| void | popcountall (const uint64_t *__restrict M, unsigned *__restrict count, size_t size) |
| void | popcountall (const unsigned short *__restrict M, unsigned *__restrict count, size_t size) |
| void | popcountall (const unsigned char *__restrict M, unsigned *__restrict count, size_t size) |
| template<typename T > | |
| void | printhistogram (const T *scansum, size_t size, unsigned bins) |
| unsigned int | highestbitset (unsigned __int64 v) |
| template<typename MTYPE > | |
| unsigned | prescan (unsigned *a, MTYPE *const M, int n) |
| unsigned char * | aligned_malloc (uint64_t size) |
| void | aligned_free (unsigned char *ptr) |
| template<typename ITYPE > | |
| ITYPE | CumulativeSum (ITYPE *arr, ITYPE size) |
| template<typename T > | |
| T | machineEpsilon () |
| template<typename _ForwardIter , typename T > | |
| void | iota (_ForwardIter __first, _ForwardIter __last, T __value) |
| template<typename T , typename I > | |
| T ** | allocate2D (I m, I n) |
| template<typename T , typename I > | |
| void | deallocate2D (T **array, I m) |
| template<int D> | |
| void | MultAdd (double &a, const double &b, const double &c) |
| template<typename ITYPE > | |
| ITYPE | BitInterleaveLow (ITYPE x, ITYPE y) |
| template<typename ITYPE , typename OTYPE > | |
| OTYPE | BitInterleave (ITYPE x, ITYPE y) |
| template<unsigned BASE> | |
| unsigned | IntPower (unsigned exponent) |
| template<> | |
| unsigned | IntPower< 2 > (unsigned exponent) |
| template<typename T > | |
| bool | IsPower2 (T x) |
| unsigned int | nextpoweroftwo (unsigned int v) |
| __int64 | highestbitset (__int64 v) |
| unsigned int | highestbitset (unsigned int v) |
| int | highestbitset (int v) |
| unsigned int | getModulo (unsigned int n, unsigned int d) |
| unsigned int | getDivident (unsigned int n, unsigned int d) |
Variables | |
| void * | address |
| void * | base |
| const uint64_t | masktable64 [64] |
| const unsigned short | masktable16 [16] |
| const unsigned char | masktable4 [4] = { 0x08, 0x04, 0x02, 0x01 } |
| unsigned | rmasks [32] |
| #define SYNCHED __cilkrts_synched() |
| CILK_EXPORT __CILKRTS_NOTHROW int __cilkrts_synched | ( | void | ) |
| T** allocate2D | ( | I | m, |
| I | n | ||
| ) |
| OTYPE BitInterleave | ( | ITYPE | x, |
| ITYPE | y | ||
| ) |
| ITYPE BitInterleaveLow | ( | ITYPE | x, |
| ITYPE | y | ||
| ) |
| ITYPE CumulativeSum | ( | ITYPE * | arr, |
| ITYPE | size | ||
| ) |
| void deallocate2D | ( | T ** | array, |
| I | m | ||
| ) |
|
inline |
| MTYPE GetMaskTable | ( | unsigned int | index | ) |
| uint64_t GetMaskTable< uint64_t > | ( | unsigned int | index | ) |
| unsigned char GetMaskTable< unsigned char > | ( | unsigned int | index | ) |
| unsigned short GetMaskTable< unsigned short > | ( | unsigned int | index | ) |
|
inline |
|
inline |
|
inline |
| void iota | ( | _ForwardIter | __first, |
| _ForwardIter | __last, | ||
| T | __value | ||
| ) |
| void MultAdd | ( | double & | a, |
| const double & | b, | ||
| const double & | c | ||
| ) |
| void popcountall | ( | const uint64_t *__restrict | M, |
| unsigned *__restrict | count, | ||
| size_t | size | ||
| ) |
Definition at line 1273 of file SSEspmv.cpp.
| void popcountall | ( | const unsigned short *__restrict | M, |
| unsigned *__restrict | count, | ||
| size_t | size | ||
| ) |
Definition at line 1253 of file SSEspmv.cpp.
| void popcountall | ( | const unsigned char *__restrict | M, |
| unsigned *__restrict | count, | ||
| size_t | size | ||
| ) |
Definition at line 1232 of file SSEspmv.cpp.
| unsigned prescan | ( | unsigned * | a, |
| MTYPE *const | M, | ||
| int | n | ||
| ) |
| void printhistogram | ( | const T * | scansum, |
| size_t | size, | ||
| unsigned | bins | ||
| ) |
| const unsigned short masktable16[16] |
| const unsigned char masktable4[4] = { 0x08, 0x04, 0x02, 0x01 } |
| const uint64_t masktable64[64] |
| unsigned rmasks[32] |
1.8.6