#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <climits>
#include <iostream>
#include <cmath>
#include <vector>
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#include <pmmintrin.h>
#include <cilk/cilk_api.h>
#include <cilk/cilk.h>
Go to the source code of this file.
Classes | |
struct | int_least_helper< Category > |
struct | int_least_helper< 8 > |
struct | int_least_helper< 4 > |
struct | int_least_helper< 2 > |
struct | thread_data |
struct | absdiff< T > |
Macros | |
#define | __int64 long long |
#define | SYNCHED __cilkrts_synched() |
#define | DETECT __cilkscreen_enable_checking() |
#define | ENDDETECT __cilkscreen_disable_checking() |
#define | WORKERS __cilkrts_get_nworkers() |
#define | UNROLL 1 |
#define | RHSDIM 1 |
#define | BALANCETH 2 |
#define | RBDIM 8 |
#define | RBSIZE (RBDIM*RBDIM) |
#define | SLACKNESS 8 |
#define | KBYTE 1024 |
#define | L2SIZE (256*KBYTE / RHSDIM) |
#define | CLSIZE 64 |
#define | BREAKEVEN 4 |
#define | MINNNZTOPAR 128 |
#define | BREAKNRB (8/RBDIM) |
#define | MINNRBTOPAR (256/RBDIM) |
#define | LOGSERIAL 15 |
#define | ROLLING 20 |
#define | EPSILON 0.0001 |
#define | REPEAT 10 |
#define | absdiff(x, y) ( (x) > (y) ? (x-y) : (y-x)) |
Functions | |
CILK_EXPORT __CILKRTS_NOTHROW int | __cilkrts_synched (void) |
template<typename MTYPE > | |
MTYPE | GetMaskTable (unsigned int index) |
template<> | |
uint64_t | GetMaskTable< uint64_t > (unsigned int index) |
template<> | |
unsigned short | GetMaskTable< unsigned short > (unsigned int index) |
template<> | |
unsigned char | GetMaskTable< unsigned char > (unsigned int index) |
void | popcountall (const uint64_t *__restrict M, unsigned *__restrict count, size_t size) |
void | popcountall (const unsigned short *__restrict M, unsigned *__restrict count, size_t size) |
void | popcountall (const unsigned char *__restrict M, unsigned *__restrict count, size_t size) |
template<typename T > | |
void | printhistogram (const T *scansum, size_t size, unsigned bins) |
unsigned int | highestbitset (unsigned __int64 v) |
template<typename MTYPE > | |
unsigned | prescan (unsigned *a, MTYPE *const M, int n) |
unsigned char * | aligned_malloc (uint64_t size) |
void | aligned_free (unsigned char *ptr) |
template<typename ITYPE > | |
ITYPE | CumulativeSum (ITYPE *arr, ITYPE size) |
template<typename T > | |
T | machineEpsilon () |
template<typename _ForwardIter , typename T > | |
void | iota (_ForwardIter __first, _ForwardIter __last, T __value) |
template<typename T , typename I > | |
T ** | allocate2D (I m, I n) |
template<typename T , typename I > | |
void | deallocate2D (T **array, I m) |
template<int D> | |
void | MultAdd (double &a, const double &b, const double &c) |
template<typename ITYPE > | |
ITYPE | BitInterleaveLow (ITYPE x, ITYPE y) |
template<typename ITYPE , typename OTYPE > | |
OTYPE | BitInterleave (ITYPE x, ITYPE y) |
template<unsigned BASE> | |
unsigned | IntPower (unsigned exponent) |
template<> | |
unsigned | IntPower< 2 > (unsigned exponent) |
template<typename T > | |
bool | IsPower2 (T x) |
unsigned int | nextpoweroftwo (unsigned int v) |
__int64 | highestbitset (__int64 v) |
unsigned int | highestbitset (unsigned int v) |
int | highestbitset (int v) |
unsigned int | getModulo (unsigned int n, unsigned int d) |
unsigned int | getDivident (unsigned int n, unsigned int d) |
Variables | |
void * | address |
void * | base |
const uint64_t | masktable64 [64] |
const unsigned short | masktable16 [16] |
const unsigned char | masktable4 [4] = { 0x08, 0x04, 0x02, 0x01 } |
unsigned | rmasks [32] |
#define SYNCHED __cilkrts_synched() |
CILK_EXPORT __CILKRTS_NOTHROW int __cilkrts_synched | ( | void | ) |
T** allocate2D | ( | I | m, |
I | n | ||
) |
OTYPE BitInterleave | ( | ITYPE | x, |
ITYPE | y | ||
) |
ITYPE BitInterleaveLow | ( | ITYPE | x, |
ITYPE | y | ||
) |
ITYPE CumulativeSum | ( | ITYPE * | arr, |
ITYPE | size | ||
) |
void deallocate2D | ( | T ** | array, |
I | m | ||
) |
|
inline |
MTYPE GetMaskTable | ( | unsigned int | index | ) |
uint64_t GetMaskTable< uint64_t > | ( | unsigned int | index | ) |
unsigned char GetMaskTable< unsigned char > | ( | unsigned int | index | ) |
unsigned short GetMaskTable< unsigned short > | ( | unsigned int | index | ) |
|
inline |
|
inline |
|
inline |
void iota | ( | _ForwardIter | __first, |
_ForwardIter | __last, | ||
T | __value | ||
) |
void MultAdd | ( | double & | a, |
const double & | b, | ||
const double & | c | ||
) |
void popcountall | ( | const uint64_t *__restrict | M, |
unsigned *__restrict | count, | ||
size_t | size | ||
) |
Definition at line 1273 of file SSEspmv.cpp.
void popcountall | ( | const unsigned short *__restrict | M, |
unsigned *__restrict | count, | ||
size_t | size | ||
) |
Definition at line 1253 of file SSEspmv.cpp.
void popcountall | ( | const unsigned char *__restrict | M, |
unsigned *__restrict | count, | ||
size_t | size | ||
) |
Definition at line 1232 of file SSEspmv.cpp.
unsigned prescan | ( | unsigned * | a, |
MTYPE *const | M, | ||
int | n | ||
) |
void printhistogram | ( | const T * | scansum, |
size_t | size, | ||
unsigned | bins | ||
) |
const unsigned short masktable16[16] |
const unsigned char masktable4[4] = { 0x08, 0x04, 0x02, 0x01 } |
const uint64_t masktable64[64] |
unsigned rmasks[32] |