#include <stdio.h>#include <stdlib.h>#include <stdint.h>#include <iostream>#include <algorithm>#include <iterator>#include <mmintrin.h>#include <xmmintrin.h>#include <emmintrin.h>#include <pmmintrin.h>#include <tmmintrin.h>#include <smmintrin.h>#include <nmmintrin.h>#include <wmmintrin.h>#include <ammintrin.h>Go to the source code of this file.
Classes | |
| union | ssp_m128 |
Typedefs | |
| typedef signed char | ssp_s8 |
| typedef unsigned char | ssp_u8 |
| typedef signed short | ssp_s16 |
| typedef unsigned short | ssp_u16 |
| typedef signed int | ssp_s32 |
| typedef unsigned int | ssp_u32 |
| typedef float | ssp_f32 |
| typedef double | ssp_f64 |
| typedef signed long long | ssp_s64 |
| typedef unsigned long long | ssp_u64 |
Functions | |
| __m128d | ssp_blendv_pd_SSE2 (__m128d a, __m128d b, __m128d mask) |
| unsigned short | BitReverse (unsigned short v) |
| void | atomicallyIncrementDouble (volatile double *target, const double by) |
| void | symcsr (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits) |
| void | symcsr (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits) |
| void | symcsr (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits) |
| void | SSEsym (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *__restrict Y, unsigned lowmask, unsigned nlbits) |
| void | SSEsym (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlbits) |
| void | SSEspmv (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits) |
| void | SSEsym (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lowmask, unsigned nlbits) |
| void | SSEsym (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *__restrict Y, double *__restrict YT, unsigned lowmask, unsigned nlbits) |
| void | SSEsym (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lowmask, unsigned nlbits) |
| void | SSEsym (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlbits) |
| void | SSEspmv (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits) |
| void | SSEspmv (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits) |
| void | popcountall (const unsigned char *__restrict M, unsigned *__restrict counts, size_t n) |
| void | popcountall (const unsigned short *__restrict M, unsigned *__restrict counts, size_t n) |
| void | popcountall (const uint64_t *__restrict M, unsigned *__restrict counts, size_t n) |
Variables | |
| const uint64_t | masktable64 [64] |
| const unsigned short | masktable16 [16] |
| const unsigned char | BitReverseTable64 [] |
| typedef float ssp_f32 |
Definition at line 62 of file SSEspmv.cpp.
| typedef double ssp_f64 |
Definition at line 63 of file SSEspmv.cpp.
| typedef signed short ssp_s16 |
Definition at line 56 of file SSEspmv.cpp.
| typedef signed int ssp_s32 |
Definition at line 59 of file SSEspmv.cpp.
| typedef signed long long ssp_s64 |
Definition at line 65 of file SSEspmv.cpp.
| typedef signed char ssp_s8 |
Definition at line 53 of file SSEspmv.cpp.
| typedef unsigned short ssp_u16 |
Definition at line 57 of file SSEspmv.cpp.
| typedef unsigned int ssp_u32 |
Definition at line 60 of file SSEspmv.cpp.
| typedef unsigned long long ssp_u64 |
Definition at line 66 of file SSEspmv.cpp.
| typedef unsigned char ssp_u8 |
Definition at line 54 of file SSEspmv.cpp.
|
inline |
Definition at line 146 of file SSEspmv.cpp.
| unsigned short BitReverse | ( | unsigned short | v | ) |
Definition at line 136 of file SSEspmv.cpp.
| void popcountall | ( | const unsigned char *__restrict | M, |
| unsigned *__restrict | counts, | ||
| size_t | n | ||
| ) |
Definition at line 1232 of file SSEspmv.cpp.
| void popcountall | ( | const unsigned short *__restrict | M, |
| unsigned *__restrict | counts, | ||
| size_t | n | ||
| ) |
Definition at line 1253 of file SSEspmv.cpp.
| void popcountall | ( | const uint64_t *__restrict | M, |
| unsigned *__restrict | counts, | ||
| size_t | n | ||
| ) |
Definition at line 1273 of file SSEspmv.cpp.
| void SSEspmv | ( | const double *__restrict | V, |
| const unsigned char *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| double * | Y, | ||
| unsigned | lcmask, | ||
| unsigned | lrmask, | ||
| unsigned | clbits | ||
| ) |
SpMV (usually used as a subroutine) using bitmasked register blocks This version works only with double values, unsigned indices, and 2x2 register blocks
| [in] | nbr | number of register blocks for this compressed sparse block only |
| [in] | bot | the local part of the bottom array, i.e. {lower row bits}.{higher row bits} |
Definition at line 395 of file SSEspmv.cpp.
| void SSEspmv | ( | const double *__restrict | V, |
| const unsigned short *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| double * | Y, | ||
| unsigned | lcmask, | ||
| unsigned | lrmask, | ||
| unsigned | clbits | ||
| ) |
SpMV (usually used as a subroutine) using bitmasked register blocks This version works only with double values, unsigned indices, and 4x4 register blocks
| [in] | nbr | number of register blocks for this compressed sparse block only |
| [in] | bot | the local part of the bottom array, i.e. {lower row bits}.{higher row bits} |
Definition at line 1047 of file SSEspmv.cpp.
| void SSEspmv | ( | const double *__restrict | V, |
| const uint64_t *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| double * | Y, | ||
| unsigned | lcmask, | ||
| unsigned | lrmask, | ||
| unsigned | clbits | ||
| ) |
Definition at line 1120 of file SSEspmv.cpp.
| void SSEsym | ( | const double *__restrict | V, |
| const unsigned char *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| double *__restrict | Y, | ||
| unsigned | lowmask, | ||
| unsigned | nlbits | ||
| ) |
Symmetric SpMV inner kernel using bitmasked register blocks 2-by-2 potentially diagonal case (X == XT and Y == YT) We can still use the __restrict keyword because we only use one alias for both X and XT
Definition at line 266 of file SSEspmv.cpp.
| void SSEsym | ( | const double *__restrict | V, |
| const unsigned char *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| const double *__restrict | XT, | ||
| double * | Y, | ||
| double * | YT, | ||
| unsigned | lowmask, | ||
| unsigned | nlbits | ||
| ) |
Symmetric SpMV inner kernel using bitmasked register blocks 2-by-2 general case (X != XT and Y != YT) assumes strict-aliasing on X and Y
Definition at line 329 of file SSEspmv.cpp.
| void SSEsym | ( | const double *__restrict | V, |
| const uint64_t *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| double * | Y, | ||
| unsigned | lowmask, | ||
| unsigned | nlbits | ||
| ) |
Definition at line 439 of file SSEspmv.cpp.
| void SSEsym | ( | const double *__restrict | V, |
| const uint64_t *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| const double *__restrict | XT, | ||
| double *__restrict | Y, | ||
| double *__restrict | YT, | ||
| unsigned | lowmask, | ||
| unsigned | nlbits | ||
| ) |
Definition at line 642 of file SSEspmv.cpp.
| void SSEsym | ( | const double *__restrict | V, |
| const unsigned short *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| double * | Y, | ||
| unsigned | lowmask, | ||
| unsigned | nlbits | ||
| ) |
Definition at line 844 of file SSEspmv.cpp.
| void SSEsym | ( | const double *__restrict | V, |
| const unsigned short *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| const double *__restrict | XT, | ||
| double * | Y, | ||
| double * | YT, | ||
| unsigned | lowmask, | ||
| unsigned | nlbits | ||
| ) |
Definition at line 931 of file SSEspmv.cpp.
|
inline |
{SSE2,_mm_blendv_pd} ISSUE: Do not short-circuit, i.e. loads 'a' regardless of the mask value Question: Does the original blendv_pd (in SSE4.1) short-circuit?
Definition at line 92 of file SSEspmv.cpp.
| void symcsr | ( | const double *__restrict | V, |
| const uint64_t *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| const double *__restrict | XT, | ||
| double * | Y, | ||
| double * | YT, | ||
| unsigned | lowmask, | ||
| unsigned | nlowbits | ||
| ) |
Definition at line 165 of file SSEspmv.cpp.
| void symcsr | ( | const double *__restrict | V, |
| const unsigned short *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| const double *__restrict | XT, | ||
| double * | Y, | ||
| double * | YT, | ||
| unsigned | lowmask, | ||
| unsigned | nlowbits | ||
| ) |
Definition at line 200 of file SSEspmv.cpp.
| void symcsr | ( | const double *__restrict | V, |
| const unsigned char *__restrict | M, | ||
| const unsigned *__restrict | bot, | ||
| const unsigned | nrb, | ||
| const double *__restrict | X, | ||
| const double *__restrict | XT, | ||
| double * | Y, | ||
| double * | YT, | ||
| unsigned | lowmask, | ||
| unsigned | nlowbits | ||
| ) |
Definition at line 222 of file SSEspmv.cpp.
| const unsigned char BitReverseTable64[] |
Definition at line 122 of file SSEspmv.cpp.
| const unsigned short masktable16[16] |
Definition at line 46 of file SSEspmv.cpp.
| const uint64_t masktable64[64] |
Definition at line 28 of file SSEspmv.cpp.
1.8.6