#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#include <pmmintrin.h>
#include <tmmintrin.h>
#include <smmintrin.h>
#include <nmmintrin.h>
#include <wmmintrin.h>
#include <ammintrin.h>
Go to the source code of this file.
Classes | |
union | ssp_m128 |
Typedefs | |
typedef signed char | ssp_s8 |
typedef unsigned char | ssp_u8 |
typedef signed short | ssp_s16 |
typedef unsigned short | ssp_u16 |
typedef signed int | ssp_s32 |
typedef unsigned int | ssp_u32 |
typedef float | ssp_f32 |
typedef double | ssp_f64 |
typedef signed long long | ssp_s64 |
typedef unsigned long long | ssp_u64 |
Functions | |
__m128d | ssp_blendv_pd_SSE2 (__m128d a, __m128d b, __m128d mask) |
unsigned short | BitReverse (unsigned short v) |
void | atomicallyIncrementDouble (volatile double *target, const double by) |
void | symcsr (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits) |
void | symcsr (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits) |
void | symcsr (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits) |
void | SSEsym (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *__restrict Y, unsigned lowmask, unsigned nlbits) |
void | SSEsym (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlbits) |
void | SSEspmv (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits) |
void | SSEsym (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lowmask, unsigned nlbits) |
void | SSEsym (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *__restrict Y, double *__restrict YT, unsigned lowmask, unsigned nlbits) |
void | SSEsym (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lowmask, unsigned nlbits) |
void | SSEsym (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlbits) |
void | SSEspmv (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits) |
void | SSEspmv (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits) |
void | popcountall (const unsigned char *__restrict M, unsigned *__restrict counts, size_t n) |
void | popcountall (const unsigned short *__restrict M, unsigned *__restrict counts, size_t n) |
void | popcountall (const uint64_t *__restrict M, unsigned *__restrict counts, size_t n) |
Variables | |
const uint64_t | masktable64 [64] |
const unsigned short | masktable16 [16] |
const unsigned char | BitReverseTable64 [] |
typedef float ssp_f32 |
Definition at line 62 of file SSEspmv.cpp.
typedef double ssp_f64 |
Definition at line 63 of file SSEspmv.cpp.
typedef signed short ssp_s16 |
Definition at line 56 of file SSEspmv.cpp.
typedef signed int ssp_s32 |
Definition at line 59 of file SSEspmv.cpp.
typedef signed long long ssp_s64 |
Definition at line 65 of file SSEspmv.cpp.
typedef signed char ssp_s8 |
Definition at line 53 of file SSEspmv.cpp.
typedef unsigned short ssp_u16 |
Definition at line 57 of file SSEspmv.cpp.
typedef unsigned int ssp_u32 |
Definition at line 60 of file SSEspmv.cpp.
typedef unsigned long long ssp_u64 |
Definition at line 66 of file SSEspmv.cpp.
typedef unsigned char ssp_u8 |
Definition at line 54 of file SSEspmv.cpp.
|
inline |
Definition at line 146 of file SSEspmv.cpp.
unsigned short BitReverse | ( | unsigned short | v | ) |
Definition at line 136 of file SSEspmv.cpp.
void popcountall | ( | const unsigned char *__restrict | M, |
unsigned *__restrict | counts, | ||
size_t | n | ||
) |
Definition at line 1232 of file SSEspmv.cpp.
void popcountall | ( | const unsigned short *__restrict | M, |
unsigned *__restrict | counts, | ||
size_t | n | ||
) |
Definition at line 1253 of file SSEspmv.cpp.
void popcountall | ( | const uint64_t *__restrict | M, |
unsigned *__restrict | counts, | ||
size_t | n | ||
) |
Definition at line 1273 of file SSEspmv.cpp.
void SSEspmv | ( | const double *__restrict | V, |
const unsigned char *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
double * | Y, | ||
unsigned | lcmask, | ||
unsigned | lrmask, | ||
unsigned | clbits | ||
) |
SpMV (usually used as a subroutine) using bitmasked register blocks This version works only with double values, unsigned indices, and 2x2 register blocks
[in] | nbr | number of register blocks for this compressed sparse block only |
[in] | bot | the local part of the bottom array, i.e. {lower row bits}.{higher row bits} |
Definition at line 395 of file SSEspmv.cpp.
void SSEspmv | ( | const double *__restrict | V, |
const unsigned short *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
double * | Y, | ||
unsigned | lcmask, | ||
unsigned | lrmask, | ||
unsigned | clbits | ||
) |
SpMV (usually used as a subroutine) using bitmasked register blocks This version works only with double values, unsigned indices, and 4x4 register blocks
[in] | nbr | number of register blocks for this compressed sparse block only |
[in] | bot | the local part of the bottom array, i.e. {lower row bits}.{higher row bits} |
Definition at line 1047 of file SSEspmv.cpp.
void SSEspmv | ( | const double *__restrict | V, |
const uint64_t *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
double * | Y, | ||
unsigned | lcmask, | ||
unsigned | lrmask, | ||
unsigned | clbits | ||
) |
Definition at line 1120 of file SSEspmv.cpp.
void SSEsym | ( | const double *__restrict | V, |
const unsigned char *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
double *__restrict | Y, | ||
unsigned | lowmask, | ||
unsigned | nlbits | ||
) |
Symmetric SpMV inner kernel using bitmasked register blocks 2-by-2 potentially diagonal case (X == XT and Y == YT) We can still use the __restrict keyword because we only use one alias for both X and XT
Definition at line 266 of file SSEspmv.cpp.
void SSEsym | ( | const double *__restrict | V, |
const unsigned char *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
const double *__restrict | XT, | ||
double * | Y, | ||
double * | YT, | ||
unsigned | lowmask, | ||
unsigned | nlbits | ||
) |
Symmetric SpMV inner kernel using bitmasked register blocks 2-by-2 general case (X != XT and Y != YT) assumes strict-aliasing on X and Y
Definition at line 329 of file SSEspmv.cpp.
void SSEsym | ( | const double *__restrict | V, |
const uint64_t *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
double * | Y, | ||
unsigned | lowmask, | ||
unsigned | nlbits | ||
) |
Definition at line 439 of file SSEspmv.cpp.
void SSEsym | ( | const double *__restrict | V, |
const uint64_t *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
const double *__restrict | XT, | ||
double *__restrict | Y, | ||
double *__restrict | YT, | ||
unsigned | lowmask, | ||
unsigned | nlbits | ||
) |
Definition at line 642 of file SSEspmv.cpp.
void SSEsym | ( | const double *__restrict | V, |
const unsigned short *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
double * | Y, | ||
unsigned | lowmask, | ||
unsigned | nlbits | ||
) |
Definition at line 844 of file SSEspmv.cpp.
void SSEsym | ( | const double *__restrict | V, |
const unsigned short *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
const double *__restrict | XT, | ||
double * | Y, | ||
double * | YT, | ||
unsigned | lowmask, | ||
unsigned | nlbits | ||
) |
Definition at line 931 of file SSEspmv.cpp.
|
inline |
{SSE2,_mm_blendv_pd} ISSUE: Do not short-circuit, i.e. loads 'a' regardless of the mask value Question: Does the original blendv_pd (in SSE4.1) short-circuit?
Definition at line 92 of file SSEspmv.cpp.
void symcsr | ( | const double *__restrict | V, |
const uint64_t *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
const double *__restrict | XT, | ||
double * | Y, | ||
double * | YT, | ||
unsigned | lowmask, | ||
unsigned | nlowbits | ||
) |
Definition at line 165 of file SSEspmv.cpp.
void symcsr | ( | const double *__restrict | V, |
const unsigned short *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
const double *__restrict | XT, | ||
double * | Y, | ||
double * | YT, | ||
unsigned | lowmask, | ||
unsigned | nlowbits | ||
) |
Definition at line 200 of file SSEspmv.cpp.
void symcsr | ( | const double *__restrict | V, |
const unsigned char *__restrict | M, | ||
const unsigned *__restrict | bot, | ||
const unsigned | nrb, | ||
const double *__restrict | X, | ||
const double *__restrict | XT, | ||
double * | Y, | ||
double * | YT, | ||
unsigned | lowmask, | ||
unsigned | nlowbits | ||
) |
Definition at line 222 of file SSEspmv.cpp.
const unsigned char BitReverseTable64[] |
Definition at line 122 of file SSEspmv.cpp.
const unsigned short masktable16[16] |
Definition at line 46 of file SSEspmv.cpp.
const uint64_t masktable64[64] |
Definition at line 28 of file SSEspmv.cpp.