Compressed Sparse Blocks  1.2
 All Classes Files Functions Variables Typedefs Friends Macros Pages
Classes | Typedefs | Functions | Variables
SSEspmv.cpp File Reference
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#include <pmmintrin.h>
#include <tmmintrin.h>
#include <smmintrin.h>
#include <nmmintrin.h>
#include <wmmintrin.h>
#include <ammintrin.h>

Go to the source code of this file.

Classes

union  ssp_m128
 

Typedefs

typedef signed char ssp_s8
 
typedef unsigned char ssp_u8
 
typedef signed short ssp_s16
 
typedef unsigned short ssp_u16
 
typedef signed int ssp_s32
 
typedef unsigned int ssp_u32
 
typedef float ssp_f32
 
typedef double ssp_f64
 
typedef signed long long ssp_s64
 
typedef unsigned long long ssp_u64
 

Functions

__m128d ssp_blendv_pd_SSE2 (__m128d a, __m128d b, __m128d mask)
 
unsigned short BitReverse (unsigned short v)
 
void atomicallyIncrementDouble (volatile double *target, const double by)
 
void symcsr (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits)
 
void symcsr (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits)
 
void symcsr (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlowbits)
 
void SSEsym (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *__restrict Y, unsigned lowmask, unsigned nlbits)
 
void SSEsym (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlbits)
 
void SSEspmv (const double *__restrict V, const unsigned char *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits)
 
void SSEsym (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lowmask, unsigned nlbits)
 
void SSEsym (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *__restrict Y, double *__restrict YT, unsigned lowmask, unsigned nlbits)
 
void SSEsym (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lowmask, unsigned nlbits)
 
void SSEsym (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, const double *__restrict XT, double *Y, double *YT, unsigned lowmask, unsigned nlbits)
 
void SSEspmv (const double *__restrict V, const unsigned short *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits)
 
void SSEspmv (const double *__restrict V, const uint64_t *__restrict M, const unsigned *__restrict bot, const unsigned nrb, const double *__restrict X, double *Y, unsigned lcmask, unsigned lrmask, unsigned clbits)
 
void popcountall (const unsigned char *__restrict M, unsigned *__restrict counts, size_t n)
 
void popcountall (const unsigned short *__restrict M, unsigned *__restrict counts, size_t n)
 
void popcountall (const uint64_t *__restrict M, unsigned *__restrict counts, size_t n)
 

Variables

const uint64_t masktable64 [64]
 
const unsigned short masktable16 [16]
 
const unsigned char BitReverseTable64 []
 

Typedef Documentation

typedef float ssp_f32

Definition at line 62 of file SSEspmv.cpp.

typedef double ssp_f64

Definition at line 63 of file SSEspmv.cpp.

typedef signed short ssp_s16

Definition at line 56 of file SSEspmv.cpp.

typedef signed int ssp_s32

Definition at line 59 of file SSEspmv.cpp.

typedef signed long long ssp_s64

Definition at line 65 of file SSEspmv.cpp.

typedef signed char ssp_s8

Definition at line 53 of file SSEspmv.cpp.

typedef unsigned short ssp_u16

Definition at line 57 of file SSEspmv.cpp.

typedef unsigned int ssp_u32

Definition at line 60 of file SSEspmv.cpp.

typedef unsigned long long ssp_u64

Definition at line 66 of file SSEspmv.cpp.

typedef unsigned char ssp_u8

Definition at line 54 of file SSEspmv.cpp.

Function Documentation

void atomicallyIncrementDouble ( volatile double *  target,
const double  by 
)
inline

Definition at line 146 of file SSEspmv.cpp.

unsigned short BitReverse ( unsigned short  v)

Definition at line 136 of file SSEspmv.cpp.

void popcountall ( const unsigned char *__restrict  M,
unsigned *__restrict  counts,
size_t  n 
)

Definition at line 1232 of file SSEspmv.cpp.

void popcountall ( const unsigned short *__restrict  M,
unsigned *__restrict  counts,
size_t  n 
)

Definition at line 1253 of file SSEspmv.cpp.

void popcountall ( const uint64_t *__restrict  M,
unsigned *__restrict  counts,
size_t  n 
)

Definition at line 1273 of file SSEspmv.cpp.

void SSEspmv ( const double *__restrict  V,
const unsigned char *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
double *  Y,
unsigned  lcmask,
unsigned  lrmask,
unsigned  clbits 
)

SpMV (usually used as a subroutine) using bitmasked register blocks This version works only with double values, unsigned indices, and 2x2 register blocks

Parameters
[in]nbrnumber of register blocks for this compressed sparse block only
[in]botthe local part of the bottom array, i.e. {lower row bits}.{higher row bits}
Attention
SSEspmv should only be called within a single compressed sparse block and X and Y should already be partially indexed by the higher order bits We don't need any template specialization based on the register block size because for different block sizes, M's type differs, hence creating overloaded definitions

Definition at line 395 of file SSEspmv.cpp.

void SSEspmv ( const double *__restrict  V,
const unsigned short *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
double *  Y,
unsigned  lcmask,
unsigned  lrmask,
unsigned  clbits 
)

SpMV (usually used as a subroutine) using bitmasked register blocks This version works only with double values, unsigned indices, and 4x4 register blocks

Parameters
[in]nbrnumber of register blocks for this compressed sparse block only
[in]botthe local part of the bottom array, i.e. {lower row bits}.{higher row bits}
Attention
SSEspmv should only be called within a single compressed sparse block and X and Y should already be partially indexed by the higher order bits We don't need any template specialization based on the register block size because for different block sizes, M's type differs, hence creating overloaded definitions

Definition at line 1047 of file SSEspmv.cpp.

void SSEspmv ( const double *__restrict  V,
const uint64_t *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
double *  Y,
unsigned  lcmask,
unsigned  lrmask,
unsigned  clbits 
)

Definition at line 1120 of file SSEspmv.cpp.

void SSEsym ( const double *__restrict  V,
const unsigned char *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
double *__restrict  Y,
unsigned  lowmask,
unsigned  nlbits 
)

Symmetric SpMV inner kernel using bitmasked register blocks 2-by-2 potentially diagonal case (X == XT and Y == YT) We can still use the __restrict keyword because we only use one alias for both X and XT

Definition at line 266 of file SSEspmv.cpp.

void SSEsym ( const double *__restrict  V,
const unsigned char *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
const double *__restrict  XT,
double *  Y,
double *  YT,
unsigned  lowmask,
unsigned  nlbits 
)

Symmetric SpMV inner kernel using bitmasked register blocks 2-by-2 general case (X != XT and Y != YT) assumes strict-aliasing on X and Y

Definition at line 329 of file SSEspmv.cpp.

void SSEsym ( const double *__restrict  V,
const uint64_t *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
double *  Y,
unsigned  lowmask,
unsigned  nlbits 
)

Definition at line 439 of file SSEspmv.cpp.

void SSEsym ( const double *__restrict  V,
const uint64_t *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
const double *__restrict  XT,
double *__restrict  Y,
double *__restrict  YT,
unsigned  lowmask,
unsigned  nlbits 
)

Definition at line 642 of file SSEspmv.cpp.

void SSEsym ( const double *__restrict  V,
const unsigned short *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
double *  Y,
unsigned  lowmask,
unsigned  nlbits 
)

Definition at line 844 of file SSEspmv.cpp.

void SSEsym ( const double *__restrict  V,
const unsigned short *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
const double *__restrict  XT,
double *  Y,
double *  YT,
unsigned  lowmask,
unsigned  nlbits 
)

Definition at line 931 of file SSEspmv.cpp.

__m128d ssp_blendv_pd_SSE2 ( __m128d  a,
__m128d  b,
__m128d  mask 
)
inline

{SSE2,_mm_blendv_pd} ISSUE: Do not short-circuit, i.e. loads 'a' regardless of the mask value Question: Does the original blendv_pd (in SSE4.1) short-circuit?

Definition at line 92 of file SSEspmv.cpp.

void symcsr ( const double *__restrict  V,
const uint64_t *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
const double *__restrict  XT,
double *  Y,
double *  YT,
unsigned  lowmask,
unsigned  nlowbits 
)

Definition at line 165 of file SSEspmv.cpp.

void symcsr ( const double *__restrict  V,
const unsigned short *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
const double *__restrict  XT,
double *  Y,
double *  YT,
unsigned  lowmask,
unsigned  nlowbits 
)

Definition at line 200 of file SSEspmv.cpp.

void symcsr ( const double *__restrict  V,
const unsigned char *__restrict  M,
const unsigned *__restrict  bot,
const unsigned  nrb,
const double *__restrict  X,
const double *__restrict  XT,
double *  Y,
double *  YT,
unsigned  lowmask,
unsigned  nlowbits 
)

Definition at line 222 of file SSEspmv.cpp.

Variable Documentation

const unsigned char BitReverseTable64[]
Initial value:
=
{
0x0, 0x20, 0x10, 0x30, 0x8, 0x28, 0x18, 0x38,
0x4, 0x24, 0x14, 0x34, 0xc, 0x2c, 0x1c, 0x3c,
0x2, 0x22, 0x12, 0x32, 0xa, 0x2a, 0x1a, 0x3a,
0x6, 0x26, 0x16, 0x36, 0xe, 0x2e, 0x1e, 0x3e,
0x1, 0x21, 0x11, 0x31, 0x9, 0x29, 0x19, 0x39,
0x5, 0x25, 0x15, 0x35, 0xd, 0x2d, 0x1d, 0x3d,
0x3, 0x23, 0x13, 0x33, 0xb, 0x2b, 0x1b, 0x3b,
0x7, 0x27, 0x17, 0x37, 0xf, 0x2f, 0x1f, 0x3f
}

Definition at line 122 of file SSEspmv.cpp.

const unsigned short masktable16[16]
Initial value:
= {0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100,
0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001 }

Definition at line 46 of file SSEspmv.cpp.

const uint64_t masktable64[64]
Initial value:
= {0x8000000000000000, 0x4000000000000000, 0x2000000000000000, 0x1000000000000000,
0x0800000000000000, 0x0400000000000000, 0x0200000000000000, 0x0100000000000000,
0x0080000000000000, 0x0040000000000000, 0x0020000000000000, 0x0010000000000000,
0x0008000000000000, 0x0004000000000000, 0x0002000000000000, 0x0001000000000000,
0x0000800000000000, 0x0000400000000000, 0x0000200000000000, 0x0000100000000000,
0x0000080000000000, 0x0000040000000000, 0x0000020000000000, 0x0000010000000000,
0x0000008000000000, 0x0000004000000000, 0x0000002000000000, 0x0000001000000000,
0x0000000800000000, 0x0000000400000000, 0x0000000200000000, 0x0000000100000000,
0x0000000080000000, 0x0000000040000000, 0x0000000020000000, 0x0000000010000000,
0x0000000008000000, 0x0000000004000000, 0x0000000002000000, 0x0000000001000000,
0x0000000000800000, 0x0000000000400000, 0x0000000000200000, 0x0000000000100000,
0x0000000000080000, 0x0000000000040000, 0x0000000000020000, 0x0000000000010000,
0x0000000000008000, 0x0000000000004000, 0x0000000000002000, 0x0000000000001000,
0x0000000000000800, 0x0000000000000400, 0x0000000000000200, 0x0000000000000100,
0x0000000000000080, 0x0000000000000040, 0x0000000000000020, 0x0000000000000010,
0x0000000000000008, 0x0000000000000004, 0x0000000000000002, 0x0000000000000001 }

Definition at line 28 of file SSEspmv.cpp.