dnl performs y = beta*y + alpha*A*x /* performs y = beta*y + alpha*A*x */ #include "cblas.h" include(cblas.m4.h) #define MAX(a,b) (((a) > (b)) ? (a) : (b)) #define MIN(a,b) (((a) < (b)) ? (a) : (b)) dnl dnl ---------------------------------------------------------------------- dnl Usage: SYMV(aby_type, a_type, x_type, sum_type, tmp_type) dnl j = n-1 i=n-1 dnl ... y[j] = SUM beta * y[j] + alpha * SUM (a[i,j]*x[i]) dnl j = 0 i=0 dnl aby_type : the type and precision of alpha, beta and r dnl a_type : the type and precision of x dnl x_type : the type and precision of y dnl sum_type : the type and precision of auxiliary variable sum dnl tmp_type : the type and precision of auxiliary variable tmp dnl Each type and precision specifier can be one of dnl real_S ... real and single dnl real_D ... real and double dnl real_I ... real and indigenous dnl real_E ... real and extra dnl complex_S ... complex and single dnl complex_D ... complex and double dnl complex_I ... complex and indigeneous dnl complex_E ... complex and extra dnl ---------------------------------------------------------------------- dnl define(`SYMV', `{ int i, j, ky, kx, jy, jx, ix, iy, tmpind = 0; PTR_CAST(y, $1, `') PTR_CAST(a, $2, `const') PTR_CAST(x, $3, `const') SCALAR_CAST(alpha, $1) SCALAR_CAST(beta, $1) DECLARE(y_ii, $1) DECLARE(y_jj, $1) DECLARE(aij, $2) DECLARE(x_ii, $3) DECLARE(prod1, $1) /* case y = beta * y */ DECLARE(prod, $4) DECLARE(sum, $4) DECLARE(tmp1, $5) DECLARE(tmp2, $5) /* checks to see if any of arguments are wrong */ if ((n <=0) || (incy ==0) || (incx ==0) || (uplo != blas_upper && uplo != blas_lower) || (lda < MAX(1, n))) { printf("there is an error in symv"); return; } /* checks to see if we can return y with no calcs */ if ((n == 0) || ((TEST_0(alpha_i, $1) && (TEST_1(beta_i, $1))))){ return; } /* Set up start points in x and y */ INC_ADJUST(incx, $3) INC_ADJUST(incy, $1) ky = 0; kx = 0; if(incx < 0) kx = 0- (n - 1) * incx; if(incy < 0) ky = 0- (n - 1) * incy; /* if alpha equals 0, multiply y = y*beta*/ if(TEST_0(alpha_i, $1)){ if(TEST_0(beta_i, $1)){ iy = ky; for (i = 0; i < n; ++i){ SET_ZERO_VECTOR_ELEMENT(y_i, iy, $1) /* y[iy] = 0.0 */ iy = iy + incy; } } else{ for(i = 0; i < n; i++){ GET_VECTOR_ELEMENT(y_ii, y_i, iy, $1) MUL(prod1, $1, y_ii, $1, beta_i, $1) /* prod1 = beta*y[iy]*/ SET_VECTOR_ELEMENT(y_i, iy, prod1, `$1') /* y[iy] = prod1 */ iy = iy + incy; } } return; } else{ if((order == blas_colmajor) && (uplo == blas_upper) || (order == blas_rowmajor) && (uplo == blas_lower)){ /* case where a is stored as col-major and upper or row-major and lower */ jy = ky; for(j = 0; j < n; ++j){ ZERO(sum, $4) tmpind = j*lda; /* set tmpind to appropriate row */ ix = kx; for(i = 0; i <= j; i++){ /* gets the elements of a from start of row to diag */ GET_VECTOR_ELEMENT(x_ii, x_i, ix, $3) GET_VECTOR_ELEMENT(aij, a_i, tmpind, $2) MUL(prod, $4, x_ii, $3, aij, $2) /* prod = a[tmpind]*x[i] */ ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */ ix = ix+incx; tmpind++; /* set tmpind to next element */ } tmpind = j+(j+1)*lda; /* set tmpind to appropriate column */ for(i= j+1; i < n; i++){ /* gets the elements of a from diagonal to end */ GET_VECTOR_ELEMENT(x_ii, x_i, ix, $3) GET_VECTOR_ELEMENT(aij, a_i, tmpind, $2) MUL(prod, $4, x_ii, $3, aij, $2) /* prod = a[tmpind]*x[i] */ ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */ ix = ix+incx; tmpind += lda; /* set tmpind to get next element */ } MUL(tmp1, $5, sum, $4, alpha_i, $1) /* tmp1 = sum*alpha */ GET_VECTOR_ELEMENT(y_jj, y_i, jy, $1) MUL(tmp2, $5, beta_i, $1, y_jj, $1) /* tmp2 = y[jy]*beta */ ADD(tmp1, $5, tmp1, $5, tmp2, $5) /* tmp1 = tmp1+tmp2 */ SET_ROUND_VECTOR_ELEMENT(y_i, jy, tmp1, $5) /* y[jy] = tmp1 */ jy = jy + incy; } } /* case where a is col-major and lower or row-major and upper */ else{ jy = ky; for(j = 0; j < n; j++){ ZERO(sum, $4) ix = kx; tmpind = j; /* set tmpind to initial row */ for(i = 0; i <= j; i++){ /*gets the elements of a from start of row to diag */ GET_VECTOR_ELEMENT(x_ii, x_i, ix, $3) GET_VECTOR_ELEMENT(aij, a_i, tmpind, $2) MUL(prod, $4, x_ii, $3, aij, $2) /* prod = a[tmpind]*x[i] */ ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */ ix = ix+incx; tmpind += lda; /* set tmpind to get next element */ } tmpind = (j+1) + j*lda; /* set tmpind to initial element */ for(i= j+1; i < n; i++){ /* gets the elements of a from diag to end */ GET_VECTOR_ELEMENT(x_ii, x_i, ix, $3) GET_VECTOR_ELEMENT(aij, a_i, tmpind, $2) MUL(prod, $4, x_ii, $3, aij, $2) /* prod = a[tmpind]*x[i] */ ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */ ix = ix+incx; tmpind++; /* set tmpind to get next element */ } MUL(tmp1, $5, sum, $4, alpha_i, $1) /* tmp1 = sum*alpha */ GET_VECTOR_ELEMENT(y_jj, y_i, jy, $1) MUL(tmp2, $5, beta_i, $1, y_jj, $1) /* tmp2 = y[jy]*beta */ ADD(tmp1, $5, tmp1, $5, tmp2, $5) /* tmp1 = tmp1+tmp2 */ SET_ROUND_VECTOR_ELEMENT(y_i, jy, tmp1, $5) /* y[jy] = tmp1 */ jy = jy + incy; } } } }') dnl dnl ---------------------------------------------------------------------- dnl Usage: TOP_SYMV(aby_type, a_type, x_type) ... generate the top level dnl SYMV routine without the suffix _x, i.e., prec is dnl not present. dnl Each type specifier can be one of dnl s ... real and single dnl d ... real and double dnl c ... complex and single dnl z ... complex and double dnl ---------------------------------------------------------------------- dnl define(`TOP_SYMV', `ifelse( `$2&&$3', `$1&&$1', `void c_$1symv(enum blas_order_type order, enum blas_uplo_type uplo,x int n, $1_scalar alpha, $2_array a, int lda, $3_array x, int incx, $1_scalar beta, $1_array y, int incy) { SYMV($1_type, $2_type, $3_type, $1_type, $1_type) } /* end c_$1symv */', `void c_$1symv_$2_$3(enum blas_order_type order, enum blas_uplo_type uplo, int n, $1_scalar alpha, $2_array a[], int lda, $3_array x, int incx, $1_scalar beta, $1_array y, int incy) { SYMV($1_type, $2_type, $3_type, $1_type, $1_type); } /* end c_$1symv_$2_$3 */')') dnl dnl ---------------------------------------------------------------------- dnl Usage: SWITCH_prec($1, $2, $3, $4, $5, $6, $7, $8, $9) ... generate dnl a 3-way switch statement based on prec. dnl $4 and $5 are the types of 'sum' and 'tmp' in single case. dnl $6 and $7 are the types of 'sum' and 'tmp' in double/indigenous case. dnl $7 and $8 are the types of 'sum' and 'tmp' in extra case. dnl ---------------------------------------------------------------------- define(`SWITCH_prec', `switch ( prec ) { case blas_prec_single: SYMV($1_type, $2_type, $3_type, $4, $5) break; case blas_prec_double: case blas_prec_indigenous: SYMV($1_type, $2_type, $3_type, $6, $7) break; case blas_prec_extra: SYMV($1_type, $2_type, $3_type, $8, $9) break; }')dnl dnl dnl ---------------------------------------------------------------------- dnl Usage: SYMV_X_BODY(aby_type, a_type, x_type) ... generate the function dnl body of the dot product routine with the suffix _x, i.e., dnl prec is present. dnl Each type specifier can be one of dnl s ... real and single dnl d ... real and double dnl c ... complex and single dnl z ... complex and double dnl ---------------------------------------------------------------------- dnl define(`SYMV_X_BODY', `ifelse( `$1&&$2&&$3', `s&&s&&s', `SWITCH_prec($1, $2, $3, real_S, real_S, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `d&&d&&d', `SWITCH_prec($1, $2, $3, real_D, real_D, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `c&&c&&c', `SWITCH_prec($1, $2, $3, complex_S, complex_S, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&z&&z', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `d&&s&&s', `SWITCH_prec($1, $2, $3, real_S, real_D, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `d&&s&&d', `SWITCH_prec($1, $2, $3, real_D, real_D, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `d&&d&&s', `SWITCH_prec($1, $2, $3, real_D, real_D, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `z&&c&&c', `SWITCH_prec($1, $2, $3, complex_S, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&c&&z', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&z&&c', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `c&&s&&s', `SWITCH_prec($1, $2, $3, real_S, complex_S, real_D, complex_D, real_E, complex_E)', `$1&&$2&&$3', `c&&s&&c', `SWITCH_prec($1, $2, $3, complex_S, complex_S, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `c&&c&&s', `SWITCH_prec($1, $2, $3, complex_S, complex_S, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&d&&d', `SWITCH_prec($1, $2, $3, real_D, complex_D, real_D, complex_D, real_E, complex_E)', `$1&&$2&&$3', `z&&d&&z', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&z&&d', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)')') dnl dnl dnl ---------------------------------------------------------------------- dnl Usage: TOP_SYMV_X(abr_prec, x_prec, y_prec) ... generate the top level dnl dot product routine with the suffix _x, i.e., prec is present. dnl ---------------------------------------------------------------------- dnl define(`TOP_SYMV_X', `ifelse( `$2&&$3', `$1&&$1', `void c_$1SYMV_x(enum blas_order_type order, enum blas_uplo_type uplo, int n, $1_scalar alpha, $2_array a, int lda, $3_array x, int incx, $1_scalar beta, $1_array y, int incy, enum blas_prec_type prec) { SYMV_X_BODY($1, $2, $3) } /* end c_$1SYMV_x */', `void c_$1SYMV_$2_$3_x(enum blas_order_type order, enum blas_uplo_type uplo, int n, $1_scalar alpha, $2_array a, int lda, $3_array x, int incx, $1_scalar beta, $1_array y, int incy, enum blas_prec_type prec) { SYMV_X_BODY($1, $2, $3) } /* end c_$1SYMV_$2_$3_x */')') dnl dnl ---------------------------------------------------------------------- dnl Invoke each function dnl ---------------------------------------------------------------------- TOP_SYMV(s, s, s) TOP_SYMV(d, d, d) TOP_SYMV(z, z, z) TOP_SYMV(c, c, c) TOP_SYMV(d, s, s) TOP_SYMV(d, s, d) TOP_SYMV(d, d, s) TOP_SYMV(z, c, c) TOP_SYMV(z, c, z) TOP_SYMV(z, z, c) TOP_SYMV(c, s, s) TOP_SYMV(c, s, c) TOP_SYMV(c, c, s) TOP_SYMV(z, d, d) TOP_SYMV(z, d, z) TOP_SYMV(z, z, d) TOP_SYMV_X(s, s, s) TOP_SYMV_X(d, d, d) TOP_SYMV_X(z, z, z) TOP_SYMV_X(c, c, c) TOP_SYMV_X(d, s, s) TOP_SYMV_X(d, s, d) TOP_SYMV_X(d, d, s) TOP_SYMV_X(z, c, c) TOP_SYMV_X(z, c, z) TOP_SYMV_X(z, z, c) TOP_SYMV_X(c, s, s) TOP_SYMV_X(c, s, c) TOP_SYMV_X(c, c, s) TOP_SYMV_X(z, d, d) TOP_SYMV_X(z, d, z) TOP_SYMV_X(z, z, d)