dnl Perform r = beta*r + alpha*x^T*y dnl #include "cblas.h" include(cblas.m4.h) dnl dnl ---------------------------------------------------------------------- dnl Usage: DOT(abr_type, x_type, y_type, sum_type, tmp_type) dnl i=n-1 dnl ... r = beta * r + alpha * SUM (x[i]*y[i]) dnl i=0 dnl abr_type : the type and precision of alpha, beta and r dnl x_type : the type and precision of x dnl y_type : the type and precision of y dnl sum_type : the type and precision of auxiliary variables prod/sum dnl tmp_type : the type and precision of auxiliary variable tmp dnl Each type and precision specifier can be one of dnl real_S ... real and single dnl real_D ... real and double dnl real_I ... real and indigenous dnl real_E ... real and extra dnl complex_S ... complex and single dnl complex_D ... complex and double dnl complex_I ... complex and indigeneous dnl complex_E ... complex and extra dnl ---------------------------------------------------------------------- define(`DOT', `{ int i, ix = 0, iy = 0; PTR_CAST(r, $1, `') PTR_CAST(x, $2, `const') PTR_CAST(y, $3, `const') SCALAR_CAST(alpha, $1) SCALAR_CAST(beta, $1) DECLARE(x_ii, $2) DECLARE(y_ii, $3) DECLARE(r_v, $1) DECLARE(prod, $4) DECLARE(sum, $4) DECLARE(tmp1, $5) DECLARE(tmp2, $5) if ( n <= 0 ) { ZERO_OUT(r_i, $1) return; } GET_VECTOR_ELEMENT(r_v, r_i, 0, $1) ZERO(sum, $4) /* sum = 0 */ INC_ADJUST(incx, $2) INC_ADJUST(incy, $3) if ( incx < 0 ) ix = (-n+1)*incx; if ( incy < 0 ) iy = (-n+1)*incx; for (i = 0; i < n; ++i) { GET_VECTOR_ELEMENT(x_ii, x_i, ix, $2) GET_VECTOR_ELEMENT(y_ii, y_i, iy, $3) MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i]*y[i] */ ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */ ix += incx; iy += incy; } /* endfor */ MUL(tmp1, $5, sum, $4, alpha_i, $1) /* tmp1 = sum*alpha */ MUL(tmp2, $5, r_v, $1, beta_i, $1) /* tmp2 = r*beta */ ADD(tmp1, $5, tmp1, $5, tmp2, $5) /* tmp1 = tmp1+tmp2 */ ROUND(r, $1, tmp1, $5) /* r = tmp1 */ }') dnl dnl dnl ********************************************************************** dnl * The following macros are used for routines without _x: * dnl * DOT_BODY * dnl * TOP_DOT * dnl ********************************************************************** dnl dnl ---------------------------------------------------------------------- dnl Usage: DOT_BODY(abr_type, x_type, y_type) ... generate the function dnl body of the dot product routine without the suffix _x, i.e., dnl prec is not present. dnl Each type specifier can be one of dnl s ... real and single dnl d ... real and double dnl c ... complex and single dnl z ... complex and double dnl ---------------------------------------------------------------------- dnl define(`DOT_BODY', `ifelse( `$1&&$2&&$3', `s&&s&&s', `DOT($1_type, $2_type, $3_type, real_S, real_S)', `$1&&$2&&$3', `d&&d&&d', `DOT($1_type, $2_type, $3_type, real_D, real_D)', `$1&&$2&&$3', `c&&c&&c', `DOT($1_type, $2_type, $3_type, complex_S, complex_S)', `$1&&$2&&$3', `z&&z&&z', `DOT($1_type, $2_type, $3_type, complex_D, complex_D)', `$1&&$2&&$3', `d&&s&&s', `DOT($1_type, $2_type, $3_type, real_D, real_D)', `$1&&$2&&$3', `d&&s&&d', `DOT($1_type, $2_type, $3_type, real_D, real_D)', `$1&&$2&&$3', `d&&d&&s', `DOT($1_type, $2_type, $3_type, real_D, real_D)', `$1&&$2&&$3', `z&&c&&c', `DOT($1_type, $2_type, $3_type, complex_D, complex_D)', `$1&&$2&&$3', `z&&c&&z', `DOT($1_type, $2_type, $3_type, complex_D, complex_D)', `$1&&$2&&$3', `z&&z&&c', `DOT($1_type, $2_type, $3_type, complex_D, complex_D)', `$1&&$2&&$3', `c&&s&&s', `DOT($1_type, $2_type, $3_type, real_S, complex_S)', `$1&&$2&&$3', `c&&s&&c', `DOT($1_type, $2_type, $3_type, complex_S, complex_S)', `$1&&$2&&$3', `c&&c&&s', `DOT($1_type, $2_type, $3_type, complex_S, complex_S)', `$1&&$2&&$3', `z&&d&&d', `DOT($1_type, $2_type, $3_type, real_D, complex_D)', `$1&&$2&&$3', `z&&d&&z', `DOT($1_type, $2_type, $3_type, complex_D, complex_D)', `$1&&$2&&$3', `z&&z&&d', `DOT($1_type, $2_type, $3_type, complex_D, complex_D)')') dnl dnl dnl ---------------------------------------------------------------------- dnl Usage: TOP_DOT(abr_type, x_type, y_type) ... generate the top level dnl dot product routine without the suffix _x, i.e., prec is dnl not present. dnl Each type specifier can be one of dnl s ... real and single dnl d ... real and double dnl c ... complex and single dnl z ... complex and double dnl ---------------------------------------------------------------------- dnl define(`TOP_DOT', `ifelse( `$2&&$3', `$1&&$1', `void c_$1dot(enum blas_conjugate conj, int n, $1_scalar alpha, const $2_array x, int incx, $1_scalar beta, const $3_array y, int incy, $1_array r) DOT_BODY($1, $2, $3) /* end c_$1dot */', `void c_$1dot_$2_$3(enum blas_conjugate conj, int n, $1_scalar alpha, const $2_array x, int incx, $1_scalar beta, const $3_array y, int incy, $1_array r) DOT_BODY($1, $2, $3) /* end c_$1dot_$2_$3 */')') dnl dnl dnl ********************************************************************** dnl * * dnl * The following macros are used for routines ending with _x: * dnl * SWITCH_prec * dnl * DOT_X_BODY * dnl * TOP_DOT_X * dnl ********************************************************************** dnl dnl ---------------------------------------------------------------------- dnl Usage: SWITCH_prec($1, $2, $3, $4, $5, $6, $7, $8, $9) ... generate dnl a 3-way switch statement based on prec. dnl $4 and $5 are the types of 'prod/sum' and 'tmp' in single case. dnl $6 and $7 are the types of 'prod/sum' and 'tmp' in double case. dnl $8 and $9 are the types of 'prod/sum' and 'tmp' in extra case. dnl ---------------------------------------------------------------------- define(`SWITCH_prec', `switch ( prec ) { case blas_prec_single: DOT($1_type, $2_type, $3_type, $4, $5) break; case blas_prec_double: case blas_prec_indigenous: DOT($1_type, $2_type, $3_type, $6, $7) break; case blas_prec_extra: DOT($1_type, $2_type, $3_type, $8, $9) break; }')dnl dnl dnl ---------------------------------------------------------------------- dnl Usage: DOT_X_BODY(abr_type, x_type, y_type) ... generate the function dnl body of the dot product routine with the suffix _x, i.e., dnl prec is present. dnl Each type specifier can be one of dnl s ... real and single dnl d ... real and double dnl c ... complex and single dnl z ... complex and double dnl ---------------------------------------------------------------------- dnl define(`DOT_X_BODY', `ifelse( `$1&&$2&&$3', `s&&s&&s', `SWITCH_prec($1, $2, $3, real_S, real_S, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `d&&d&&d', `SWITCH_prec($1, $2, $3, real_D, real_D, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `c&&c&&c', `SWITCH_prec($1, $2, $3, complex_S, complex_S, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&z&&z', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `d&&s&&s', `SWITCH_prec($1, $2, $3, real_D, real_D, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `d&&s&&d', `SWITCH_prec($1, $2, $3, real_D, real_D, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `d&&d&&s', `SWITCH_prec($1, $2, $3, real_D, real_D, real_D, real_D, real_E, real_E)', `$1&&$2&&$3', `z&&c&&c', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&c&&z', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&z&&c', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `c&&s&&s', `SWITCH_prec($1, $2, $3, real_S, complex_S, real_D, complex_D, real_E, complex_E)', `$1&&$2&&$3', `c&&s&&c', `SWITCH_prec($1, $2, $3, complex_S, complex_S, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `c&&c&&s', `SWITCH_prec($1, $2, $3, complex_S, complex_S, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&d&&d', `SWITCH_prec($1, $2, $3, real_D, complex_D, real_D, complex_D, real_E, complex_E)', `$1&&$2&&$3', `z&&d&&z', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)', `$1&&$2&&$3', `z&&z&&d', `SWITCH_prec($1, $2, $3, complex_D, complex_D, complex_D, complex_D, complex_E, complex_E)')') dnl dnl dnl ---------------------------------------------------------------------- dnl Usage: TOP_DOT_X(abr_prec, x_prec, y_prec) ... generate the top level dnl dot product routine with the suffix _x, i.e., prec is present. dnl ---------------------------------------------------------------------- dnl define(`TOP_DOT_X', `ifelse( `$2&&$3', `$1&&$1', `void c_$1dot_x(enum blas_conjugate conj, int n, $1_scalar alpha, const $2_array x, int incx, $1_scalar beta, const $3_array y, int incy, $1_array r, enum blas_prec_type prec) { DOT_X_BODY($1, $2, $3) } /* end c_$1dot_x */', `void c_$1dot_$2_$3_x(enum blas_conjugate conj, int n, $1_scalar alpha, const $2_array x, int incx, $1_scalar beta, const $3_array y, int incy, $1_array r, enum blas_prec_type prec) { DOT_X_BODY($1, $2, $3) } /* end c_$1dot_$2_$3_x */')') dnl dnl ---------------------------------------------------------------------- dnl Invoke each function dnl ---------------------------------------------------------------------- TOP_DOT(s, s, s) TOP_DOT(d, d, d) TOP_DOT(c, c, c) TOP_DOT(z, z, z) TOP_DOT(d, s, s) TOP_DOT(d, s, d) TOP_DOT(d, d, s) TOP_DOT(z, c, c) TOP_DOT(z, c, z) TOP_DOT(z, z, c) TOP_DOT(c, s, s) TOP_DOT(c, s, c) TOP_DOT(c, c, s) TOP_DOT(z, d, d) TOP_DOT(z, z, d) TOP_DOT(z, d, z) TOP_DOT_X(s, s, s) TOP_DOT_X(d, d, d) TOP_DOT_X(c, c, c) TOP_DOT_X(z, z, z) TOP_DOT_X(d, s, s) TOP_DOT_X(d, s, d) TOP_DOT_X(d, d, s) TOP_DOT_X(z, c, c) TOP_DOT_X(z, c, z) TOP_DOT_X(z, z, c) TOP_DOT_X(c, s, s) TOP_DOT_X(c, s, c) TOP_DOT_X(c, c, s) TOP_DOT_X(z, d, d) TOP_DOT_X(z, d, z) TOP_DOT_X(z, z, d)