dnl Perform r = beta*r + alpha*x^T*y
dnl
#include "cblas.h"
include(cblas.m4.h)
dnl
dnl ----------------------------------------------------------------------
dnl Usage: DOT(abr_type, x_type, y_type, sum_type, tmp_type)
dnl        			      i=n-1
dnl        ... r = beta * r + alpha * SUM  (x[i]*y[i])
dnl				      i=0
dnl        abr_type : the type and precision of alpha, beta and r
dnl 	   x_type   : the type and precision of x
dnl 	   y_type   : the type and precision of y
dnl 	   sum_type : the type and precision of auxiliary variables prod/sum
dnl 	   tmp_type : the type and precision of auxiliary variable tmp
dnl Each type and precision specifier can be one of
dnl        real_S 	... real and single
dnl	   real_D 	... real and double
dnl	   real_I 	... real and indigenous
dnl	   real_E 	... real and extra
dnl	   complex_S	... complex and single
dnl	   complex_D	... complex and double
dnl	   complex_I	... complex and indigeneous
dnl	   complex_E	... complex and extra
dnl ----------------------------------------------------------------------
define(`DOT',
    `{  
	int i, ix = 0, iy = 0;
	PTR_CAST(r, $1, `')
	PTR_CAST(x, $2, `const')
	PTR_CAST(y, $3, `const')
	SCALAR_CAST(alpha, $1)
	SCALAR_CAST(beta, $1)
	DECLARE(x_ii, $2)
	DECLARE(y_ii, $3)
	DECLARE(r_v, $1)
    	DECLARE(prod, $4)
    	DECLARE(sum, $4)
    	DECLARE(tmp1, $5)
    	DECLARE(tmp2, $5)

	if ( n <= 0 ) {
	    ZERO_OUT(r_i, $1)
	    return; 
  	}
	GET_VECTOR_ELEMENT(r_v, r_i, 0, $1)
    	ZERO(sum, $4) /* sum = 0 */
	INC_ADJUST(incx, $2)
	INC_ADJUST(incy, $3)
	if ( incx < 0 ) ix = (-n+1)*incx;
	if ( incy < 0 ) iy = (-n+1)*incx;
	for (i = 0; i < n; ++i) {
	    GET_VECTOR_ELEMENT(x_ii, x_i, ix, $2)
	    GET_VECTOR_ELEMENT(y_ii, y_i, iy, $3)
	    MUL(prod, $4, x_ii, $2, y_ii, $3) /* prod = x[i]*y[i] */
	    ADD(sum, $4, sum, $4, prod, $4) /* sum = sum+prod */
	    ix += incx;
	    iy += incy;
        } /* endfor */
	MUL(tmp1, $5, sum, $4, alpha_i, $1) /* tmp1 = sum*alpha */
	MUL(tmp2, $5, r_v, $1, beta_i, $1) /* tmp2 = r*beta */
	ADD(tmp1, $5, tmp1, $5, tmp2, $5) /* tmp1 = tmp1+tmp2 */
	ROUND(r, $1, tmp1, $5) /* r = tmp1 */
    }') dnl
dnl
dnl **********************************************************************
dnl * The following macros are used for routines without _x: 	       	 *
dnl *   	DOT_BODY						 *
dnl *		TOP_DOT							 *
dnl **********************************************************************
dnl
dnl ----------------------------------------------------------------------
dnl Usage: DOT_BODY(abr_type, x_type, y_type) ... generate the function
dnl        body of the dot product routine without the suffix _x, i.e.,
dnl        prec is not present.
dnl Each type specifier can be one of
dnl        s 	... real and single
dnl	   d 	... real and double
dnl	   c	... complex and single
dnl	   z	... complex and double
dnl ----------------------------------------------------------------------
dnl
define(`DOT_BODY', `ifelse(
	`$1&&$2&&$3', `s&&s&&s',
	`DOT($1_type, $2_type, $3_type, real_S, real_S)',
	`$1&&$2&&$3', `d&&d&&d',
	`DOT($1_type, $2_type, $3_type, real_D, real_D)',
	`$1&&$2&&$3', `c&&c&&c',
	`DOT($1_type, $2_type, $3_type, complex_S, complex_S)',
	`$1&&$2&&$3', `z&&z&&z',
	`DOT($1_type, $2_type, $3_type, complex_D, complex_D)',
	`$1&&$2&&$3', `d&&s&&s',
	`DOT($1_type, $2_type, $3_type, real_D, real_D)',
	`$1&&$2&&$3', `d&&s&&d',
	`DOT($1_type, $2_type, $3_type, real_D, real_D)',
	`$1&&$2&&$3', `d&&d&&s',
	`DOT($1_type, $2_type, $3_type, real_D, real_D)',
	`$1&&$2&&$3', `z&&c&&c',
	`DOT($1_type, $2_type, $3_type, complex_D, complex_D)',
	`$1&&$2&&$3', `z&&c&&z',
	`DOT($1_type, $2_type, $3_type, complex_D, complex_D)',
	`$1&&$2&&$3', `z&&z&&c',
	`DOT($1_type, $2_type, $3_type, complex_D, complex_D)',
	`$1&&$2&&$3', `c&&s&&s',
	`DOT($1_type, $2_type, $3_type, real_S, complex_S)',
	`$1&&$2&&$3', `c&&s&&c',
	`DOT($1_type, $2_type, $3_type, complex_S, complex_S)',
	`$1&&$2&&$3', `c&&c&&s',
	`DOT($1_type, $2_type, $3_type, complex_S, complex_S)',
	`$1&&$2&&$3', `z&&d&&d',
	`DOT($1_type, $2_type, $3_type, real_D, complex_D)',
	`$1&&$2&&$3', `z&&d&&z',
	`DOT($1_type, $2_type, $3_type, complex_D, complex_D)',
	`$1&&$2&&$3', `z&&z&&d',
	`DOT($1_type, $2_type, $3_type, complex_D, complex_D)')') dnl
dnl
dnl ----------------------------------------------------------------------
dnl Usage: TOP_DOT(abr_type, x_type, y_type) ... generate the top level
dnl        dot product routine without the suffix _x, i.e., prec is
dnl        not present.
dnl Each type specifier can be one of
dnl        s 	... real and single
dnl	   d 	... real and double
dnl	   c	... complex and single
dnl	   z	... complex and double
dnl ----------------------------------------------------------------------
dnl
define(`TOP_DOT', `ifelse(
   	`$2&&$3', `$1&&$1',
	`void c_$1dot(enum blas_conjugate conj,
		        int n, 
			$1_scalar alpha, 
			const $2_array x, int incx, 
			$1_scalar beta,
			const $3_array y, int incy, 
			$1_array r)
	DOT_BODY($1, $2, $3) /* end c_$1dot */',
	`void c_$1dot_$2_$3(enum blas_conjugate conj,
			int n, 
			$1_scalar alpha, 
			const $2_array x, int incx, 
			$1_scalar beta,
			const $3_array y, int incy,
			$1_array r)
	DOT_BODY($1, $2, $3) /* end c_$1dot_$2_$3 */')') dnl
dnl
dnl **********************************************************************
dnl *									 *
dnl * The following macros are used for routines ending with _x:	 *
dnl *		SWITCH_prec					 	 *
dnl *		DOT_X_BODY						 *
dnl *		TOP_DOT_X						 *
dnl **********************************************************************
dnl 
dnl ----------------------------------------------------------------------
dnl Usage: SWITCH_prec($1, $2, $3, $4, $5, $6, $7, $8, $9) ... generate
dnl        a 3-way switch statement based on prec.
dnl        $4 and $5 are the types of 'prod/sum' and 'tmp' in single case.
dnl        $6 and $7 are the types of 'prod/sum' and 'tmp' in double case.
dnl        $8 and $9 are the types of 'prod/sum' and 'tmp' in extra case.
dnl ----------------------------------------------------------------------
define(`SWITCH_prec', 
       `switch ( prec ) {
  	case blas_prec_single:
    	    DOT($1_type, $2_type, $3_type, $4, $5)
	    break;
	case blas_prec_double:
        case blas_prec_indigenous:
    	    DOT($1_type, $2_type, $3_type, $6, $7)
	    break;
    	case blas_prec_extra:
	    DOT($1_type, $2_type, $3_type, $8, $9)
            break;
       }')dnl
dnl
dnl ----------------------------------------------------------------------
dnl Usage: DOT_X_BODY(abr_type, x_type, y_type) ... generate the function
dnl        body of the dot product routine with the suffix _x, i.e.,
dnl        prec is present.
dnl Each type specifier can be one of
dnl        s 	... real and single
dnl	   d 	... real and double
dnl	   c	... complex and single
dnl	   z	... complex and double
dnl ----------------------------------------------------------------------
dnl
define(`DOT_X_BODY', `ifelse(
	`$1&&$2&&$3', `s&&s&&s',
	`SWITCH_prec($1, $2, $3, real_S, real_S,
		real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `d&&d&&d',
	`SWITCH_prec($1, $2, $3, 
		real_D, real_D, real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `c&&c&&c',
	`SWITCH_prec($1, $2, $3, complex_S, complex_S,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&z&&z',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `d&&s&&s',
	`SWITCH_prec($1, $2, $3, real_D, real_D,
		real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `d&&s&&d',
	`SWITCH_prec($1, $2, $3, real_D, real_D,
		real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `d&&d&&s',
	`SWITCH_prec($1, $2, $3, real_D, real_D,
		real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `z&&c&&c',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&c&&z',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&z&&c',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `c&&s&&s',
	`SWITCH_prec($1, $2, $3, real_S, complex_S,
		real_D, complex_D, real_E, complex_E)',
	`$1&&$2&&$3', `c&&s&&c',
	`SWITCH_prec($1, $2, $3, complex_S, complex_S,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `c&&c&&s',
	`SWITCH_prec($1, $2, $3, complex_S, complex_S,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&d&&d',
	`SWITCH_prec($1, $2, $3, real_D, complex_D,
		real_D, complex_D, real_E, complex_E)',
	`$1&&$2&&$3', `z&&d&&z',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&z&&d',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)')') dnl
dnl
dnl ----------------------------------------------------------------------
dnl Usage: TOP_DOT_X(abr_prec, x_prec, y_prec) ... generate the top level
dnl        dot product routine with the suffix _x, i.e., prec is present.
dnl ----------------------------------------------------------------------
dnl
define(`TOP_DOT_X', `ifelse(
   	`$2&&$3', `$1&&$1',
	`void c_$1dot_x(enum blas_conjugate conj,
			int n, 
			$1_scalar alpha, 
			const $2_array x, int incx, 
			$1_scalar beta,
			const $3_array y, int incy, 
			$1_array r,
			enum blas_prec_type prec)
	{
	    DOT_X_BODY($1, $2, $3)
	} /* end c_$1dot_x */',
	`void c_$1dot_$2_$3_x(enum blas_conjugate conj,
			int n, 
			$1_scalar alpha, 
			const $2_array x, int incx, 
			$1_scalar beta,
			const $3_array y, int incy,
			$1_array r,
			enum blas_prec_type prec)
	{
	    DOT_X_BODY($1, $2, $3)
	} /* end c_$1dot_$2_$3_x */')')
dnl
dnl ----------------------------------------------------------------------
dnl Invoke each function
dnl ----------------------------------------------------------------------
TOP_DOT(s, s, s)
TOP_DOT(d, d, d)
TOP_DOT(c, c, c)
TOP_DOT(z, z, z)

TOP_DOT(d, s, s)
TOP_DOT(d, s, d)
TOP_DOT(d, d, s)

TOP_DOT(z, c, c)
TOP_DOT(z, c, z)
TOP_DOT(z, z, c)

TOP_DOT(c, s, s)
TOP_DOT(c, s, c)
TOP_DOT(c, c, s)

TOP_DOT(z, d, d)
TOP_DOT(z, z, d)
TOP_DOT(z, d, z)


TOP_DOT_X(s, s, s)
TOP_DOT_X(d, d, d)
TOP_DOT_X(c, c, c)
TOP_DOT_X(z, z, z)

TOP_DOT_X(d, s, s)
TOP_DOT_X(d, s, d)
TOP_DOT_X(d, d, s)

TOP_DOT_X(z, c, c)
TOP_DOT_X(z, c, z)
TOP_DOT_X(z, z, c)

TOP_DOT_X(c, s, s)
TOP_DOT_X(c, s, c)
TOP_DOT_X(c, c, s)

TOP_DOT_X(z, d, d)
TOP_DOT_X(z, d, z)
TOP_DOT_X(z, z, d)