dnl performs y = beta*y + alpha*A*x 
/* performs y = beta*y + alpha*A*x */
#include "cblas.h"
include(cblas.m4.h)
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define MIN(a,b) (((a) < (b)) ? (a) : (b))

dnl
dnl ----------------------------------------------------------------------
dnl Usage: SYMV(aby_type, a_type, x_type, sum_type, tmp_type)
dnl        	     j = n-1		          i=n-1
dnl        ... y[j] = SUM   beta * y[j] + alpha * SUM  (a[i,j]*x[i])
dnl		     j = 0		          i=0
dnl        aby_type : the type and precision of alpha, beta and r
dnl 	   a_type   : the type and precision of x
dnl 	   x_type   : the type and precision of y
dnl 	   sum_type : the type and precision of auxiliary variable sum
dnl 	   tmp_type : the type and precision of auxiliary variable tmp
dnl Each type and precision specifier can be one of
dnl        real_S 	... real and single
dnl	   real_D 	... real and double
dnl	   real_I 	... real and indigenous
dnl	   real_E 	... real and extra
dnl	   complex_S	... complex and single
dnl	   complex_D	... complex and double
dnl	   complex_I	... complex and indigeneous
dnl	   complex_E	... complex and extra
dnl ----------------------------------------------------------------------
dnl

define(`SYMV',
        `{
        int i, j, ky, kx, jy, jx, ix, iy, tmpind = 0;
        PTR_CAST(y, $1, `')
        PTR_CAST(a, $2, `const')
        PTR_CAST(x, $3, `const')
        SCALAR_CAST(alpha, $1)
        SCALAR_CAST(beta, $1)
        DECLARE(y_ii, $1)
        DECLARE(y_jj, $1)
        DECLARE(aij, $2)
        DECLARE(x_ii, $3)
        DECLARE(prod1, $1)   /* case y = beta * y */
        DECLARE(prod, $4)
        DECLARE(sum, $4)
        DECLARE(tmp1, $5)
	DECLARE(tmp2, $5)
   
   /* checks to see if any of arguments are wrong */  
   
     if ((n <=0) ||
        (incy ==0) || 
        (incx ==0) || 
        (uplo != blas_upper && uplo != blas_lower) ||
        (lda < MAX(1, n))) {
        printf("there is an error in symv");
        return;
      }
   
   /* checks to see if we can return y with no calcs */  
   
     if ((n == 0) || 
        ((TEST_0(alpha_i, $1) && (TEST_1(beta_i, $1))))){
        return;
     }
   
   /* Set up start points in x and y  */
   
   INC_ADJUST(incx, $3)
   INC_ADJUST(incy, $1)
   ky = 0;
   kx = 0;
   if(incx < 0) kx = 0- (n - 1) * incx;
   if(incy < 0) ky = 0- (n - 1) * incy;

   /* if alpha equals 0, multiply y = y*beta*/  
   if(TEST_0(alpha_i, $1)){
    if(TEST_0(beta_i, $1)){
      iy = ky;
      for (i = 0; i < n; ++i){
        SET_ZERO_VECTOR_ELEMENT(y_i, iy, $1)   /* y[iy] = 0.0 */
        iy = iy + incy;
      }
    }
    else{
    for(i = 0; i < n; i++){
     GET_VECTOR_ELEMENT(y_ii, y_i, iy, $1)
     MUL(prod1, $1, y_ii, $1, beta_i, $1)     /* prod1 = beta*y[iy]*/
     SET_VECTOR_ELEMENT(y_i, iy, prod1, `$1') /* y[iy] = prod1 */
     iy = iy + incy;  
    }
   }
   return;
  }
  else{              
    if((order == blas_colmajor) && (uplo == blas_upper) ||
       (order == blas_rowmajor) && (uplo == blas_lower)){
        /* case where a is stored as col-major and upper or 
           row-major and lower */
         jy = ky; 
         for(j = 0; j < n; ++j){
           ZERO(sum, $4)
           tmpind = j*lda;    /* set tmpind to appropriate row */
           ix = kx;
           for(i = 0; i <= j; i++){
            /* gets the elements of a from start of row to diag */
             GET_VECTOR_ELEMENT(x_ii, x_i, ix, $3)
             GET_VECTOR_ELEMENT(aij, a_i, tmpind, $2)
	     MUL(prod, $4, x_ii, $3, aij, $2)   /* prod = a[tmpind]*x[i] */
             ADD(sum, $4, sum, $4, prod, $4)    /* sum = sum+prod */
             ix = ix+incx;
             tmpind++;       /* set tmpind to next element */
           }
           tmpind = j+(j+1)*lda;  /* set tmpind to appropriate column */
           for(i= j+1; i < n; i++){
           /* gets the elements of a from diagonal to end */
             GET_VECTOR_ELEMENT(x_ii, x_i, ix, $3)
             GET_VECTOR_ELEMENT(aij, a_i, tmpind, $2)
	     MUL(prod, $4, x_ii, $3, aij, $2)  /* prod = a[tmpind]*x[i] */
             ADD(sum, $4, sum, $4, prod, $4)   /* sum = sum+prod */
             ix = ix+incx;
             tmpind += lda;     /* set tmpind to get next element */
           }
            MUL(tmp1, $5, sum, $4, alpha_i, $1)   /* tmp1 = sum*alpha */
            GET_VECTOR_ELEMENT(y_jj, y_i, jy, $1)
            MUL(tmp2, $5, beta_i, $1, y_jj, $1)   /* tmp2 = y[jy]*beta */
            ADD(tmp1, $5, tmp1, $5, tmp2, $5)    /* tmp1 = tmp1+tmp2 */
            SET_ROUND_VECTOR_ELEMENT(y_i, jy, tmp1, $5) /* y[jy] = tmp1 */
            jy = jy + incy;
         }
        }
     /* case where a is col-major and lower or row-major and upper */
      else{ 
         jy = ky; 
         for(j = 0; j < n; j++){
           ZERO(sum, $4)
           ix = kx;
           tmpind = j;    /* set tmpind to initial row */
           for(i = 0; i <= j; i++){
             /*gets the elements of a from start of row to diag */
             GET_VECTOR_ELEMENT(x_ii, x_i, ix, $3)
             GET_VECTOR_ELEMENT(aij, a_i, tmpind, $2)
	     MUL(prod, $4, x_ii, $3, aij, $2)  /* prod = a[tmpind]*x[i] */
             ADD(sum, $4, sum, $4, prod, $4)   /* sum = sum+prod */
             ix = ix+incx;
             tmpind += lda;   /* set tmpind to get next element */
           }
           tmpind = (j+1) + j*lda;  /* set tmpind to initial element */
           for(i= j+1; i < n; i++){
             /* gets the elements of a from diag to end */
             GET_VECTOR_ELEMENT(x_ii, x_i, ix, $3)
             GET_VECTOR_ELEMENT(aij, a_i, tmpind, $2)
	     MUL(prod, $4, x_ii, $3, aij, $2)  /* prod = a[tmpind]*x[i] */
             ADD(sum, $4, sum, $4, prod, $4)   /* sum = sum+prod */
             ix = ix+incx; 
             tmpind++;            /* set tmpind to get next element */
           }
            MUL(tmp1, $5, sum, $4, alpha_i, $1) /* tmp1 = sum*alpha */
            GET_VECTOR_ELEMENT(y_jj, y_i, jy, $1)
            MUL(tmp2, $5, beta_i, $1, y_jj, $1)  /* tmp2 = y[jy]*beta */
            ADD(tmp1, $5, tmp1, $5, tmp2, $5)    /* tmp1 = tmp1+tmp2 */
            SET_ROUND_VECTOR_ELEMENT(y_i, jy, tmp1, $5) /* y[jy] = tmp1 */
            jy = jy + incy;
         }
        }
    }   
}')


dnl
dnl ----------------------------------------------------------------------
dnl Usage: TOP_SYMV(aby_type, a_type, x_type) ... generate the top level
dnl        SYMV routine without the suffix _x, i.e., prec is
dnl        not present.
dnl Each type specifier can be one of
dnl        s 	... real and single
dnl	   d 	... real and double
dnl	   c	... complex and single
dnl	   z	... complex and double
dnl ----------------------------------------------------------------------
dnl
define(`TOP_SYMV', `ifelse(
        `$2&&$3', `$1&&$1',
        `void c_$1symv(enum blas_order_type order,
			 enum blas_uplo_type uplo,x
                         int n,
                         $1_scalar alpha,
                         $2_array a,
                         int lda, 
                         $3_array x, int incx,
                         $1_scalar beta,
                         $1_array y, int incy)
        {
            SYMV($1_type, $2_type, $3_type, $1_type, $1_type)
        } /* end c_$1symv */',
        `void c_$1symv_$2_$3(enum blas_order_type order,
			enum blas_uplo_type uplo,
                        int n,
                        $1_scalar alpha,
                        $2_array a[],
                        int lda,
                        $3_array x, int incx,
                        $1_scalar beta,
                        $1_array y, int incy)
        {
            SYMV($1_type, $2_type, $3_type, $1_type, $1_type);
        } /* end c_$1symv_$2_$3 */')')
dnl
dnl ----------------------------------------------------------------------
dnl Usage: SWITCH_prec($1, $2, $3, $4, $5, $6, $7, $8, $9) ... generate
dnl        a 3-way switch statement based on prec.
dnl        $4 and $5 are the types of 'sum' and 'tmp' in single case.
dnl        $6 and $7 are the types of 'sum' and 'tmp' in double/indigenous case.
dnl        $7 and $8 are the types of 'sum' and 'tmp' in extra case.
dnl ----------------------------------------------------------------------
define(`SWITCH_prec', 
       `switch ( prec ) {
  	case blas_prec_single:
    	    SYMV($1_type, $2_type, $3_type, $4, $5)
	    break;
	case blas_prec_double:
        case blas_prec_indigenous:
    	    SYMV($1_type, $2_type, $3_type, $6, $7)
	    break;
    	case blas_prec_extra:
	    SYMV($1_type, $2_type, $3_type, $8, $9)
            break;
       }')dnl


dnl
dnl ----------------------------------------------------------------------
dnl Usage: SYMV_X_BODY(aby_type, a_type, x_type) ... generate the function
dnl        body of the dot product routine with the suffix _x, i.e.,
dnl        prec is present.
dnl Each type specifier can be one of
dnl        s 	... real and single
dnl	   d 	... real and double
dnl	   c	... complex and single
dnl	   z	... complex and double
dnl ----------------------------------------------------------------------
dnl
define(`SYMV_X_BODY', `ifelse(
	`$1&&$2&&$3', `s&&s&&s',
	`SWITCH_prec($1, $2, $3, real_S, real_S,
		real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `d&&d&&d',
	`SWITCH_prec($1, $2, $3, 
		real_D, real_D, real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `c&&c&&c',
	`SWITCH_prec($1, $2, $3, complex_S, complex_S,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&z&&z',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `d&&s&&s',
	`SWITCH_prec($1, $2, $3, real_S, real_D,
		real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `d&&s&&d',
	`SWITCH_prec($1, $2, $3, real_D, real_D,
		real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `d&&d&&s',
	`SWITCH_prec($1, $2, $3, real_D, real_D,
		real_D, real_D, real_E, real_E)',
	`$1&&$2&&$3', `z&&c&&c',
	`SWITCH_prec($1, $2, $3, complex_S, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&c&&z',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&z&&c',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `c&&s&&s',
	`SWITCH_prec($1, $2, $3, real_S, complex_S,
		real_D, complex_D, real_E, complex_E)',
	`$1&&$2&&$3', `c&&s&&c',
	`SWITCH_prec($1, $2, $3, complex_S, complex_S,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `c&&c&&s',
	`SWITCH_prec($1, $2, $3, complex_S, complex_S,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&d&&d',
	`SWITCH_prec($1, $2, $3, real_D, complex_D,
		real_D, complex_D, real_E, complex_E)',
	`$1&&$2&&$3', `z&&d&&z',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)',
	`$1&&$2&&$3', `z&&z&&d',
	`SWITCH_prec($1, $2, $3, complex_D, complex_D,
		complex_D, complex_D, complex_E, complex_E)')') dnl

dnl
dnl ----------------------------------------------------------------------
dnl Usage: TOP_SYMV_X(abr_prec, x_prec, y_prec) ... generate the top level
dnl        dot product routine with the suffix _x, i.e., prec is present.
dnl ----------------------------------------------------------------------
dnl

define(`TOP_SYMV_X', `ifelse(
        `$2&&$3', `$1&&$1',
        `void c_$1SYMV_x(enum blas_order_type order,
			 enum blas_uplo_type uplo,
                         int n,
                         $1_scalar alpha,
                         $2_array a,
                         int lda, 
                         $3_array x, int incx,
                         $1_scalar beta,
                         $1_array y, int incy,
                         enum blas_prec_type prec)
        {
            SYMV_X_BODY($1, $2, $3)
        } /* end c_$1SYMV_x */',
        `void c_$1SYMV_$2_$3_x(enum blas_order_type order,
			enum blas_uplo_type uplo,
                        int n,
                        $1_scalar alpha,
                        $2_array a,
                        int lda,
                        $3_array x, int incx,
                        $1_scalar beta,
                        $1_array y, int incy,
                        enum blas_prec_type prec)
        {
            SYMV_X_BODY($1, $2, $3)
        } /* end c_$1SYMV_$2_$3_x */')')
	    
dnl
dnl ----------------------------------------------------------------------
dnl Invoke each function
dnl ----------------------------------------------------------------------
	        
TOP_SYMV(s, s, s)
TOP_SYMV(d, d, d)
TOP_SYMV(z, z, z)
TOP_SYMV(c, c, c)

TOP_SYMV(d, s, s)
TOP_SYMV(d, s, d)
TOP_SYMV(d, d, s)

TOP_SYMV(z, c, c)           
TOP_SYMV(z, c, z)              
TOP_SYMV(z, z, c)	 

TOP_SYMV(c, s, s)
TOP_SYMV(c, s, c)
TOP_SYMV(c, c, s)

TOP_SYMV(z, d, d) 
TOP_SYMV(z, d, z)
TOP_SYMV(z, z, d)

TOP_SYMV_X(s, s, s)
TOP_SYMV_X(d, d, d)
TOP_SYMV_X(z, z, z)
TOP_SYMV_X(c, c, c)

TOP_SYMV_X(d, s, s)
TOP_SYMV_X(d, s, d)
TOP_SYMV_X(d, d, s)

TOP_SYMV_X(z, c, c)           
TOP_SYMV_X(z, c, z)              
TOP_SYMV_X(z, z, c)	 

TOP_SYMV_X(c, s, s)
TOP_SYMV_X(c, s, c)
TOP_SYMV_X(c, c, s)

TOP_SYMV_X(z, d, d) 
TOP_SYMV_X(z, d, z)
TOP_SYMV_X(z, z, d)