;;; compute value of p(x),a polynomial in x, p(x)=a_n*x^n+...+a_0
;;; given a list A={a_n, a_{n-1}, ..., a_0)  and x, ALL numbers.
;;;

;;(eval-when (compile load eval) (declare (optimize (speed 3)(safety 0))))

(defun horner(A x)  
  ;; any common lisp numbers in A and x. 
  ;; order in A: highest degree down to constant term
  (let((q (car A)))
    (loop for i in (cdr A) do (setf q (+ i (* x q))))
    q))

;; next 2 programs, same computation as above, evaluate-poly is 10X slower
;; than hornerf,  each good only for double-floats.

#+ignore 
(defun evaluate-poly (p x) 
  (declare (double-float x)(optimize (speed 3)(safety 0)))
  (reduce #'(lambda (a c)(declare(double-float a c)) (+ c (* x a)))  p :initial-value 0.0d0))
;; in maxima eval_poly(p,x):=xreduce(lambda([a,c],a*x+c),p)

(defun hornerf(A x);;optimized for double floats in A and x (required!)
  (declare (double-float x)(optimize(speed 3)(safety 0)))
  (let((q (car A)))
  (declare (double-float q))
    (loop for i double-float in (cdr A) do 
			      (setf q (+ i (* x q))))
    q))

;; Maxima version.  Evaluate a polynomial from float coeflist and point

(defun $eval_poly_coeflist_f(A x)(hornerf (cdr A) (coerce x 'double-float)))

(defun $hornerfe(A x)(cons '(mlist)(multiple-value-list (hornerfe A x))))

;; horner-float-error  (See e.g. p 95 Higham) returns  value and error-bound

(defun hornerfe(A x)			;double floats in A and x
  (declare (double-float x)(optimize(speed 3)(safety 0)))
  (let*((y (car A))
	(mu (* 0.5d0 (abs y)))
	(xa (abs x)))
  (declare (double-float y mu xa))
    (loop for i double-float in (cdr A) do 
	  (setf y (+ i (* x y)))
	  (setf mu (+ (abs y) (* xa mu))))
    (values y (* double-float-epsilon (- (* 2.0d0 mu) (abs y))))))

;;;(hornerfe '(1.0d0 2.0d0 ) 5.0d0)  must be double floats
;; Maxima version.  Evaluate a polynomial and error from float coeflist and point
(defun $eval_poly_coeflist_fe(a x)
  (cons '($value_error simp) ;; for Maxima,returns value_error(val, errorbound)
	(multiple-value-list 
	 (hornerfe  (map 'list #'$float (cdr a))
		    ($float x)))))


;; A multivariate polynomial coefficient extractor:
;; 

;; poly_coeflist_var(polynomial expression,variable)

;;poly_coeflist_var(a*x+b/c,x)   returns coeflist(a,b/c).

;; This is pretty well defined if the expression is a polynomial in var.
;;
;; If not, proceed with caution.
;; If poly_coeflist_var(p,v) and p is actually a rational function (ratio)
;; and v is not in (the numerator of) p,
;; then the expression is treated as p*v^0.
;; This does not exclude the case where v is also in the denominator,
;; so also beware, it might then appear in the denominator of the
;; coefficients! For example,
;; poly_coeflist_var((a*x^2+1)/(x-1),x); returns coeflist(a/(x-1),0,1/(x-1)).
;; note there are 2 arguments: this version specifies a main variable.
;; If the polynomial p has only one variable, the second argument can be
;; omitted.  If  p includes several variables and the second
;; argument is omitted, the program chooses the first of the listofvars(p).

(defun $poly_coeflist_var(p &rest var)
  (setf var (if (null var) (cadr($listofvars p)) (car var)))
  (let* ((varlist nil)(genvar nil)
	 (r (ratrep p (list var)))
	 (den (cddr r))	
	 (hipow (second (cadr r)))
	 (ar (make-array (1+ hipow) :initial-element 0))
	 (pairlist (cdadr r)))
    (declare (special varlist genvar))
    (if (not (equal var (pdis (list (caadr r) 1 1))))
	(return-from $poly_coeflist_var (list '($coeflist simp) p))) 
    (loop for i on pairlist by #'cddr do
	  (setf (aref ar (car i))
		(rdis (ratreduce (cadr i) den) )))
    (cons '($coeflist simp) (nreverse (coerce ar 'list)))))

;; Here's a simpler case, done faster. Let p be a polynomial in ONE variable. 
;; poly_coeflist produce a list of coefficients as DOUBLE FLOATS.
;;The poly_coeflist(3*x^2+5*x+100)returns  coeflist(3,5,100).
;; For a univariate polynomial p,
;; poly_coeflist(p):= float(poly_coeflist_var(p))
;; but it is maybe 5X faster.

(defun $poly_coeflist(p)
  (let* ((r (ratf p))
	 (den (cddr r))		;denominator. better be number!
	 (hipow (second (cadr r)))
	 (ar (make-array (1+ hipow) :initial-element 0.0d0))
	 (pairlist (cdadr r)))
    (loop for i on pairlist by #'cddr do
	 (setf (aref ar (car i))
	   (coerce (/ (cadr i) den) 'double-float)))
    (cons '($coeflist simp) (nreverse (coerce ar 'list)))))

(defun $poly_coef_array(p)
  (if ($numberp p) p
  (let* ((r (ratf p))
	 (den (cddr r))		;denominator. better be number!
	 (hipow (second (cadr r)))
	 (ar (make-array (1+ hipow) :initial-element 0))
	 (pairlist (cdadr r)))
    (loop for i on pairlist by #'cddr do
	 (setf (aref ar (car i))
	   ($poly_coef_array(div (pdis(cadr i))(pdis den)) )))
    (cons '(mlist) (cons (pdis (list (caadr r) 1 1))  (nreverse (coerce ar 'list)))))))


;; Similar to the horner program but for Tchebysheff (Chebyshev) coefficients.
;; If you don't know about Chebyshev series, ignore this. Or learn about them.
;; They are interesting..

(defun eval_ts(a x) ;; clenshaw algorithm
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; This works for any numeric data.
  ;; Without declarations, relatively slowly, if you know you have floats.
  
  (let* ((bj2 0)
		(bj1 0)
		(bj0 0))
	 
	   (setf a (reverse a))  
	   (loop while (cdr a) do
		 (setf bj0
		   (+ (* 2 x bj1)
		      (- bj2)
		      (pop a)))
		 (setf bj2 bj1 bj1 bj0))
	   (+ (* x bj1) (* 1/2 (pop a))(- bj2))))


(defun eval_tsf(a x) ;; clenshaw algorithm DOUBLE FLOATS ALL AROUND
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; works. but only for floats
  (declare (optimize (speed 3)(safety 0))
	   (double-float x))
  
  (let* ((bj2 0.0d0)
		(bj1 0.0d0)
		(bj0 0.0d0))
	   (declare(double-float bj2 bj1 bj0))
	   (loop while (cdr a) do
		 (setf bj0
		   (+ (* 2.0d0 x bj1)
		      (- bj2)
		      (the double-float (pop a))))
		 (setf bj2 bj1 bj1 bj0))
	   (+ (* x bj1) (* 0.5d0(pop a))(- bj2))))

;; For this to work we need to convert 1/2 to ((rat) 1 2) and back. 
;; I must have written this before, but so what.

;; input is lisp number.  if 1/2 make it ((rat) 1 2)
(defun rat2max(x)(typecase x (ratio (list '(rat)(numerator x)(denominator x)))
			   (number  x)
			   (t (error "expected a number, not ~s" x))))

;; input is Maxima number. a lisp number or ((rat) 3 5) make it a lisp number 3/5
(defun max2rat(x)(typecase x 
			   (number  x)
			   (cons (if (eq (caar x) 'rat) (/ (cadr x ) (caddr x))))
			   (t (error "expected a number, not ~s" x))))
			;;; maxima access. aa is cheby ..remove $chebyseries header

;; the version callable from Maxima.  There are faster versions of eval_ts below
(defun $eval_ts(aa xx)
  (let ((a (mapcar #'max2rat (cdr aa)))
	(x (max2rat xx)))
    (rat2max (eval_ts a x))))

(defun $eval_tsf(aa xx)
  (let ((a (mapcar #'max2rat (cdr aa)))
	(x (coerce (max2rat xx) 'double-float)))
   (rat2max (eval_ts2 a x))))


;; in order to allow this to work in any lisp, not requiring maxima, we includ
;; el cheapo polynomial add and multiply.

;; if you are not doing division, this 
;; order of coefficients is harder than the reverse.
;; but what the hey.

;; here's a version of horner that generalizes  the point p
;; from just being a number to be a non-number.
;;That is, p might be something like x-3  or actually any polynomial..

(defun hornerpol(A x) ;; A and x are polynomials.  e.g. (4 3)  is 4*y+3
  (let((q (list (car A))))
    (loop for i in (cdr A) do  (setf q (polc+ i (pol* x q))))
    q))


;; The point of this is to develop mathematically
;; equivalent arithmetical calculations which are, practically speaking,
;; faster and especially more accurate.

;; (horner A x)   
(defun sh(A x diff) 
  (horner (hornerpol A (list 1 diff)) (- x diff))) ;; shift by diff. then shift back.
(defun shf(A x diff) (horner 
		      (mapcar #'(lambda(r)(coerce r 'double-float))(hornerpol A (list 1 diff)))
		      (- x diff))) ;; shift by diff exactly, float the coefs. then shift back.




(defun polc+ (i p)(let ((q (nreverse p)))(nreverse (cons (+ i (car q))(cdr q)))))

(defun pol+ (p1 p2)(let* ((lp1 (length p1)) ;; add 2 polys in one variable
			  (lp2 (length p2))
			  (ans (nreverse(append (if (> lp1 lp2) p1 p2) nil))) ; copy longest
			  (other (reverse (if (> lp1 lp2) p2 p1)))
			 (ptr ans))
		     (loop for k in other do (incf (car ptr) k)
					     (pop ptr))
		  ;;   (nreverse other)
		     (nreverse ans)))
		     
;; multiply 2 polynomials. not super efficient.

(defun pol* (p1 p2) ;reminder p1 = (hi_deg ... const). etc
  (let ((ans nil)
	(pad nil))
    (loop for k in (reverse p1) do 
	  (setf ans (pol+ 
		     ans
		     (append (mapcar #'(lambda(r)(* r k)) p2) pad)))
	  (push 0 pad))
    ans))

;; w20 in monomial basis exactly
;; computes product of (x-i), i=1 to n, SYMBOLICALLY. I.e. x is
;; the polynomial represented by list (1 0), x-9 is the list (1 -9) etc.

(defun w(n)
  (let ((ans '(1)))
    (loop for i from 1 to n do (setf ans (pol* ans (list 1 (- i)))))
    ans))

;; oh, some other polynomials..
(defun hfact(n)(do ((i 1 (1+ i))(r 1 (* i r)))((> i n) r)))


(defun s(n)				; sine of x, upto degree n
  (nreverse
   (loop for i from 0 to n 
     collect (if (evenp i) 0 (* (expt -1 (/ (1- i)2)) (/ 1 (hfact i)))))
 ))
						  
						  
(defun flwli (x) 
  ;; convert a list of rationals to double-floats
  ;; without loss of information (wli)
  ;; if not possible, prints msgs.
  
  (let ((m 0.0d0))
    (mapcar #'(lambda(r)
		(typecase r
		  (double-float r)
		  (t
		   (setf m (coerce r 'double-float))
		   (cond ((= r (rational m)) m)
		     (t(format t "~% rational ~s and float ~s differ by ~s"
			     r m
			     (coerce 
			      (abs  (- r (rational m)))
			      'double-float)) m)))))
		    x)))


;;(loop for i from 1.0d0 to 20.0d0 do (print (shf w20mbrat i 105/10)))
;;(loop for i from 1.0d0 to 20.0d0 do (print (shf w20mbrat i 10)))

;; return a program to compute the polynomial using a shifted "origin"


(defun shfprog(A diff)
  (let ((newcoefs (flwli (hornerpol A (list 1 diff)))))
    #'(lambda(x)(horner newcoefs (- x diff)))))

;; for Maxima, we hand in a name

(defun $shprog(A diff name)  
  (let ((newcoefs 
	 (flwli (hornerpol (cdr
			    ($poly_coeflist A)) (list 1 (max2rat diff))))))
    (setf (symbol-function name) 
      #'(lambda(x)(horner newcoefs (- x (max2rat diff)))))
    name))

;;  (setf (symbol-function 'p10) (shfprog w20mbrat 10))  then  (p10  12.5d0)  etc

(defun shfproge(A diff);; return function which computes value plus error
  (let ((newcoefs (flwli(hornerpol A (list 1 diff)))))
    #'(lambda(x)(hornerfe newcoefs (coerce (- x diff) 'double-float)))))

(defun $shproge(A diff name)  
  (let ((newcoefs 
	 (flwli (hornerpol (cdr
			    ($poly_coeflist A)) (list 1 (max2rat diff))))))
    (setf (symbol-function name) 
      #'(lambda(x)(cons '($val_error) (multiple-value-list
				       (hornerfe newcoefs (- x (max2rat diff))))))))
  name)

;; example
;;  (setf (symbol-function 'p10e) (shfproge w20mbrat 10))
;; (p10e 10.0d0) -> 0.0 0.0  for value, error




;; (setf w18mb (flwli (w 18)))  ;; gives no warning of conversion problem.  w19 and 20 do


#|
/*if powers(a,b,c,d) is an encoding of a+b*x+c*x^2+d*x^3,   with "x" understood, then converting
from power basis [exactly!] to chebshev basis is */

power2cheby(p):=
 block([pa,ans,n:length(p)],
  local(pa,ans),
  pa[i]:=inpart(p,i+1),
  ans[i]:=0,
  for r:0 thru n-1 do /* for each power r*/
   for q:0 thru r do (
     if evenp(r-q)then 
           ans[q]:ans[q]+pa[r]*2^(1-r)*binomial(r,(r-q)/2)),
	   apply ('chebseries, makelist(ans[i],i,0,n-1)))$
	   |#
;; pr is a list (a0 a1 a2 ...)  for a0+a1*x+a2*x^2+ ...

#+ignore
(defun power2cheby(pr) ;; pr is a list of double-floats?  or maybe rationals. 
  (let*((n (length pr))
	(pa  (make-array n :initial-contents pr))
	(ans (make-array n :initial-element 0)))
    (loop for r from 0 to (1- n) do
	  (loop for q from 0 to r do
		(if (evenp (- r q))
			   (incf (aref ans q)
				 (* (aref pa r)(expt 2 (- 1 r))(choose r (* 1/2 (- r q))))))))
    (coerce ans 'list)))

;; pr is list of coeffs, hi degree first.

(defun power2cheby(pr) ;; pr is a list of double-floats?  or maybe rationals. 
  (let*((n (length pr))
	(pa  (make-array n :initial-contents (nreverse pr)))
	(ans (make-array n :initial-element 0)))
    (loop for r from 0 to (1- n) do
	  (loop for q from 0 to r do
		(if (evenp (- r q))
			   (incf (aref ans q)
				 (* (aref pa r)(expt 2 (- 1 r))(choose r (* 1/2 (- r q))))))))
     (coerce ans 'list)))

;;;L is a list ((mlist) ...)

(defun $power2cheby(L)	
  (cons '($chebyseries) (mapcar #'rat2max  (power2cheby (mapcar #'max2rat (cdr L))))))

(defun $poly2cheby(p) ;; p is a maxima expression, a polynomial in 1 variable.
  (cons '($chebyseries) (power2cheby (cdr ($poly_coeflist_var p) ))))

(defun $poly2chebyf(p) ;; p is a maxima expression, a polynomial in 1 variable.
(cons '($chebyseries) (power2cheby (cdr ($poly_coeflist p) ))))


				 
(defun chopser(L n) ;; take only first n terms of list
 (loop for i from 0 to (1- n) collect (elt  L i)))	       
 
(defun choose (n k)
  (labels ((prod-enum (s e)
	     (do ((i s (1+ i)) (r 1 (* i r))) ((> i e) r)))
	   (fact (n) (prod-enum 1 n)))
    (/ (prod-enum (- (1+ n) k) n) (fact k))))
 

;; w18c chebyshev coefficients for w18

(defvar w18 (w 18))			; exact integer coefficients
(defvar $w18 (cons '($coeflist simp) w18))
(defvar w18c  (power2cheby w18))	; exact rational coeffs for cheby series
(defvar w18fl (flwli w18))		;float coefs. also exact
(defvar $w18fl (cons '($polcoefs) w18fl)) ;float coefs. also exact

(defvar $w18c (cons '($chebyseries) w18c))

;;  w18c scaled by 2^17.  These are all integers, but too many
;; digits to convert  100% to double-floats. 
(defvar w18c17(mapcar #'(lambda(r)(* r #.(expt 2 17))) w18c))

;; note (eval_tsx w18c 65/10)  and (horner w18 65/10) are equal,
;; 3287253918823875/262144
;; note (eval_tsx w18c 6)  and (horner w18 6) are equal, namely 0.

;; (flwli w18c) or (flwli w18c17)  gives some warnings . 9 of em/
(defvar w18cfl (flwli w18c))
(defvar w18fl (flwli w18))

;; note (eval_tsx w18cfl 65/10)  and (horner w18fl 65/10) are NOT equal,
;; w18cfl is NOT fully accurate because of rounding. 5 digits agree tho.
;; Also note (eval_tsx w18cfl 6)  is 10554.0 when it should be 0.

;; what about at other points?
;; (horner w18fl 6.5d0) is    1.2539867158d+10
;; (horner w18fl 65/10) is    1.2539867158d+10
;; (horner w18   6.5d0) is    1.2539878535552502d+10  (right)
;; (dfloat(horner w18 65/10)) 1.2539878535552502d+10  (right)
;; differs                          ************
;; w18fl is wrong not because of coefficients or x, but
;; because floating point arithmetic is used.
;; what about (sh w18fl 65/10 6)
;; or         (sh w18fl 6.5d0 6)
;; they return                1.2539878535552502d+10   (right)

;; but so does (sh w18fl 6.5d0 13)
;; even (sh w18fl 6.5d0 1)  returns a pretty good answer, (8 digits right)
;;                            1.2539878 9759375d+10
;; while (sh w18fl 6.5d0 0)   1.25398 67158d+10 
;; tho   (sh w18fl 6.5d0 20)  1.2 493240416d+10  is way worse
;; and   (sh w18fl 6.5d0 6.5) 1.2539867158d+10


;; o'course using w18cfl is wrong because the coeffs are slightly wrong..
;; (eval_tsx w18cfl 65/10)    1.2539 90375d+10
;; but if we shift the series to be valid between 5 and 7, cheby series works:
;; (setf (symbol-function 'f1) (power2chebysh w18fl 5 7))
;; (f1 6.5d0)                1.2539878535552502d+10  (100% right)
;; if we try to cover too much in one shift:
;;  (setf (symbol-function 'f1) (power2chebysh w18fl 1 18))
;; (f1 6.5d0)                1.25398 6152242871d+10  (too ambitious)
;; but if we do the translation exactly on the rational (actually integer)
;; coefficients w18, and then convert to floats...

;; (setf (symbol-function 'f1) (power2chebysh w18 1 18))
;; (f1 6.5d0)                1.25398785355522 46d+10 (rather close, 15 digits)
;;  (setf (symbol-function 'f1) (power2chebysh w18 4 8)) smaller range
;;(f1 6.5d0)                 1.2539878535552502d+10 (100% right)
;; 


;;; program to do sort of power2cheby,but shifted and scaled
(defun power2chebysh(L a b)  ;; use monomial base L, but for a<x<b
  (let* ((sum (+ b a))
	 (diff (- b a))
	 (newcoefs (flwli (power2cheby 
			    (hornerpol L 
				       (list (/ diff 2) (/ sum 2)))))))
    #'(lambda(x) (eval_tsx newcoefs (/ (- (* 2 x) sum) diff)))))

(defun power2chebyshX(L a b)  ;; use monomial base L, but for a<x<b exact
  (let* ((sum (+ b a))
	 (diff (- b a))
	 (newcoefs (power2cheby 
			    (hornerpol L 

				       (list (/ diff 2) (/ sum 2))))))
    #'(lambda(x) (eval_tsx newcoefs (/ (- (* 2 x) sum) diff)))))

(defun eval_tsder(aa x) ;; clenshaw algorithm for value and derivative
;; copied from echebser1
  
  (let* ((b2 0)	 (b1 0)	(b0 0) (c2 0) (c1 0)(c0 0)
	 (x2 (* x 2)) (nc 0) (a nil))
    (setf a (coerce aa 'vector)  nc (length a)
	  b0 (aref a (1- nc)) c0 b0)

    (loop for i from (- nc 2) downto 0 
	do
	  (setf	b2 b1 b1 b0  b0  (+ (* x2 b1)(- b2)(aref a i)) )
	  (unless (= i 0)
		   (setf c2 c1 c1 c0 c0 (+ b0 (* x2 c1) (- c2)))))

    (values  (* 1/2 (- b0 b2))		; value. right
	     (- c0 c2))))  ; first deriv


(defun eval_tsx(aa x) ;; clenshaw algorithm for value
;; copied from echebser0. no declarations
  
  (let* ((b2 0)	 (b1 0)	(b0 0)
	 (x2 (* x 2)) (nc 0) (a nil))
    (setf a (coerce aa 'vector)  nc (length a)
	  b0 (aref a (1- nc)) )
    (loop for i from (- nc 2) downto 0 
	do
	  (setf	b2 b1 b1 b0  b0  (+ (* x2 b1)(- b2)(aref a i)) )
	 )
     (* 1/2 (- b0 b2))		; value
     ))

(defun eval_tsxf(aa x) ;; clenshaw algorithm for value
  ;; copied from echebser0. everything double-float
  (declare (double-float x)(optimize (speed 3)(safety 0)))
  
  (let* ((b2 0.0d0) (b1 0.0d0)	(b0 0.0d0)
	 (x2 (* x 2)) (nc 0) (a nil))
    (declare(double-float b0 b1 b2 x2))
    
    (setf a (coerce aa 'vector)  nc (length a)
	  b0 (the double-float (aref a (1- nc)) ))
    (loop for i from (- nc 2) downto 0 
	do
	  (setf	b2 b1 b1 b0  b0  (+ (* x2 b1)(- b2)(the double-float (aref a i)) ))
	 )
     (* 0.5d0 (- b0 b2))		; value
     ))

(defun eval_tsxfe(aa x) ;; clenshaw algorithm for value and error bound
  ;; copied from echebser0. everything double-float
  ;; probably wrong, look at interval version...
  (declare (double-float x)(optimize (speed 3)(safety 0)))
  
  (let* ((b2 0.0d0) (b1 0.0d0)	(b0 0.0d0)
	 (e2 0.0d0) (e1 0.0d0)  (e0 0.0d0)
	 (pert (+ 1.0d0 double-float-epsilon)) ;perturbation
	 (x2 (* x 2)) (nc 0) (a nil))
    (declare(double-float b0 b1 b2 x2 e0 e1 e2 pert))
    
    (setf a (coerce aa 'vector)  nc (length a)
	  b0 (the double-float (aref a (1- nc)) ))
    (loop for i from (- nc 2) downto 0 
	do
	  (setf	b2 b1 b1 b0  
		b0  
		(+ (* x2 b1)(- b2)(the double-float (aref a i)) ))
	  (setf e2 e1 e1 e0 
		e0  (* pert(+ (abs(* x2 e1)) e2 (abs (the double-float (aref a i)) ))))
	 )
    (values  (* 0.5d0 (- b0 b2))	; value
	     (* 0.5d0 (+ e0 e1)))))  ;; very pessimistic bound for w18?


(defun power2chebyshi(L a b)  ;; use monomial base L, but for a<x<b interval
  (let* ((sum (+ b a))
	 (diff (- b a))
	 (newcoefs (flwli (power2cheby 
			    (hornerpol L 
				       (list (/ diff 2) (/ sum 2)))))))
    #'(lambda(x) (eval_tsxfe newcoefs (/ (- (* 2 x) sum) diff)))))
  

;;interval stuff

(defstruct (ri (:constructor ri (lo hi)))lo hi ) ;structure for real interval
(defmethod print-object ((a ri) stream)
  (format stream "[~a,~a]"  (pbg(ri-lo a))(pbg(ri-hi a))))
;; these are all crude. Better would be to set rounding mode.
;; each op could then have half the error, not a full epsilon.


#-gcl (defmethod bu ((k double-float)) (if (> k 0)(* k #.(+ 1 double-float-epsilon))
				   (* k #.(- 1 double-float-epsilon))))
(defmethod bu ((k (eql 0.0d0)))least-positive-normalized-double-float)

#-gcl(defmethod bd ((k double-float)) (if (< k 0)(* k #.(+ 1 double-float-epsilon))
				   (* k #.(- 1 double-float-epsilon))))
(defmethod bd ((k (eql 0.0d0))) least-negative-normalized-double-float)

(defun widen-ri (a b) (ri (bd a)(bu b)))


(defun symb (&rest args) (values (intern (apply #'mkstr args))))

(defun mkstr (&rest args)
  (with-output-to-string (s)(dolist (a args) (princ a s))))

(defmacro with-ri (struct1 names1  &body body)
  (let ((gs1 (gensym)))
    `(let ((,gs1 ,struct1))
       (let  ,(mapcar #'(lambda (f field)
			  `(,f (,(symb "RI-" field) ,gs1)))
		      names1
		      '(lo hi))
	 ,@body))))

(defmethod averr((k ri)) ;; 2 values: average and error of an interval
  (with-ri k (lo hi)
	   (values  (* 1/2 (+ hi lo))
		    (* 1/2 (- hi lo)))))


(defun eval_tsxi(aa x) ;; clenshaw algorithm for value as interval
  ;; copied from echebser0. everything double-float or interval
  ;(declare (double-float x)(optimize (speed 3)(safety 0)))
  
  (let* ((b2 0.0d0) (b1 0.0d0)	(b0 0.0d0)
	 (x2 (* x 2.0d0)) (nc 0) (a nil))
  ;  (declare(double-float b0 b1 b2 x2))
    
    (setf a (coerce aa 'vector)  nc (length a)
	  b0 (the double-float (aref a (1- nc)) ))
    (loop for i from (- nc 2) downto 0 
	do
	  (setf	b2 b1 b1 b0  b0 
		;;(int+ (int* x2 b1)(int- (the double-float (aref a i)) b2) )
		(int+ (int- (int* x2 b1) b2) (aref a i) )
		)
	  )
  ;;  (format t "~%  b2=~s" b2)
     (int* 0.5d0 (int- b0 b2))		; value
     ))


;; this stuff for intervals is adapted from c:/lisp/generic/interval.lisp





(defmacro with-ri2 (struct1 struct2 names1 names2 &body body)
  (let ((gs1 (gensym))
	(gs2 (gensym)))
    `(let ((,gs1 ,struct1)
	   (,gs2 ,struct2))
       (let ,(append 
	      (mapcar #'(lambda (f field)
			  `(,f (,(symb "RI-" field) ,gs1)))
		      names1
		      '(lo hi))
	      (mapcar #'(lambda (f field)
			  `(,f (,(symb "RI-" field) ,gs2)))
		      names2
		      '(lo hi)))
	 ,@body))))



(defmethod int+ ((r ri)(s ri)) ;; two arg
  ;; adding 2 intervals, just add their parts.
  (with-ri2 r s (lo1 hi1)(lo2 hi2) (widen-ri (+ lo1 lo2)(+ hi1 hi2))))


(defmethod int+ (r (s ri)) ;adding num+interval
  (with-ri s (lo1 hi1) (widen-ri (+ lo1 r)(+ hi1 r))))

(defmethod int+ ((s ri) r)
  (with-ri s (lo1 hi1) (widen-ri (+ lo1 r)(+ hi1 r))))

(defmethod int- ((r ri)(s ri)) ;; two arg
  ;; difference of 2 intervals, just diff their parts.
  (with-ri2 r s (lo1 hi1)(lo2 hi2) (widen-ri (- lo1 lo2)(- hi1 hi2))))

(defmethod int- ((s ri) r)
  (with-ri s (lo1 hi1) (widen-ri (- lo1 r)(- hi1 r))))

(defmethod int- (s (r ri))
  (with-ri r (lo1 hi1) (widen-ri (- s lo1)(- s hi1))))

(defmethod int- ( r s)  (let((p (- r s)))(widen-ri p p)))

(defmethod int*((r ri)(s ri))
  ;; multiplying intervals, try all 4, taking min and max.
  ;; could be done faster, e.g. if intervals are 0<lo<hi.
  (with-ri2 r s (lo1 hi1)(lo2 hi2)
	    (let ((prods (sort (list (* lo1 lo2)(* lo1 hi2)(* hi1 lo2)(* hi1 hi2)) #'<)))
	      (widen-ri (car prods)(fourth prods)))))
  
(defmethod int* ((s ri) r) (int* r s))  

(defmethod int* (r (s ri))
  ;; multiplying num by interval
  (with-ri s (lo1 hi1)
	   (let ((prods (sort (list (* r lo1)(* r hi1)) #'<)))
	     (widen-ri (car prods)(cadr prods)))))

(defmethod int* (r s)(let((p (* r s)))(widen-ri p p)))

#-allegro (defun pbg(x)x) ;; is there a CL standard way of identifying exceptional op?

#+allegro
(defun pbg(x) ;; print bad guy. This uses the mistake that infinities can be compared equal
  (if
      (badguy x)
      (case x
	((#.excl::*infinity-double*   #.excl::*infinity-single*)
	 "oo")
	((#.excl::*negative-infinity-double* #.excl::*negative-infinity-single*) 
	 "-oo")
	((#.excl::*nan-double*  #.excl::*nan-single*) 
	 "NaN")
	(otherwise x)) ;; not really a bad guy
    x))    

#+allegro 
(defun badguy(x) ;; this returns non-nil for single or double nans and infinities
 (excl::exceptional-floating-point-number-p x))	       


(defun power2chebyshXe(L a b)  ;; use monomial base L, but for a<x<b exact, with error
  (let* ((sum (+ b a))
	 (diff (- b a))
	 (newcoefs (flwli(power2cheby 
			    (hornerpol L 
				       (list (/ diff 2) (/ sum 2)))))))
    #'(lambda(x) (eval_tsxfe newcoefs (/ (- (* 2 x) sum) diff)))))

;; hackery

(defun dfloat(x)(coerce x 'double-float))
(defvar halfepsilon (expt 2 -54))
(defmethod bu ((k number)) 
  (* (rational k) (if (> k 0) #.(+ 1 halfepsilon)
				    #.(- 1 halfepsilon))))
(defmethod bd ((k number)) (* (rational k)(if (< k 0) #.(+ 1 halfepsilon)				   #.(- 1 halfepsilon))))




;; this unwinds the coefs into a nested arithmetic expression
;; and then compiles it.  Seems not to be detectably faster than
;; a loop
(defun hornerm(A name)			;A is list of numbers. name is e.g. 'w18fast
    
  (let ((q (car A))
	(x (gensym)))			; q will be arithmetic expression
  (loop for i double-float in (cdr A) do 
	(setf q `(+ ,(coerce i 'double-float) (* ,x ,q))))
 ; (print q)
  
 (eval  `(defun ,name(,x)(declare (double-float  ,x)(optimize(speed 3)(safety 0)))
	       ,q))
 (compile name)))


(defun $fast_poly_maker(q x name)
  (let ((z (gensym))
	($ratprint nil))
    (compile name `(lambda(,z)  (hornerf ' ,(cdr ($poly_coeflist_var_f q x))
					   (coerce ,z 'double-float)))))

  name)

(defun $fast_poly_maker_uc(q x name) ;;uncompiled. should be about as fast, since hornerf is compiled
  (let ((z (gensym))
	($ratprint nil))
    (setf (symbol-function name) 
      (coerce `(lambda(,z)  (hornerf ' ,(cdr ($poly_coeflist_var_f q x))
				       (coerce ,z 'double-float)))
	      'function)))
  name)

(defun $poly_coeflist_var_f (q x) ;; make the coefficients into floats
  (let ((res($poly_coeflist_var q x))
	)
    (cons (car res)(mapcar #'$float (cdr res))))) ;; for maxima

(defun $fast_poly_maker_err(q x  name)
  (let ((z (gensym))
	($ratprint nil))
    (compile name `(lambda(,z)  ($hornerfe ' ,($poly_coeflist_var q x)(coerce ,z 'double-float)))))
  name)

(defun $fast_poly_maker_err_uc(q x name) ;;uncompiled. should be about as fast.
  (let ((z (gensym))
	($ratprint nil))
    (setf (symbol-function name) 
      (coerce `(lambda(,z)  ($hornerfe ' ,($poly_coeflist_var_f q x)
				       (coerce ,z 'double-float)))
	      'function)))
  name)

;; could just do this:
;; coefs:poly2coeflist(expr))
;; fastexpr(z):= hornerf(coefs,z);
;; instead of   fast_poly_maker(expr,fastexpr)


;; f0(x):= ''horner((x+1)^10+1,x);
;; fast_poly_maker(f0(x),f1);

;; f1(3.0d0) is about 18X faster than f0(3.0d0) if hornerf is compiled...


;; (hornerm w18 'w18fast)  produces a program e.g. (w18fast 3.5d0)
;; marginally slower than (hornerf w18 3.5d0)



;; another cute version..




#| in maxima

evpol(p,x):= /* evaluate polynomial given by coefs via horner's rule*/
 xreduce( lambda([a,c],c+a*x), args(p), 0)$

ww(x):=x^2+x+1;

cs: poly2cheby(ww(x));
eval_ts(cs,45);
ww(45);

shprog(w18(x), 9, shift9)

wxplot2d(w18(x)-shift9(x),[x,1,18]);

|#


(defun eval_ts2(a x) ;; clenshaw algorithm
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; works. but only for floats
  (declare (optimize (speed 3)(safety 0))
	   (double-float x))
  
  (let* ((bj2 0.0d0)
		(bj1 0.0d0)
		(bj0 0.0d0))
	   (declare(double-float bj2 bj1 bj0))
	   (setf a (reverse a))  ;; could hack this away. maybe later
	   (loop while (cdr a) do
		 (setf bj0
		   (+ (* 2.0d0 x bj1)
		      (- bj2)
		      (the double-float (pop a))))
		 (setf bj2 bj1 bj1 bj0))
	   (+ (* x bj1) (* 0.5d0(pop a))(- bj2))))


;; mucking about, trying to speed up chebyshev stuff.


;;  careful if you re-do stuff below.

#+ignore

(
 

;; 1+4*x+9*x^2+8*x^3+8*x^4;
		
;;(defun sfun(r)(eval_ts2 '(17.0d0 10.0d0 8.5d0 2.0d0 1.0d0) (coerce r 'double-float)))
;;(defun qfun(r) (+ 1.0d0 (* 4  r)(* 9 r r) (* 8 r r r)(* 8 r r r r)))
;;(defun hfun(r)(+ 1.0 (* r (+ 4 (* r (+ 9 (* r (+ 8 (* r 8)))))))))

(defun eval_ts2e(a x) 
  ;; clenshaw algorithm with roundoff error. a and x are by assumption exact.
  ;; if this program is correct, the analysis is crude.
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; works.
  (declare ;;(optimize (speed 3)(safety 0))
	   (double-float x))
  
  (let* ((bj2 0.0d0)
	 (bj1 0.0d0)
	 (bj0 0.0d0)
	 (u0 0.0d0)(u1 0.0d0) (u2 0.0d0) (ai 0.0d0)
	 (xa (abs x))
	 )
	   (declare(double-float bj2 bj1 bj0 u0 u1 u2 ai xa))
	   (setf a (reverse a))  ;; could hack this away. maybe later
	   (loop while (cdr a) do
		 (setf bj0
		   (+ (* 2.0d0 x bj1)
		      (- bj2)
		      (the double-float (setf ai (pop a)))))
		 
		 ;; 2*x*bj1-bj2+a[i] expanded with (1+eps) e.g.
		 ;; taylor((2*x*bj1*(1+eps) -(bj2+a[i])*(1+eps))*(1+eps), eps,0, 3);
		 
		 ;; looks like 
		 ;;-bj2-a[i]+2*bj1*x+ (4*bj1*x-2*a[i]-2*bj2)*eps+...
		 ;;so  4*bj1*x   -2*ai -2*bj2
		 ;;       
		 ;;(2*u2+2)*abs(bj2)+(4*u1+4)*x*abs(bj1)+ai +O(eps^2)
		 ;; so  2*(1+u2)(abs((4*abs(bj1)*abs(x)*(1+u1)
		 ;;+abs(a[i]+abs(bj1)*(1+u2) ))*eps
		 
		 (setf u0(* double-float-epsilon  
			    
			    ;;(+ (* 2.0d0 (+ u2 1.0d0)(abs bj2))
			    ;;		     (* 4.0d0 (+ u1 1.0d0)xa(abs bj1))
			    ;;		     (abs ai))))
			      (+ (* 4.0d0 (abs bj1)(+ 1 u1) xa)
				 (* 2.0d0 (+ (* (abs bj2)(+ 1 u2))(abs ai))))
			    ))
		 
		 (setf bj2 bj1 bj1 bj0)
		 (setf u2 u1 u1 u0))	; end of loop
	   
	   (values  (+ (* x bj1) 
		       (* 0.5d0(setf ai (pop a)))
		       (- bj2))		;value
		    
		    ;;(2*u1*x+2*bj1*x-2*bj2(u2+1)+abs(ai))/2
		    
		    (* double-float-epsilon
		       (+ (* xa (+ u1 (abs bj1)))
			  (* (abs bj2)(+ u2 1.0d0))
			  (* 0.5d0 (abs ai)))))))

(defun eval_ts2rat(a x) ;; clenshaw algorithm using rational arithmetic
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; works.  same as eval_tsx
  (let* ((bj2 0)
		(bj1 0)
		(bj0 0))

	   (setf a (reverse a))  ;; could hack this away. maybe later
	   (loop while (cdr a) do
		 (setf bj0
		   (+ (* 2 x bj1)
		      (- bj2)
		      (pop a)))
		 (setf bj2 bj1 bj1 bj0))
	   (+ (* x bj1) (* 1/2(pop a))(- bj2))))


(defun w18(x)(let((ans 1))(loop for i from 1 to 18 do (setf ans (* ans (- x i)))) ans))

;;; slightly hacked ..


(defun eval_ts3(aa x) ;; clenshaw algorithm
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; works.
  (declare (optimize (speed 3)(safety 0))
	   (double-float x))
  
  (let* ((bj2 0.0d0)
	 (bj1 0.0d0)
	 (bj0 0.0d0)
	 (lasta (car aa))
	 a )
	   (declare(double-float bj2 bj1 bj0 lasta))
	   (setf a (reverse (cdr aa)))  ;; could hack this away. maybe later
	   (loop while a do
		 (setf bj0
		   (+ (* 2.0d0 x bj1)
		      (- bj2)
		      (the double-float (pop a))))
		 (setf bj2 bj1 bj1 bj0))
	   
	   (+ (* x bj1) (* 0.5d0 lasta)(- bj2))))


(defun eval_ts4(aa x) ;; clenshaw algorithm. everything floats. fastest
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; works.
  (declare (optimize (speed 3)(safety 0))
	   (double-float x))
  
  (let* ((bj2 0.0d0)(bj1 0.0d0) (bj0 0.0d0) (twox (* 2.0d0 x) )
	 (lasta (car aa))
	 (a nil) (p nil))
	   (declare(double-float bj2 bj1 bj0 lasta twox))
	   (setf a (setf p (nreverse (cdr aa))))  ;; could hack this away. maybe later
	   
	   (loop while a do
		 (setf bj0
		   (+ (* twox bj1)
		      (- bj2)
		      (the double-float (pop a))))
		#+ignore (format t "~%ts4 bj0= ~s" bj0)
		 (setf bj2 bj1 bj1 bj0))
	   (nreverse p)
	   (+ (* x bj1) (* 0.5d0 lasta)(- bj2))))

(defun eval_tsrat(aa x) ;; clenshaw algorithm use for EXACT rational inputs.
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; works.
  (declare (optimize (speed 3)(safety 0))
)
  
  (let* ((bj2 0)
	 (bj1 0)
	 (bj0 0)
	 (twox (* 2 x) )
	 (lasta (car aa))
	 (a nil) (p nil))

	   (setf a (setf p (nreverse (cdr aa))))  ;; could hack this away. maybe later
	   
	   (loop while a do
		 (setf bj0
		   (+ (* twox bj1)
		      (- bj2)
		      (pop a)))
		 (setf bj2 bj1 bj1 bj0))
	   (nreverse p)
	   (+ (* x bj1) (* 1/2 lasta)(- bj2))))


;; second try for error, use interval arithmetic??
;; coded to be self contained. Probably a bad idea..

(defvar perthi (+ 1.0d0 double-float-epsilon))
(defvar pertlo (- 1.0d0 double-float-epsilon))
(defvar pertarr #(#.(- 1.0d0 double-float-epsilon)
		  #.(+ 1.0d0  double-float-epsilon)))

(defun addints (&rest L)
  (let (( min 0.0d0)(max 0.0d0)) ;limits of resulting interval sum
    (loop for k in L do
	  (cond ((atom k) (incf min k)(incf max k))
		(t (incf min (first k))(incf max (second k)))))
    (list min max)))

(defun eval_ts4ee(aa x) ;; clenshaw algorithm  using INTERVAL ARITHMETIC ad hoc
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; works.
  (declare (optimize (speed 3)(safety 0))
	   (double-float x))
  
  (let* ((bj2 '(0.0d0 0.0d0))
	 (bj1 '(0.0d0 0.0d0))
	 (bj0 '(0.0d0 0.0d0))
	 (twox (* 2.0d0 x) )
	 (lasta (car aa))
	 (a nil) (p nil) )
	   (declare(double-float lasta twox))
	   (setf a (setf p (nreverse (cdr aa))))  ;; could hack this away. maybe later
	   
	   (loop while a do
		 (setf bj0
		   (addints ;; (* twox bj1)
		   (sort (list (* twox (first bj1))(* twox (second bj1))) #'<) 
		    ;;(- bj2)
		      (mapcar #'- (reverse bj2))
		      (pop a)))
	
	   ;; hm, now we have bj0_ < bj0^ and 2 perturbations. 0<lo<1<hi
		 (setf bj0 (list (* pertlo (first bj0)) (* (second bj0) perthi)))
		  (format t "~% ts4ee bj0= ~s" bj0)
		 (setf bj2 bj1 bj1 bj0))
	   (nreverse p)
	   ;; (+ (* x bj1) (* 0.5d0 lasta) (- bj2))
	 #+ignore  (format t "~% answer= ~s ~% bj1=~s, bj2=~s " 
		   (+ (* x (mdpt bj1)) (* 0.5d0 lasta)(- (mdpt bj2)))
		   bj1 bj2)
	 (setf bj1 (addints (sort (mapcar #'(lambda(r)(* r x)) bj1) #'<)  ;; return center, error
			    (* 0.5d0 lasta)
			    (mapcar #'- (reverse bj2))		  ))
	 (format t "~% interval result ee=~s" bj1) 
	 (ctr_errf bj1)))


(defun mdptf(r)(* 0.5d0 (apply #'+ r)))  ;;midpoint
(defun ctr_errf(r) (list (* 0.5d0 (reduce #'+ r))(* -0.5d0 (reduce #'- r))))
(defun ctr_errr(r) (list (* 1/2 (reduce #'+ r))(* -1/2 (reduce #'- r))))
(defun mdptr(r)(* 1/2 (reduce #'+ r)))

(defun eval_ts4eerat(aa x eps) ;; clenshaw algorithm, rational,  using INTERVAL ARITHMETIC ad hoc
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  aa is chebseries
  ;; sum' multiplies a_0 by 1/2
  ;; works.
  ;;  (declare (optimize (speed 3)(safety 0))	)
  ;; aa is list of integers or rationals. x is integer or rational. eps is rational or 0 maybe.
  
  (let* ((bj2 '(0 0))
	 (bj1 '(0 0))
	 (bj0 '(0 0))
	 (twox (* 2 x) )
	 (pertlo (- 1 eps)) (perthi(+ 1 eps))
	 (lasta (car aa))
	 (a nil) (p nil) )

	   (setf a (setf p (nreverse (cdr aa))))  ;; could hack this away. maybe later
	   
	   (loop while a do
		   (setf bj0(addints ;; (* twox bj1)
		   (sort (list (* twox (first bj1))(* twox (second bj1))) #'<) 
		    ;;(- bj2)
		      (mapcar #'- (reverse bj2))
		      (pop a)))
	   ;; hm, now we have bj0_ < bj0^ and 2 perturbations. 0<=lo<=1<=hi
		 (setf bj0 (list (* pertlo (first bj0)) (* (second bj0) perthi)))
		 (setf bj2 bj1 bj1 bj0))
	   (nreverse p)
	    #+ignore (format t "~% answer= ~s"
		   (+ (* x (mdpt bj1)) (* 0.5d0 lasta)(- (mdpt bj2)))
		   )
	 (ctr_errr  (addints (sort (mapcar #'(lambda(r)(* r x)) bj1) #'<)  ;; return center, error
		    (* 1/2 lasta)
		    (mapcar #'- (reverse bj2)) ))))



;;  (eval_ts4ee w20ts 3.0d0)
;;(-127488.0d0 137216.0d0)  ;; answer +- error,  doing double-float arithmetic. not so good, but
;; correct answer is definitely in there.
;; another point, 0.75
;; (eval_ts4ee w20ts 0.75d0)
;; (7.0619879085128 704d+16 3072.0d0)
;; right answer is 7.0619879085128 824d+16 , differs by 120, within bound of 3072.
;; we know it is right because 
;;(mapcar #'(lambda(r)(* 1.0d0 r)) (eval_ts4eerat w20tsrerat 3/4 0))
;;(7.0619879085128824d+16 0.0d0)

;; try testing near a known zero of w20, namely 1.0.

;; (eval_ts4ee w20ts 1.0d0) gives (-1024.0d0 6144.0d0), right answer is -460.02657318116735d0
;; w20tsrerat does not have a zero at 1.0 because of perturbations.
;; the zero is closer to  z = 1 - 3.6637359812630166d-15 = 9999999999999962183/10000000000000000000

;;  (mapcar #'(lambda(r)(* 1.0d0 r)) (eval_ts4eerat w20tsrerat z 0))
;; returns (-0.001296965073310622d0 0.0d0)

;; (eval_ts4ee w20ts (* 1.0d0 z))
;; returns (-512.0d0 4608.0d0)
;;  So it looks like the absolute error is bounded.  The relative error in the
;; approximation is going to be large because the actual value is so small.
;; By comparison,  (hornerfe w20mb 1.0d0) gives an answer of 0.0, but error bound 5672.
;; w20mb is a different polynomial though..

;; try horner at 10..
;;(hornerfe w20mb 10.0d0)
;;0.0d0
;;5672.234034882423d0
;;but that's a different polynomial from this one ...

;;(mapcar #'(lambda(r)(coerce r 'double-float)) (eval_ts4eerat w20tsrerat 10 0))
;;(4.0881513806276226d+8 0.0d0)

;;compare to

;; (eval_ts4ee w20ts 10.0d0)
;;(7.38368448d+8 1.163136192d+9)  ;; huge [valid] error bound.

;;hacking to speed it up
;;; BROKEN!

	   
	   
(defun addintsV  (target &rest L)
  (declare   (type (simple-array double-float(2)) target))
  (loop for k in L  do
	(cond 
	      ((numberp k) 
		 (incf (aref target 0)k)
		 (incf (aref target 1)k))
	      
	      (t (incf (aref target 0)(aref k 0))
		   (incf (aref target 1)(aref k 1)))) )
  target)





;; try to simplify, fix..




(defun eval_ts4eefast(aa x) ;; clenshaw algorithm  using INTERVAL ARITHMETIC ad hoc
  ;; evaluate p=sum'(a_nT_n(x), i=0 to N)  a is chebseries
  ;; sum' multiplies a_0 by 1/2

  
  (let* ((bj1 #(0.0d0 0.0d0))
	 (bj2 #(0.0d0 0.0d0))
	 bj0
	 (twox (* 2.0d0 x) )
	 (lasta (car aa))
	 (a nil) (p nil) )
	   (setf a (setf p (nreverse (cdr aa)))) 
	   (loop while a do 
		 (setf bj0
		   (addintsV  
		    (sort (map 'vector #'(lambda(r)(* twox r)) bj1) #'<) ; 2*x*bj1
		    (sort (map 'vector #'(lambda(r)(- r)) bj2) #'<) ;-bj2
		    (pop a)))
		;; 
		 (setf bj0 (map 'vector #'(lambda(r s)(* r s))
				bj0
				pertarr)) 

		 (setf bj0 (sort bj0 #'<)) ; in-place sort
		 ;;  (format t "~% ts4eefastx bj0= ~s" bj0)
		 (setf bj2 bj1 bj1 bj0)
		 ) ; loop end
	   (nreverse p)

	   ;; (+ (* x bj1) (* 0.5d0 lasta) (- bj2))
	  
	   (setf bj1
	     (addintsV	 
	    (sort (map 'vector #'(lambda(r)(* x r)) bj1) #'<) 
	    (nreverse (map 'vector #'(lambda(r)(- r)) bj2)) ; - bj2
	    (* 0.5d0 lasta)))		; also addthis in
			   
	   (format t "~%interval result fastx= ~s " bj1)			; the answer as an interval 
	  
	   (values (* 0.5d0 (reduce #'+ bj1))
		   (* -0.5d0 (reduce #'- bj1)))))


)

#|

pp(a,b):=wxdraw2d(
error_type = y,color=black,
key="error in horner w18",
line_width=2,
errors(makelist(cons(i/10.0d0,args(s0(i/10.0d0))),i,a*10,b*10)

    
    ));
    
    good(x):=''%;
    prod(x-i,i,1,18)$
shproge(good(x),0,s0);
    

    |#





  ;; ARRAY version
  ;; any common lisp numbers in A and x. 
;; order in A: highest degree down to constant term
;; works but not as fast as A as a list.  The aref takes too long?
(defun hornerA(A x)  
  (declare (optimize (speed 3)(safety 0))
	   (double-float x)
	   (type (simple-array double-float *) A))
  
  (let*((k (1- (length A)))
	(q (aref A 0)))
    (declare (double-float q)(fixnum k))
    (loop for  i fixnum from 1 to k
	do 
;;	  (print q)
	  (setf q (+ (the double-float (aref A i))(the double-float  (* x q)))))
    q))

;; wrong
#+ignore
(defun hornerA(A x)  
  (declare (optimize (speed 3)(safety 0))
	   (double-float x)
	   (type (simple-array t *) A))
  
  (let*((k (1- (length A)))
	(q (svref A 0)))
    (declare (double-float q)(fixnum k))
    (loop for  i fixnum from 1 to k
	do 
;;	  (print q)
	  (setf q (+ (the double-float (svref A i))(the double-float  (* x q)))))
    q))



#| can we do "automatic" error analysis by symbolically
entering values for x as  x+eps?  e.g. instead of p(2), try p(2+eps)
then use taylor, rat()  ratweight, or just look at coef of eps?

problem.. we need eps +2 eps to become 3 eps, (ok) but eps - eps should be 2 eps.
we don't know the sign of eps.  General rules...

u*eps + v*eps ->  (|u|+|v|)*eps
 more generally
 u*eps^n+v*eps^n ->  (|u|+|v|)*eps^n

 matchdeclare( r, lambda([m], is(not (m> 0))))$  /* eh, may return unknown? etc */
 matchdeclare(n, any)$
 maybe tellsimp(r*eps, -r*eps*x^b)

 naw... matchdeclare(r,negnump)
 negnump(x):=is (numberp(x)and x<0)
 let(r*eps,abs(r)*eps);
 letsimp(-3*eps^2*x^3);   e*eps^2*x^3 is answer.

 eh, doesn't work to get same error as hornerfe..
 

 for a more thorough discussion see
 
issac 1992

An Approach for Floating-Point Error Analysis
using Computer Algebra
Mark P.W. Mutrie
Richard H. Bartels
Bruce W. Char
|#