(eval-when (compile load) (declaim (optimize (speed 3)(safety 0)))) (load "fftwdsimple.fasl") ;; polynomial multiplication by FFT: pack 2 input arrays into one array ;; for a complex fft. ;; like 4f.lisp but returns answer in lists (defvar *fftbufsize* 8) (defvar *fftbuf* (make-array *fftbufsize* :element-type 'double-float :allocation :static-reclaimable )) (defvar *fftbuf2*(make-array *fftbufsize* :element-type 'double-float :allocation :static-reclaimable )) (defun fftpoly2(poly1 poly2 len) ;; poly1, poly2 inputs are pairs of arrays: ;; of exponents in increasing order, ;; and of coefs corresponding to those exponents. ;; len is desired length of transform, which is at least as large as degree of poly1,2 ;; but may be rounded up to next power of 2, or some other convenient FFT length ;; This program returns a transformed sequence suitable to ;; send into untangle to get the ;; fourier transforms of poly1 and poly2l of length len (let*((exps1 (car poly1)) (coefs1 (cdr poly1)) (exps2 (car poly2)) (coefs2 (cdr poly2)) (e 0) ;(c 0.0d0) (r *fftbuf*) ;; this is known to be at least 2*len in size big enough ;; we could do this in r, for this program.. ;;(ans(make-array (+ len len) :element-type 'double-float :displaced-to r)) ;;(ans *fftbuf*) ) (declare (fixnum len e ); (double-float c) (type (array double-float (*)) r) (optimize (speed 3)(safety 0))) ;; array is twice the length of the (real) input to make room for real & imag parts ;; copy real/imag parts of input into adjacent array locations and pre-normalize. ;; Fill with zeros first. (loop for i fixnum from 0 to (+ len len -1) do (setf (aref r i) 0.0d0)) ;; next, copy real numbers from poly1 (loop for i fixnum from 0 to (1- (length exps1)) do (setf e (aref exps1 i)) ;; assume coefs are already double-floats ... (setf (aref r (+ e e)) (float (aref coefs1 i) 1.0d0) )) (loop for i fixnum from 0 to (1- (length exps2)) do (setf e (aref exps2 i)) (setf (aref r (+ 1 e e)) (float (aref coefs2 i) 1.0d0) )) ;; we can do the fft plan here since estimate only will be cheap and not clobber r? (fftw_execute_dft (fftw_plan len r r -1 64) r r) ;; do the FFT r )) ;; faster yet? ;; um, broken (defun untangleprod2(f NN ) ;;; given an array *f*, of real/imag/real/imag doubles, untangle to a pair of ;;; fourier transforms, and multiply them pointwise ;;; according to http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM ;;; with these interleaved data r,s. fft(k,z) is the kth element of ;;; the array of fft coefs. (but we use 2 spots for real/ imag ;;; r= fft(k,r) = (fft(k,z)+conj(fft(N-k,z))/2 ;;; s= fft(k,s) =-i*(fft(k,z)-conj(fft(N-k,z))/2 ;;; they leave out what to do when k=0, since fft is defined from 0 to N-1. ;;; treat fft(N,z) == fft(0,z). ;;; so ans[q]=r[q]*s[q] ;;; returned as real/imag real/image etc. (declare (type (array double-float (*)) f) (fixnum NN) (optimize (speed 3)(safety 0))) ;; (format t "~%f=~s" f) (let*( ;;(ans2 *fftbuf*) ;; re-use buffer?? ;;(ans2 (make-array (+ NN NN) :element-type 'double-float :initial-element 0.0d0)) (ans2 *fftbuf2*) (ar 0.0d0)(ai 0.0d0)(br 0.0d0)(bi 0.0d0)(idx1 0)(idx2 0) (sr 0.0d0)(si 0.0d0)(rr 0.0d0)(ri 0.0d0)) (declare (type (array double-float (*)) ans2) (fixnum idx1 idx2) (double-float ar ai br bi sr si rr ri)) (setf (aref ans2 0) (* (aref f 0)(aref f 1))) (setf (aref ans2 1) 0.0d0) (loop for k fixnum from 1 to (1- NN) do (setf idx1 (+ k k)) (setf idx2 (* (- NN k) 2)) (setf ar (aref f idx1) ai (aref f (1+ idx1))) (setf br (aref f idx2) bi (aref f (+ idx2 1))) (setf rr (+ ar br) ri (- ai bi)) (setf sr (+ ai bi) si (- br ar)) ; (format t "r= ~s ~s, s= ~s ~s" rr ri sr si) (setf (aref ans2 idx1) (* 0.25d0 (- (* rr sr) (* ri si)))) (setf (aref ans2 (1+ idx1))(* 0.25d0 (+ (* rr si)(* ri sr))))) ;; this all accomplishes complex multiplication of relevant parts. ;; pieces unraveled for speed, instead of consing up complex numbers. ans2)) (defun mulfft3(r s) ;; multiply polynomials r,s using floating FFT ;;compute the size Z of the answer. Z is a power of 2. ;; compute complex array r, increased to size S ;; compute complex array r, increased to size S ;; compute two FFTs ;; multiply pointwise ;; compute inverse FFT * 1/n ;; convert back to array and return answer, coefficients rounded to integers. ;; The answer will be only approximate if the coefficients are too large or ;; too corrupted by round-off. (let* ((lr (length (car r))) (ls (length (car s))) ;; compute deg of r + degree of s +1 (lans (+ (aref (car r) (1- lr)) (aref (car s) (1- ls)) 1)) (z (ash 1 (ceiling (log lans 2))))) ; round up to power of 2 ;;; check the size of fftbuf here.. (if (< *fftbufsize* (+ z z)) ;; enough for real and imag part (setf *fftbuf* (make-array (setf *fftbufsize* (+ z z) ) ;; real and imag parts :element-type 'double-float :allocation :static-reclaimable ) *fftbuf2* ;; need another one, or could overlap ops in *fftbuf* with some fancier progs. (make-array (setf *fftbufsize* (+ z z) ) ;; real and imag parts :element-type 'double-float :allocation :static-reclaimable ))) ;; this next line does the whole job. (fftiR3(untangleprod2 (fftpoly2 r s z) z)))) (defun fftiR3(input) ;; this is the inverse FFT, retaining only REAL part of answer ;; input is array of real, imag, real, imag, ... ;; returns array of REAL parts of answer, double-float. ;; computes a plan, if there is not one. (declare (type (array double-float (*)) input)) (let*((len (ash (length input) -1)) (r input) ;;(anse (make-array len :element-type 'fixnum :fill-pointer 0)) (anse nil) ;;(ansc (make-array len :fill-pointer 0)) (ansc nil) (q len) (k 0)) (declare (fixnum len k q) ;; (type (array double-float (*)) ansc) ;; (type (array fixnum (*)) anse) (optimize (speed 3)(safety 0))) (fftw_execute_dft (fftw_plan len r r 1 64) r r) ;; do the FFT (loop for i fixnum from 0 to (1- (* 2 len)) by 2 do ;; copy real parts into answer (setf k (round (aref r i) q)) ;; (format t "~% r[i]=~s k=~s " (aref r i) k) (cond((= k 0) nil) (t (push (ash i -1) anse) (push k ansc)))) ;;(cons (nreverse anse)(nreverse ansc)) (cons anse ansc) )) ;;maxima version of pieces of this.. #| ;; normalization is 1, 1/n (slowFFT(x):= block([n:length(x)], map(lambda([k], sum(x[j]*exp((-2*%i*%pi*(j-1)*k)/n),j,1,n)), makelist(i,i,0,n-1))), slowFFTi(x):= block([n:length(x)], map(lambda([k], 1/n*sum(x[j]*exp((2*%i*%pi*(j-1)*k)/n),j,1,n)), makelist(i,i,0,n-1))), untangle(v):=block([n:length(v)], local(a,b,r), r[i]:=v[i+1], a[0]:realpart(r[0]), b[0]:imagpart(r[0]), a[i]:=1/2*(r[i]+conjugate(r[n-i])), b[i]:= -%i/2*(r[i]-conjugate(r[n-i])), [makelist(a[i],i,0,n-1), makelist(b[i],i,0,n-1)])) |# #| (defvar xv (cons (coerce (loop for i from 0 to 1999 collect i) 'vector) (coerce (loop for i from 0 to 1999 collect 1) 'vector))) (defvar xd (cons (coerce (loop for i from 0 to 1999 collect i) 'vector) (coerce (loop for i from 0 to 1999 collect 1.0d0) 'vector))) |#