;;; experimentation in encoding polynomials for faster multiplication. ;;; karatsuba but using a sparse encoding!! ;;; July, 2010 ;;; Richard Fateman ;; try to get it all to work with exponent array fixnums. (eval-when (compile load) (declaim (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0))) (declaim (inline svref = eql make-array))) ;;An implementation of Karatsuba multiplication #|Assume p is of degree 2n-1, i.e. p= a*x^n+ b where a is degree n-1 and b is degree n-1. assume q is also of degree 2n-1, i.e. q= c*x^n+d compute U= (a+b)*(c+d) = a*c+a*d+b*c+b*d, a polynomial of degree k=2*(n-1) compute S= a*c of degree k compute T= b*d of degree k compute U = U-S-T = a*d+b*c of degree k Then V=p*q = S*x^(2n)+U*x^n + T. from first principles, V =p*q should be of degree 2*(2n-1) = 4n-2. from the form above, k+2n= 4n-2. note that U*x^n is of degree 2*(n-1)+n = 3n-1 so that it OVERLAPS the S*x^(2n) component. note that T is of degree 2*(n-1) = 2n-2 so that it OVERLAPS the U*x^(n) component. (the trick is that, used recursively to multiply a*c etc, this saves coefficient mults, instead of nm, max(n,m)^1.5) if p, q, are of degree (say) 10 or so, use conventional arithmetic. Just a guess. What about this 2n-1 business? Find the max degree of p, q. If it is odd, we are set. If it is even, 2n, do this: p=a*x^n+ b where b is of degree n-1, but a is of degree n. Same deal works. I think. |# ;;(defvar *karatlim* 12) ;;(defparameter *karatlim* 40) ;; an educated guess -- don't use karatsuba for small probs (defvar *karatlim* 4) ;; for testing ;; representation is (cons #(e0 e1 ....) #(c0 c1 ...)) ; array of expons and coefs, ascending degree. (defun degr(p) (aref (car p)(1- (length (car p))))) (defun truncatepoly(p v) ; break up polynomial p into two pieces at degree v. ;; (0 ...) (v ....).. actually, the 2nd value will be the first exponent >= v, not necessarily v. (declare (fixnum v)(optimize (speed 3)(safety 0))) (let ((spot(position v (car p) :test #'<=)) (carp (car p))(cdrp (cdr p))) (declare (fixnum spot)) (if (null spot)(values p '( #(). #())) (values (cons (make-array spot :displaced-to carp :element-type 'fixnum) (make-array spot :displaced-to cdrp)) (cons (make-array (- (length carp) spot ) :displaced-to carp :displaced-index-offset spot :element-type 'fixnum) (make-array (- (length carp) spot) :displaced-to cdrp :displaced-index-offset spot)))))) #+ignore (defun truncatepoly(p v) ; break up polynomial p into two pieces at degree v. ;; (0 ...) (v ....).. actually, the 2nd value will be the first exponent >= v, not necessarily v. (declare (fixnum v)(optimize (speed 3)(safety 0))) (let ((spot(position v (car p) :test #'<=)) (carp (car p))(cdrp (cdr p))) (declare (fixnum spot)) (if (null spot)(values p '( #(). #())) (values (cons (make-array spot :displaced-to carp) (make-array spot :displaced-to cdrp)) (cons (make-array (- (length carp) spot ) :displaced-to carp :displaced-index-offset spot ;:element-type 'fixnum ) (make-array (- (length carp) spot) :displaced-to cdrp :displaced-index-offset spot)))))) ;;#+ignore ;;; THIS IS IT (defun mul-ks(pp qq) ;; by Karatsuba., use displaced arrays ;; sparse rep. (let* ((plim (length (car pp))) ; number of terms in p (qlim (length (car qq)))) ; number of terms in q (declare (fixnum qlim plim)) (if (or (< plim *karatlim*) (< qlim *karatlim*)) ;; (mul-dense pp qq) #+ignore (mul-dense (cons(make-simple-array (car pp)) (make-simple-array (cdr pp))) (cons(make-simple-array (car qq)) (make-simple-array (cdr qq)))) (mul-dense-ns2 pp qq) ;; need to have a version of mul-dense that does not require simple arrays. ;; main program (let ((v (+ 1 (ash (max (degr pp)(degr qq)) -1)))) ;half of the max degree of inputs, +1 (multiple-value-bind (A B) (truncatepoly pp v) (setf B (kpolyshiftleft B (- v))) (multiple-value-bind (C D) (truncatepoly qq v) (setf D (kpolyshiftleft D (- v))) (let ((U (mul-ks (kpolyadd A B) (kpolyadd C D))) (TT (mul-ks A C)) (S (mul-ks B D)) (aa nil)) ; (format t "~%U=~s~%TT=~s~%S=~s" U TT S) (setf U (kpolysub U S)) (setf U (kpolysub U TT)) ; (format t "~%U=~s" U) (setf aa (kpolyaddshift TT U v)) (kpolyaddshift aa S (+ v v)) ))))))) #+ignore ;; can't use this.. (defun kpolyshiftleft(p k) ; like multiplying by x^k, destructive (let ((A (car p))) (loop for i fixnum from 0 below (length A) finally (return p) do (incf (aref A i) k) ))) (defun kpolyshiftleft(p k) ; like multiplying by x^k, NON-destructive (let ((A (make-array (length (car p)):element-type 'fixnum :initial-contents (car p)) )) (loop for i fixnum from 0 below (length A) finally (return (cons A (cdr p))) do (incf (aref A i) k) ))) (defun kpolyadd(p q) (let* ((exps nil)(coefs nil) (carp (car p)) ;exponents of p ascending (cdrp (cdr p)) (carq (car q)) (cdrq (cdr q)) (i (1- (length (car p)))) (j (1- (length (car q)))) (cc 0) (carpi (aref carp i)) (carqj (aref carq j))) (loop (cond ((< j 0) ; clean up (loop for k fixnum from i downto 0 do (push (aref carp k) exps) (push (aref cdrp k) coefs)) (return)) ((< i 0) ; clean up (loop for k fixnum from j downto 0 do (push (aref carq k) exps) (push (aref cdrq k) coefs)) (return))) ; (format t "~% i=~s j=~s" i j) (setf carpi (aref carp i) carqj (aref carq j)) (cond ((= carpi carqj) (cond ((= 0 (setf cc (+ (aref cdrp i)(aref cdrq j))))nil) (t (push carpi exps) (push cc coefs))) (decf i)(decf j)) ((> carpi carqj) (push carpi exps) (push (aref cdrp i) coefs) (decf i)) (t (push carqj exps) (push (aref cdrq j) coefs) (decf j)))) (cons (coerce exps '(array fixnum (*)))(coerce coefs '(array integer (*)))))) (defun kpolyaddshift(p q n) ;;p+ x^n*q (let* ((exps nil)(coefs nil) (carp (car p)) ;exponents of p ascending (cdrp (cdr p)) (carq (car q)) (cdrq (cdr q)) (i (1- (length (car p)))) (j (1- (length (car q)))) (cc 0) (carpi (aref carp i)) (carqj (aref carq j))) (loop (cond ((< j 0) ; clean up (loop for k fixnum from i downto 0 do (push (aref carp k) exps) (push (aref cdrp k) coefs)) (return)) ((< i 0) ; clean up (loop for k fixnum from j downto 0 do (push (+ n (aref carq k)) exps) (push (aref cdrq k) coefs)) (return))) ; (format t "~% i=~s j=~s" i j) (setf carpi (aref carp i) carqj (+ n (aref carq j))) (cond ((= carpi carqj) (cond ((= 0 (setf cc (+ (aref cdrp i)(aref cdrq j))))nil) (t (push carpi exps) (push cc coefs))) (decf i)(decf j)) ((> carpi carqj) (push carpi exps) (push (aref cdrp i) coefs) (decf i)) (t (push carqj exps) (push (aref cdrq j) coefs) (decf j)))) (cons (coerce exps '(array fixnum (*)) ) (coerce coefs '(array integer (*)))))) (defun kpolysub(p q) (let* ((exps nil)(coefs nil) (carp (car p)) ;exponents of p ascending (cdrp (cdr p)) (carq (car q)) (cdrq (cdr q)) (i (1- (length (car p)))) (j (1- (length (car q)))) (cc 0) (carpi (aref carp i)) (carqj (aref carq j))) (loop (cond ((< j 0) ; clean up (loop for k fixnum from i downto 0 do (push (aref carp k) exps) (push (aref cdrp k) coefs)) (return)) ((< i 0) ; clean up (loop for k fixnum from j downto 0 do (push (aref carq k) exps) (push (- (aref cdrq k)) coefs)) (return))) ; (format t "~% i=~s j=~s" i j) (setf carpi (aref carp i) carqj (aref carq j)) (cond ((= carpi carqj) (cond ((= 0 (setf cc (- (aref cdrp i)(aref cdrq j))))nil) (t (push carpi exps) (push cc coefs))) (decf i)(decf j)) ((> carpi carqj) (push carpi exps) (push (aref cdrp i) coefs) (decf i)) (t (push carqj exps) (push (aref cdrq j) coefs) (decf j)))) (cons (coerce exps '(array fixnum (*)) ) (coerce coefs '(array integer (*)))))) ;; benchmarks #| (setf ones (cons (coerce (loop for i from 0 to 999 collect i) '(array fixnum (*))) (coerce (loop for i from 1 to 1000 collect 1) 'array))) ;;; don't declare exponents to be fixnum.... (setf ones (cons (coerce (loop for i from 0 to 999 collect i) 'simple-array ) (coerce (loop for i from 1 to 1000 collect 1) 'simple-array))) (setf lones (cons (coerce (loop for i from 0 to 9 collect i) '(array fixnum (*))) (coerce (loop for i from 1 to 10 collect 1) 'array))) (setf jones (cons (coerce (loop for i from 0 to 9 collect i) '(array fixnum (*))) (coerce (loop for i from 1 to 10 collect 1) 'array))) (setf ss (cons (coerce (loop for i from 0 to 2 collect i) '(array fixnum (*))) (coerce (loop for i from 1 to 3 collect 1) 'array))) |# ;; copied from muldense.lisp ;; and changed.. #+ignore (defun mul-dense (x y) ;; (format t "~%x is ~s, . ~s y is ~s . ~s " (type-of (car x))(type-of (cdr x))(type-of (car y))(type-of (cdr y))) (if (typep (car x) '(simple-array fixnum)) nil (setf x (cons(coerce (car x) '(array fixnum (*))) (cdr x)))) ; (if (typep (cdr x) 'simple-array) nil (setf x (cons(car x) (coerce (cdr x) '(array integer (*)))))) (if (typep (car y) '(simple-array fixnum)) nil (setf y (cons(coerce (car y) '(array fixnum (*))) (cdr y)))) ; (if (typep (cdr y) 'simple-array) nil (setf y (cons(car y) (coerce (cdr y) '(array integer (*)))))) ;; (format t "~%after conversion ~%xp is ~s, . ~s y is ~s . ~s " (type-of (car x))(type-of (cdr x))(type-of (car y))(type-of (cdr y))) ;; simple for small degree dense ;; can be much faster than mul-naive, esp. if answer is dense ;; exponents and coefficients are integers. compare to mul-dense-fix ;; if they are not, mul-dense-fix's answer will be wrong! (if (< (length (car x))(length (car y))) ; reverse args if x is shorter (mul-dense y x) (let* ((xe (car x)) ;array of exponents of x (ye (car y)) ;array of exponents of y (xc (cdr x)) ;array of coefficients of x ;array of coefficients of y, put in TYPED array so we can iterate faster? ;; (yc (make-simple-array (cdr y))) (yc (cdr y)) (xei 0)(xci 0) (yl (length ye)) (numtermsX (length xe)) (numtermsY (length ye)) (maxXe (aref xe (1- numtermsX))) (maxYe (aref ye (1- numtermsY))) (ASize (+ 1 maxXe maxYe)) (dense-check (dense-check (* numtermsX numtermsY) ASize)) (ans (make-array dense-check :element-type 'integer ;; assumes coeffs are integers only ;; note: an integer can be a bignum. :initial-element 0))) (declare (fixnum yl) (type (array integer (*)) xc yc) (type (array fixnum (*)) xe ye ) (type (array integer (*)) xe ye) (type (array integer (*)) ans) (optimize (speed 3)(safety 0)) ) #+ignore (cond ((= 0 dense-check) (error "mul-dense is not be a good choice for polynomial mult of ~s by ~s terms, since it needs to allocate an array of size ~s" numtermsX numtermsY ASize) (error "leaving mult program, need to fill in some alternative"))) (dotimes (i (length (car x)) (DA2PA ans) ) (declare (fixnum i)) ; (print i) (setf xci (aref xc i)) (unless (= xci 0) (setf xei (aref xe i)) (dotimes(j yl) ;; multiply terms (declare (fixnum j)) (incf (aref ans (+ xei (aref ye j))) (* xci (aref yc j))))))))) ;;; another version with start/stop in arrays, rather than displaced arrays which slow up (defmacro expons(x) `(car ,x)) (defmacro coefs(x) `(cdr ,x)) (defvar mulbuf (make-array 2000 :element-type 'integer :initial-element 0)) (defun mul-denses (x y) ; simplest case of multiplyng full polynomials (mul-densess x y 0 (1-(length (car x))) 0 (1- (length (car y))))) (defun mul-densess (x y xbot xtop ybot ytop) ; multiply sections of polynomials ;;bot and top are indexes into polys x and y ;; expons(x) is the array of exponents. ;; coefs(x) is the array of coefficients, same length as coefs(x). (declare (fixnum xbot xtop ybot ytop) ;; (:explain :types) (optimize (speed 3)(safety 0))) (let ((xlen (- xtop xbot)) ; how many terms from x are we using? (ylen (- ytop ybot))) (if (< xlen ylen) ; reverse args if x is shorter (mul-densess y x ybot ytop xbot xtop) (let* ((xe (expons x)) ;array of exponents of x (ye (expons y)) ;array of exponents of y (xc (coefs x)) ;array of coefficients of x (yc (coefs y)) (xei 0)(xci 0) (maxXe (aref xe xtop)) ;maximum degree in x (maxYe (aref ye ytop)) (minXe (aref xe xbot)) ;minimum degree in x (minYe (aref ye ybot)) ;; allocate an array for all terms ;; with exponenets from minXe+minYe through maxXe+maxYe (ASize (+ 1 (- maxXe minXe)(- maxYe minYe))) ;; we could allocate this array from some buffer (ans mulbuf) ) (declare (fixnum ASize maxXe maxYe minXe minYe) (type (array integer (*)) xc yc ans) (type (array fixnum (*)) xe ye )) (if (< (length mulbuf) ASize) (setf ans (setf mulbuf (make-array ASize :element-type 'integer)))) (loop for i fixnum from 0 below ASize do (setf (aref ans i) 0)) (loop for i fixnum from xbot to xtop finally (return (DA2PAs ans ASize (+ minXe minYe))) do (setf xci (aref xc i)) ;; xci will not be zero by hypothesis (setf xei (aref xe i)) (loop for j fixnum from ybot to ytop do (incf (aref ans (+ xei (aref ye j))) (* xci (aref yc j))))))))) (defun DA2PAs (A size offset) ;;DENSE single array to Pair of Arrays ;; A is an ARRAY of (coef coef coef ...), implicit exponents (0 1 2 ...) ;; create a pair: one array of exponents and one of coefficients. ;; offset the exponenets by offset. (declare (type (array integer (*)) A) (optimize (speed 3)(safety 0))) (let* ( (n (count-if-not #'zerop A :start 0 :end size)) ;; on one example this line above takes 19% of the time! ;(n (1- (length A))) faster but not simplest. (exps (make-array n :element-type 'fixnum)) (coefs (make-array n :element-type 'integer)) (c 0)) (declare (fixnum n) (type (array integer (*)) coefs) (type (array fixnum (*)) exps) (optimize (speed 3)(safety 0))) (if (= 0 offset) (do ((i 0 (1+ i)) (j 0)) ((>= j n) (cons exps coefs)) (declare (fixnum i j)) (unless (eq 0 (setf c (aref A i))) (setf (aref exps j) i (aref coefs j)c) (incf j))) ;; non-zero offset (do ((i 0 (1+ i)) (j 0)) ((>= j n) (cons exps coefs)) (declare (fixnum i j)) (unless (eq 0 (setf c (aref A i))) (setf (aref exps j) (+ i offset) (aref coefs j)c) (incf j)))))) ;;; we need programs that add polys from bot to top, shifted. ;;; yeech. (defun foldpoly(x)(foldpolyat x (+ 1 (ash (degr x) -1)))) (defun foldpolyat(x h) ;; add the top and the bottom half of a polynomial. ;; for example, A+x^n*B where A,B are of degree < n --> A+B (add-densess x x 0 h (1- h) (1-(length (expons x))) (1- h))) (defun add-densess (x y xbot xtop ybot ytop offset) ; add sections of polynomials ;;bot and top are indexes into polys x and y ;; expons(x) is the array of exponents. ;; coefs(x) is the array of coefficients, same length as coefs(x). (declare (fixnum xbot xtop ybot ytop) ;; (:explain :types) (optimize (speed 3)(safety 0))) (let ((xe (expons x)) ;array of exponents of x (ye (expons y)) ;array of exponents of y (xc (coefs x)) ;array of coefficients of x (yc (coefs y))) (cond ((>= xbot (length xe)) (return-from add-densess (polyleftshift y offset))) ((>= ybot (length ye)) (return-from add-densess x)) ((>= xtop (length xe))(setf xtop (1- (length xe)))) ((>= ytop (length ye))(setf ytop (1- (length ye)))) ((> offset ybot) (error "illegal offset ~s in add-densess" offset))) (let*( (xlen (- xtop xbot)) ; how many terms from x are we using? (ylen (- ytop ybot)) (maxXe (aref xe xtop)) ;maximum degree in x (maxYe (aref ye ytop)) (minXe (aref xe xbot)) ;minimum degree in x (minYe (aref ye ybot)) ;; allocate an array for all terms ;; with exponenets from minXe+minYe through maxXe+maxYe (ASize (1+ (- (max maxXe maxYe)(min minXe minYe)))) ;; we could allocate this array from some buffer (ans mulbuf)) (declare (fixnum ASize maxXe maxYe minXe minYe) (type (array integer (*)) xc yc ans) (type (array fixnum (*)) xe ye )) ; (print ASize) (if (< (length mulbuf) ASize) (setf ans (setf mulbuf (make-array ASize :element-type 'integer)))) (loop for i fixnum from 0 below ASize do (setf (aref ans i) 0)) (loop for i fixnum from xbot below xtop do (setf (aref ans i) (aref xc i))) (loop for j fixnum from ybot below ytop do (incf (aref ans (- j offset)) (aref yc j))) (DA2PAs ans ASize 0))) ) #+ignore ;; not working (defun mul-ks(pp qq) ;; by Karatsuba., use displaced arrays ;; sparse rep. (let* ((plim (length (car pp))) ; number of terms in p (qlim (length (car qq)))) ; number of terms in q (declare (fixnum qlim plim)) (if (or (< plim *karatlim*) (< qlim *karatlim*)) (mul-dense pp qq) ;; main program (let ((v (+ 1 (ash (max (degr pp)(degr qq)) -1)))) ;half of the max degree of inputs, +1 (multiple-value-bind (A B) (truncatepoly pp v) (setf B (kpolyshiftleft B (- v))) (multiple-value-bind (C D) (truncatepoly qq v) (setf D (kpolyshiftleft D (- v))) (let ((U (mul-ks (foldpolyat pp v) (foldpolyat qq v))) (TT (mul-ks A C)) (S (mul-ks B D)) (aa nil)) ; (format t "~%U=~s~%TT=~s~%S=~s" U TT S) (setf U (kpolysub U S)) (setf U (kpolysub U TT)) ; (format t "~%U=~s" U) (setf aa (kpolyaddshift TT U v)) (kpolyaddshift aa S (+ v v)) ))))))) (defun tri(pp v) (multiple-value-bind (A B) (truncatepoly pp v) (setf B (kpolyshiftleft B (- v))) (kpolyadd A B))) ;; (try tt 5) should be same as (foldpolyat tt 5) ?? (defun tr2(b n)(declare (type (array integer (*)) b) (fixnum n) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (aref b i) 0)) b) (defun tr3(b n)(declare (fixnum n) ;(type (array integer (*)) b) (optimize (speed 3)(safety 0))) (make-array n :element-type 'integer :initial-element 0)) (defun tr4(b n)(declare (type (array t (*)) b) (fixnum n) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (aref b i) 0)) b) (defun tr5(b n)(declare (simple-array b) (fixnum n) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (aref b i) 0)) b) (defun tr6(b n)(declare ;(simple-array b) (fixnum n) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (aref b i) 0)) b) (defun tr7(b n)(declare (simple-array b) (fixnum n) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (svref b i) 0)) b) (defun tr2b(b n)(declare (type (array integer (*)) b) (fixnum n) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (svref b i) 0)) ;;; the winner!!! b) ;; 35 bytes (defun tr2(b n)(declare (type (array integer (*)) b) (fixnum n) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (aref b i) 0)) b);; 59 bytes (defun tr2c(b n)(declare (type (simple-array fixnum (*)) b) (fixnum n) ;; (:explain :types :inlining) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (aref b i) 0)) b);; 35 bytes (defun tr2d(b n)(declare (type (simple-array t (*)) b) (fixnum n) ;(:explain :types :inlining) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (aref b i) 0)) b) (defun tr2e(b n)(declare (type (simple-array integer (*)) b) (fixnum n) ;(:explain :types :inlining) (optimize (speed 3)(safety 0))) (loop for i fixnum from 0 below n do (setf (aref b i) 0)) b) ;(defvar mulbuf (make-array 2000 :element-type 'integer :initial-element 0)) ;; simple-array (setf mulbufsa (make-array 2000 :initial-element 0)) ;; simple array not 'integer ;(print 'bye) ;; (time (dotimes (i 1000000)(tr3 mulbuf 1000))) ;; (time (dotimes (i 1000000)(tr4 mulbufsa 1000))) ;; (time (dotimes (i 1000000) (tr2 mulbuf 1000)))