;;; -*- Lisp -*- ;;; various ways of multiplying sparse polynomials. ;;; using lists, hashtables, trees, streams, heaps, and just hacking the naive method. ;;; ;;; buggy in this version, those similar correct versions are just slow. (eval-when (compile load) (declaim (optimize (speed 3)(safety 0)))) ;; set up a lexically available context for handling several ;; variables which would otherwise be global, and require more ;; effort to access from compiled code. ;; (defparameter *zcoefs-lim* 50) (defparameter *zcoefs* (make-array *zcoefs-lim*)) (defvar *x* nil) (defvar *y* nil) (defmacro heap-empty-p () "Returns non-NIL if & only if the heap contains no items." `(<= fill-pointer 1)) (defmacro hlessfun (a b) `(< (st-exp ,a) (st-exp ,b))) (defmacro lesser-child (a parent fp) "Return the index of the lesser child. If there's one child, return its index. If there are no children, return (FILL-POINTER (HEAP-A HEAP))." `(let* ((left;;(+ (the fixnum ,parent)(the fixnum ,parent)) (ash ,parent 1)) (right (1+ (the fixnum left)))) (declare (fixnum left right ,fp ,parent ) (optimize(speed 3)(safety 0)) (type (simple-array t (*)) ,a)) (cond ((>= left ,fp) ,fp) ((= right ,fp) left) ((hlessfun (aref ,a left) (aref ,a right)) left) (t right)))) (let ((*YYEXPS* nil) (*YYCOEFS* nil) (*YYLEN* 0) (fill-pointer 0)) (declare (fixnum *YYLEN* fill-pointer) (type (simple-array t (*)) *YYEXPS* *YYCOEFS*)) ;; input format: a univariate polynomial is represented by a Lisp cons ;; of 2 arrays. The first array is exponents, in ascending order. The ;; second array is corresponding coefficients. We expect that the ;; usual way of representing numbers in Lisp is used, whereby a small ;; number (in magnitude less than, say 2^29) is represented "in place" ;; shifted right a few bits so it is tagged as a small number; larger ;; numbers, of unlimited length, are pointed to by the entry in the ;; same field. Thus "short" arithmetic in Lisp is slightly ;; handicapped by the need to do some tag checking and shifting. ;;This makes the transition to arbitrary precision, as well as to ;; float, double-float, and even complex numbers programmatically ;; smoother. ;; If the Lisp compiler is informed (via declarations) that it has an array of small ;; numbers, it may dispense with the tags and generate code to use ordinary 32-bit (or ;; 64-bit) arithmetic. ;; example of the input format: 45x^10+25x^2+ 9 [note: 9 = 9x^0] ;; ( #(0 2 10) . #(9 25 45)) ;; Now to multiplication of polynomials. ;; First, the really obvious way, using a double-nested loop, ;; inserting each of the new exponent-coefficient pairs in ;; the right place in a linked list. (defun mul-naive0 (x y) (let* ((ans nil)(e 0)(c 0) (yl (length (car y))) (xe (car x)) ;array of exponents of x (ye (car y)) ;array of exponents of y (xc (cdr x)) ;array of coefficients of x (yc (cdr y))) ;array of coefficients of y (dotimes (i (length (car x)) ans) (dotimes(j yl) ;; multiply terms (setf e (+ (aref xe i)(aref ye j));add exponents c (* (aref xc i)(aref yc j));multiply coefficients ans (add-poly-term e c ans)))))) (defun add-poly-term(e c L) ;make a list of ( ... (exp . coef)...) (let ((ahead (cdr L))) (cond ((null ahead) (if L (setf (cdr L)(list (cons e c))) (setf L (list (cons e c))))) ((= e (caar ahead)) (incf (cdar ahead) c) ;add the new term into old coefficient (cdr L)) ((< e (caar ahead)) (setf (cdr L)(cons (cons e c) ahead)) (cdr L)) (t (add-poly-term e c (cdr L))))) L) ;;; This is almost all we need for a working program, but we should do this: ;;; Convert list back to the standard form for polynomials used by the input. ;;; Here is a program that does it: (defun L2PA (L) ;;List to Pair of Arrays ;; L is a LIST of (exp . coef) of length n. ;; create a pair: one array of exponents and one of coefficients. (let* ((n (length L)) (exps (make-array n)) (coefs (make-array n)) (count 0)) (declare (fixnum count n) (type (simple-array t (*)) exps coefs)) (dolist (i L (cons exps coefs)) (setf (aref exps count) (car i)) (setf (aref coefs count)(cdr i)) (incf count)))) ;; A nuance we will ignore for now is that this result may have some zero coefficients, ;; e.g. if we multiply (x+1)*(x-1). We can usually fix this cheaply. ;; The new program looks like this: (defun mul-naive1 (x y) (let* ((ans nil) (e 0)(c 0) (yl (length (car y))) (xe (car x)) (ye (car y)) (xc (cdr x)) (yc (cdr y))) (dotimes (i (length (car x)) (L2PA ans));; ** changed** (dotimes(j yl) ;; multiply terms (setf e (+ (aref xe i)(aref ye j)) c (* (aref xc i)(aref yc j)) ans (add-poly-term e c ans)))))) ;; This program is not nearly as fast as it could be, if we just make ;; a small change. Instead of looking to insert each new term into ;; the answer starting from the beginning, let us keep track of where ;; the just-previous insertion was made, and place the new insertion ;; to its right, on each sweep through the second argument. When we ;; start with the next term of the first argument, we start insertion ;; from the beginning of the answer. We also simplify the programming ;; of an insertion into the answer by initializing ans to a non-empty ;; list. ;; This change results in the following program: (defun mul-naive (x y);; naive but with a finger into the answer (let* ((ans (list (cons 0 0))) ; makes iteration neater (finger ans) (yl (length (the simple-array (car y)))) (xl (length (the simple-array (car x)))) (xe (car x)) (ye (car y)) (xc (cdr x)) (yc (cdr y))) (declare(fixnum yl xl) (type (simple-array t (*)) xe ye xc yc)) (dotimes (i xl (L2PA (cdr ans))) (dotimes(j yl) ;; multiply terms (setf finger (addterm-fin(+ (aref xe i)(aref ye j)) (* (aref xc i)(aref yc j)) finger))) (setf finger ans)))) (defun mul-naiveM (x y);; naive but with a finger into the answer (let* ((ans (list (cons 0 0))) ; makes iteration neater (finger ans) (yl (length (the simple-array (car y)))) (xl (length (the simple-array (car x)))) (xe (car x)) (ye (car y)) (xc (cdr x)) (yc (cdr y)) (xx 0)(ee 0)) (declare(fixnum yl xl) (type (simple-array t (*)) xe ye xc yc)) (dotimes (i xl (L2PA (cdr ans))) (setf xx (aref xc i) ee (aref xe i)) (dotimes(j yl) ;; multiply terms (setf finger (addterm-fin(+ ee(aref ye j)) (* xx(aref yc j)) finger))) (setf finger ans)))) ;; We slightly improve the addterm program that returns an ;; appropriate finger into the list for insertion. ;; Instead of the recursive version we use an iterative format. (defun addterm-fin(e c L) ;iterative version with do ; 258 bytes (do ((ahead (caadr L)(caadr L))) ((null ahead) (setf (cdr L)(list (cons e c)))) (cond ((> e ahead) (setf L (cdr L))) ((= e ahead) (incf (cdadr L) c) (return (cdr L))) (t (setf (cdr L)(cons (cons e c) (cdr L))); e < ahead. tack on end (return (cdr L)))))) ;; OK, mul-naive works. How much faster is it than mul-naive0? ;; On one example of a 500 X 1000 term multiplication, mul-naive takes 10.500 seconds. ;; The improved one takes 0.047 seconds, and is therefore 223 times faster. ;; Next step is to make a testing program. ;; First, we specify a program to create some random data. (defun make-racg(n d c) ;; make Random Array of increasing exponents of length n with ;; spacing k, with 0", 0.495501 ;;; million "=" 0.5 million "+". The number of coefficient operations ;;; is 0.5 million "*" and and 495,501 "+". Note that the number of ;;; terms in the answer is 500000-495501=4499. There are about 2.3 ;;; million exponent operations vs. 1 million coefficient operations. ;;; Converting the exponent arithmetic ONLY to fixnum and leaving the ;;; coefficients general, does not appear to improve the timing, at ;;; least in Allegro CL 8.0, so this was not pursued further. Other ;;; prospects are appealing however. The coefficient arithmetic ;;; could be done with (say) GMP arithmetic, an open-source fast bignum ;;; package, or MPFR, an open-source arbitrary precision float package. ;;; These type/coding issues are largely independent of the algorithm used ;;; for sparse polynomial multiplication. ;;; For the record, we show how to write the program for fixnums. (defun make-racf(n d);;fixnum only ;; make random array of increasing exponents of length n with ;; spacing k, with 0 e ahead) (setf L (cdr L))) ((= e ahead) (incf (the fixnum (cdadr L)) c) (return (cdr L))) (t (setf (cdr L)(cons (cons e c) (cdr L))); e < ahead. tack on end (return (cdr L)))))) (defun mul-naivef (x y);; naive but with a finger into the answer. fixnum only (let* ((ans (list (cons 0 0))) (finger ans) (yl (length (car y))) (xl (length (car x))) (xe (car x)) (ye (car y)) (xc (cdr x)) (yc (cdr y)) (xx 0)(ee 0)) (declare(fixnum yl xl xx ee) (type (simple-array fixnum (*)) xe ye xc yc)) (dotimes (i xl (L2PA (cdr ans))) (setf ee (aref xe i) xx (aref xc i)) (dotimes(j yl) ;; multiply terms (setf finger (addterm-finf (the fixnum (+ (the fixnum ee)(the fixnum(aref ye j)))) (the fixnum (* (the fixnum xx)(the fixnum (aref yc j)))) finger))) (setf finger ans)))) ;;;;;;;;;;;; enough for naive methods. Can we do better? ;;; A first hashing method for multiplication. (defun mul-hash0(x y) (let* ((ans (make-hash-table)) ;set up an empty hash table (yl (length (car y))) (xe (car x)) (ye (car y)) (xc (cdr x)) (yc (cdr y))) (dotimes (i (length (car x)) ans) (dotimes(j yl) ;; multiply terms and increment any existing entry ;; in the hash table by the new coefficient. If there ;; is not coefficient there, give it a default 0 value. (incf (gethash (+ (aref xe i)(aref ye j));key= exponent ans 0) ;default for missing value: 0 (* (aref xc i)(aref yc j));increment by prod of coefs. ))))) ;; There are 2 problems here. One: the result is a hash table, not our ;; standard form. We could live with this and have every polynomial ;; be stored in a hash table. Probably better is to translate from a ;; hash table to the standard form. We do this in mulhash0 below. ;; The second problem is that this program, while short, is not as ;; efficient in time as we can make it. not take into account various ;; ways of reducing hashing time. ;; Here is the program needed for the first fix: (defun sorthash(h);; utility for mulhash0, empty a hash table into an a-list (let ((ans nil)) (maphash #'(lambda (i v)(push (cons i v) ans)) h) (sort ans #'< :key #'car))) ;; Here is a much more sophisticated version. We even toss in the ;; nuance of eliminating zero coefficients. ;; Coefficients are stored in an array separate from the hash table, ;; so they can be allocated in an array of known type, like ;; double-float, or fixnum. For now we don't specify a particular ;; numeric type. This array may make memory locality better in some ;; situations, but it also has the beneficial side effect of ;; eliminating about half the hash code computation. We use the ;; address of data in this array to update, rather than re-hashing on ;; the key. The program is longer than strictly necessary: Some of the ;; code is almost never used, but allows us to do some storage ;; optimization "by hand". Common Lisp provides for expandable ;; arrays, but we can be faster by managing it by hand. #+ignore ;; This works, but allocates 3N list elements then copies into ;; 2N array elements. another definition (much longer, using a ;; hand-coded quicksort variant) is provided later. (defun sorthash1(h ar);; utility for mul-hash1 (let ((ans nil)) (maphash #'(lambda (i v) (setf v (aref ar v)) ; get the coefficient, by indirection (unless (= v 0);; normalize! remove 0 coefs (push (cons i v) ans))) h) (L2PA (sort ans #'< :key #'car)))) (defun mul-hash1 (x y);; exponents may be arbitrary integers. coefficients numbers. (let ((ans (make-hash-table :test 'eql));; eql is good enough for bignums too (xexps (car x)) (xcoefs (cdr x)) (yexps (car y)) (ycoefs (cdr y)) (xei 0) (xco 0) (zcoefs *zcoefs*) (zlim *zcoefs-lim*) (zcounter 0) (loc nil) (xy 0)) (declare (type (simple-array t (*)) xexps yexps); any exponents (type (simple-array t (*)) xcoefs ycoefs zcoefs); any coefs (fixnum zcounter zlim) (integer xei xy) (optimize (speed 3)(safety 0))) (dotimes (i (length xexps)(sorthash1 ans zcoefs)) (declare (fixnum i)) (setf xei (aref xexps i)) ;exponent for x (setf xco (aref xcoefs i)) ;coefficient corresponding to that exponent (dotimes (j (length yexps)) (declare (fixnum j)) ;; look in the hash table at location for this exponent (setf loc (gethash (setf xy (+ xei (aref yexps j))) ans)) (cond (loc ;; There is an entry in hash table for this ;; exponent. Update corresponding array location. Note: ;; hashtable is NOT updated. this happens about n*m ;; times, where n=size(x), m=size(y). (incf (aref zcoefs loc);; no number consing here either?? (* xco (aref ycoefs j)))) (t ;; if loc is nil, allocate a space in the answer array. ;; This happens relatively infrequently, about k times, where k=size(x*y). ;; Hashtable is updated. (setf (gethash xy ans) zcounter) ;; store the result in zcoefs array. (setf (aref zcoefs zcounter) (* xco (aref ycoefs j))); no number consing?? (incf zcounter) (cond ((= zcounter zlim) ;;if we reach this, we need more working space. ;; This is rarely used code. If global array is ;; big enough, it is never used. If global array ;; is too small, it is made bigger. (let* ((newsize (ash zlim 1)); double the size ;; (newarray (make-array newsize)) (newarray (make-array newsize :initial-element 0))) (declare (fixnum newsize) (type (simple-array t(*)) newarray)) ;;(format t "~% sizes: newarray ~s zcoefs ~s newsize ~s" ;; (length newarray)(length zcoefs) newsize) (dotimes ;(m newsize) ;wrong (m zlim) (declare (fixnum m)) (setf (aref newarray m)(aref zcoefs m))) (setf zcoefs newarray zlim newsize *zcoefs-lim* zlim *zcoefs* zcoefs)))))))))) ;; on the same data, 500 X 1000 -> 4450 terms ;;* ACL 8.0 GCL SBCL ;;* mul-naivef 29 ms ;;* mul-naive 47 ms 29 ms 47 ms ;;* mul-hash1 94 ms 1,120 ms 110 ms ;;* mul-hash0 203 ms ;;* mul9 1,562 ms 2,160 ms 687 ms ;;* mulstr 282 ms ? 203 ms ;; We slipped in mul-heap and mul9, to be discussed later. ;;; On different data, with random spacing of up to 500 between terms, ;;; we tried 500 X 1000 -> 255,364 terms [that is, VERY sparse ;;; input.], something else is going on. mul-heap now looks good ... ;;* ACL GCL SBCL ;;* mul-naive 2,400 ms 6,600ms 1,546 ms ;;* mul-hash1 1,173 ms ? 1,782 ms ;;* mul-heap 391 ms ? 297 ms ;;; On yet different data, with random spacing of up to 5000 between ;;; terms, we tried 500 X 1000 -> 459,416 terms [that is, VERY VERY ;;; sparse input.] Also rather large output. If there are 500k terms, ;;; there are 500k exponents and 500k coefficients. If each one ;;; occupies 4 bytes, we have 500k X 2 X 4 bytes or 2 megabytes. ;;; Given the additional space for storing numbers by indirection, and ;;; the extra space allocated for possible growth, the hash-table ;;; allocations are substantially larger. The 2 L2 caches on the Intel ;;; Pentium D CPU on which we are testing are each 1 megabyte. The ;;; heap multiplier (mul-heap6) has the significant advantage of ;;; generating each coefficient/exponent pair in the answer once, and ;;; not re-examining it. (Actually it will revisit locations because ;;; the program uses an array that must be dynamically grown -- it ;;; does not know how many terms will ultimately appearin the answer.) ;;; As for how large this polynomial is, it would probably take 1,500 ;;; pages to print out, double-sided, estimating 150 terms per ;;; side. Evaluating the polynomial at a floating-point point much ;;; outside of [-1,1] would probably fail from overflow. Finding zeros ;;; would likely be problematical, and it is unlikely that such a ;;; large polynomial would be a useful approximation to any data from ;;; measurements. Nevertheless, there apparently is interest from the ;;; direction of pure mathematics, in such creations. ;;; we show data for 3 implementations of Common Lisp, ;;; Allegro 8.0, GCL 2.6.8 and SBCL 1.0.13 ;;* ACL GCL SBCL ;;* mul-naive 3,891 ms 9,820 ms 4,422 ms ;;* mul-hash1 2,187 ms 2,970 ms 3,359 ms ;;* mul-heap 484 ms * 375 ms ;;* mul-naiveG 61,219 ms with GMP arithmetic ;; test0 500 1000 50,000 5 ;;* mul-naiveG 45,937 ms with GMP arithmetic ;; test0 500 1000 5,0000 5 ;;* mul-hash1G 7,800 ms with GMP arithmetic ;; small coeffs... ;;; ;;; note 1: * GCL error message. ;;; note 2: ? unknown. Too long to wait. ;;; note 3: Some program alterations that we ;;; found could speed up ACL, slowed down other programs especially ;;; GCL. Use of arrays and structures in GCL seem to be slow compared ;;; to lists. ;;; It seems that mul-heap is a BIG winner for ACL and SBCL, but GCL ;;; has problems with it. ;;; What IS this mul-heap?? It uses a heap, and we discuss it below. ;; It is one of a bunch of more advanced alternative storage ideas ;; (rather than a linked list) including binary radix trees, AVL ;; trees, skiplists, and other methods that we programmed. ;; Unfortunately, in our tests, these are all slower than ;; mul-naive for smallish problems. We know that this ;; mul-naive algorithm was used at least as early as 1966 by ;; William A. Martin, in programming the polynomial arithmetic system ;; for Macsyma at MIT. In this system a slightly different arrangement ;; was used. Rather than ((e . c)(e . c) ...) for exponents and ;; coefficients, they were arranged in one list (e c e c ...), and the ;; arrangement started with the highest degree with a non-zero ;; coefficient. The arrangement also allowed (recursively) for the ;; coefficients to themselves be polynomials. The timings show that on ;; large and very sparse problems the more elaborate alternatives ;; look better. ;; Since Monagan and Pearce advocate heap organizations, we describe ;; that next, and proceed on to mulstr. ;; For our demonstration here, a heap is just an array. Since we are ;; using only one heap, we have one fill-pointer we manage as a ;; lexically bound shared variable among all the programs using the ;; heap. A relatively straightforward implementation puts STREAMS into ;; a heap. What do we mean by a stream? We experimented with various ;; versions, which in Lisp can be made quite elegant by using ;; "continuations" -- in effect, designating as streams an object ;; which includes a program which when called produces further another ;; stream object -- including a program which can be called yet again, ;; ad infinitum. Using this kind of construction we can build streams ;; that interleave or merge results from two streams, and cascade them ;; recursively into the one final stream of results. Cute, and not ;; too shabby in terms of performance, but not the best. For low ;; storage usage and fast speed we found it adequate to use the ;; concept of streams, in this case, using just ordinary records of 5 ;; integers: the simple stream continuation function just increments a ;; pointer and otherwise is just multiplying and adding elements in a ;; stream. The simple version is below. (defstruct (st) ;; A kind of stream. each structure contains 5 items ;; which by reference to arrays of XX and YY , can burp out ;; successive exponents and coefficients of the product. (exp 0 :type integer) (coef 0 :type integer) (i 0 :type fixnum) (v 0 :type integer) (e 0 :type integer)); v, e are read-only ;;**** let was here**** ;; mularZ4 produces an array of "streams" of structures. ;; Each stream consists of 5 items: ;; current exponent, current coefficient, index of the next array ;; element of YY to use, multiplier for this stream, exponent ;; increment from XX for this stream. YY is the second polynomial. ;; Example: ;; Let X = a[0]*x^e[0]+ a[1]*x^e[1]+ .... ;; Let Y = b[0]*x^f[0]+ b[1]*x^f[1]+ .... ;; the first stream will represent a[0]* Y ;; the second stream will represent a[1]* Y ... ;; the first stream will start out as ;; { e[0]+f[0] , a[0]*b[0], a[0], 1, a[0] e[0]} = {exp,coef,i,v, e} ;; the second { e[1]+f[1], a[1]*b[0], 2, a[1], a[1], e[1]} ;; updating the first stream will do two things. Assuming Y has at least 2 terms, ;; 1. return exp and coef ;; 2. update the stream to contain { e[0]+f[1] , a[0]*b[1], 2, a[0], e[0]}. ;; If a stream runs off the end of Y, exp is returned as NIL rather than a number. ;; x and y are sparse polys in arrays as produced by (make-racg n ;; m). return array hack for heap management with a starter entry in ;; ans. ;; mularZ4 sets up an array of initialized streams, one for each term in x. ;; this setup takes time linear in the length of x, and is negligible. ;; It also allocates essentially all the memory for the heap maintenance. ;; The rest of the memory needed is allocated to store the answer. (defun make-simple-array (A) (if (typep A 'simple-array) A (make-array (length A) :element-type (array-element-type A) :initial-contents A))) (defun mularZ4(x y) (declare (optimize (safety 3)(speed 0))) (let* ((xexps (car x)) (xcoefs (cdr x)) (yexps (make-simple-array (car y))) (ycoefs(make-simple-array (cdr y))) (yexp0 (aref yexps 0)) (ycoef0 (aref ycoefs 0)) (ans (make-array (1+(length xexps)))) (ylen (length (car y))) (xe 0) (v 0)) ; (declare (fixnum i) (type (simple-array t (*)) ans xexps xcoefs yexps ycoefs)) (setf *YYEXPS* yexps *YYCOEFS* ycoefs *YYLEN* ylen) (setf (aref ans 0) (make-st :exp 0 :coef 0 :i ylen :v 0 :e 0)) (dotimes (j (length xexps) ans) (setf v (aref xcoefs j)) (setf xe (aref xexps j)) (setf (aref ans (1+ j)) (make-st :exp (+ xe yexp0) :coef (* v ycoef0) :i 1 :v v :e xe) )))) (defun zz(c) (let ((ans (make-array (1+ c) :initial-element (make-st :exp 0)))) (loop for i from 1 to c do (heap-insert ans (make-st :exp (random 250)) i)) ans)) (defun update-strheap (h) ;;return top of heap h, which is necessarily not empty. ;; take successor in stream of top of heap, and insert it in heap. (declare(type (simple-array t(*)) h)(optimize (speed 3)(safety 0))) (let* ((top (aref h 1)) (ta (st-exp top))(tb (st-coef top)) (newe (updatestrfun top))) ;; in case newe <= lefti and newe <= righti. put it at the top. ;; avoiding all heap programs ;; top of heap is location 1 (ignore 0) ;; left branch is location 2 ;; right branch is location 3 (cond ;if end of stream from this subproduct, remove top and make heap smaller! ((null newe) (heap-remove h)) ;; we've modified the top node! leave it there! ((and(<= newe (the integer (st-exp (aref h 2)))) (<= newe (the integer (st-exp (aref h 3)))))) ;; it's not the top node. remove it and put the (revised) node back in. (t (heap-insert h (heap-remove h) fill-pointer))); the new item doesn't fit there (values ta tb))) (defun mul-heap(x y);; hacked to insert at top of heap if possible (declare (optimize(speed 3)(safety 0))) (let* ((hh (if (< (length (car x))(length (car y)))(mularZ4 x y)(mularZ4 y x))) (alen 10) ; heuristic(* (length (car x))(length (car y)))) ;; initial heap (anse (make-array alen :adjustable t :element-type 'integer :fill-pointer 0)) (ansc (make-array alen :adjustable t :element-type 'integer :fill-pointer 0)) (preve 0) (prevc 0)) (declare (integer preve prevc) (type (array t(*)) anse ansc)) (setf fill-pointer (length hh)) (loop while (not (heap-empty-p)) do (multiple-value-bind (newe newc) (update-strheap hh) (declare (integer newe newc)) ; take next term off heap and insert its successor (cond ((= preve newe) ; is this same exponent as previous? (incf prevc newc)) ; if so, add coefficient. (t (unless (= prevc 0); normalize: remove zero coeffs (vector-push-extend prevc ansc) (vector-push-extend preve anse)) (setf prevc newc) (setf preve newe))))); initialize new coefficient (vector-push-extend prevc ansc) ;put in last results before exit (vector-push-extend preve anse) (cons anse ansc))) (defun updatestrfun(rec) ; record is a stream 5-tuple (declare(type (simple-array t(*)) *YYCOEFS* *YYEXPS*) (optimize (speed 3)(safety 0)) (type st rec)) (let ((i (st-i rec))) (cond ((>= i *YYLEN*) nil);; no more items on stream of products (t (incf (the integer (st-i rec))) (setf (st-coef rec) (* (the integer (st-v rec))(the integer (aref *YYCOEFS* i)))) (setf (st-exp rec) (+ (the integer (st-e rec))(the integer (aref *YYEXPS* i))); returns this val ))))) ;; We want to get fast access to a sort-of global variable "fill-pointer" ;; without incurring the overhead for a full global access. We can have ;; the best of both worlds by setting up a lexical environment in which ;; fill-pointer is bound, and defining programs in that environment. ;; The names of the programs (like percolate-down) are available in ;; any context including the global context. ;; (heap management parts of this were inspired by code that is ;; Copyright (c) 2002, 2003 Gene Michael Stover., GPL. ;; The code was found ;; at http://cybertiggyr.com/lisp-heap/ (defun percolate-down (aa hole xx fp) "Private. Move the HOLE down until it's in a location suitable for X. Return the new index of the hole." (let ((xxval (st-exp xx))) (declare (fixnum hole fp)(integer xxval) (type (simple-array t (*)) aa)(optimize (speed 3)(safety 0))) (do* ((child (lesser-child aa hole fp) (lesser-child aa hole fp)) (aaval (aref aa child)(aref aa child))) ((or (>= (the fixnum child) fp) (< xxval (the fixnum(st-exp aaval)))) hole) (declare (fixnum child) (type st aaval)) (setf (aref aa hole) aaval hole child)))) (defun percolate-up (a hole xx) "Private. Moves the HOLE until it's in a location suitable for holding X. Does not actually bind X to the HOLE. Returns the new index of the HOLE. The hole itself percolates down; it's the X that percolates up." (declare (type (simple-array t (*)) a)(fixnum hole) (optimize (speed 3)(safety 0))) (setf (aref a 0) xx) (do ((i hole parent) (parent (ash hole -1);;(floor (/ hole 2)) (ash parent -1);;(floor (/ parent 2)) )) ;; potential to speed up line below by declaration if a, x are fixnum, ((not (hlessfun xx (aref a parent))) i) (declare (fixnum i parent)) (setf (aref a i) (aref a parent)))) ;; important: initial-contents must have the first element duplicated. ;; e.g. (3 5 8) should be (3 3 5 8) (defun heap-insert (a xx fp) "Insert a new element into the heap. Returns the heap.";; rjf (declare (type (simple-array t (*)) a)(fixnum fill-pointer) (optimize (speed 3)(safety 0))) ;; (format t ".") (setf (aref a fp) nil) (incf fill-pointer) ; the global one ;; Move the hole from the end towards the front of the ;; queue until it is in the right position for the new ;; element. (setf (aref a (percolate-up a fp xx)) xx)) ;;; Later, a trick to try, if we believe Roman Pearce's comment: ;;; Instead of percolating down, see what is going to be inserted next, ;;; and try to insert it at the top. This saves a lot of time if it works. (defun heap-remove(a) ;; assumes non-empty!! if empty next answer is bogus. ;; answer after that is an error (fill pointer can't pop) (declare(type (simple-array t(*)) a)(optimize (speed 3)(safety 0))) (let* ((xx (aref a 1)) (fp (decf fill-pointer)) ;faster, fp is local now (last-object (aref a fp))) (declare (fixnum fp)) (setf (aref a (the fixnum (percolate-down a 1 last-object fp))) last-object) xx)) ;;; end of the heap stuff. Note that this has been hacked to provide ;;; less generality and some consequent advantages in time or ;;; space. The original package on which this is based is far more flexible. ;;; ;; end of lex environment for *YY..* ;; (heap management parts of this were inspired by code that is ;; Copyright (c) 2002, 2003 Gene Michael Stover., GPL. ;; The code was found ;; at http://cybertiggyr.com/lisp-heap/ #+SBCL (defmacro while (test &rest body) ;SBCL doesn't have "while". `(do () ((not ,test)) ,@body)) ;; Here is a multiplier similar to mul-heap but more straightforward in ;; that it doesn't use a heap. Just uses lisp linked list, as in ;; naive multiplication, for heap. ;; coefficient . program). The program generates the next exp/coef pair. ;;Manage streams of exponent/coeffs by keeping the linked list ;; sorted. Keep on taking the top off the list! It doesn't win ;; anywhere. (defun mulstr12X(x y);; manage "heap" as linked list! Use st records. (declare (optimize(speed 3)(safety 0))) (let* ((hha (if (< (length (car x))(length (car y)))(mularZ4 x y)(mularZ4 y x))) ;; initial sorted linked list (anse (make-array 10 :adjustable t :fill-pointer 0)) (ansc (make-array 10 :adjustable t :fill-pointer 0)) (preve 0) (prevc 0) (hhl (coerce hha 'list))) (declare (integer preve prevc)) (loop while (cdr hhl) do (multiple-value-bind (newe newc) (update-llx hhl) (declare (integer newe newc)) ;; (format t "~% inserting e=~s c=~s with preve=~s" newe newc preve ) ; take next term off heap and insert its successor (cond ((= preve newe) ; is this same exponent as previous? (incf prevc newc)) ; if so, add coefficient. (t (unless (= prevc 0); normalize: remove zero coeffs (vector-push-extend prevc ansc) (vector-push-extend preve anse)) (setf prevc newc) (setf preve newe))); initialize new coefficient )) (vector-push-extend prevc ansc) ;put in last results before exit (vector-push-extend preve anse) (cons anse ansc))) (defun update-llx (h) ;;return top of linked list, which is necessarily not empty. ;; take successor in stream of top of list, and insert it in the list. (declare(type (simple-array t(*)) h)(optimize (speed 3)(safety 0))) (let* ((rest (cdr h)) (top (car rest)) ; (car h) is a dummy link to make insertion easier. (ta (st-exp top))(tb (st-coef top) )) (cond((null (updatestrfun top)) ;; remove top; that's all (setf (cdr h)(cdr rest))) (t ; (format t "~% top=~s" top) (setf (cdr h)(cdr rest)) ; remove top from list (setf h (LL-insertxx rest (st-exp top) h)) )) ;re-insert (values ta tb))) (defun LL-insertxx(r exp L) ;make a list of ( ... (exp coef . rprog)...) (let ((ahead (cdr L))) ; compare key is exponent to compare in list (cond ((null ahead) (setf (cdr r) nil) (if L (setf (cdr L) r) (setf L r)) ) ((<= exp (st-exp (car ahead))) (setf (cdr r)ahead) (setf (cdr L) r) ) (t (LL-insertxx r exp (cdr L)))) L)) ;;; another piece, to make mul-hash1 faster, especially useful when the answer is huge. ;;; we don't want to keep generating terms in answer in a list and sorting it. ;;; we can generate terms in 2 arrays and sort, but we can't use lisp's sort. ;;; at least not easily. This version slows down GCL. (defun sorthash1(h ar) ;; utility for mul-hash1 (let* ((htc (hash-table-count h)) (anse (make-array htc :fill-pointer 0)) (ansc (make-array htc :fill-pointer 0))) (maphash #'(lambda (i v) (setf v (aref ar v)); get the coefficient, by indirection (unless (= v 0) ;; normalize! remove 0 coefs (vector-push i anse) (vector-push v ansc))) h) (qs anse ansc 0 (1- (length anse))) ;; qs is a quick sort we wrote, with a twist. (cons anse ansc) )) (defun qs(a b lo hi) ;; a re-implementation of quicksort, where entries in a are used as keys ; ;for sorting b too. (let ((pivotloc 0)) (declare(type (array t(*)) a b) (fixnum lo hi pivotloc)) (cond ((< lo hi) (setf pivotloc (partition a b lo hi)) (qs a b lo (1- pivotloc)) (qs a b (1+ pivotloc) hi) nil)))) (defmacro swapab(i j) `(let((temp& (aref a ,i))) (setf (aref a ,i) (aref a ,j)) (setf (aref a ,j) temp&) (setf temp& (aref b ,i)) ;also update b. this is why we wrote qs (setf (aref b ,i) (aref b ,j)) (setf (aref b ,j) temp&) )) (defun partition(a b lo hi) ;; the partition step in quicksort (declare(type (array t(*)) a b ) (fixnum lo hi)) (let ((left lo)(right hi)(pivot (aref a lo)) (bpivot (aref b lo))) ;pick a[lo] for pivot (declare(fixnum left right)) (loop while (< left right) do (while (< pivot (aref a right)) (decf right)) (while (and (<= (aref a left) pivot)(< left hi)) (incf left)) ;; at this point a[left]>pivot and a[right]<=pivot. ;; or left=hi (if (< left right) (swapab left right)) ) (setf (aref a lo) (aref a right)) (setf (aref b lo) (aref b right)) (setf (aref a right) pivot) (setf (aref b right) bpivot) ;also update b array. This is why we wrote qs right)) (defun test(n m &optional (d 5)(c 5)) ;try these parameters for now. (setf *x* (make-racg n d c)) (setf *y* (make-racg m d c)) (format t "~% mul-hash1 time - best hash table version") (time (mul-hash1 *x* *y*)) ; (format t "~% mul-heap time - use simple streams, in a heap array, insert from top sometimes") ; (time (mul-heap *x* *y*)) ; (format t "~% mul-heap3 time - use simple streams, sort the heap!") ; (time (mul-heap3 *x* *y*)) (format t "~% mul-naive time - just use insertion into single list") (time (mul-naive *x* *y*)) (format t "~% mul-naiveM time - just use insertion into single list") (time (mul-naiveM *x* *y*)) ;; minor optimizations ) (defun update-strheap2 (h) ;;return top of heap h, which is necessarily not empty. ;; take successor in stream of top of heap, and insert it in heap. (declare(type (simple-array t(*)) h)(optimize (speed 3)(safety 0))) (let* ((top (aref h 1)) (ta (st-exp top))(tb (st-coef top)) (newe (updatestrfun top))) (cond ;;if end of stream from this subproduct, remove top and make heap smaller! ((null newe) (heap-remove h)) ;; in case newe <= lefti and newe <= righti. put it at the top. ;; avoiding all heap programs ;; top of heap is location 1 (ignore 0) ;; left branch is location 2 ;; right branch is location 3 ;; if top node stays in place, just leave it there! ;; #+ignore ;; could be removed, not much change ((and(<= newe (the integer (st-exp (aref h 2)))) (<= newe (the integer (st-exp (aref h 3))))) ) ;; it's not the top node. remove it and put the (revised) node back in. ;; before we hack the heap to remove/insert this node h, ;; see if we can find another node in the heap that is exactly ;; the same key as the key for h. ;; if we find one (there may be many; we find one!) ;; then we update that node and insert the successor of h. ;; removing this next line makes the program way faster for sparse (t (chain-update newe h (1- fill-pointer));;**** new ;; WORKS ;;(chain-update newe h (ash fill-pointer -2));very fast but buggy. ;;(chain-update newe h fill-pointer) ;faster, but buggy! )) (values ta tb))) (defparameter ct1 0) ;testing (defun chain-update(newe hh fp) (declare(type (simple-array t(*)) hh) (integer newe) (fixnum fp) (optimize (speed 3)(safety 0))) ;; note, top of hh is already updated, and is non-nil (let ((spot ;;(find-in-heap newe hh fp) (find-in-heap newe hh (1+ fp)) ;;(ffinh newe hh (1+ fp)) )) (cond (spot ;; I think this is the "chain" operation ; (format t "*") ;happens mostly when dense result. ;; (incf (st-coef (aref hh spot)) (incf (st-coef spot) (st-coef (aref hh 1)));;return non-nil. saves heap op (if (null (updatestrfun (aref hh 1))) ; record is a stream 5-tuple or nil (heap-remove hh);; we have run out of this stream. remove it. (chain-update (st-exp (aref hh 1)) hh fp))) ;; no spot is found. (t ;;(incf ct1) ;; could comment out later (heap-insert hh (heap-remove hh) fp) )))) (defun find-in-heap (newe a fp) (declare (fixnum fp)(integer newe) (type (simple-array t (*)) a)) (let ((parval 0)) (declare (integer parval)) (do ((i 2 (1+ i))) ;;((>= i fp) nil) ; ((>= i 50) nil) ; (setf parval (st-exp (the st (aref a i)))) (cond ((< newe parval) (return nil));; not found; not in heap below here ((= newe parval) (return (aref a i))))))) (defun mul-heap3(x y);; hacked to insert at top of heap if possible, use CHAIN (declare (optimize(speed 3)(safety 0))) (let* ((hh (if (< (length (car x))(length (car y)))(mularZ4 x y)(mularZ4 y x))) (alen 10) ; heuristic(* (length (car x))(length (car y)))) ;; initial heap (anse (make-array alen :adjustable t :element-type 'integer :fill-pointer 0)) (ansc (make-array alen :adjustable t :element-type 'integer :fill-pointer 0)) (preve 0) (prevc 0)) (declare (integer preve prevc) (type (array t(*)) anse ansc hh)) (setf fill-pointer (length hh)) ;; (setf (aref hh 0)(aref source 0)) ;; (setf (aref hh 1)(aref source 1)) ;; heap is kept as small as possible, not loaded with source, as previous versions (loop while (not (heap-empty-p)) do (multiple-value-bind (newe newc) (update-strheap2 hh) (declare (integer newe newc)) ;; take next term off heap and insert ;; its neighbor to the right or its neighbor below. (cond ;; next clause is required. ((= preve newe) ; is this same exponent as previous? (incf prevc newc)) ; if so, add coefficient. (t (unless (= prevc 0); normalize: remove zero coeffs (vector-push-extend prevc ansc) (vector-push-extend preve anse)) (setf prevc newc) (setf preve newe) ;; big change here. Really bad idea. ;; (setf (aref hh 0) (aref hh 1)) ;; (setf hh(sort hh #'< :key #'st-exp)) )) ; initialize new coefficient )) (vector-push-extend prevc ansc) ;put in last results before exit (vector-push-extend preve anse) (cons anse ansc))) ;; buggy (defun test3() ;(n m &optional (d 5)(c 5)) ;try these parameters for now. (format t "~% mul-heap3 time - use simple streams, in a heap array, insert from top sometimes, chaining") (time (mul-heap3 *x* *y*)) ) (defun testmul12();(n m &optional (d 5)(c 5)) ;try these parameters for now. (format t "~% mulstr12X time - use simple streams , in a linear LIST") (time (mulstr12X *x* *y*)) ) ) ;; TRIP data #| Fast Sparse Multivariate Polynomial Multiplication Using Burst Tries Mickašel Gastineau and Jacques Laskar Paris Observatory - IMCCE - CNRS UMR8028 77, avenue Denfert Rochereau 75014 Paris gastineau@imcce.fr, laskar@imcce.fr V.N. Alexandrov et al. (Eds.): ICCS 2006, Part II, LNCS 3992, pp. 446–453, 2006. Table 1. Computation time and memory consumption of s1 (s1 + 1) with s1 = (1+x+y+z+t)25 using different memory managers. Polynomial is stored as a recursive list. Measured time (sum of user CPU time and system CPU time) is expressed in seconds and space used (size of the trip process in memory) in kilobytes. Machine time (s) memory (Kb) system rlalloc system rlalloc Intel Xeon 3.06Ghz - Linux 32-bits - glibc 2.3.4 108 83 22360 19688 Intel Xeon 3.6Ghz - Linux 64-bits - glibc 2.3.4 87 63 44656 33920 Apple G5 2.0Ghz - Mac os X 10.4 64-bits 225 186 28748 22504 better stuff from table 2 in same article, s2 = (1+x+y+z+t)^20 Computer algebra systems s1 (s1 +1) s2 (s2 + 1) time (s) memory (Kb) time (s) memory (Kb) PARI/GP 2.1.7 82.1 N/A 225.6 N/A PARI/GP 2.2.11 (GMP kernel) 67.8 N/A 177.6 N/A Singular 3.0.1 101.5 8383.5 42.8 982.9 TRIP 0.98 (recursive list) 54.6 7138.5 15.5 1370.0 TRIP 0.98 (recursive vector) 42.9 5953.7 16.5 1831.4 TRIP 0.98 (flat vector) 24.1 3609.1 13.3 721.2 |# #| our time, (time (mul-hash1 s2 s2p1)) ; cpu time (non-gc) 98,661 msec (00:01:38.661) user, 62 msec system ; cpu time (gc) 3,167 msec user, 0 msec system ; cpu time (total) 101,828 msec (00:01:41.828) user, 62 msec system ; real time 102,250 msec (00:01:42.250) ; space allocation: ; 2,112 cons cells, -3,090,540,784 other bytes, 0 static bytes (#(0 1 2 3 4 5 6 7 8 9 ...) . #(1 40 780 780 40 658008 658008 18643560 3838380 273438880 ...)) [4] cl-user(974): [4] cl-user(978): (time (mul-naive s2 s2p1)) ; cpu time (non-gc) 86,852 msec (00:01:26.852) user, 32 msec system ; cpu time (gc) 3,133 msec user, 0 msec system ; cpu time (total) 89,985 msec (00:01:29.985) user, 32 msec system ; real time 90,641 msec (00:01:30.641) ; space allocation: ; 271,406 cons cells, 1,196,507,336 other bytes, 16 static bytes [4] cl-user(980): (time (mulstr s2 s2p1)) ; cpu time (non-gc) 203,147 msec (00:03:23.147) user, 46 msec system ; cpu time (gc) 1,588 msec user, 0 msec system ; cpu time (total) 204,735 msec (00:03:24.735) user, 46 msec system ; real time 206,203 msec (00:03:26.203) ; space allocation: ; 1,327 cons cells, 1,197,434,912 other bytes, 0 static bytes ;;; puting fixnums in for exponents doe snot speed things up... ;; 1+x+y+z+t --> (setf r (cons #(0 1 100 10000 1000000)#(1 1 1 1 1))) [8] cl-user(88): (time (mulstr *s2* *s2p1*)) ; cpu time (non-gc) 237,750 msec (00:03:57.750) user, 125 msec system ; cpu time (gc) 12,719 msec user, 31 msec system ; cpu time (total) 250,469 msec (00:04:10.469) user, 156 msec system ; real time 252,375 msec (00:04:12.375) ; space allocation: ; 1,356 cons cells, 1,197,434,952 other bytes, 16 static bytes s2^20 has 135751 terms s2 has 10626 terms. We replaced the coefficient arithmetic in mul-naive with gmp4.1, giving mul-naiveg. Here's the timing. if we use addmul instead of 2 separate operations. (newmulg2) cl-user(378): (time (mul-naiveG *s2* *s2p1*)) ; cpu time (non-gc) 114,140 msec (00:01:54.140) user, 31 msec system ; cpu time (gc) 78 msec user, 0 msec system ; cpu time (total) 114,218 msec (00:01:54.218) user, 31 msec system ; real time 114,672 msec (00:01:54.672) ; space allocation: ; 407,374 cons cells, 11,946,512 other bytes, 0 static bytes (#(0 1 2 3 4 5 6 7 8 9 ...) . #(2{g} 60{g} 970{g} 11020{g} 96235{g} 673512{g} 3877140{g} if we use hashtable, it looks like this: [5] cl-user(412): (time (mul-hash1G *s2* *s2p1*)) ; cpu time (non-gc) 119,265 msec (00:01:59.265) user, 16 msec system ; cpu time (gc) 172 msec user, 0 msec system ; cpu time (total) 119,437 msec (00:01:59.437) user, 16 msec system ; real time 119,860 msec (00:01:59.860) ; space allocation: ; 135,820 cons cells, 21,396,096 other bytes, 0 static bytes (#(0 1 2 3 4 5 6 7 8 9 ...) . #(2{g} 60{g} 970{g} 11020{g} 96235{g} 673512{g} 3877140{g} |# (defvar *r* nil) (defvar *s2* nil) (defvar *s2p1* nil) (defun setup() (let ((k1 nil)(k2 nil)) (setf *r* (cons #(0 1 100 10000 1000000)#(1 1 1 1 1))); 1+x+y+z+t (setf *s2* (power *r* 20)) (setf k1 (make-array 10626 :initial-contents (car *s2*))) (setf k2 (make-array 10626 :initial-contents (cdr *s2*))) (setf (aref k2 0) 2) (setf *s2p1* (cons k1 k2)))) (defun setup2() ;; smaller problem (let ((k1 nil)(k2 nil)(r2 nil)(len 0)) (setf r2 (cons #(0 1 100 10000 )#(1 1 1 1))) ; 1+x+y+z ;; not +t (setf *s2* (power r2 20)) (setf len (length(car *s2*))) (setf k1 (make-array len :initial-contents (car *s2*))) (setf k2 (make-array len :initial-contents (cdr *s2*))) (setf (aref k2 0) 2) (setf *s2p1* (cons k1 k2)))) (defun power(p n)(if (eql n 1) p(mul-naive p (power p (1- n))))) (defun power2(p n)(if (eql n 1) p(mul-heap p (power2 p (1- n))))) (defun test0(n m &optional (d 5) (c 5)) ;try these parameters for now. (setf *x* (make-racg n d c)) (setf *y* (make-racg m d c)) (format t "~% mul-naive time - just use insertion into single list, not using GMP") (time (mul-naive *x* *y*)) ;; We will add more lines here later (format t "~% mul-hash1 time - hash1 not using GMP") (time (mul-hash1 *x* *y*)) (format t "~% mul-heap time - heapmerge not using GMP") (time (mul-heap *x* *y*)) ) (defun arr=(r s) (and (= (length r)(length s)) (dotimes (i (length r) t) (if (/= (aref r i)(aref s i)) (return nil))))) (defun pol=(r s)(and (arr= (car r)(car s)) (arr= (cdr r)(cdr s)))) (defun diff1(r s) (dotimes (i (min (length s)(length r)) 'nodiff) (cond ((= (aref r i)(aref s i))nil) (t (return (format nil "differs at ~s: ~s ~s" i (aref r i)(aref s i))))))) ;; slower, but another way of computing find-in-heap (defun ffinh(val ar end) (find val ar :start 2 :end end :key #'st-exp)) ;;;;;;;;fiddling, may, 2010. #|how can we write a loop that looks like this (loop for i from 0 to maxdegree ans[i]= sum of c[j]*c[k] such that j+k=i) = (loop for i from 0 to maxdegree ans[i]= sum of c[j]*c[k] such that j+k=i) ;; let E1 be exponent vector of P1, and E2 exponent vector of P2 (loop for i from (+ (min E1 E2)) to (max E1 E2) do jlist:=E1 for k on E2 do (expon:= car(jlist)+k, if expon=i then add c[car(jlist)]*c[k] to coef for k from E2[1] eh, just a search.. |#