;;; -*- Lisp -*- #| Multiplication of sparse polynomials P, Q, using the following strategy: Using a bit-vector as working storage, determine the location of all possible non-zero coefficients in the answer. Namely, insert a 1 in the vector for the sum of each exponent pair, r+s, where r is an exponent in P, and s is an exponent in Q. This, in effect, computes a "skeleton" for the answer, in an attempt to reduce the time to access a sparse data structure as we compute the product. If this bit-vector contains T ones, then allocate two arrays of length T, one for exponents E and one for coefficients C. Repeat the doubly-nested loop. Now for the sum of each exponent pair, q=r+s, compute the related coefficient product and add it in to the accumulated result. Finding the place i where q appears in E. Then add to C[i] the product of the associated coefficients in P and Q. Finding the place i is done by a binary search, but where upper/lower bounds for the search are provided by some simple calculations in the loop. |# (eval-when (compile load) (declaim (optimize (speed 3)(safety 0)))) ;; This method should not be used if the highest degree is ;; extravagant: We have to allocate a bit array of size equal to the ;; degree of the answer. This technique may win on a kind of ;; middling-sparse domain. If the degree is low and there are many ;; non-zero terms, a dense array makes a better structure. ;; If the degree is very high, a heap version should be better. (defun mul-naive-b (x y);; arrange the data structure by computing a skeleton of answer exponents ;; if hideg (computed below) is huge, we may not want to do this at all. (let* ((xe (car x)) (ye (car y)) (yl (length (the simple-array ye))) (xl (length (the simple-array xe))) (yemax (aref ye (1- yl))) (hideg (+ (aref xe (1- xl))yemax ));degree of answer ) (declare(fixnum yl xl hideg yemax) (type (simple-array t (*)) xe ye)) (cond ((> hideg 1000000) (format t "~%mul-naive-b can't be used: too high degree: ~s" hideg) (mul-heap5 x y) ;; probably use heap ) ;; ((< hideg (* 3000 xl yl)) ;; (format t "~%mul-naive-b shouldn't be used: too low degree: ~s" hideg) ;; probably use dense ;;(mul-dense x y) ) (t(let((ansbit (make-array (1+ hideg) :element-type 'bit :initial-element 0)) (xc (cdr x)) (yc (cdr y)) (low 0)(high 0) (j 0) (xei 0) (xci 0) (ee 0) (ae #()) (ac #()) (termcount 0)) (declare(fixnum xei j ee termcount low high yemax) (type (simple-array t (*)) xc yc ansy ans) (type (simple-array bit (*)) ansbit)) ;; exponent "skeleton" keeping track of maximum number of ;; non-zero coefs possible in termcount. (dotimes (i xl) (declare (fixnum i)) (setf xei (aref xe i)) (dotimes(j yl) ;; all could be done in parallel (declare (fixnum j)) (setf ee (the fixnum(+ xei(the fixnum (aref ye j))))) (unless (eq 1 (sbit ansbit ee)) (setf (sbit ansbit ee) 1) (incf termcount)))) ;; now we know exactly how many terms in answer, allocate storage. ;; If we determine that the number of terms is nearly the same as hideg, ;; we could divert this again and just use mul-dense. (cond ((>(* 4 termcount) hideg) ;25% of terms are non-zero (format t "~% using mul-dense; ~s terms, ~s degree" termcount hideg) (return-from mul-naive-b (mul-dense x y)))) (setf ae (make-array termcount)) (setf ac (make-array termcount :initial-element 0)) ;; initialize exponents in answer. j=0 to start (dotimes (i (1+ hideg) ae) ;;all could be done in parallel (declare(fixnum i)) (unless (zerop (aref ansbit i)) (setf (aref ae j) i) (incf j))) ;; finally, do the multiplications in place (dotimes (i xl (cons ae ac)) (declare (fixnum i)) (setf xei (aref xe i) xci (aref xc i)) ;; use low and high to delimit the binary search to possible matches (setf low -1) (setf high (1+ (binsearch-updateLH ae ac (+ xei yemax) 0 0 termcount))) (dotimes (j yl) ;; all could be done in parallel (declare (fixnum j)) (setf low (binsearch-updateLH ae ac (the fixnum(+ xei(the fixnum (aref ye j)))) (* (aref yc j) xci) (1+ low) high))))))))) (defun binsearch-updateLH (indexA valA ind val lo hi) ;; Binary search and update, with low and high search parameters. ;; All exponents are in array indexA, all values in array valA ;; find j such that ind=indexA[j]. Then add val to element valA[j] ;; by construction there is such a j between lo and hi. Also indexA is sorted. (declare (fixnum ind lo hi) (type (simple-array t (*)) indexA valA)) (let ((p 0) (midpt 0)) (declare (fixnum p midpt)) (loop (setf p (ash (- hi lo) -1)) (cond((eq p 0);; either lo=hi or lo=hi-1 ;; all values are in the array, so we found it. (incf (aref valA lo) val) (return lo))) (setf p (+ lo p)) (setf midpt (the fixnum (aref indexA p))); midpoint, about. (cond ((eq midpt ind) (incf (aref valA p) val) (return p));; the only other way out ((< ind midpt) (setf hi p)) (t (setf lo p)))))) #| sample polynomial: (#(2 5 6 7 9) . #(2 1 3 3 1)) has exponents 2,5,6,7,9 and corresponding coeffs 2,1,3,3,1. make-racg is defined in newmul.fasl. (setf q (make-racg 5000 100 5)r (make-racg 1000 100 5)) cl-user(254): (time (mul-naive-b q r)) ; cpu time (non-gc) 1,171 msec user, 0 msec system cl-user(253): (time (mul-dense q r)) ; cpu time (non-gc) 235 msec user, 0 msec system cl-user(286): (time (mul-heap5 q r)) ; cpu time (non-gc) 3,375 msec user, 0 msec system (setf q (make-racg 5000 10000 5)r (make-racg 1000 10000 5)) cl-user(291): (time (mul-heap5 q r)) ; cpu time (non-gc) 5,359 msec user, 0 msec system cl-user(290): (time (mul-dense q r)) ; cpu time (non-gc) 2,375 msec user, 0 msec system cl-user(293): (time (mul-rtree4 q r)) ; cpu time (non-gc) 22,064 msec user, 140 msec system cl-user(295): (time (mul-naive q r)) ; cpu time (non-gc) 458,844 msec (00:07:38.844) user, 157 msec system |#