;;; -*- mode: lisp; syntax: common-lisp; package: gmp; base: 10 -*- ;; author: richard fateman, jan, 2006 ;; this file is based in part on lisp/loadgmp2.cl ?2002? ;; which also has polynomial stuff in it. ;; working univariate polynomial stuff is in lisp/polymultx.cl though. (eval-when '(load) (require "ga")(provide "gmp")) (in-package :gmp) (eval-when (compile load) (declaim (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0))) ) ;; if i learned about uffi i suppose it could be done in a uniform way ;; for any of the participating common lisp implementations. ;; load a working gmp library. ;;(load "h:/lisp/generic/p4/gmp.dll") ;; if this is a pentium 4 ;;(load "h:/lisp/generic/p3/gmp.dll") ;; if this is a pentium 3 ;;(load "h:/lisp/generic/p0/gmp.dll") ;; if this is a pentium #|trying to clear the air about different versions of gmp 4.1.4 for different architectures. consider the computation of 20,000! by the program k4, below. how much does it matter which library we use? we tested this first bunch on a pentium 4. (we do the computation 10 times) (time (progn (dotimes (i 10)(k4 20000)) nil)) with generic code for any i86, written in c: 1,219 ms. pentium-generic/gmp.dll with some code specifically for pentium 0: 1,250 ms. p0/gmp.dll with some code specifically for pentium 3: 641 ms. p3/gmp.dll with some code specifically for pentium 4: 437 ms. p4/gmp.dll using a pentium 3 with generic code for any i86, written in c: 2,083 ms. pentium-generic/gmp.dll with some code specifically for pentium 0: 792 ms. p0/gmp.dll with some code specifically for pentium 3: 791 ms. p3/gmp.dll with some code specifically for pentium 4: p4/gmp.dll oh, using mpz_fac, directly from gmp 661 ms. ;;(progn (setf x (into 0))(mpzfac (gmpz-z x) 20000)) using sprfact, 588 ms. using gkg, 831 ms. ?? using gmaxfactx 779 ms ?? |# ;;(load "gmp.dll") ;; one should work.. ;; the next section's syntax is particular for allegro cl. (eval-when (compile load eval) (ff:def-foreign-type mpz (:array :int)) (ff:def-foreign-call (mpz_init "__gmpz_init") ((x (cl::* :int))) :returning :int :arg-checking nil :call-direct t) (ff:def-foreign-call (mpz_init2 "__gmpz_init2") ((x (cl::* :int)) (len :int) ) ;; space for how many bits? :returning :int :arg-checking nil :call-direct t) (ff:def-foreign-call (mpz_fac "__gmpz_fac_ui") ;; factorial ((x (cl::* :int)) (n :int) ) ;; compute n! :returning :int :arg-checking nil :call-direct t) (ff:def-foreign-call (mpz_set_str "__gmpz_set_str") ;changes value of mpz x. ((x mpz (simple-array (signed-byte 32) (3))) (s (cl::* :char)) (base :int)) :strings-convert t :returning :int :arg-checking nil) ;; :call-direct t (ff:def-foreign-call (mpz_get_str "__gmpz_get_str") ((s :int) (base :int) (op mpz (simple-array (signed-byte 32) (3)))) ;;i think it is ok to let gmp allocate the space for this string. :returning ((cl::* :char) ) :arg-checking nil :call-direct t) (ff:def-foreign-call (mpz_get_d "__gmpz_get_d") ((x mpz (simple-array (signed-byte 32) (3)))) :returning :double :arg-checking nil :call-direct t) (ff:def-foreign-call;; remove gmp number from memory (mpz_clear "__gmpz_clear") ((x mpz (simple-array (signed-byte 32) (3)))) :returning :int :arg-checking nil :call-direct t) (ff:def-foreign-call;; compare (mpz_cmp "__gmpz_cmp") ((x mpz (simple-array (signed-byte 32) (3))) (y mpz (simple-array (signed-byte 32) (3))) ); returns pos if x>y, zero if x=y, neg if x0. If b=0, what do we do? (defmethod qr((a gmpz)(b gmpz)) (let ((r (create_mpz_zero)) (q (create_mpz_zero))) (mpz_fdiv_qr (gmpz-z q)(gmpz-z r)(gmpz-z a)(gmpz-z b)) (values r q)) ) ;;(defarithmetic expt) hm. can't do negative exponent. ;; not sure we want to do ;; (expt GMP GMP) (defmethod ga::two-arg-expt ((z gmpz) (u fixnum)) (assert (>= u 0)) (let ((ans (alloc-gmpz))) (mpz_pow_ui (gmpz-z ans) (gmpz-z z) u) ans)) (defmethod ga::two-arg-expt ((z gmpz) (u gmpz)) (error "not implemented gmpz ^gmpz") (assert (>= u 0)) (let ((ans (alloc-gmpz))) ;; not implemented. if u is small, use previous. ;; (mpz_pow (gmpz-z ans) (gmpz-z z)(gmpz-z u)) ans)) (defmethod ga::1+((z gmpz))(+ 1 z)) (defmethod ga::1-((z gmpz))(- z 1)) (defun fact(x)(let((ans (alloc-gmpz))) ;; factorial (mpz_fac (gmpz-z ans) x) ans)) (defmethod ga::ash((z gmpz)(n fixnum)) ;; arithmetic shift (let((ans (alloc-gmpz))) (cond ((> n 0) (mpz_mul_2exp (gmpz-z ans) (gmpz-z z) n) ans) ((< n 0) (mpz_rshift (gmpz-z ans) (gmpz-z z) (- n)) ans) (t z)))) (defun probably_primep(z &optional(reps 5)) ;; 2=yes, prime. 0= no, composite. 1= probably prime. ;; increase reps for more repetitions of the test. (setf z (gmp::into z)) (mpz_probab_prime_p (gmpz-z z) reps)) (defun next_prime(z) ;; 2=yes, prime. 0= no, composite. 1= probably prime. ;; increase reps for more repetitions of the test. (let ((ans (alloc-gmpz))) (setf z (gmp::into z)) (mpz_nextprime (gmpz-z ans) (gmpz-z z)) ans)) (defun gmp_gcd (a b) (let((ans (alloc-gmpz))) (setf a (gmpz-z(into a))) (setf b (gmpz-z(into b))) ;; make both into gmpz numbers if not already (mpz_gcd (gmpz-z ans) a b) ans)) (defun gmp_gcdext (a b) (let((ss (alloc-gmpz)) (tt (alloc-gmpz)) (gg (alloc-gmpz))) (setf a (gmpz-z(into a))) (setf b (gmpz-z(into b))) ;; make both into gmpz numbers if not already (mpz_gcdext (gmpz-z gg) (gmpz-z ss)(gmpz-z tt) a b) (values gg ss tt))) ;returns g,s,t such that g=a*s+b*t and g>0 (defmethod ga::evenp((z gmpz)) (> (mpz_divisible_2exp_p (gmpz-z z) 1) 0)) (defmethod ga::oddp((z gmpz)) (= (mpz_divisible_2exp_p (gmpz-z z) 1) 0)) (defmacro defcomparison (op) (let ((two-arg (intern (concatenate 'string "two-arg-" (symbol-name op)) :ga )) (cl-op (tocl op))) `(progn ;; only extra methods not in ga are defined here. (defmethod ,two-arg ((arg1 gmpz) (arg2 gmpz)) (,cl-op (mpz_cmp(gmpz-z arg1)(gmpz-z arg2)) 0)) (defmethod ,two-arg ((arg1 real) (arg2 gmpz)) (let ((x (lisp2gmpz arg1))) (,cl-op (mpz_cmp(gmpz-z x)(gmpz-z arg2)) 0))) (defmethod ,two-arg ((arg1 gmpz) (arg2 real)) (let ((y (lisp2gmpz arg2))) (,cl-op (mpz_cmp(gmpz-z arg2) (gmpz-z y)) 0))) (compile ',two-arg) (compile ',op) ',op))) (defcomparison >) (defcomparison =) (defcomparison <) (defcomparison <=) (defcomparison >=) ;; programs that decompose an Allegro CL number into an ;; array of 16 bit quantities suitable for piling into GMP are ;; in the appendix to papers/polysbyGMP.tex. Copied here, too. (defmethod lisp2gmpz ((x integer)) ;; but not fixnum (let* ((out (create_mpz_zero)) ;the wrapper (r (gmpz-z out)) ;the inside (a (btoa x));; bignum to array. (out2(create_mpz_zero)) (h (gmpz-z out2))) (do ((i (1- (length a))(1- i))) ((< i 0) r) (declare (fixnum i)) (mpz_set_si h (aref a i)) (mpz_mul_2exp r r 16) (mpz_add r r h)) (if (< x 0) (setf (aref r 1)(- (aref r 1)))) out)) ;;B to A: Bignum or fixnum to Array of bigits, 16 bits (defun btoa(x) (if (excl::fixnump x) (vector (abs x)) (let* ((size (bignum-size x)) (ans (make-array size :element-type '(unsigned-byte 16)))) (declare (optimize (speed 3)(safety 0)(debug 0))(fixnum size)) (do ((i 0 (1+ i))) ((= i size) ans) (declare (fixnum i)) (setf (aref ans i)(nth-bigit x i)))))) (eval-when (compile load)(require :llstructs)) (defun nth-bigit (x n) (sys:memref x -14 (* 2 n) :unsigned-word)) (defun bignum-size(x) ;; in 16-bit quantities (excl::bm_size x)) ;; this program is not used... (defun atob(a) ; array to POSITIVE lisp (bignum) (let ((num 0)) (do ((i (1- (length a)) (1- i))) ((< i 0) num) (setf num (+ (aref a i) (ash num 16)))))) ;;;;;;;;;;;;;; programs for largish factorials ;;; just using gmp:: (defun gmpfac(x) (let ((r (into 0))) (assert (typep x 'fixnum)) (mpz_fac (gmpz-z r) x) r)) ;;;; combining gmp, memoization, split-recursive, (defparameter oldfacts (make-array 0 :adjustable t :fill-pointer t)) (defun kgmin (n m min) ;; (k n 1 1) is n! (declare (fixnum n m min)) (cond ((< n min) 1) ((<= n m) n) ((and (evenp m)(evenp n)(evenp min)) (ash (kgmin (ash n -1) (ash m -1) (ash min -1)) (1+ (ash (- n min) -1)))) (t (* (kgmin n (ash m 1) min) (kgmin (- n m)(ash m 1) min))))) ;; (* (gkgmin 100 1 50)(kgmin 49 1 1)) is the same as (gkg 100 1 1) (defun gkgmin(n min &aux (aa (alloc-gmpz)));; (declare (fixnum n)(optimize (speed 3)(safety 0)(debug 0))) (let ((res nil) (shift 0) (L (ceiling (cl::log n 2))) (a (gmpz-z aa))) (declare (fixnum L shift n)) (labels ((k (n m ans res min) (declare (fixnum n m)) (cond ((cl::< n min) (mpz_set_si ans 1)) ((cl::<= n m)(mpz_set_si ans n)) ((and (evenp n)(evenp m)(evenp min)) (cl::incf shift (1+ (ash (- n min) -1))) (k (ash n -1) (ash m -1) ans res (ash min -1))) (t(let ((b (car res))) (k (cl::- n m) (setq m (ash m 1)) b (cdr res) min) (k n m ans (cdr res) min) (mpz_mul ans ans b) ans))))) (dotimes (i L)(push (gmpz-z (alloc-gmpz)) res));set up resource (k n 1 a res min) (mpz_mul_2exp a a shift) aa))) ;;main program. (defun gkmemfac(n) (if (null oldfacts) (clearfact)) (let ((z (lookupfact n))) ;; find z, the largest integer <= n such that we know z! (if (= (car z) n)(cdr z) (rememberfact n (* (gkgmin n (1+ (car z))) (cdr z)))))) (defun lookupfact(n) (loop for i from 0 to (length oldfacts) do (if (<= (car (aref oldfacts i)) n) (return (aref oldfacts i))))) (defun rememberfact(n f) (vector-push-extend (cons n f) oldfacts) (setf oldfacts (sort oldfacts #'> :key #'car)) f) (defun clearfact()(setf oldfacts (make-array 0 :adjustable t :fill-pointer t)) (vector-push-extend (cons 0 (into 1)) oldfacts)) ;(clearfact) ; clear it now. #| The model for generic arithmetic and overloading here is not congruent with the gmp design in the following way, which mostly affects efficiency. We are superimposing a "functional" model on a "state-based" one, and in the transition we are creating temporary objects too often. With GC finalizations, these extra objects will go away without additional programmer effort, but they are still created. A closer adherence to the computation model would probably be better. Example: in our model we can compute (setf a (+ a (* b c))) but before we complete the operation we must compute separately, t1=b*c, t2=a+t1, and then pointing a to t2, and then clearing the old value of a. This can be done without any intermediate (Lisp) storage by this idiom: (mpz_addmul (gmpz-z a) (gmpz-z b) (gmpz-z c)) This assumes that no other access route is associated with a's value. ............. ;;A digression on Factorial functions, benchmarks, timing, etc. ;;see paper .. h:/papers/factorial.tex or ;;http://www.cs.berkeley.edu/~fateman/papers/factorial.pdf ;; factorial gmp version, vs usual ;; These functions are variants on the usual, as it happens, not ;; very fast ways of computing factorial. We include these to ;; show how to take a simple function and elaborate upon it ;; to make better (i.e. more efficient) use of gmp. (defun f(x)(if (zerop x) (cs 1) (* x (f (1- x))))) ;functional version (defun g(x)(if (zerop x) 1 (* x (g (1- x))))) ; ordinary bignum version (defun h(x) ;; a version that avoids allocating extra space. (let ((ans (into 1)) (m (into 0))) (dotimes (i x ans) (mpz_add_ui (gmpz-z m)(gmpz-z m) 1) ;; add integer 1 to gmp m (mpz_mul (gmpz-z ans)(gmpz-z ans) (gmpz-z m))))) ;; set ans <- m*ans ;; A hacked up version of h, Since m isn't likely to be ;; larger than a fixnum. Who can compute factorial of (2^29) ? (defun h7(x);; compute factorial with gmpz (assert (and (excl::fixnump x) (> x 0))) (let* ((ans (cs 1)) ;; make space for the answer. (g (gmpz-z ans))) ;; g is the inside of gmpz number ans. (declare (fixnum m x)) (do ((i 1 (cl::1+ i))) ((cl::= i x) ans) (declare (fixnum i)) (mpz_mul_si g g i) ;; compute g <- g*i ))) ;;; an observation: we are not really benchmarking Bignum X Bignum, since ;;; if you look at how we are computing factorial, we really are ;;; doing Bignum X signed-32bit-integer. This may or may not be ;;; what you really want to test. ;;; On a pentium 3, (h7 20000) takes 489 ms. The GMP factorial ;;; program takes 67 ms. Is this because C is faster than Lisp? ;;; Well, that's not the reason, because a DIFFERENT way of ;;; computing factorial, in Lisp, but using arithmetic from GMP ;;; is about as fast. Here's a recursive factorial that is especially nice for large numbers if it is efficient to multiply two numbers of about equal size, and probably what Mathematica uses. Actually, even on ordinary Lisp it is a better algorithm because much of the work in the lower levels of the tree that is eventually all multiplied together, is done in ordinary single-word fixnum arithmetic, so some of the dealing with bigfloats is avoided. Other approaches are almost immediately thrown into the bignum arena having to multiply smallish numbers (like n) by large large numbers: so most work is bignum arithmetic. (defun k (n m) ;; (k n 1) is n! (if (<= n m) (cs n) (* (k n (* 2 m)) (k (- n m)(* 2 m))))) ;; or declared better and using shift instead of mult by 2 ... (defun k2 (n m) ;; (k n 1) is n! (declare (fixnum n m)) (if (cl::<= n m) (cs n) (* (k2 (cl::- n m)(setf m (ash m 1))) (k2 n m) ))) ;; Now here's a mysterious factorial which uses no extra storage, but its ;; not faster because it multiplies small X big. (defun kfact(n)(kfact1 n 1 1)) ;; version for integers (defun kfact1 (n m ans) ;; (kfact n 1 1) is n! (declare (fixnum n m)) (if (<= n m) (* n ans) (let ((q (ash m 1))) (kfact1 n q (kfact1 (- n m) q ans))))) ;; Here's a nicer version showing how a minor piece of ;; representation hacking in GMPZ could be done (defun k3top(n) ;;(k3top n) is n! pretty fast (let ((a (alloc-gmpz))) (k3 n 1 (gmpz-z a)) a)) (defun k3 (n m ans) ;; ans is a gmp number "insides" (declare (fixnum n m)) (if(cl::<= n m)(mpz_set_si ans n) (let ((b (gmpz-z(alloc-gmpz)))) (k3 (- n m) (setf m(ash m 1)) b) (k3 n m ans) (mpz_mul ans ans b)))) ;; The problem with k3 above is that it uses lots of storage, O(n). It is ;; using (alloc-gmpz) too much. Most of it very briefly. In reality ;; only a few distinct storage locations, O(log_2(n)) are needed at any ;; one time. The next program k4 pre-allocates sufficient cells and then ;; manages them explicitly. We also used labels and local environment ;; to capture the resource, res, which is a vector of scratch space for gmpzs. ;; It was surprisingly easy to set up scratch space here. This program k4 ;; is about 3X faster than k3. (defun k4(n);; using resource instead of allocation each time. Faster!! (declare (fixnum n)) (let ((res nil)) (labels ((k4i (n m ans) ; ans is a gmp number. (declare (fixnum n m)) (if(cl::<= n m)(mpz_set_si ans n) (let ((b (vector-pop res))) (k4i (cl::- n m) (setf m(ash m 1)) b) (k4i n m ans) (mpz_mul ans ans b) (incf (fill-pointer res)) ; (vector-push b gmpres) )))) (let* ((L (ceiling (cl::log n 2))) (a (alloc-gmpz))) (declare (fixnum L)) (setf res (make-array L :fill-pointer 0)) ;; allocate enough space for temps. ;; fill the temps with gmp numbers, zeros. Just the insides though. (dotimes (i L)(vector-push (gmpz-z (alloc-gmpz)) res)) ;; the spaces in this vector will get enlarged as needed, pretty fast. (k4i n 1 (gmpz-z a)) a)))) ;;; from luschny, translated from Java to Lisp (defun gsprfact(n) (declare (fixnum n)) (let* ((pp (into 1)) (p (gmpz-z pp)) (rr (into 1)) (r (gmpz-z rr)) (NN 1) (mlog2n (-(floor (log n 2)))) (h 0) (shift 0) (high 1) (len 0) (ans (gmpz-z (into 1))) (res (make-array(1+ (- mlog2n)) :fill-pointer 0))) (declare (fixnum mlog2n log2n h shift high len NN)) (labels ((gprod(n ans) (declare (fixnum n)) (let ((m (ash n -1))) (cond ((cl::= m 0) (mpz_set_si ans (incf NN 2))) ((cl::= n 2) (mpz_set_si ans (cl::* (cl::incf NN 2)(cl::incf NN 2)))) (t (let ((b (vector-pop res))) ;b is a temporary gmpz (gprod m b) ;set b (gprod (cl::- n m) ans) ;set ans (mpz_mul ans b ans) ;set ans (cl::incf (fill-pointer res)) ; remove b ))) ans))) (dotimes (i (1+ (- mlog2n))) (vector-push (gmpz-z (alloc-gmpz2 256)) res)) ;initialize (loop while (cl::/= h n) do (cl::incf shift h) (setf h (ash n mlog2n)) (cl::incf mlog2n) (setf len high) (setf high (if (oddp h) h (cl::1- h))) (setf len (ash (cl::- high len) -1)) (cond ((cl::> len 0) (mpz_mul p p (gprod len ans)) (mpz_mul r r p) ))) (mpz_mul_2exp r r shift) rr))) ;; split-recursive like the program above, but more lisp-like. (defun gkg(n &optional (aa (alloc-gmpz)));put answer in 2nd arg, if there (declare (fixnum n)(optimize (speed 3)(safety 0)(debug 0))) (let ((res nil) (shift 0) (L (ceiling (cl::log n 2))) (a (gmpz-z aa))) (declare (fixnum L shift n)) (labels ((k (n m ans res) (declare (fixnum n m)) (cond ((and (cl::evenp n)(cl::> m 1)) (cl::incf shift (cl::ash n -1)) (k (cl::ash n -1) (cl::ash m -1) ans res)) ((cl::<= n m)(mpz_set_si ans n)) (t(let ((b (car res))) (k (cl::- n m) (setq m (cl::ash m 1)) b (cdr res)) (k n m ans (cdr res)) (mpz_mul ans ans b) ans))))) (dotimes (i L)(push (gmpz-z (alloc-gmpz)) res));set up resource (k n 1 a res) (mpz_mul_2exp a a shift) aa))) To see many more ideas for how to compute factorials of very large numbers relatively fast, some involving factoring, see http://www.luschny.de/math/factorial/index.html or our own paper www.cs.berkeley.edu/~fateman/papers/factorial.pdf This bit of discussion here goes to show that our attempt to use a traditional factorial program for a benchmark does not represent the most efficient way of computing the answer, but just a way of using computer resources in some systematic fashion, watching how some resource utilization varies as we modify some relatively simple parameter such as language implementation, language usage, choice of computer, etc. ... end of digression on Factorial computation. Further notes: after writing the QD interface, and revising the MPFR interface to look like the QD interface, the question arises as to whether we should revise GMP interface to look the same. Since the GMP and MPFR arithmetic are likely to be much slower in their actual computing, inefficiency in the linkage would be much more tolerable. Do we need to write with-temps and dsetv for GMP? I think not. We could do a wrapper for gmp rationals, I guess. Or make interfaces for the number-theoretic stuff in gmp. |#