;;; compile inside qd, some tests to see what the qd allocation ;;; overhead might be (in-package :qd) (eval-when (compile) (load "qd.fasl")) ; need the macros (defun time-sin(n) (let ((m1 (into 1))) (dotimes (i n)(declare (fixnum n)) ;; just use lisp (sin (the aqd m1))))) ;; this uses the OO defmethod for sine (defun time-sin2(n) ;; avoid the allocation (let* ((m1 (into 1)) (ans (make-aqd)) (in (aqd-q m1)) (ina(aqd-q ans))) (declare (optimize speed) (type (simple-array double-float (4)) in inat)) (dotimes (i n ans)(declare (fixnum n)) (qd_sin in ina)))) ;;this uses the raw c entry point directly (defun time-empty(n);; a program to use to time the empty loop, compiled (let* ((m1 (into 1)) (ans (make-aqd)) (in (aqd-q m1)) (ina(aqd-q ans))) (declare (optimize speed) (ignore in ina) (type (simple-array double-float (4)) in inat)) (dotimes (i n ans)(declare (fixnum n)) ;; just counting (identity 1)))) (defun time-make-aqd(n);; a program to use to time make-aqd (let* ((m1 (into 1)) (ans (make-aqd)) (in (aqd-q m1)) (ina(aqd-q ans))) (declare (optimize speed) (ignore in ina) (type (simple-array double-float (4)) in inat)) (dotimes (i n ans)(declare (fixnum n)) ;; just counting (make-aqd)))) (defun time-sin3(n) ;; use our destructive set operation (let* ((m1 (into 1)) (ans (make-aqd))) (dotimes (i n ans)(declare (fixnum n)) ;; this is just as fast as time-sin2 (dsetv ans (sin m1))))) (defun time-sin4(n) (let* ((m1 (into 1)) (ans (make-aqd))) (dotimes (i n ans)(declare (fixnum n)) ; use our with-temps hack (setf ans (with-temps (sin m1)))))) #| fundamentally, for any of the time-sin* programs compiled correctly [careful: must set up macros at compile time] the cost for any of these sin(1) loops is about the same, though the storage allocation / deallocation varies wildly among them. For example, to run the loop 10,000 times, time-sin4 allocates 10 cons cells, 168 other bytes, 0 static bytes time-sin allocates 100,054 cons cells, 721,424 other bytes, 0 static bytes And compiled wrong, with macroexpansion at runtime, this same program takes 12% more time.. time-sin4 allocates 770,076 cons cells, 3,601,816 other bytes, 0 static bytes How can it be that such a big difference in what is apparently going on between correctly compiled code doesn't seem to affect the run time much? Here's why: the empty loop running (time-empty 1,000,000) takes 15 ms. the loop doing 1,000,000 allocations of aqd takes 267+124(GC)=391 ms. So in the loop running 10,000 times, the aqd allocation is about 4 ms. The loop actually computing sin(1) 10,000 times is about 3,500 ms. So the extra 4ms doesn't show up in 3500+4. How does the sin(1) time here compare to sin(1.0d0)? The double-float version seems to be about 350 times faster. uses 1 cons cell, 480,000 other bytes. |#