;;; How bad is Allegro CL 4.1 in compiling floating point? (on SPARC)? ;;; Seems to use about twice as many instructions as necessary in this ;;; example.. ;;; Consider the following 2 functions: ;;; test1 computes (x^2+y^2)/(x+y), and test2 computes x/y. ;;; they both need the same number of instructions to set up and return -- ;;; 96 bytes worth. ;; The test2 function uses 4 byte of DIV.S in the middle (byte 60). ; 60: DIV.S %f30,%f28,%f30 ;; The test1 function uses 44 bytes " 60: MOVE.S %f22,%f28 64: MOVE.S %f22,%f30 68: MUL.S %f30,%f28,%f26 ;x^2 72: MOVE.S %f24,%f28 76: MOVE.S %f24,%f30 80: MUL.S %f30,%f28,%f30 ;y^2 84: ADD.S %f30,%f26,%f26 ;(x^2+y^2) 88: MOVE.S %f22,%f28 92: MOVE.S %f24,%f30 96: ADD.S %f30,%f28,%f30 ;x+y 100: DIV.S %f30,%f26,%f30 " (defun test1 (x y) ;; 136 bytes (declare (optimize (speed 3)(safety 0)) (single-float x y) (inline / * +)) (/ (+ (* x x)(* y y)) (+ x y))) (defun test2 (x y) ;;96 bytes (declare (optimize (speed 3)(safety 0)) (single-float x y) (inline / * +)) (/ x y)) ;;; I think that test1 could be compiled to fewer instructions like this.. " 60: MUL.S %f22,%f22,%f26 ;x^2 64: MUL.S %f24,%f24,%f30 ;y^2 68: ADD.S %f30,%f26,%f26 ;(x^2+y^2) 72: ADD.S %f22,%f24,%f30 ;x+y 76: DIV.S %f30,%f26,%f30 ; (x^2+y^2)/(x+y) " ;; doing stuff in double looks painful because each move is 2 instructions ;; instead of just one. ;; I don't know if any of the prolog/epilog could be dispensed with ;; under the circumstances... ......... Here is test2 in full detail... (disassemble 'test2) ;; disassembly of # ;; formals: X Y ;; code vector @ #xd68fcc: 0: save #x-88,%o6 4: sethi %o7,#x203 ; #x80c00, 527360 8: or #x1e7,%o7 12: move.l %o7,-12(%i6) 16: or #x8,%o7 20: move.l %o7,-16(%i6) 24: sethi %o7,#x203 ; #x80c00, 527360 28: or #x2b7,%o7 32: move.l %o7,-40(%i6) 36: or #x8,%o7 40: move.l %o7,-36(%i6) 44: move.l %i0,%l1 48: move.l %i1,%l0 52: move.s -2(%l0),%f30 56: move.s -2(%l1),%f28 60: DIV.S %f30,%f28,%f30 64: move.l 235(%g4),%o7 ; NEW-FLOAT 68: move.l #x0,%o0 72: jmpl 0(%o7),%o7 76: move.l #x1,%g3 80: move.s %f30,-2(%o0) 84: move.l #x1,%g3 88: move.l %g0,-12(%i6) 92: jmpl 8(%i7),%g0 96: restore %g0,%o0