;;; How bad is Allegro CL 4.1 in compiling floating point? (on SPARC)?
;;; Seems to use about twice as many instructions as necessary in this
;;; example..

;;;  Consider the following 2 functions:
;;;  test1  computes   (x^2+y^2)/(x+y), and test2 computes x/y.
;;;  they both need the same number of instructions to set up and return --
;;;  96 bytes worth.

;; The test2 function uses 4 byte of DIV.S in the middle (byte 60).
;  60:	DIV.S	%f30,%f28,%f30
;; The test1 function uses 44 bytes

" 60:	MOVE.S	%f22,%f28
  64:	MOVE.S	%f22,%f30
  68:	MUL.S	%f30,%f28,%f26  ;x^2
  72:	MOVE.S	%f24,%f28
  76:	MOVE.S	%f24,%f30
  80:	MUL.S	%f30,%f28,%f30  ;y^2
  84:	ADD.S	%f30,%f26,%f26  ;(x^2+y^2)
  88:	MOVE.S	%f22,%f28
  92:	MOVE.S	%f24,%f30
  96:	ADD.S	%f30,%f28,%f30  ;x+y
 100:	DIV.S	%f30,%f26,%f30 "


(defun test1 (x y) ;; 136 bytes
  (declare (optimize (speed 3)(safety 0))
	   (single-float x y)
	   (inline / * +))
  (/  (+ (* x x)(* y y)) (+ x y)))

(defun test2 (x y)  ;;96 bytes
  (declare (optimize (speed 3)(safety 0))
	   (single-float x y)
	   (inline / * +))
  (/ x y))

;;; I think that test1  could be compiled to fewer instructions like this..

" 
  60:	MUL.S	%f22,%f22,%f26  ;x^2
  64:	MUL.S	%f24,%f24,%f30  ;y^2
  68:	ADD.S	%f30,%f26,%f26  ;(x^2+y^2)
  72:	ADD.S	%f22,%f24,%f30  ;x+y
  76:	DIV.S	%f30,%f26,%f30  ; (x^2+y^2)/(x+y)
"

;; doing stuff in double looks painful because each move is 2 instructions
;; instead of just one.

;; I don't know if any of the prolog/epilog could be dispensed with
;; under the circumstances...

.........
Here is test2 in full detail...

(disassemble 'test2)
;; disassembly of #<Function (:ANONYMOUS-LAMBDA 85) @ #xd691d6>
;; formals: X Y

;; code vector @ #xd68fcc:
   0:	save	#x-88,%o6
   4:	sethi	%o7,#x203	; #x80c00, 527360
   8:	or	#x1e7,%o7
  12:	move.l	%o7,-12(%i6)
  16:	or	#x8,%o7
  20:	move.l	%o7,-16(%i6)
  24:	sethi	%o7,#x203	; #x80c00, 527360
  28:	or	#x2b7,%o7
  32:	move.l	%o7,-40(%i6)
  36:	or	#x8,%o7
  40:	move.l	%o7,-36(%i6)
  44:	move.l	%i0,%l1
  48:	move.l	%i1,%l0
  52:	move.s	-2(%l0),%f30
  56:	move.s	-2(%l1),%f28
  60:	DIV.S	%f30,%f28,%f30
  64:	move.l	235(%g4),%o7	; NEW-FLOAT
  68:	move.l	#x0,%o0
  72:	jmpl	0(%o7),%o7
  76:	move.l	#x1,%g3
  80:	move.s	%f30,-2(%o0)
  84:	move.l	#x1,%g3
  88:	move.l	%g0,-12(%i6)
  92:	jmpl	8(%i7),%g0
  96:	restore	%g0,%o0