;;; 4x3-eval.lisp

;;; Approximate utility function for the 4x3 MDP.

;;;; Features

(defun 4x3-heaven-inverse-distance (state)
  (/ 1 (x+y-distance state (@ 4 3))))

(defun 4x3-hell-inverse-distance (state)
  (/ 1 (x+y-distance state (@ 4 2))))

(defun 4x3-heaven-inverse-distance-squared (state)
  (/ 1 (square (x+y-distance state (@ 4 3)))))

(defun 4x3-hell-inverse-distance-squared (state)
  (/ 1 (square (x+y-distance state (@ 4 2)))))


(defvar *4x3-features*)
(setq *4x3-features*
  (list #'4x3-heaven-inverse-distance
	#'4x3-heaven-inverse-distance-squared
	#'4x3-hell-inverse-distance
	#'4x3-hell-inverse-distance-squared
	))

(defvar *4x3-weights*)
(setq *4x3-weights*
  '(+0.4    ; 4x3-heaven-inverse-distance
    +0.4    ; 4x3-heaven-inverse-distance-squared
    -0.25    ; 4x3-hell-inverse-distance
    -0.5    ; 4x3-hell-inverse-distance-squared
    ))


;;;; Approximate utility function

(defun 4x3-eval (state)
  "Return a value for 4x3-mdp state using a linear combination of features."
  (linear-eval state *4x3-features* *4x3-weights*))

(defun linear-eval (state features weights)
  "Return a value for state using a linear combination of features."
  (reduce #'+ (mapcar #'* weights (mapcar #'(lambda (f) (funcall f state)) features))))