;;;; A simple policy agent for Markov decision processes (MDPs).

(defun make-mdp-agent (&key body name mdp program
                            (algorithm 'value-iteration-policy))
  "An MDP agent constructs a policy from the MDP once, and then uses that
  policy repeatedly to take action.  The ALGORITHM keyword specifies the
  algorithm that is used to create the policy; don't confuse it with the
  PROGRAM keyword, which decides what actions to take."
  (new-mdp-agent 
   :body body :name name
   :program (or program
                (let ((policy nil))
                  #'(lambda (percept)
                      (when (null policy)
                        (setf policy (funcall algorithm mdp)))
                      (policy-choice (mdp-percept-state percept) policy))))))
		 
(defstructure (mdp-agent (:include agent) (:constructor new-mdp-agent))
  (total-reward 0))