;;; Definitions for Markov decision processes (MDPs).
;;; An MDP is defined by initial state, transition model, rewards, and
;;; distinguished terminal states. Model and rewards are hash tables
;;; index by state (after application of hash-key function).
;;; The entries in the model are alists keyed by action; each action is
;;; associated with an action model: basically a list of transitions.
;;; Markov chains (i.e., stochastic processes with no distinct agent)
;;; can be defined by allowing only a no-op action in the MDP.
(defstruct mdp
initial-state ;; The initial state for the problem
(model (make-hash-table :test #'equal)) ;; Describes transition probabilities
(rewards (make-hash-table :test #'equal)) ;; Rewards for each state
(terminal-states nil) ;; List of terminal states
(hash-key #'identity) ;; To convert states into hash keys
name) ;; String, identifies the problem
(defstruct (mdp-action-model (:type list))
(transitions nil)
(times-executed 0))
(defstruct (transition (:type list))
destination
probability
(times-achieved 0))
(defun action-model (a s M)
(cdr (assoc a (gethash s M) :test #'eq)))
(defun transitions (a s M)
"Returns the transitions resulting from executing
action a in state s according to model M."
(mdp-action-model-transitions (action-model a s M)))
(defun actions (s M)
"Returns the list of actions feasible in state s according to model M."
(mapcar #'car (gethash s M)))