(defstruct (spider-mdp (:include mdp (reward-type 'sas) (gamme 1.0d0))) problem ; The underlying partially observable problem in the environment ) (defmethod actions ((mdp spider-mdp) state) (actions (spider-mdp-problem mdp) state)) (defmethod terminal? ((mdp spider-mdp) state) (goal-test (spider-mdp-problem mdp) state)) (defmethod state-hash-key ((mdp spider-mdp) state) "Return a canonical form for a state; can omit hidden cards since they are fixed by the visible cards. [[Fix for multisuit completed]]" (cons (spider-state-stacks state) (length (spider-state-completed state)))) (defmethod sas-reward ((mdp spider-mdp) state1 action state2) (step-cost (spider-mdp-problem mdp) state1 action state2)) (defmethod results ((mdp spider-mdp) move state) "Return enumerated distribution over outcome from move in state. Uses the deterministic result function for the underlying problem, which returns a new state possibly with some *unknown* cards exposed. These unknown cards are then chosen in all possible ways from the set of all hidden cards." (let* ((problem (spider-mdp-problem mdp)) (new (result problem move state)) (stacks (spider-state-stacks new)) (unknowns nil)) (loop for i from 0 to (1- (length stacks)) do (when (and (aref stacks i) (card-unknown? (first (aref stacks i)))) (push i unknowns))) (let* ((outcomes (all-spider-outcomes new unknowns)) (p (/ 1.0d0 (length outcomes)))) (mapcar #'(lambda (s) (cons s p)) outcomes)))) (defun all-spider-outcomes (state unknowns) (if (null unknowns) (list state) (let ((all-hidden-cards (spider-state-all-hidden-cards state))) (mapcan #'(lambda (card) (let ((new (copy-state state))) (setf (first (aref (spider-state-stacks new) (first unknowns))) (unhide card)) (setf (spider-state-all-hidden-cards new) (remove card all-hidden-cards)) (all-spider-outcomes new (rest unknowns)))) all-hidden-cards)))) (defmethod num-results ((mdp spider-mdp) move state) "Return the number of possible outcomes from move in state. Uses the deterministic result function for the underlying problem, which returns a new state possibly with some *unknown* cards exposed. Count the ways these could be chosen." (let* ((problem (spider-mdp-problem mdp)) (new (result problem move state)) (stacks (spider-state-stacks new)) (num-unknowns 0) (count 1) (num-hidden-cards (length (spider-state-all-hidden-cards state)))) (loop for i from 0 to (1- (length stacks)) do (when (and (aref stacks i) (card-unknown? (first (aref stacks i)))) (incf num-unknowns))) (loop for j from (1+ (- num-hidden-cards num-unknowns)) to num-hidden-cards do (setf count (* count j))) count)) (defmethod random-result ((mdp spider-mdp) move state) "Return a random outcome from move in state. Uses the deterministic result function for the underlying problem, which returns a new state possibly with some *unknown* cards exposed. These unknown cards are then sampled from the set of all hidden cards." (let* ((problem (spider-mdp-problem mdp)) (new (result problem move state)) (stacks (spider-state-stacks new)) (unknowns nil) (all-hidden-cards (spider-state-all-hidden-cards state))) (loop for i from 0 to (1- (length stacks)) do (when (and (aref stacks i) (card-unknown? (first (aref stacks i)))) (push i unknowns))) (let ((new-cards (if (= (length all-hidden-cards) (length unknowns)) (shuffle all-hidden-cards) (sample-without-replacement (length unknowns) all-hidden-cards)))) (mapc #'(lambda (i card) (setf (first (aref stacks i)) (unhide card))) unknowns new-cards)) new))