;;; learning/agents/policies.lisp ;;; Definitions for policies and choice functions used by reinforcement ;;; learning agents for exploration etc. ;;; Compute wacky policy table - a randomized policy uniformly distributed over possible actions (defun wacky-policy (U M R &aux (P (make-hash-table :test #'equal))) (declare (ignore U R)) (maphash #'(lambda (s md) (setf (gethash s P) (mapcar #'(lambda (ants) (list (car ants) (/ 1.0 (length md)))) md))) M) P)