;;; decision tree learning algorithm - the standard "induction algorithm" ;;; returns a tree in the format ;;; (a1 (v11 . ) (v12 . )), bottoming out with goal values. ;;; currently handles only a single goal attribute (defun decision-tree-learning (problem) (dtl (learning-problem-examples problem) (learning-problem-attributes problem) (first (learning-problem-goals problem)))) (defun dtl (examples attributes goal &optional prior &aux (trees nil)) (cond ((null examples) prior) ((null attributes) (majority examples goal)) ((every #'(lambda (e) (eq (attribute-value goal e) (attribute-value goal (first examples)))) (rest examples)) (majority examples goal)) (t (let ((best (select-attribute examples attributes goal))) (dolist (v (rest best) (cons best trees)) (push (cons v (dtl (remove-if-not #'(lambda (e) (eq v (attribute-value best e))) examples) (remove best attributes) goal (majority examples goal))) trees)))))) (defun distribution (examples goal &aux (l (length (rest goal))) (d (make-list l :initial-element 0))) (dolist (e examples) (incf (nth (position (attribute-value goal e) (rest goal)) d))) (mapcar #'(lambda (n) (float (/ n (length examples)))) d)) (defun majority (examples goal) (the-biggest #'(lambda (v) (count v (mapcar #'(lambda (e) (attribute-value goal e)) examples))) (rest goal))) (defun select-attribute (examples attributes goal) (the-biggest #'(lambda (a) (information-value a examples goal)) attributes)) (defun information-value (a examples goal &aux (i (bits-required (distribution examples goal)))) (dolist (v (rest a) i) (let ((s (remove-if-not #'(lambda (e) (eq (attribute-value a e) v)) examples))) (when s (decf i (* (bits-required (distribution s goal)) (/ (length s) (length examples)))))))) (defun bits-required (d &aux (b 0)) (dolist (p d (- b)) (unless (= 0 p) (incf b (* p (log p 2)))))) ;;; dtpredict is the standard "performance element" that ;;; interfaces with the example-generation and learning-curve functions (defun dtpredict (dt example) (if (atom dt) (list dt) (dtpredict (cdr (assoc (attribute-value (car dt) example) (cdr dt))) example)))