package aima.examples; import java.util.*; import java.io.*; import aima.cs.machine.*; import aima.cs.learning.*; /** * A Risk vs. No-Risk game * * An example JavaBean environment. * The interaction between the environment and an agent * is executed as its own separate thread. * The interaction thread sleeps every percept-action pair * so that it's possible to see something happening. * * @author Daishi Harada (daishi@cs.berkeley.edu) */ public class RiskNoRiskGame extends OurButton implements intActionListener, Runnable { public RiskNoRiskGame() { super("RiskNoRiskGame"); stepCount = 0; epochCount = 1; score = 0.0; game = new intPFSM( numStates, numInputs, numOutputs, transitionProbs, outputProbs ); game.setState(0); } public void setStopped(boolean s) { stopped = s; } public boolean getStopped() { return (stopped); } public void setEpochCount(int n) { epochCount = n; } public int getEpochCount() { return (epochCount); } public synchronized void addStateReward_PerceptListener(StateReward_PerceptListener l) { perceptListeners.addElement(l); } public synchronized void removeStateReward_PerceptListener(StateReward_PerceptListener l) { perceptListeners.removeElement(l); } public synchronized void intActionPerformed(intActionEvent e) { if (verbosity > 0) { logStream.println("["+this.getClass().getName()+"].intActionPerformed()] Got "+e); } agentAction = e.getAction(); notify(); } /** * Deal with button press. * Note that this doesn't necessarily fire an action any more. */ public synchronized void fireAction() { if (stopped) { startRun(); } else { stopRun(); } } public synchronized void startRun() { if (runThread == null) { runThread = new Thread(this); runThread.start(); } stopped = false; notify(); } public synchronized void stopRun() { stopped = true; } /** * Main environment-agent interaction loop. */ public void run() { try { while (true) { synchronized(this) { if (verbosity > 1) { logStream.println("["+this.getClass().getName()+"].run()] Waiting for !stopped"); } while (stopped) { wait(); } agentAction = -1; } if (sendPercept()) { synchronized(this) { if (verbosity > 1) { logStream.println("["+this.getClass().getName()+"].run()] Waiting for agentAction >= 0"); } while (agentAction < 0) { wait(); } } if (game.getState() == 0) { score = (1-scoreDecay)*score+scoreDecay*(double)agentActionCorrect(agentAction); } int gameState = game.getState(); gameOutput = game.step(agentAction); if (gameState == 2) { sendTerminalPercept(); game.setState(0); epochCount++; stepCount = 0; } stepCount++; setLabel(Integer.toString(agentAction)+", "+Double.toString(score)); repaint(); } else { stopRun(); } Thread.sleep(sleepInterval); } } catch (InterruptedException e) { } } protected int agentActionCorrect(int action) { int correct; if (action == 0) { if (0.8 >= 0.5 / (1-discountFactor*0.5)) { correct = 1; } else { correct = 0; } } else { if (0.8 >= 0.5 / (1-discountFactor*0.5)) { correct = 0; } else { correct = 1; } } return (correct); } protected boolean sendPercept() { Vector targets; synchronized (this) { targets = (Vector) perceptListeners.clone(); } StateReward_PerceptEvent perceptEvt = new StateReward_PerceptEvent(this, game.getState(), (double)gameOutput); for (int i = 0; i < targets.size(); i++) { StateReward_PerceptListener target = (StateReward_PerceptListener)targets.elementAt(i); target.perceiveStateReward(perceptEvt); } if (targets.size() > 0) { if (verbosity > 2) { logStream.println("["+this.getClass().getName()+"].sendPercept()] returning true"); } return (true); } else { if (verbosity > 2) { logStream.println("["+this.getClass().getName()+"].sendPercept()] returning false"); } return (false); } } protected boolean sendTerminalPercept() { Vector targets; synchronized (this) { targets = (Vector) perceptListeners.clone(); } StateReward_PerceptEvent perceptEvt = new StateReward_PerceptEvent(this, -1, 0.0); for (int i = 0; i < targets.size(); i++) { StateReward_PerceptListener target = (StateReward_PerceptListener)targets.elementAt(i); target.perceiveStateReward(perceptEvt); } if (targets.size() > 0) { if (verbosity > 2) { logStream.println("["+this.getClass().getName()+"].sendPercept()] returning true"); } return (true); } else { if (verbosity > 2) { logStream.println("["+this.getClass().getName()+"].sendPercept()] returning false"); } return (false); } } public void setSleepInterval(int n) { sleepInterval = 125; } public int getSleepInterval() { return (sleepInterval); } public void setVerbosity(int n) { verbosity = n; } public int getVerbosity() { return (verbosity); } public void setScoreDecay(double d) { scoreDecay = d; } public double getScoreDecay() { return (scoreDecay); } public void setDiscountFactor(double d) { discountFactor = d; } public double getDiscountFactor() { return (discountFactor); } private double discountFactor; private Thread runThread; private boolean stopped = true; private int sleepInterval = 25; private Vector perceptListeners = new Vector(); private int gameOutput; private int stepCount; private int epochCount; private double score; private double scoreDecay; private int agentAction; private intPFSM game; private int numStates = 3; private int numInputs = 2; private int numOutputs = 2; private double[][][] transitionProbs = { { {0.0, 0.8, 0.2}, {0.5, 0.5, 0.0} }, { {0.0, 0.0, 1.0}, {0.0, 0.0, 1.0} }, { {0.0, 0.0, 1.0}, {0.0, 0.0, 1.0} } }; private double[][] outputProbs = { { 1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 } }; private int verbosity = 0; private PrintStream logStream = System.err; }