public static void main(String[] args) { MDP mdp = new GridWorldMDP(1, -1, -0.01, 0.8); QValues qv = new QValues(mdp); qv.valueIteration(-1); for (State s : mdp.getStates()) { DecimalFormat df = new DecimalFormat("##.##############"); System.out.println("S: " + s.getInt(0) + " : " + df.format(qv.getValues().get(s))); } qv.calcQValues(); for (State s : mdp.getStates()) { System.out.println("S: " + s.getInt(0) + " : " + qv.getQvalues().get(s).toString()); } }
private double maxa(State s) { double max = Double.NEGATIVE_INFINITY; for (ActionStep as : mdp.getActions()) { double sum = 0; for (State sprime : mdp.getStates()) { StateActionState sas = new StateActionState(s, as, sprime); double res = mdp.probTransition(sas) * (mdp.reward(s) + gamma * values.get(sprime)); if (s.getInt(0) == 7) { System.out.println("prob: " + mdp.probTransition(sas)); System.out.println("R(s,a, s'): " + (mdp.reward(sprime))); System.out.println("future value: " + values.get(sprime)); } sum += res; } if (s.getInt(0) == 7) { System.out.println(sum); } if (sum > max) { max = sum; } } return max; }