public static void main(String[] args) {

    MDP mdp = new GridWorldMDP(1, -1, -0.01, 0.8);
    QValues qv = new QValues(mdp);

    qv.valueIteration(-1);

    for (State s : mdp.getStates()) {
      DecimalFormat df = new DecimalFormat("##.##############");
      System.out.println("S: " + s.getInt(0) + " : " + df.format(qv.getValues().get(s)));
    }

    qv.calcQValues();
    for (State s : mdp.getStates()) {
      System.out.println("S: " + s.getInt(0) + " : " + qv.getQvalues().get(s).toString());
    }
  }
  private double maxa(State s) {
    double max = Double.NEGATIVE_INFINITY;
    for (ActionStep as : mdp.getActions()) {
      double sum = 0;
      for (State sprime : mdp.getStates()) {
        StateActionState sas = new StateActionState(s, as, sprime);
        double res = mdp.probTransition(sas) * (mdp.reward(s) + gamma * values.get(sprime));
        if (s.getInt(0) == 7) {
          System.out.println("prob: " + mdp.probTransition(sas));
          System.out.println("R(s,a, s'): " + (mdp.reward(sprime)));
          System.out.println("future value: " + values.get(sprime));
        }
        sum += res;
      }
      if (s.getInt(0) == 7) {
        System.out.println(sum);
      }

      if (sum > max) {
        max = sum;
      }
    }
    return max;
  }