Exemple #1
0
  private static void printOptimal(
      int xdim,
      int ydim,
      Map<Block<Double>, BlockAction> optimalAction,
      Map<Block<Double>, BlockAction> optimalAction1,
      BufferedWriter out) {
    try {
      //  List<CellAction> values1 = new ArrayList<CellAction>(optimalAction.values());
      //   List<CellAction> values2 = new ArrayList<CellAction>(optimalAction1.values());
      // Iterate through each cell and get the optimal policy for that cell(state)
      for (int j = ydim; j > 0; j--) {
        for (int i = 1; i <= xdim; i++) {
          Block<Double> c = table.getCellAt(i, j);

          String toWrite = "";
          if (c != null) {
            BlockAction ca = getCellAction(i, j, optimalAction);

            if (ca != null) {
              toWrite = ca.toString();
              toWrite = printactionMap.get(toWrite);
            }
            if (optimalAction1 != null) {
              BlockAction ca1 = getCellAction(i, j, optimalAction1);
              if (ca != null && ca1 != null) {
                if (ca1.equals(ca) == false) {
                  toWrite = toWrite + "*";
                }
              }
            }

            if (ca != null) {

              out.write(toWrite + "\t");

            } else {
              out.write(c.getContent().toString() + "\t");
            }
          } else {
            out.write("X\t");
          }
        }

        out.write(System.getProperty("line.separator"));
      }
      out.write(System.getProperty("line.separator"));
      out.write(System.getProperty("line.separator"));

    } catch (IOException ex) {
      Logger.getLogger(A3Main.class.getName()).log(Level.SEVERE, null, ex);
    }
  }
Exemple #2
0
  private static boolean compareMaps(
      Map<Block<Double>, BlockAction> a, Map<Block<Double>, BlockAction> b) {
    List<BlockAction> values1 = new ArrayList<BlockAction>(a.values());
    List<BlockAction> values2 = new ArrayList<BlockAction>(b.values());
    // boolean equals = true;
    for (int i = 0; i < values1.size(); i++) {
      BlockAction a1 = values1.get(i);
      BlockAction b1 = values2.get(i);

      if (a1.equals(b1) == false) {

        return false;
      }
    }

    return true;
  }
Exemple #3
0
  private static void runProblem2() throws IOException {
    table = createTable(Double.class, 4, 3, -0.04);

    table.removeCell(2, 2);
    table.getCellAt(4, 3).setContent(1.0);
    table.getCellAt(4, 2).setContent(-1.0);

    MarkovProc<Block<Double>, BlockAction> mdp = createMDP(table);
    VI<Block<Double>, BlockAction> vi = new VI<Block<Double>, BlockAction>(1.0);

    Map<Block<Double>, BlockAction> optimalAction = new LinkedHashMap<Block<Double>, BlockAction>();

    Map<Block<Double>, Double> U = vi.valueIteration(mdp, 0.0001, optimalAction);

    Map<Double, Integer> run10 = new LinkedHashMap();
    Map<Double, Integer> run100 = new LinkedHashMap();
    Map<Double, Integer> run1000 = new LinkedHashMap();

    int i = 0;

    ArrayList<Double> rewardsHolder = new ArrayList<Double>();
    ArrayList<Double> rewardsHolder1 = new ArrayList<Double>();
    ArrayList<Double> rewardsHolder2 = new ArrayList<Double>();

    Double firstRunReward = 0.0;

    while (i++ < 10) {

      Block<Double> start = table.getCellAt(4, 1);
      Block<Double> end1 = table.getCellAt(4, 3);
      Block<Double> end2 = table.getCellAt(4, 2);

      Block<Double> current = start;
      double rewards = 0.0;

      while (current.equals(end1) == false && current.equals(end2) == false) {

        BlockAction a = optimalAction.get(current);
        rewards += current.getContent();
        double r = Math.random() * 100;
        if (r > 20.0) {
          current = table.result(current, a);
        } else if (r > 10.0 && r <= 20.0) {
          current = table.result(current, a.getFirstRightAngledAction());
        } else {
          current = table.result(current, a.getSecondRightAngledAction());
        }
      }

      rewards += current.getContent();

      if (i == 1) {
        firstRunReward = rewards;
      }

      if (run10.containsKey(rewards)) {
        Integer k = run10.get(rewards);
        k++;
        run10.put(rewards, k);
        rewardsHolder.add(rewards);
      } else {
        run10.put(rewards, 1);
        rewardsHolder.add(rewards);
      }
    }

    i = 0;

    while (i++ < 100) {

      Block<Double> start = table.getCellAt(4, 1);
      Block<Double> end1 = table.getCellAt(4, 3);
      Block<Double> end2 = table.getCellAt(4, 2);

      Block<Double> current = start;
      double rewards = 0.0;

      while (current != end1 && current != end2) {

        BlockAction a = optimalAction.get(current);
        rewards += current.getContent();
        double r = Math.random() * 100;
        if (r > 20.0) {
          current = table.result(current, a);
        } else if (r > 10.0 && r <= 20.0) {
          current = table.result(current, a.getFirstRightAngledAction());
        } else {
          current = table.result(current, a.getSecondRightAngledAction());
        }
      }

      rewards += current.getContent();

      if (run100.containsKey(rewards)) {
        Integer k = run100.get(rewards);
        k++;
        run100.put(rewards, k);
        rewardsHolder1.add(rewards);

      } else {
        run100.put(rewards, 1);
        rewardsHolder1.add(rewards);
      }
    }

    i = 0;

    while (i++ < 1000) {

      Block<Double> start = table.getCellAt(4, 1);
      Block<Double> end1 = table.getCellAt(4, 3);
      Block<Double> end2 = table.getCellAt(4, 2);

      Block<Double> current = start;
      double rewards = 0.0;

      while (current != end1 && current != end2) {

        BlockAction a = optimalAction.get(current);
        rewards += current.getContent();
        double r = Math.random() * 100;
        if (r > 20.0) {
          current = table.result(current, a);
        } else if (r > 10.0 && r <= 20.0) {
          current = table.result(current, a.getFirstRightAngledAction());
        } else {
          current = table.result(current, a.getSecondRightAngledAction());
        }
      }

      rewards += current.getContent();

      if (run1000.containsKey(rewards)) {
        Integer k = run1000.get(rewards);
        k++;
        run1000.put(rewards, k);
        rewardsHolder2.add(rewards);
      } else {
        run1000.put(rewards, 1);
        rewardsHolder2.add(rewards);
      }
    }

    Block<Double> start = table.getCellAt(4, 1);
    Block<Double> end1 = table.getCellAt(4, 3);
    Block<Double> end2 = table.getCellAt(4, 2);

    Block<Double> current = start;
    Double exprewards = 0.0;

    while (current != end1 && current != end2) {

      BlockAction a = optimalAction.get(current);
      exprewards += current.getContent();

      current = table.result(current, a);
    }

    exprewards += current.getContent();

    FileWriter fstream = new FileWriter("P2-histogram.txt");
    BufferedWriter out = new BufferedWriter(fstream);

    out.write("10 runs");
    out.write(System.getProperty("line.separator"));

    TreeSet<Double> keys = new TreeSet<Double>(run10.keySet());
    for (Double rewards : keys) {
      Integer count = run10.get(rewards);
      // do something

      String rewstr = rewards.toString();
      if (rewards.toString().length() > 8) {
        rewstr = rewstr.substring(0, 7);
      }
      out.write(rewstr + "\t\t" + count.toString());
      out.write(System.getProperty("line.separator"));

      // System.out.println(pairs.getKey() + " = " + pairs.getValue());
      // it.remove(); // avoids a ConcurrentModificationException
    }

    out.write("100 runs");
    out.write(System.getProperty("line.separator"));

    keys = new TreeSet<Double>(run100.keySet());
    for (Double rewards : keys) {
      Integer count = run100.get(rewards);
      // do something

      String rewstr = rewards.toString();
      if (rewards.toString().length() > 8) {
        rewstr = rewstr.substring(0, 7);
      }
      out.write(rewstr + "\t\t" + count.toString());
      out.write(System.getProperty("line.separator"));

      // System.out.println(pairs.getKey() + " = " + pairs.getValue());
      // it.remove(); // avoids a ConcurrentModificationException
    }

    out.write("1000 runs");
    out.write(System.getProperty("line.separator"));

    keys = new TreeSet<Double>(run1000.keySet());
    for (Double rewards : keys) {
      Integer count = run1000.get(rewards);
      // do something

      String rewstr = rewards.toString();
      if (rewards.toString().length() > 8) {
        rewstr = rewstr.substring(0, 7);
      }
      out.write(rewstr + "\t\t" + count.toString());
      out.write(System.getProperty("line.separator"));

      // System.out.println(pairs.getKey() + " = " + pairs.getValue());
      // it.remove(); // avoids a ConcurrentModificationException
    }

    out.close();

    FileWriter fstream1 = new FileWriter("P2-output.txt");
    BufferedWriter out1 = new BufferedWriter(fstream1);
    out1.write("Optimum utility when  -0.04");
    out1.write(System.getProperty("line.separator"));

    printUtil(4, 3, U, out1);

    out1.write(System.getProperty("line.separator"));
    out1.write("______________________________________________________");

    out1.write(System.getProperty("line.separator"));

    out1.write("First Run reward is " + roundString(firstRunReward.toString()));
    out1.write(System.getProperty("line.separator"));

    out1.write("Mean for 10 runs :");
    // Set<Double> rewVec = (Set<Double>) run10.keySet();
    Double meanval = FindMean(rewardsHolder);
    out1.write(roundString(meanval.toString()));
    out1.write(System.getProperty("line.separator"));

    out1.write("SD for 10 runs: ");
    Double sdVal = FindSD(rewardsHolder, meanval);

    out1.write(roundString(sdVal.toString()));
    out1.write(System.getProperty("line.separator"));

    out1.write("Mean for 100 runs :");
    // rewVec = (Set<Double>) run100.keySet();
    meanval = FindMean(rewardsHolder1);
    out1.write(roundString(meanval.toString()));
    out1.write(System.getProperty("line.separator"));

    out1.write("SD for 100 runs: ");
    sdVal = FindSD(rewardsHolder1, meanval);

    out1.write(roundString(sdVal.toString()));
    out1.write(System.getProperty("line.separator"));

    out1.write("Mean for 1000 runs :");
    // rewVec = (Set<Double>) run1000.keySet();
    meanval = FindMean(rewardsHolder2);
    out1.write(roundString(meanval.toString()));
    out1.write(System.getProperty("line.separator"));

    out1.write("SD for 1000 runs: ");
    sdVal = FindSD(rewardsHolder2, meanval);

    out1.write(roundString(sdVal.toString()));
    out1.write(System.getProperty("line.separator"));
    out1.write(System.getProperty("line.separator"));

    out1.write("Expected reward from start state: ");
    out1.write(roundString(exprewards.toString()));
    out1.close();
  }