private static void printOptimal( int xdim, int ydim, Map<Block<Double>, BlockAction> optimalAction, Map<Block<Double>, BlockAction> optimalAction1, BufferedWriter out) { try { // List<CellAction> values1 = new ArrayList<CellAction>(optimalAction.values()); // List<CellAction> values2 = new ArrayList<CellAction>(optimalAction1.values()); // Iterate through each cell and get the optimal policy for that cell(state) for (int j = ydim; j > 0; j--) { for (int i = 1; i <= xdim; i++) { Block<Double> c = table.getCellAt(i, j); String toWrite = ""; if (c != null) { BlockAction ca = getCellAction(i, j, optimalAction); if (ca != null) { toWrite = ca.toString(); toWrite = printactionMap.get(toWrite); } if (optimalAction1 != null) { BlockAction ca1 = getCellAction(i, j, optimalAction1); if (ca != null && ca1 != null) { if (ca1.equals(ca) == false) { toWrite = toWrite + "*"; } } } if (ca != null) { out.write(toWrite + "\t"); } else { out.write(c.getContent().toString() + "\t"); } } else { out.write("X\t"); } } out.write(System.getProperty("line.separator")); } out.write(System.getProperty("line.separator")); out.write(System.getProperty("line.separator")); } catch (IOException ex) { Logger.getLogger(A3Main.class.getName()).log(Level.SEVERE, null, ex); } }
private static boolean compareMaps( Map<Block<Double>, BlockAction> a, Map<Block<Double>, BlockAction> b) { List<BlockAction> values1 = new ArrayList<BlockAction>(a.values()); List<BlockAction> values2 = new ArrayList<BlockAction>(b.values()); // boolean equals = true; for (int i = 0; i < values1.size(); i++) { BlockAction a1 = values1.get(i); BlockAction b1 = values2.get(i); if (a1.equals(b1) == false) { return false; } } return true; }
private static void runProblem2() throws IOException { table = createTable(Double.class, 4, 3, -0.04); table.removeCell(2, 2); table.getCellAt(4, 3).setContent(1.0); table.getCellAt(4, 2).setContent(-1.0); MarkovProc<Block<Double>, BlockAction> mdp = createMDP(table); VI<Block<Double>, BlockAction> vi = new VI<Block<Double>, BlockAction>(1.0); Map<Block<Double>, BlockAction> optimalAction = new LinkedHashMap<Block<Double>, BlockAction>(); Map<Block<Double>, Double> U = vi.valueIteration(mdp, 0.0001, optimalAction); Map<Double, Integer> run10 = new LinkedHashMap(); Map<Double, Integer> run100 = new LinkedHashMap(); Map<Double, Integer> run1000 = new LinkedHashMap(); int i = 0; ArrayList<Double> rewardsHolder = new ArrayList<Double>(); ArrayList<Double> rewardsHolder1 = new ArrayList<Double>(); ArrayList<Double> rewardsHolder2 = new ArrayList<Double>(); Double firstRunReward = 0.0; while (i++ < 10) { Block<Double> start = table.getCellAt(4, 1); Block<Double> end1 = table.getCellAt(4, 3); Block<Double> end2 = table.getCellAt(4, 2); Block<Double> current = start; double rewards = 0.0; while (current.equals(end1) == false && current.equals(end2) == false) { BlockAction a = optimalAction.get(current); rewards += current.getContent(); double r = Math.random() * 100; if (r > 20.0) { current = table.result(current, a); } else if (r > 10.0 && r <= 20.0) { current = table.result(current, a.getFirstRightAngledAction()); } else { current = table.result(current, a.getSecondRightAngledAction()); } } rewards += current.getContent(); if (i == 1) { firstRunReward = rewards; } if (run10.containsKey(rewards)) { Integer k = run10.get(rewards); k++; run10.put(rewards, k); rewardsHolder.add(rewards); } else { run10.put(rewards, 1); rewardsHolder.add(rewards); } } i = 0; while (i++ < 100) { Block<Double> start = table.getCellAt(4, 1); Block<Double> end1 = table.getCellAt(4, 3); Block<Double> end2 = table.getCellAt(4, 2); Block<Double> current = start; double rewards = 0.0; while (current != end1 && current != end2) { BlockAction a = optimalAction.get(current); rewards += current.getContent(); double r = Math.random() * 100; if (r > 20.0) { current = table.result(current, a); } else if (r > 10.0 && r <= 20.0) { current = table.result(current, a.getFirstRightAngledAction()); } else { current = table.result(current, a.getSecondRightAngledAction()); } } rewards += current.getContent(); if (run100.containsKey(rewards)) { Integer k = run100.get(rewards); k++; run100.put(rewards, k); rewardsHolder1.add(rewards); } else { run100.put(rewards, 1); rewardsHolder1.add(rewards); } } i = 0; while (i++ < 1000) { Block<Double> start = table.getCellAt(4, 1); Block<Double> end1 = table.getCellAt(4, 3); Block<Double> end2 = table.getCellAt(4, 2); Block<Double> current = start; double rewards = 0.0; while (current != end1 && current != end2) { BlockAction a = optimalAction.get(current); rewards += current.getContent(); double r = Math.random() * 100; if (r > 20.0) { current = table.result(current, a); } else if (r > 10.0 && r <= 20.0) { current = table.result(current, a.getFirstRightAngledAction()); } else { current = table.result(current, a.getSecondRightAngledAction()); } } rewards += current.getContent(); if (run1000.containsKey(rewards)) { Integer k = run1000.get(rewards); k++; run1000.put(rewards, k); rewardsHolder2.add(rewards); } else { run1000.put(rewards, 1); rewardsHolder2.add(rewards); } } Block<Double> start = table.getCellAt(4, 1); Block<Double> end1 = table.getCellAt(4, 3); Block<Double> end2 = table.getCellAt(4, 2); Block<Double> current = start; Double exprewards = 0.0; while (current != end1 && current != end2) { BlockAction a = optimalAction.get(current); exprewards += current.getContent(); current = table.result(current, a); } exprewards += current.getContent(); FileWriter fstream = new FileWriter("P2-histogram.txt"); BufferedWriter out = new BufferedWriter(fstream); out.write("10 runs"); out.write(System.getProperty("line.separator")); TreeSet<Double> keys = new TreeSet<Double>(run10.keySet()); for (Double rewards : keys) { Integer count = run10.get(rewards); // do something String rewstr = rewards.toString(); if (rewards.toString().length() > 8) { rewstr = rewstr.substring(0, 7); } out.write(rewstr + "\t\t" + count.toString()); out.write(System.getProperty("line.separator")); // System.out.println(pairs.getKey() + " = " + pairs.getValue()); // it.remove(); // avoids a ConcurrentModificationException } out.write("100 runs"); out.write(System.getProperty("line.separator")); keys = new TreeSet<Double>(run100.keySet()); for (Double rewards : keys) { Integer count = run100.get(rewards); // do something String rewstr = rewards.toString(); if (rewards.toString().length() > 8) { rewstr = rewstr.substring(0, 7); } out.write(rewstr + "\t\t" + count.toString()); out.write(System.getProperty("line.separator")); // System.out.println(pairs.getKey() + " = " + pairs.getValue()); // it.remove(); // avoids a ConcurrentModificationException } out.write("1000 runs"); out.write(System.getProperty("line.separator")); keys = new TreeSet<Double>(run1000.keySet()); for (Double rewards : keys) { Integer count = run1000.get(rewards); // do something String rewstr = rewards.toString(); if (rewards.toString().length() > 8) { rewstr = rewstr.substring(0, 7); } out.write(rewstr + "\t\t" + count.toString()); out.write(System.getProperty("line.separator")); // System.out.println(pairs.getKey() + " = " + pairs.getValue()); // it.remove(); // avoids a ConcurrentModificationException } out.close(); FileWriter fstream1 = new FileWriter("P2-output.txt"); BufferedWriter out1 = new BufferedWriter(fstream1); out1.write("Optimum utility when -0.04"); out1.write(System.getProperty("line.separator")); printUtil(4, 3, U, out1); out1.write(System.getProperty("line.separator")); out1.write("______________________________________________________"); out1.write(System.getProperty("line.separator")); out1.write("First Run reward is " + roundString(firstRunReward.toString())); out1.write(System.getProperty("line.separator")); out1.write("Mean for 10 runs :"); // Set<Double> rewVec = (Set<Double>) run10.keySet(); Double meanval = FindMean(rewardsHolder); out1.write(roundString(meanval.toString())); out1.write(System.getProperty("line.separator")); out1.write("SD for 10 runs: "); Double sdVal = FindSD(rewardsHolder, meanval); out1.write(roundString(sdVal.toString())); out1.write(System.getProperty("line.separator")); out1.write("Mean for 100 runs :"); // rewVec = (Set<Double>) run100.keySet(); meanval = FindMean(rewardsHolder1); out1.write(roundString(meanval.toString())); out1.write(System.getProperty("line.separator")); out1.write("SD for 100 runs: "); sdVal = FindSD(rewardsHolder1, meanval); out1.write(roundString(sdVal.toString())); out1.write(System.getProperty("line.separator")); out1.write("Mean for 1000 runs :"); // rewVec = (Set<Double>) run1000.keySet(); meanval = FindMean(rewardsHolder2); out1.write(roundString(meanval.toString())); out1.write(System.getProperty("line.separator")); out1.write("SD for 1000 runs: "); sdVal = FindSD(rewardsHolder2, meanval); out1.write(roundString(sdVal.toString())); out1.write(System.getProperty("line.separator")); out1.write(System.getProperty("line.separator")); out1.write("Expected reward from start state: "); out1.write(roundString(exprewards.toString())); out1.close(); }