// Derives QFunctions for the given value function and simulates the // greedy policy for the given number of trials and steps per trial. // Returns final value of every trial. public ArrayList simulate(int trials, int steps, long rand_seed) { ArrayList values = new ArrayList(); _r = new Random(rand_seed); for (int trial = 1; trial <= trials; trial++) { System.out.println("\n -----------\n Trial " + trial + "\n -----------"); // Initialize state _state = new ArrayList(); _nVars = _mdp._alVars.size(); for (int c = 0; c < (_nVars << 1); c++) { _state.add("-"); } Iterator i = _mdp._alVars.iterator(); _vars = new TreeSet(); while (i.hasNext()) { String s = (String) i.next(); if (!s.endsWith("\'")) { Integer gid = (Integer) _mdp._tmVar2ID.get(s); _vars.add(gid); // Note: assign level (level is gid-1 b/c gids in order) _state.set(gid.intValue() - 1, _r.nextBoolean() ? TRUE : FALSE); } } // System.out.println(_mdp._context.printNode(_mdp._valueDD) + "\n" + _state); double reward = _mdp._context.evaluate(_mdp._rewardDD, _state); System.out.print(" " + PrintState(_state) + " " + MDP._df.format(reward)); // Run steps for (int step = 1; step <= steps; step++) { // Get action Action a; if (_bUseBasis) { a = getBasisAction(); } else { a = getAction(); } // Execute action executeAction(a); // Update reward reward = (_mdp._bdDiscount.doubleValue() * reward) + _mdp._context.evaluate(_mdp._rewardDD, _state); System.out.println(", a=" + a._sName); System.out.print( " " + PrintState(_state) + " " + MDP._df.format(reward) + ": " + "Step " + step); } values.add(new Double(reward)); System.out.println(); } return values; }
void solve() { int k = nextInt(); char[] c = nextToken().toCharArray(); for (int i = 0, j = c.length - 1; i < j; i++, j--) { if (c[i] != '?' && c[j] != '?' && c[i] != c[j]) { out.println("IMPOSSIBLE"); return; } } int q = 0; TreeSet<Character> ts = new TreeSet<Character>(); for (int i = 0; i < k; i++) { ts.add((char) ('a' + i)); } for (int i = 0, j = c.length - 1; i <= j; i++, j--) { if (c[i] == '?' && c[j] == '?') { q++; } if (c[i] != '?') { ts.remove(c[i]); } if (c[j] != '?') { ts.remove(c[j]); } } for (int i = (c.length - 1) / 2, j = c.length - i - 1; i >= 0; i--, j++) { if (c[i] == '?' && c[j] == '?') { if (!ts.isEmpty()) { c[i] = c[j] = ts.pollLast(); } else { c[i] = c[j] = 'a'; } } } if (!ts.isEmpty()) { out.println("IMPOSSIBLE"); return; } for (int i = 0, j = c.length - 1; i < j; i++, j--) { if (c[i] == '?') { c[i] = c[j]; } else if (c[j] == '?') { c[j] = c[i]; } } out.println(new String(c)); }