// Build the MDP and value function, as well as the Q-functions for // executing the greedy policy. public MDPSim(String prob_file, String vfun_file) { _mdp = new MDP(prob_file, DD.TYPE_ADD); _bUseBasis = vfun_file.equalsIgnoreCase("basis"); if (_bUseBasis) { _mdp._valueDD = null; } else { _mdp._valueDD = _mdp._context.buildDDFromUnorderedTree( MDPConverter.ADDFileToTree(vfun_file), _mdp._tmVar2ID); _qfuns = new HashMap(); Iterator i = _mdp._hmName2Action.entrySet().iterator(); while (i.hasNext()) { Map.Entry me = (Map.Entry) i.next(); Action a = (Action) me.getValue(); ////////////////////////////////////////////////////////////// // Regress the current value function through each action ////////////////////////////////////////////////////////////// Object qfun = _mdp.regress(_mdp._context.remapGIDsInt(_mdp._valueDD, _mdp._hmPrimeRemap), a); System.out.println("Calculating Q-function for action: " + a._sName); // System.out.println(_mdp._context.printNode(qfun) + "\n"); _qfuns.put(a._sName, qfun); } } System.out.println(_mdp); }
// Determines the best action, given the Q-functions public Action getAction() { Iterator i = _qfuns.keySet().iterator(); Action best_act = null; double best_val = -1d; while (i.hasNext()) { String act_name = (String) i.next(); Object qfun = _qfuns.get(act_name); Action a = (Action) _mdp._hmName2Action.get(act_name); double val = _mdp._context.evaluate(qfun, _state); // System.out.println(a._sName + " -> " +_mdp._context.printNode(qfun)); // System.out.println(" " + a._sName + " -> " + _mdp._df.format(val)); if (val > best_val) { best_act = a; best_val = val; } } // System.out.println(" Chose: " + best_act._sName); return best_act; }