/** * computes a hash-backed policy for every state visited along the greedy path of the UCT tree. */ public void computePolicyFromTree() { policy = new HashMap<StateHashTuple, GroundedAction>(); if (this.planner.getRoot() == null) { return; } // define policy for all states that are expanded along the greedy path of the UCT tree LinkedList<UCTStateNode> queue = new LinkedList<UCTStateNode>(); queue.add(planner.getRoot()); while (queue.size() > 0) { UCTStateNode snode = queue.poll(); if (!planner.containsActionPreference(snode)) { System.out.println( "UCT tree does not contain action preferences of the state queried by the UCTTreeWalkPolicy. Consider replanning with planFromState"); break; // policy ill defined } UCTActionNode choice = this.getQGreedyNode(snode); if (choice != null) { policy.put(snode.state, choice.action); // set the policy List<UCTStateNode> successors = choice.getAllSuccessors(); // queue up all possible successors of this action for (UCTStateNode suc : successors) { queue.offer(suc); } } } }
@Override public boolean isDefinedFor(State s) { if (policy == null) { this.computePolicyFromTree(); } GroundedAction ga = policy.get(planner.stateHash(s)); if (ga == null) { return false; } return true; }
@Override public AbstractGroundedAction getAction(State s) { if (policy == null) { this.computePolicyFromTree(); } GroundedAction ga = policy.get(planner.stateHash(s)); if (ga == null) { throw new PolicyUndefinedException(); } return ga; }
@Override public List<ActionProb> getActionDistributionForState(State s) { if (policy == null) { this.computePolicyFromTree(); } GroundedAction ga = policy.get(planner.stateHash(s)); if (ga == null) { throw new PolicyUndefinedException(); } List<ActionProb> res = new ArrayList<Policy.ActionProb>(); res.add(new ActionProb(ga, 1.)); // greedy policy so only need to supply the mapped action return res; }