예제 #1
0
  /**
   * computes a hash-backed policy for every state visited along the greedy path of the UCT tree.
   */
  public void computePolicyFromTree() {
    policy = new HashMap<StateHashTuple, GroundedAction>();

    if (this.planner.getRoot() == null) {
      return;
    }

    // define policy for all states that are expanded along the greedy path of the UCT tree
    LinkedList<UCTStateNode> queue = new LinkedList<UCTStateNode>();
    queue.add(planner.getRoot());
    while (queue.size() > 0) {

      UCTStateNode snode = queue.poll();

      if (!planner.containsActionPreference(snode)) {
        System.out.println(
            "UCT tree does not contain action preferences of the state queried by the UCTTreeWalkPolicy. Consider replanning with planFromState");
        break; // policy ill defined
      }

      UCTActionNode choice = this.getQGreedyNode(snode);
      if (choice != null) {

        policy.put(snode.state, choice.action); // set the policy

        List<UCTStateNode> successors =
            choice.getAllSuccessors(); // queue up all possible successors of this action
        for (UCTStateNode suc : successors) {
          queue.offer(suc);
        }
      }
    }
  }
예제 #2
0
  @Override
  public boolean isDefinedFor(State s) {
    if (policy == null) {
      this.computePolicyFromTree();
    }
    GroundedAction ga = policy.get(planner.stateHash(s));
    if (ga == null) {
      return false;
    }

    return true;
  }
예제 #3
0
  @Override
  public AbstractGroundedAction getAction(State s) {

    if (policy == null) {
      this.computePolicyFromTree();
    }

    GroundedAction ga = policy.get(planner.stateHash(s));
    if (ga == null) {
      throw new PolicyUndefinedException();
    }

    return ga;
  }
예제 #4
0
  @Override
  public List<ActionProb> getActionDistributionForState(State s) {

    if (policy == null) {
      this.computePolicyFromTree();
    }

    GroundedAction ga = policy.get(planner.stateHash(s));
    if (ga == null) {
      throw new PolicyUndefinedException();
    }

    List<ActionProb> res = new ArrayList<Policy.ActionProb>();
    res.add(new ActionProb(ga, 1.)); // greedy policy so only need to supply the mapped action

    return res;
  }