@Override
  public void planFromState(State initialState) {

    StateHashTuple sih = this.stateHash(initialState);

    if (mapToStateIndex.containsKey(sih)) {
      return; // no need to plan since this is already solved
    }

    PrioritizedSearchNode initialPSN = new PrioritizedSearchNode(sih, heuristic.h(initialState));
    double nextMinR = initialPSN.priority;

    PrioritizedSearchNode solutionNode = null;
    while (solutionNode == null) {

      PrioritizedSearchNode cand = this.FLimtedDFS(initialPSN, nextMinR, 0.);
      if (cand == null) {
        return; // FAIL CONDITION, EVERY PATH LEADS TO A DEAD END
      }

      // was the goal found within the limit?
      if (this.planEndNode(cand) && cand.priority >= nextMinR) {
        solutionNode = cand;
      }
      nextMinR = cand.priority;

      if (solutionNode == null) {
        DPrint.cl(debugCode, "Increase depth to F: " + nextMinR);
      }
    }

    // search to goal complete now follow back pointers to set policy
    this.encodePlanIntoPolicy(solutionNode);
  }
  /**
   * Recursive method to perform A* up to a f-score depth
   *
   * @param lastNode the node to expand
   * @param minR the minimum cumulative reward at which to stop the search (in other terms the
   *     maximum cost)
   * @param cumulatedReward the amount of reward accumulated at this node
   * @return a search node with the goal state, or null if there is no path within the reward
   *     requirements from this node
   */
  protected PrioritizedSearchNode FLimtedDFS(
      PrioritizedSearchNode lastNode, double minR, double cumulatedReward) {

    if (lastNode.priority < minR) {
      return lastNode; // fail condition (either way return the last point to which you got)
    }
    if (this.planEndNode(lastNode)) {
      return lastNode; // succeed condition
    }
    if (this.tf.isTerminal(lastNode.s.s)) {
      return null; // treat like a dead end if we're at a terminal state
    }

    State s = lastNode.s.s;

    // get all actions
    /*List <GroundedAction> gas = new ArrayList<GroundedAction>();
    for(Action a : actions){
    	gas.addAll(s.getAllGroundedActionsFor(a));
    }*/
    List<GroundedAction> gas =
        Action.getAllApplicableGroundedActionsFromActionList(this.actions, s);

    // generate successor nodes
    List<PrioritizedSearchNode> successors = new ArrayList<PrioritizedSearchNode>(gas.size());
    List<Double> successorGs = new ArrayList<Double>(gas.size());
    for (GroundedAction ga : gas) {

      State ns = ga.executeIn(s);
      StateHashTuple nsh = this.stateHash(ns);

      double r = rf.reward(s, ga, ns);
      double g = cumulatedReward + r;
      double hr = heuristic.h(ns);
      double f = g + hr;
      PrioritizedSearchNode pnsn = new PrioritizedSearchNode(nsh, ga, lastNode, f);

      // only add if this does not exist on our path already
      if (this.lastStateOnPathIsNew(pnsn)) {
        successors.add(pnsn);
        successorGs.add(g);
      }
    }

    // sort the successors by f-score to travel the most promising ones first
    Collections.sort(successors, nodeComparator);

    double maxCandR = Double.NEGATIVE_INFINITY;
    PrioritizedSearchNode bestCand = null;
    // note that since we want to expand largest expected rewards first, we should go reverse order
    // of the f-ordered successors
    for (int i = successors.size() - 1; i >= 0; i--) {
      PrioritizedSearchNode snode = successors.get(i);
      PrioritizedSearchNode cand = this.FLimtedDFS(snode, minR, successorGs.get(i));
      if (cand != null) {
        if (cand.priority > maxCandR) {
          bestCand = cand;
          maxCandR = cand.priority;
        }
      }
    }

    return bestCand;
  }