   * Selects a next state for expansion when action a is applied in state s by randomly sampling
   * from the transition dynamics weighted by the margin of the lower and upper bound value
   * functions.
   * @param s the source state of the transition
   * @param a the action applied in the source state
   * @return a {@link StateSelectionAndExpectedGap} object holding the next state to be expanded and
   *     the expected margin size of this transition.
  protected StateSelectionAndExpectedGap getNextStateBySampling(State s, GroundedAction a) {

    List<TransitionProbability> tps = a.getTransitions(s);
    double sum = 0.;
    double[] weightedGap = new double[tps.size()];
    HashableState[] hashedStates = new HashableState[tps.size()];
    for (int i = 0; i < tps.size(); i++) {
      TransitionProbability tp = tps.get(i);
      HashableState nsh = this.hashingFactory.hashState(tp.s);
      hashedStates[i] = nsh;
      double gap = this.getGap(nsh);
      weightedGap[i] = tp.p * gap;
      sum += weightedGap[i];

    double roll = RandomFactory.getMapped(0).nextDouble();
    double cumSum = 0.;
    for (int i = 0; i < weightedGap.length; i++) {
      cumSum += weightedGap[i] / sum;
      if (roll < cumSum) {
        StateSelectionAndExpectedGap select =
            new StateSelectionAndExpectedGap(hashedStates[i], sum);
        return select;

    throw new RuntimeException("Error: probabilities in state selection did not sum to 1.");
   * Selects a next state for expansion when action a is applied in state s according to the next
   * possible state that has the largest lower and upper bound margin. Ties are broken randomly.
   * @param s the source state of the transition
   * @param a the action applied in the source state
   * @return a {@link StateSelectionAndExpectedGap} object holding the next state to be expanded and
   *     the expected margin size of this transition.
  protected StateSelectionAndExpectedGap getNextStateByMaxMargin(State s, GroundedAction a) {

    List<TransitionProbability> tps = a.getTransitions(s);
    double sum = 0.;
    double maxGap = Double.NEGATIVE_INFINITY;
    List<HashableState> maxStates = new ArrayList<HashableState>(tps.size());
    for (TransitionProbability tp : tps) {
      HashableState nsh = this.hashingFactory.hashState(tp.s);
      double gap = this.getGap(nsh);
      sum += tp.p * gap;
      if (gap == maxGap) {
      } else if (gap > maxGap) {
        maxGap = gap;

    int rint = RandomFactory.getMapped(0).nextInt(maxStates.size());
    StateSelectionAndExpectedGap select =
        new StateSelectionAndExpectedGap(maxStates.get(rint), sum);

    return select;