/** * Selects a next state for expansion when action a is applied in state s by randomly sampling * from the transition dynamics weighted by the margin of the lower and upper bound value * functions. * * @param s the source state of the transition * @param a the action applied in the source state * @return a {@link StateSelectionAndExpectedGap} object holding the next state to be expanded and * the expected margin size of this transition. */ protected StateSelectionAndExpectedGap getNextStateBySampling(State s, GroundedAction a) { List<TransitionProbability> tps = a.getTransitions(s); double sum = 0.; double[] weightedGap = new double[tps.size()]; HashableState[] hashedStates = new HashableState[tps.size()]; for (int i = 0; i < tps.size(); i++) { TransitionProbability tp = tps.get(i); HashableState nsh = this.hashingFactory.hashState(tp.s); hashedStates[i] = nsh; double gap = this.getGap(nsh); weightedGap[i] = tp.p * gap; sum += weightedGap[i]; } double roll = RandomFactory.getMapped(0).nextDouble(); double cumSum = 0.; for (int i = 0; i < weightedGap.length; i++) { cumSum += weightedGap[i] / sum; if (roll < cumSum) { StateSelectionAndExpectedGap select = new StateSelectionAndExpectedGap(hashedStates[i], sum); return select; } } throw new RuntimeException("Error: probabilities in state selection did not sum to 1."); }
/** * Selects a next state for expansion when action a is applied in state s according to the next * possible state that has the largest lower and upper bound margin. Ties are broken randomly. * * @param s the source state of the transition * @param a the action applied in the source state * @return a {@link StateSelectionAndExpectedGap} object holding the next state to be expanded and * the expected margin size of this transition. */ protected StateSelectionAndExpectedGap getNextStateByMaxMargin(State s, GroundedAction a) { List<TransitionProbability> tps = a.getTransitions(s); double sum = 0.; double maxGap = Double.NEGATIVE_INFINITY; List<HashableState> maxStates = new ArrayList<HashableState>(tps.size()); for (TransitionProbability tp : tps) { HashableState nsh = this.hashingFactory.hashState(tp.s); double gap = this.getGap(nsh); sum += tp.p * gap; if (gap == maxGap) { maxStates.add(nsh); } else if (gap > maxGap) { maxStates.clear(); maxStates.add(nsh); maxGap = gap; } } int rint = RandomFactory.getMapped(0).nextInt(maxStates.size()); StateSelectionAndExpectedGap select = new StateSelectionAndExpectedGap(maxStates.get(rint), sum); return select; }