/** * Check if the given action and state is a better policy * * @param current - The current state * @param newActionValue - The value of the new action * @return true if we have a better policy. False otherwise */ private boolean isBetterPolicy(State current, Double newActionValue) { if (!this.policy.keySet().contains(current)) { // No policy set for this yet return true; } if (current.getCost() <= newActionValue) { // New action has better cost return true; } return false; }
/** * Value generation * * @param state - The current state * @param action - The current action * @param Set<State> lookup - The set of states we are looking up * @return The total value for that state */ private Double valueGeneration(State state, Action action, Set<State> lookup) { Double maxValue = null; // The maximum value double currentVal = 0.0; // The current value List<Double> currentProb = new ArrayList<Double>(); for (State possible : this.possibleStates) { currentProb = transition(state, action, possible); for (int i = 0; i < currentProb.size(); ++i) { currentVal += currentProb.get(i) * (reward(possible) + this.spec.getDiscountFactor() * possible.getCost()); } if (maxValue == null) { // First time generating value maxValue = currentVal; } else { // Need to sum up these values maxValue += currentVal; } } return maxValue; }
/** * Checks if the difference between the values of the previous policy and the new policy are <= * val. * * <p>Also reassigns the old policy * * @param double val - The difference * @param Map<State, Action> newPolicy - The new policy * @return true if the minimum difference > val. false otherwise */ private boolean CheckDifference(double val, Map<State, Action> newPolicy) { Double currDiff, minDiff = null; // The minimum and current difference if (this.policy.size() != this.possibleStates.size()) { // Previous policy wasn't assigned yet return true; } for (State current : newPolicy.keySet()) { for (State old : this.policy.keySet()) { if (current.equals(old)) { currDiff = Math.abs(current.getTemporaryCost() - old.getCost()); if (minDiff == null || currDiff <= minDiff) { minDiff = currDiff; } break; } } } if (minDiff > val) { return true; } return false; }