/** * Transition function. Currently, we only get the list of probabilities of each item. * * @param current - The current state * @param action - The action * @param possible - The possible state * @return The list of probabilities where each index refers to the probability of that item */ private List<Double> transition(State current, Action action, State possible) { List<Double> probs = new ArrayList<Double>(); // Probabilities Map<Integer, Integer> currentStock, purchase, possibleStock; // Maps double currentProb; // The current probability Matrix currentMatrix; // The current probability matrix int row, column; // The row and column for (int i = 0; i < current.getState().size(); ++i) { currentProb = 0.0; currentStock = current.getState(); purchase = action.getPurchases(); possibleStock = possible.getState(); row = currentStock.get(i) + purchase.get(i); column = row - possible.getState().get(i); currentMatrix = this.probabilities.get(i); if (column < 0 || column >= currentMatrix.getNumCols() || row >= currentMatrix.getNumRows()) { // Invalid state probs.add(0.0); continue; } if (possibleStock.get(i) > 0 || (possibleStock.get(i) == 0 && column == 0)) { // Sufficiently provided currentProb = currentMatrix.get(row, column); } else if (possibleStock.get(i) == 0 && column > 0) { // Range of probabilities because user could have eaten plenty for (int j = column; j < currentMatrix.getNumCols(); ++j) { currentProb += currentMatrix.get(row, j); } } probs.add(currentProb); } return probs; }
/** * Add the given map to the respective list * * @param int generateType - The type of item to add * @param Map<Integer, Integer> toAdd - The map to add */ private void addToList(int generateType, Map<Integer, Integer> toAdd) { /* Containers for various types */ State stateContainer; Action actionContainer; Consumption consumptionContainer; switch (generateType) { case 0: stateContainer = new State(toAdd); if (!this.possibleStates.contains(stateContainer)) { stateContainer.setCost(sumOf(toAdd)); this.policy.put(stateContainer, null); this.possibleStates.add(stateContainer); } break; case 1: actionContainer = new Action(toAdd); if (!this.possibleActions.contains(actionContainer)) { this.possibleActions.add(actionContainer); } break; case 2: consumptionContainer = new Consumption(toAdd); if (!this.possibleConsumptions.contains(consumptionContainer)) { this.possibleConsumptions.add(consumptionContainer); } break; default: System.err.println("Invalid generation type"); System.exit(13); } }
/** * Make a copy of a policy * * @param Map<State, Action> newPolicy - The new policy */ private void copyPolicy(Map<State, Action> newPolicy) { /* Containers for clones */ State current; Action currentAction; this.policy = new HashMap<State, Action>(); for (State newState : newPolicy.keySet()) { current = new State(newState.getState()); current.setCost(newState.getTemporaryCost()); currentAction = new Action(newPolicy.get(newState).getPurchases()); this.policy.put(current, currentAction); } }
/** * Check if the given action is valid or not. This means check if the current action causes us to * over-stock our fridge * * @param currentAction - The action we are looking at * @param currentState - The state we are looking at * @return true if currentAction leads to state having more than the capacity of the fridge. false * otherwise */ private boolean validAction(Action currentAction, State currentState) { int sumOfAction = sumOf(currentAction.getPurchases()); int sumOfState = sumOf(currentState.getState()); if ((sumOfAction + sumOfState) > fridge.getCapacity()) { return false; } return true; }
/** * Check if the end state is valid. This means check that the difference between the sums of both * states are not greater than total number of items in fridge * maximum consumption of one item. * * <p>Will also check if the difference between the contents of both states for each item is not * greater than the maximum consumption * * @param currentState The current state * @param endState The end state * @return true if the end state is valid. false otherwise */ private boolean IsValidEndState(State currentState, State endState) { /* The sums */ int currentSum = sumOf(currentState.getState()); int endSum = sumOf(endState.getState()); if (Math.abs(currentSum - endSum) > (this.fridge.getMaxTypes() * this.fridge.getMaxItemsPerType())) { return false; } for (int i = 0; i < currentState.getState().size(); ++i) { if (Math.abs(currentSum - endSum) > this.fridge.getMaxItemsPerType()) { return false; } } return true; }
/** Generate optimal policy */ private void generateOptimalPolicy() { double startTime, currentTime; // Timer things boolean alreadyRunThrough = false; // Record if we've already done one run Set<State> toLookup; // The states to lookup Map<Action, Double> differentValues = new HashMap<Action, Double>(); // The different values for the current state Map<State, Action> newPolicy = new HashMap<State, Action>(); // New policy State newState; // New state for new policy Action best; // The best action startTime = Global.currentTime(); currentTime = Global.currentTime(); while ((currentTime - startTime) <= this.timeRemaining) { if (alreadyRunThrough) { toLookup = this.policy.keySet(); } else { toLookup = this.possibleStates; } for (State currentState : toLookup) { differentValues.clear(); // Reset different values for (Action currentAction : this.possibleActions) { if (!validAction(currentAction, currentState)) { continue; } differentValues.put( currentAction, valueGeneration(currentState, currentAction, toLookup)); } best = maxArg(differentValues); if (isBetterPolicy(currentState, differentValues.get(best))) { newState = new State(currentState.getState()); newState.setTemporaryCost(differentValues.get(best)); newPolicy.put(newState, best); } } if (CheckDifference(0.1, newPolicy)) { copyPolicy(newPolicy); newPolicy.clear(); alreadyRunThrough = true; } else { currentTime = Global.currentTime(); break; } } }
/** * Check if the given action and state is a better policy * * @param current - The current state * @param newActionValue - The value of the new action * @return true if we have a better policy. False otherwise */ private boolean isBetterPolicy(State current, Double newActionValue) { if (!this.policy.keySet().contains(current)) { // No policy set for this yet return true; } if (current.getCost() <= newActionValue) { // New action has better cost return true; } return false; }
/** * Value generation * * @param state - The current state * @param action - The current action * @param Set<State> lookup - The set of states we are looking up * @return The total value for that state */ private Double valueGeneration(State state, Action action, Set<State> lookup) { Double maxValue = null; // The maximum value double currentVal = 0.0; // The current value List<Double> currentProb = new ArrayList<Double>(); for (State possible : this.possibleStates) { currentProb = transition(state, action, possible); for (int i = 0; i < currentProb.size(); ++i) { currentVal += currentProb.get(i) * (reward(possible) + this.spec.getDiscountFactor() * possible.getCost()); } if (maxValue == null) { // First time generating value maxValue = currentVal; } else { // Need to sum up these values maxValue += currentVal; } } return maxValue; }
/** * Print out important values * * @param mode - Indicates the type of values to print out. */ private void printImportantValues(int mode) { Set<?> toPrint = null; // The set to print int currentIndex = 0; // The current index switch (mode) { case 0: // States System.err.println("Printing out states"); toPrint = this.possibleStates; break; case 1: // Actions System.err.println("Printing out actions"); toPrint = this.possibleActions; break; case 2: // Consumption System.err.println("Printing out consumptions"); toPrint = this.possibleConsumptions; break; case 3: // Policies for (State current : this.policy.keySet()) { System.out.println( "State " + (currentIndex + 1) + ": " + current.getState().toString() + ". Do Action: " + this.policy.get(current).getPurchases().toString()); currentIndex++; } return; default: // Invalid mode System.err.println("Invalid mode for printing"); System.exit(100); } if (toPrint == null) { System.err.println("Something really horrible went wrong"); System.exit(150); } for (Object current : toPrint) { System.out.println("Value: " + (currentIndex + 1) + current.toString()); currentIndex++; } }
/** * Checks if the difference between the values of the previous policy and the new policy are <= * val. * * <p>Also reassigns the old policy * * @param double val - The difference * @param Map<State, Action> newPolicy - The new policy * @return true if the minimum difference > val. false otherwise */ private boolean CheckDifference(double val, Map<State, Action> newPolicy) { Double currDiff, minDiff = null; // The minimum and current difference if (this.policy.size() != this.possibleStates.size()) { // Previous policy wasn't assigned yet return true; } for (State current : newPolicy.keySet()) { for (State old : this.policy.keySet()) { if (current.equals(old)) { currDiff = Math.abs(current.getTemporaryCost() - old.getCost()); if (minDiff == null || currDiff <= minDiff) { minDiff = currDiff; } break; } } } if (minDiff > val) { return true; } return false; }
/** * Reward function for a state. Currently, our reward is simply the total number of items in the * state minus the capacity of the fridge. * * <p>This means that we get the highest reward if we fully stock our fridge * * @param current - The current state * @return The immediate reward for being in this state */ private double reward(State current) { int total = sumOf(current.getState()); // The total number of items return total - this.fridge.getCapacity(); }