/** * Transition function. Currently, we only get the list of probabilities of each item. * * @param current - The current state * @param action - The action * @param possible - The possible state * @return The list of probabilities where each index refers to the probability of that item */ private List<Double> transition(State current, Action action, State possible) { List<Double> probs = new ArrayList<Double>(); // Probabilities Map<Integer, Integer> currentStock, purchase, possibleStock; // Maps double currentProb; // The current probability Matrix currentMatrix; // The current probability matrix int row, column; // The row and column for (int i = 0; i < current.getState().size(); ++i) { currentProb = 0.0; currentStock = current.getState(); purchase = action.getPurchases(); possibleStock = possible.getState(); row = currentStock.get(i) + purchase.get(i); column = row - possible.getState().get(i); currentMatrix = this.probabilities.get(i); if (column < 0 || column >= currentMatrix.getNumCols() || row >= currentMatrix.getNumRows()) { // Invalid state probs.add(0.0); continue; } if (possibleStock.get(i) > 0 || (possibleStock.get(i) == 0 && column == 0)) { // Sufficiently provided currentProb = currentMatrix.get(row, column); } else if (possibleStock.get(i) == 0 && column > 0) { // Range of probabilities because user could have eaten plenty for (int j = column; j < currentMatrix.getNumCols(); ++j) { currentProb += currentMatrix.get(row, j); } } probs.add(currentProb); } return probs; }
/** * Check if the given action is valid or not. This means check if the current action causes us to * over-stock our fridge * * @param currentAction - The action we are looking at * @param currentState - The state we are looking at * @return true if currentAction leads to state having more than the capacity of the fridge. false * otherwise */ private boolean validAction(Action currentAction, State currentState) { int sumOfAction = sumOf(currentAction.getPurchases()); int sumOfState = sumOf(currentState.getState()); if ((sumOfAction + sumOfState) > fridge.getCapacity()) { return false; } return true; }
/** * Check if the end state is valid. This means check that the difference between the sums of both * states are not greater than total number of items in fridge * maximum consumption of one item. * * <p>Will also check if the difference between the contents of both states for each item is not * greater than the maximum consumption * * @param currentState The current state * @param endState The end state * @return true if the end state is valid. false otherwise */ private boolean IsValidEndState(State currentState, State endState) { /* The sums */ int currentSum = sumOf(currentState.getState()); int endSum = sumOf(endState.getState()); if (Math.abs(currentSum - endSum) > (this.fridge.getMaxTypes() * this.fridge.getMaxItemsPerType())) { return false; } for (int i = 0; i < currentState.getState().size(); ++i) { if (Math.abs(currentSum - endSum) > this.fridge.getMaxItemsPerType()) { return false; } } return true; }
/** * Make a copy of a policy * * @param Map<State, Action> newPolicy - The new policy */ private void copyPolicy(Map<State, Action> newPolicy) { /* Containers for clones */ State current; Action currentAction; this.policy = new HashMap<State, Action>(); for (State newState : newPolicy.keySet()) { current = new State(newState.getState()); current.setCost(newState.getTemporaryCost()); currentAction = new Action(newPolicy.get(newState).getPurchases()); this.policy.put(current, currentAction); } }
/** Generate optimal policy */ private void generateOptimalPolicy() { double startTime, currentTime; // Timer things boolean alreadyRunThrough = false; // Record if we've already done one run Set<State> toLookup; // The states to lookup Map<Action, Double> differentValues = new HashMap<Action, Double>(); // The different values for the current state Map<State, Action> newPolicy = new HashMap<State, Action>(); // New policy State newState; // New state for new policy Action best; // The best action startTime = Global.currentTime(); currentTime = Global.currentTime(); while ((currentTime - startTime) <= this.timeRemaining) { if (alreadyRunThrough) { toLookup = this.policy.keySet(); } else { toLookup = this.possibleStates; } for (State currentState : toLookup) { differentValues.clear(); // Reset different values for (Action currentAction : this.possibleActions) { if (!validAction(currentAction, currentState)) { continue; } differentValues.put( currentAction, valueGeneration(currentState, currentAction, toLookup)); } best = maxArg(differentValues); if (isBetterPolicy(currentState, differentValues.get(best))) { newState = new State(currentState.getState()); newState.setTemporaryCost(differentValues.get(best)); newPolicy.put(newState, best); } } if (CheckDifference(0.1, newPolicy)) { copyPolicy(newPolicy); newPolicy.clear(); alreadyRunThrough = true; } else { currentTime = Global.currentTime(); break; } } }
/** * Print out important values * * @param mode - Indicates the type of values to print out. */ private void printImportantValues(int mode) { Set<?> toPrint = null; // The set to print int currentIndex = 0; // The current index switch (mode) { case 0: // States System.err.println("Printing out states"); toPrint = this.possibleStates; break; case 1: // Actions System.err.println("Printing out actions"); toPrint = this.possibleActions; break; case 2: // Consumption System.err.println("Printing out consumptions"); toPrint = this.possibleConsumptions; break; case 3: // Policies for (State current : this.policy.keySet()) { System.out.println( "State " + (currentIndex + 1) + ": " + current.getState().toString() + ". Do Action: " + this.policy.get(current).getPurchases().toString()); currentIndex++; } return; default: // Invalid mode System.err.println("Invalid mode for printing"); System.exit(100); } if (toPrint == null) { System.err.println("Something really horrible went wrong"); System.exit(150); } for (Object current : toPrint) { System.out.println("Value: " + (currentIndex + 1) + current.toString()); currentIndex++; } }
/** * Reward function for a state. Currently, our reward is simply the total number of items in the * state minus the capacity of the fridge. * * <p>This means that we get the highest reward if we fully stock our fridge * * @param current - The current state * @return The immediate reward for being in this state */ private double reward(State current) { int total = sumOf(current.getState()); // The total number of items return total - this.fridge.getCapacity(); }