/** * Transition function. Currently, we only get the list of probabilities of each item. * * @param current - The current state * @param action - The action * @param possible - The possible state * @return The list of probabilities where each index refers to the probability of that item */ private List<Double> transition(State current, Action action, State possible) { List<Double> probs = new ArrayList<Double>(); // Probabilities Map<Integer, Integer> currentStock, purchase, possibleStock; // Maps double currentProb; // The current probability Matrix currentMatrix; // The current probability matrix int row, column; // The row and column for (int i = 0; i < current.getState().size(); ++i) { currentProb = 0.0; currentStock = current.getState(); purchase = action.getPurchases(); possibleStock = possible.getState(); row = currentStock.get(i) + purchase.get(i); column = row - possible.getState().get(i); currentMatrix = this.probabilities.get(i); if (column < 0 || column >= currentMatrix.getNumCols() || row >= currentMatrix.getNumRows()) { // Invalid state probs.add(0.0); continue; } if (possibleStock.get(i) > 0 || (possibleStock.get(i) == 0 && column == 0)) { // Sufficiently provided currentProb = currentMatrix.get(row, column); } else if (possibleStock.get(i) == 0 && column > 0) { // Range of probabilities because user could have eaten plenty for (int j = column; j < currentMatrix.getNumCols(); ++j) { currentProb += currentMatrix.get(row, j); } } probs.add(currentProb); } return probs; }
/** * Find out the maximum value for a given map of actions * * @param actions - The map of actions to their values * @return Return the action with the largest value out of all the actions */ private Action maxArg(Map<Action, Double> actions) { Action largestValue = null; // The largest value for (Action current : actions.keySet()) { if (largestValue == null || (actions.get(largestValue) < actions.get(current))) { largestValue = current; } } return largestValue; }
/** * Return the sum of the integers in the list * * @param integerMap - The map of integers * @return - the sum of integers in the list */ private int sumOf(Map<Integer, Integer> integerList) { int sum = 0; // The sum for (Integer current : integerList.keySet()) { sum += integerList.get(current); } return sum; }
/** Generate optimal policy */ private void generateOptimalPolicy() { double startTime, currentTime; // Timer things boolean alreadyRunThrough = false; // Record if we've already done one run Set<State> toLookup; // The states to lookup Map<Action, Double> differentValues = new HashMap<Action, Double>(); // The different values for the current state Map<State, Action> newPolicy = new HashMap<State, Action>(); // New policy State newState; // New state for new policy Action best; // The best action startTime = Global.currentTime(); currentTime = Global.currentTime(); while ((currentTime - startTime) <= this.timeRemaining) { if (alreadyRunThrough) { toLookup = this.policy.keySet(); } else { toLookup = this.possibleStates; } for (State currentState : toLookup) { differentValues.clear(); // Reset different values for (Action currentAction : this.possibleActions) { if (!validAction(currentAction, currentState)) { continue; } differentValues.put( currentAction, valueGeneration(currentState, currentAction, toLookup)); } best = maxArg(differentValues); if (isBetterPolicy(currentState, differentValues.get(best))) { newState = new State(currentState.getState()); newState.setTemporaryCost(differentValues.get(best)); newPolicy.put(newState, best); } } if (CheckDifference(0.1, newPolicy)) { copyPolicy(newPolicy); newPolicy.clear(); alreadyRunThrough = true; } else { currentTime = Global.currentTime(); break; } } }
/** * Make a copy of a policy * * @param Map<State, Action> newPolicy - The new policy */ private void copyPolicy(Map<State, Action> newPolicy) { /* Containers for clones */ State current; Action currentAction; this.policy = new HashMap<State, Action>(); for (State newState : newPolicy.keySet()) { current = new State(newState.getState()); current.setCost(newState.getTemporaryCost()); currentAction = new Action(newPolicy.get(newState).getPurchases()); this.policy.put(current, currentAction); } }
public List<Integer> generateShoppingList(List<Integer> inventory, int numWeeksLeft) { State current; // current state Map<Integer, Integer> purchases = new HashMap<Integer, Integer>(); // Purchases Map<Integer, Integer> map = new HashMap<Integer, Integer>(); // Mapping of integers List<Integer> shopping = new ArrayList<Integer>(); // Shopping items for (int i = 0; i < inventory.size(); ++i) { map.put(i, inventory.get(i)); } current = new State(map); purchases = this.policy.get(current).getPurchases(); for (int i = 0; i < inventory.size(); ++i) { shopping.add(purchases.get(i)); } return shopping; }