コード例 #1
0
  /**
   * Transition function. Currently, we only get the list of probabilities of each item.
   *
   * @param current - The current state
   * @param action - The action
   * @param possible - The possible state
   * @return The list of probabilities where each index refers to the probability of that item
   */
  private List<Double> transition(State current, Action action, State possible) {
    List<Double> probs = new ArrayList<Double>(); // Probabilities
    Map<Integer, Integer> currentStock, purchase, possibleStock; // Maps
    double currentProb; // The current probability
    Matrix currentMatrix; // The current probability matrix
    int row, column; // The row and column

    for (int i = 0; i < current.getState().size(); ++i) {
      currentProb = 0.0;
      currentStock = current.getState();
      purchase = action.getPurchases();
      possibleStock = possible.getState();
      row = currentStock.get(i) + purchase.get(i);
      column = row - possible.getState().get(i);
      currentMatrix = this.probabilities.get(i);
      if (column < 0
          || column >= currentMatrix.getNumCols()
          || row >= currentMatrix.getNumRows()) { // Invalid state
        probs.add(0.0);
        continue;
      }
      if (possibleStock.get(i) > 0
          || (possibleStock.get(i) == 0 && column == 0)) { // Sufficiently provided
        currentProb = currentMatrix.get(row, column);
      } else if (possibleStock.get(i) == 0 && column > 0) {
        // Range of probabilities because user could have eaten plenty
        for (int j = column; j < currentMatrix.getNumCols(); ++j) {
          currentProb += currentMatrix.get(row, j);
        }
      }
      probs.add(currentProb);
    }
    return probs;
  }
コード例 #2
0
  /**
   * Add the given map to the respective list
   *
   * @param int generateType - The type of item to add
   * @param Map<Integer, Integer> toAdd - The map to add
   */
  private void addToList(int generateType, Map<Integer, Integer> toAdd) {
    /* Containers for various types */
    State stateContainer;
    Action actionContainer;
    Consumption consumptionContainer;

    switch (generateType) {
      case 0:
        stateContainer = new State(toAdd);
        if (!this.possibleStates.contains(stateContainer)) {
          stateContainer.setCost(sumOf(toAdd));
          this.policy.put(stateContainer, null);
          this.possibleStates.add(stateContainer);
        }
        break;
      case 1:
        actionContainer = new Action(toAdd);
        if (!this.possibleActions.contains(actionContainer)) {
          this.possibleActions.add(actionContainer);
        }
        break;
      case 2:
        consumptionContainer = new Consumption(toAdd);
        if (!this.possibleConsumptions.contains(consumptionContainer)) {
          this.possibleConsumptions.add(consumptionContainer);
        }
        break;
      default:
        System.err.println("Invalid generation type");
        System.exit(13);
    }
  }
コード例 #3
0
  /**
   * Make a copy of a policy
   *
   * @param Map<State, Action> newPolicy - The new policy
   */
  private void copyPolicy(Map<State, Action> newPolicy) {
    /* Containers for clones */
    State current;
    Action currentAction;

    this.policy = new HashMap<State, Action>();

    for (State newState : newPolicy.keySet()) {
      current = new State(newState.getState());
      current.setCost(newState.getTemporaryCost());
      currentAction = new Action(newPolicy.get(newState).getPurchases());
      this.policy.put(current, currentAction);
    }
  }
コード例 #4
0
  /**
   * Check if the given action is valid or not. This means check if the current action causes us to
   * over-stock our fridge
   *
   * @param currentAction - The action we are looking at
   * @param currentState - The state we are looking at
   * @return true if currentAction leads to state having more than the capacity of the fridge. false
   *     otherwise
   */
  private boolean validAction(Action currentAction, State currentState) {
    int sumOfAction = sumOf(currentAction.getPurchases());
    int sumOfState = sumOf(currentState.getState());

    if ((sumOfAction + sumOfState) > fridge.getCapacity()) {
      return false;
    }
    return true;
  }
コード例 #5
0
  /**
   * Check if the end state is valid. This means check that the difference between the sums of both
   * states are not greater than total number of items in fridge * maximum consumption of one item.
   *
   * <p>Will also check if the difference between the contents of both states for each item is not
   * greater than the maximum consumption
   *
   * @param currentState The current state
   * @param endState The end state
   * @return true if the end state is valid. false otherwise
   */
  private boolean IsValidEndState(State currentState, State endState) {
    /* The sums */
    int currentSum = sumOf(currentState.getState());
    int endSum = sumOf(endState.getState());

    if (Math.abs(currentSum - endSum)
        > (this.fridge.getMaxTypes() * this.fridge.getMaxItemsPerType())) {
      return false;
    }

    for (int i = 0; i < currentState.getState().size(); ++i) {
      if (Math.abs(currentSum - endSum) > this.fridge.getMaxItemsPerType()) {
        return false;
      }
    }

    return true;
  }
コード例 #6
0
  /** Generate optimal policy */
  private void generateOptimalPolicy() {
    double startTime, currentTime; // Timer things
    boolean alreadyRunThrough = false; // Record if we've already done one run
    Set<State> toLookup; // The states to lookup
    Map<Action, Double> differentValues =
        new HashMap<Action, Double>(); // The different values for the current state
    Map<State, Action> newPolicy = new HashMap<State, Action>(); // New policy
    State newState; // New state for new policy
    Action best; // The best action

    startTime = Global.currentTime();
    currentTime = Global.currentTime();
    while ((currentTime - startTime) <= this.timeRemaining) {
      if (alreadyRunThrough) {
        toLookup = this.policy.keySet();
      } else {
        toLookup = this.possibleStates;
      }
      for (State currentState : toLookup) {
        differentValues.clear(); // Reset different values
        for (Action currentAction : this.possibleActions) {
          if (!validAction(currentAction, currentState)) {
            continue;
          }
          differentValues.put(
              currentAction, valueGeneration(currentState, currentAction, toLookup));
        }
        best = maxArg(differentValues);
        if (isBetterPolicy(currentState, differentValues.get(best))) {
          newState = new State(currentState.getState());
          newState.setTemporaryCost(differentValues.get(best));
          newPolicy.put(newState, best);
        }
      }
      if (CheckDifference(0.1, newPolicy)) {
        copyPolicy(newPolicy);
        newPolicy.clear();
        alreadyRunThrough = true;
      } else {
        currentTime = Global.currentTime();
        break;
      }
    }
  }
コード例 #7
0
  /**
   * Check if the given action and state is a better policy
   *
   * @param current - The current state
   * @param newActionValue - The value of the new action
   * @return true if we have a better policy. False otherwise
   */
  private boolean isBetterPolicy(State current, Double newActionValue) {
    if (!this.policy.keySet().contains(current)) { // No policy set for this yet
      return true;
    }

    if (current.getCost() <= newActionValue) { // New action has better cost
      return true;
    }
    return false;
  }
コード例 #8
0
 /**
  * Value generation
  *
  * @param state - The current state
  * @param action - The current action
  * @param Set<State> lookup - The set of states we are looking up
  * @return The total value for that state
  */
 private Double valueGeneration(State state, Action action, Set<State> lookup) {
   Double maxValue = null; // The maximum value
   double currentVal = 0.0; // The current value
   List<Double> currentProb = new ArrayList<Double>();
   for (State possible : this.possibleStates) {
     currentProb = transition(state, action, possible);
     for (int i = 0; i < currentProb.size(); ++i) {
       currentVal +=
           currentProb.get(i)
               * (reward(possible) + this.spec.getDiscountFactor() * possible.getCost());
     }
     if (maxValue == null) { // First time generating value
       maxValue = currentVal;
     } else { // Need to sum up these values
       maxValue += currentVal;
     }
   }
   return maxValue;
 }
コード例 #9
0
  /**
   * Print out important values
   *
   * @param mode - Indicates the type of values to print out.
   */
  private void printImportantValues(int mode) {
    Set<?> toPrint = null; // The set to print
    int currentIndex = 0; // The current index

    switch (mode) {
      case 0: // States
        System.err.println("Printing out states");
        toPrint = this.possibleStates;
        break;
      case 1: // Actions
        System.err.println("Printing out actions");
        toPrint = this.possibleActions;
        break;
      case 2: // Consumption
        System.err.println("Printing out consumptions");
        toPrint = this.possibleConsumptions;
        break;
      case 3: // Policies
        for (State current : this.policy.keySet()) {
          System.out.println(
              "State "
                  + (currentIndex + 1)
                  + ": "
                  + current.getState().toString()
                  + ". Do Action: "
                  + this.policy.get(current).getPurchases().toString());
          currentIndex++;
        }
        return;
      default: // Invalid mode
        System.err.println("Invalid mode for printing");
        System.exit(100);
    }

    if (toPrint == null) {
      System.err.println("Something really horrible went wrong");
      System.exit(150);
    }
    for (Object current : toPrint) {
      System.out.println("Value: " + (currentIndex + 1) + current.toString());
      currentIndex++;
    }
  }
コード例 #10
0
  /**
   * Checks if the difference between the values of the previous policy and the new policy are <=
   * val.
   *
   * <p>Also reassigns the old policy
   *
   * @param double val - The difference
   * @param Map<State, Action> newPolicy - The new policy
   * @return true if the minimum difference > val. false otherwise
   */
  private boolean CheckDifference(double val, Map<State, Action> newPolicy) {
    Double currDiff, minDiff = null; // The minimum and current difference
    if (this.policy.size() != this.possibleStates.size()) { // Previous policy wasn't assigned yet
      return true;
    }

    for (State current : newPolicy.keySet()) {
      for (State old : this.policy.keySet()) {
        if (current.equals(old)) {
          currDiff = Math.abs(current.getTemporaryCost() - old.getCost());
          if (minDiff == null || currDiff <= minDiff) {
            minDiff = currDiff;
          }
          break;
        }
      }
    }

    if (minDiff > val) {
      return true;
    }

    return false;
  }
コード例 #11
0
  /**
   * Reward function for a state. Currently, our reward is simply the total number of items in the
   * state minus the capacity of the fridge.
   *
   * <p>This means that we get the highest reward if we fully stock our fridge
   *
   * @param current - The current state
   * @return The immediate reward for being in this state
   */
  private double reward(State current) {
    int total = sumOf(current.getState()); // The total number of items

    return total - this.fridge.getCapacity();
  }