Пример #1
0
  public boolean[] getAction() {

    // Take random action with some probability
    if (numGenerator.nextFloat() < getEpsilonGreedy()) {
      int randIndex = numGenerator.nextInt(possibleActions.size());
      action = possibleActions.get(randIndex);
    }
    // Otherwise return best action (calculated in integrateObservation)
    return action;
  }
Пример #2
0
  private boolean[] findBestAction(Environment env, int[] state) {
    ArrayList<boolean[]> allActions = getPossibleActions(env);

    // Calculate score for all possible next actions
    double[] qscores = new double[allActions.size()];
    for (int i = 0; i < allActions.size(); i++) {
      boolean[] action = allActions.get(i);
      StateActionPair sap = new StateActionPair(state, action);
      qscores[i] = evalScore(sap);
    }

    // Find ArgMax over all actions using calculated scores
    int ind = numGenerator.nextInt(allActions.size());
    boolean[] bestAction = allActions.get(ind);
    bestScore = qscores[ind];
    for (int i = 1; i < allActions.size(); i++) {
      if (qscores[i] > bestScore) {
        bestScore = qscores[i];
        bestAction = allActions.get(i);
      }
    }
    // System.out.println(bestScore);
    return (bestAction);
  }
Пример #3
0
 private double[][] extractFeatures(StateActionPair sap) {
   // Feature extractor
   int[] state = sap.getState();
   double[][] features = new double[1][numFeatures];
   int ind = 0;
   for (int i = 0; i < state.length; i++) {
     features[0][ind] = (state[i] == 0) ? 0.0 : 1.0;
     ind++;
   }
   for (int i = 0; i < possibleActions.size(); i++) {
     if (Arrays.equals(possibleActions.get(i), sap.getAction())) {
       features[0][ind] = 1.0;
       break;
     }
     ind++;
   }
   // Bias term
   features[0][numFeatures - 1] = 1.0;
   return (features);
 }