public boolean[] getAction() { // Take random action with some probability if (numGenerator.nextFloat() < getEpsilonGreedy()) { int randIndex = numGenerator.nextInt(possibleActions.size()); action = possibleActions.get(randIndex); } // Otherwise return best action (calculated in integrateObservation) return action; }
private boolean[] findBestAction(Environment env, int[] state) { ArrayList<boolean[]> allActions = getPossibleActions(env); // Calculate score for all possible next actions double[] qscores = new double[allActions.size()]; for (int i = 0; i < allActions.size(); i++) { boolean[] action = allActions.get(i); StateActionPair sap = new StateActionPair(state, action); qscores[i] = evalScore(sap); } // Find ArgMax over all actions using calculated scores int ind = numGenerator.nextInt(allActions.size()); boolean[] bestAction = allActions.get(ind); bestScore = qscores[ind]; for (int i = 1; i < allActions.size(); i++) { if (qscores[i] > bestScore) { bestScore = qscores[i]; bestAction = allActions.get(i); } } // System.out.println(bestScore); return (bestAction); }
private double[][] extractFeatures(StateActionPair sap) { // Feature extractor int[] state = sap.getState(); double[][] features = new double[1][numFeatures]; int ind = 0; for (int i = 0; i < state.length; i++) { features[0][ind] = (state[i] == 0) ? 0.0 : 1.0; ind++; } for (int i = 0; i < possibleActions.size(); i++) { if (Arrays.equals(possibleActions.get(i), sap.getAction())) { features[0][ind] = 1.0; break; } ind++; } // Bias term features[0][numFeatures - 1] = 1.0; return (features); }