/** * The policy is learned here * * @param sweeps = number of sweeps executed * @throws OptimizationException */ public void learn(int sweeps) throws OptimizationException { largestDif = 0.0; // temporary table to store new V-values StateRepV newV = new StateRepV(0.0, false); for (int i = 0; i < sweeps; i++) { double diff = 0.0; // for each state for (int state = 0; state < StateRepV.nrStates; state++) { // solve the set of equations double[] values = solveEquations(state); // calculate difference in v-value if (state != 0) { diff = Math.abs(vValues.getV(state) - values[Action.nrActions]); if (diff > largestDif) { largestDif = diff; } newV.setValue(state, values[Action.nrActions]); } // repair values returned by solving the equations if neccessary for (int a = 0; a < Action.nrActions; a++) { if (values[a] < 0.00000001) { policy.setValue(state, Action.getAction(a), 0.00000001); } else if (values[a] > 0.99999999) { policy.setValue(state, Action.getAction(a), 1.0); } else { policy.setValue(state, Action.getAction(a), values[a]); } } } // put new values in V-table vValues = newV; newV = new StateRepV(init, false); } }
@Override public void onStart() { super.onStart(); SharedPreferences settings = PlaybackService.getSettings(this); if (mDisplayMode != Integer.parseInt(settings.getString(PrefKeys.DISPLAY_MODE, "2"))) { finish(); startActivity(new Intent(this, FullPlaybackActivity.class)); } mCoverPressAction = Action.getAction(settings, PrefKeys.COVER_PRESS_ACTION, Action.ToggleControls); mCoverLongPressAction = Action.getAction(settings, PrefKeys.COVER_LONGPRESS_ACTION, Action.PlayPause); }
/** * Maximize V while not violating the constraints * * @param state = state the maximization has to take place for * @return = array with values for pi's and v * @throws OptimizationException */ private double[] solveEquations(int state) throws OptimizationException { Collection constraints = new ArrayList(); // for each possible action of the prey for (int preyAction = 0; preyAction < Action.nrActions; preyAction++) { // initialize weigths for this constraint double[] Q = new double[Action.nrActions + 1]; // for each possible action of the predator for (int predAction = 0; predAction < Action.nrActions; predAction++) { int newStatePred = policy.getLinearIndexForAction(state, Action.getAction(predAction)); int newStatePrey = policy.getLinearIndexForAction(newStatePred, Action.getReverseAction(preyAction)); // calculate expected reward R(s,a,o) double expReward = 0; if (preyAction == Action.Wait.getIntValue()) { expReward = policy.getReward(newStatePrey, false); } else { expReward = policy.getReward(newStatePrey, false) * (1.0 - Ptrip) + policy.getReward(newStatePred, false) * Ptrip; } // add weight to constraint for this combitnation if (preyAction == Action.Wait.getIntValue()) { Q[predAction] = expReward + learningRate * vValues.getV(newStatePrey); } else { Q[predAction] = expReward + learningRate * vValues.getV(newStatePrey) * (1.0 - Ptrip) + learningRate * vValues.getV(newStatePred) * Ptrip; } } // add constraint weight for V Q[Action.nrActions] = -1.0; // add constraint constraints.add(new LinearConstraint(Q, Relationship.GEQ, 0)); } // add constraints that probabilities need to be > 0 for (int predAction = 0; predAction < Action.nrActions; predAction++) { double[] constraintProb = new double[Action.nrActions + 1]; Arrays.fill(constraintProb, 0.0); constraintProb[predAction] = 1.0; constraints.add(new LinearConstraint(constraintProb, Relationship.GEQ, 0)); } // add total is zero constraint double[] totalZero = new double[Action.nrActions + 1]; Arrays.fill(totalZero, 1.0); totalZero[Action.nrActions] = 0.0; constraints.add(new LinearConstraint(totalZero, Relationship.EQ, 1.0)); // build objective function double[] objective = new double[Action.nrActions + 1]; Arrays.fill(objective, 0.0); objective[Action.nrActions] = 1.0; LinearObjectiveFunction f = new LinearObjectiveFunction(objective, 0); // solve and return RealPointValuePair solution = new SimplexSolver().optimize(f, constraints, GoalType.MAXIMIZE, false); return solution.getPoint(); }
/** * Returns the probability distribution over actions for a given state in the environment * * @param prey = position of the predator * @param predatorItself = position of the prey * @return probability distribution over actions */ @Override public double[] policy(Position prey, Position predatorItself) { double[] pActions = new double[Action.nrActions]; int linIndex = vValues.getLinearIndex(prey, predatorItself); for (int i = 0; i < Action.nrActions; i++) { int index = vValues.getMove(predatorItself, prey, i, false); pActions[index] = policy.getValue(linIndex, Action.getAction(i)); } return pActions; }
/** * returns a move based on the policy of the predator * * @param others = array list of positions with the position of the predator */ @Override public void doMove(ArrayList<Position> others, boolean isPrey) { preyPos = new Position(others.get(0)); int linIndex = policy.getLinearIndexFromPositions(myPos, preyPos); double[] prob = policy.getStateActionPairValues(linIndex); double[] probCum = new double[Action.nrActions]; probCum[0] = prob[0]; for (int i = 1; i < Action.nrActions; i++) { probCum[i] = probCum[i - 1] + prob[i]; } probCum[Action.nrActions - 1] = 1.0; double p = Math.random(); int action = -1; for (int i = 0; i < Action.nrActions; i++) { if (p <= probCum[i]) { action = i; break; } } myPos.adjustPosition(policy.getMove(myPos, preyPos, Action.getAction(action), false)); }