/** * prints solution after maximizing v without violating constraints for debugging purpose * * @param solution = the solution * @param nrVar = the number of variables in the solution */ private void printSolution(RealPointValuePair solution, int nrVar) { for (int i = 0; i < nrVar; i++) { System.out.print("pi" + i + "=" + solution.getPoint()[i] + " "); } System.out.println(" V = " + solution.getValue()); System.out.println(" V = " + solution.getPoint()[nrVar] + "\n\n"); }
/** * Maximize V while not violating the constraints * * @param state = state the maximization has to take place for * @return = array with values for pi's and v * @throws OptimizationException */ private double[] solveEquations(int state) throws OptimizationException { Collection constraints = new ArrayList(); // for each possible action of the prey for (int preyAction = 0; preyAction < Action.nrActions; preyAction++) { // initialize weigths for this constraint double[] Q = new double[Action.nrActions + 1]; // for each possible action of the predator for (int predAction = 0; predAction < Action.nrActions; predAction++) { int newStatePred = policy.getLinearIndexForAction(state, Action.getAction(predAction)); int newStatePrey = policy.getLinearIndexForAction(newStatePred, Action.getReverseAction(preyAction)); // calculate expected reward R(s,a,o) double expReward = 0; if (preyAction == Action.Wait.getIntValue()) { expReward = policy.getReward(newStatePrey, false); } else { expReward = policy.getReward(newStatePrey, false) * (1.0 - Ptrip) + policy.getReward(newStatePred, false) * Ptrip; } // add weight to constraint for this combitnation if (preyAction == Action.Wait.getIntValue()) { Q[predAction] = expReward + learningRate * vValues.getV(newStatePrey); } else { Q[predAction] = expReward + learningRate * vValues.getV(newStatePrey) * (1.0 - Ptrip) + learningRate * vValues.getV(newStatePred) * Ptrip; } } // add constraint weight for V Q[Action.nrActions] = -1.0; // add constraint constraints.add(new LinearConstraint(Q, Relationship.GEQ, 0)); } // add constraints that probabilities need to be > 0 for (int predAction = 0; predAction < Action.nrActions; predAction++) { double[] constraintProb = new double[Action.nrActions + 1]; Arrays.fill(constraintProb, 0.0); constraintProb[predAction] = 1.0; constraints.add(new LinearConstraint(constraintProb, Relationship.GEQ, 0)); } // add total is zero constraint double[] totalZero = new double[Action.nrActions + 1]; Arrays.fill(totalZero, 1.0); totalZero[Action.nrActions] = 0.0; constraints.add(new LinearConstraint(totalZero, Relationship.EQ, 1.0)); // build objective function double[] objective = new double[Action.nrActions + 1]; Arrays.fill(objective, 0.0); objective[Action.nrActions] = 1.0; LinearObjectiveFunction f = new LinearObjectiveFunction(objective, 0); // solve and return RealPointValuePair solution = new SimplexSolver().optimize(f, constraints, GoalType.MAXIMIZE, false); return solution.getPoint(); }