/** * prints solution after maximizing v without violating constraints for debugging purpose * * @param solution = the solution * @param nrVar = the number of variables in the solution */ private void printSolution(RealPointValuePair solution, int nrVar) { for (int i = 0; i < nrVar; i++) { System.out.print("pi" + i + "=" + solution.getPoint()[i] + " "); } System.out.println(" V = " + solution.getValue()); System.out.println(" V = " + solution.getPoint()[nrVar] + "\n\n"); }
/** * Maximize V while not violating the constraints * * @param state = state the maximization has to take place for * @return = array with values for pi's and v * @throws OptimizationException */ private double[] solveEquations(int state) throws OptimizationException { Collection constraints = new ArrayList(); // for each possible action of the prey for (int preyAction = 0; preyAction < Action.nrActions; preyAction++) { // initialize weigths for this constraint double[] Q = new double[Action.nrActions + 1]; // for each possible action of the predator for (int predAction = 0; predAction < Action.nrActions; predAction++) { int newStatePred = policy.getLinearIndexForAction(state, Action.getAction(predAction)); int newStatePrey = policy.getLinearIndexForAction(newStatePred, Action.getReverseAction(preyAction)); // calculate expected reward R(s,a,o) double expReward = 0; if (preyAction == Action.Wait.getIntValue()) { expReward = policy.getReward(newStatePrey, false); } else { expReward = policy.getReward(newStatePrey, false) * (1.0 - Ptrip) + policy.getReward(newStatePred, false) * Ptrip; } // add weight to constraint for this combitnation if (preyAction == Action.Wait.getIntValue()) { Q[predAction] = expReward + learningRate * vValues.getV(newStatePrey); } else { Q[predAction] = expReward + learningRate * vValues.getV(newStatePrey) * (1.0 - Ptrip) + learningRate * vValues.getV(newStatePred) * Ptrip; } } // add constraint weight for V Q[Action.nrActions] = -1.0; // add constraint constraints.add(new LinearConstraint(Q, Relationship.GEQ, 0)); } // add constraints that probabilities need to be > 0 for (int predAction = 0; predAction < Action.nrActions; predAction++) { double[] constraintProb = new double[Action.nrActions + 1]; Arrays.fill(constraintProb, 0.0); constraintProb[predAction] = 1.0; constraints.add(new LinearConstraint(constraintProb, Relationship.GEQ, 0)); } // add total is zero constraint double[] totalZero = new double[Action.nrActions + 1]; Arrays.fill(totalZero, 1.0); totalZero[Action.nrActions] = 0.0; constraints.add(new LinearConstraint(totalZero, Relationship.EQ, 1.0)); // build objective function double[] objective = new double[Action.nrActions + 1]; Arrays.fill(objective, 0.0); objective[Action.nrActions] = 1.0; LinearObjectiveFunction f = new LinearObjectiveFunction(objective, 0); // solve and return RealPointValuePair solution = new SimplexSolver().optimize(f, constraints, GoalType.MAXIMIZE, false); return solution.getPoint(); }
/** * Evaluate all the non-evaluated points of the simplex. * * @param comparator comparator to use to sort simplex vertices from best to worst * @exception FunctionEvaluationException if no value can be computed for the parameters * @exception OptimizationException if the maximal number of evaluations is exceeded */ protected void evaluateSimplex(final Comparator<RealPointValuePair> comparator) throws FunctionEvaluationException, OptimizationException { // evaluate the objective function at all non-evaluated simplex points for (int i = 0; i < simplex.length; ++i) { final RealPointValuePair vertex = simplex[i]; final double[] point = vertex.getPointRef(); if (Double.isNaN(vertex.getValue())) { simplex[i] = new RealPointValuePair(point, evaluate(point), false); } } // sort the simplex from best to worst Arrays.sort(simplex, comparator); }
/** {@inheritDoc} */ @Override protected void iterateSimplex(final Comparator<RealPointValuePair> comparator) throws FunctionEvaluationException, OptimizationException { incrementIterationsCounter(); // the simplex has n+1 point if dimension is n final int n = simplex.length - 1; // interesting values final RealPointValuePair best = simplex[0]; final RealPointValuePair secondBest = simplex[n - 1]; final RealPointValuePair worst = simplex[n]; final double[] xWorst = worst.getPointRef(); // compute the centroid of the best vertices // (dismissing the worst point at index n) final double[] centroid = new double[n]; for (int i = 0; i < n; ++i) { final double[] x = simplex[i].getPointRef(); for (int j = 0; j < n; ++j) { centroid[j] += x[j]; } } final double scaling = 1.0 / n; for (int j = 0; j < n; ++j) { centroid[j] *= scaling; } // compute the reflection point final double[] xR = new double[n]; for (int j = 0; j < n; ++j) { xR[j] = centroid[j] + rho * (centroid[j] - xWorst[j]); } final RealPointValuePair reflected = new RealPointValuePair(xR, evaluate(xR), false); if ((comparator.compare(best, reflected) <= 0) && (comparator.compare(reflected, secondBest) < 0)) { // accept the reflected point replaceWorstPoint(reflected, comparator); } else if (comparator.compare(reflected, best) < 0) { // compute the expansion point final double[] xE = new double[n]; for (int j = 0; j < n; ++j) { xE[j] = centroid[j] + khi * (xR[j] - centroid[j]); } final RealPointValuePair expanded = new RealPointValuePair(xE, evaluate(xE), false); if (comparator.compare(expanded, reflected) < 0) { // accept the expansion point replaceWorstPoint(expanded, comparator); } else { // accept the reflected point replaceWorstPoint(reflected, comparator); } } else { if (comparator.compare(reflected, worst) < 0) { // perform an outside contraction final double[] xC = new double[n]; for (int j = 0; j < n; ++j) { xC[j] = centroid[j] + gamma * (xR[j] - centroid[j]); } final RealPointValuePair outContracted = new RealPointValuePair(xC, evaluate(xC), false); if (comparator.compare(outContracted, reflected) <= 0) { // accept the contraction point replaceWorstPoint(outContracted, comparator); return; } } else { // perform an inside contraction final double[] xC = new double[n]; for (int j = 0; j < n; ++j) { xC[j] = centroid[j] - gamma * (centroid[j] - xWorst[j]); } final RealPointValuePair inContracted = new RealPointValuePair(xC, evaluate(xC), false); if (comparator.compare(inContracted, worst) < 0) { // accept the contraction point replaceWorstPoint(inContracted, comparator); return; } } // perform a shrink final double[] xSmallest = simplex[0].getPointRef(); for (int i = 1; i < simplex.length; ++i) { final double[] x = simplex[i].getPoint(); for (int j = 0; j < n; ++j) { x[j] = xSmallest[j] + sigma * (x[j] - xSmallest[j]); } simplex[i] = new RealPointValuePair(x, Double.NaN, false); } evaluateSimplex(comparator); } }