/**
  * prints solution after maximizing v without violating constraints for debugging purpose
  *
  * @param solution = the solution
  * @param nrVar = the number of variables in the solution
  */
 private void printSolution(RealPointValuePair solution, int nrVar) {
   for (int i = 0; i < nrVar; i++) {
     System.out.print("pi" + i + "=" + solution.getPoint()[i] + " ");
   }
   System.out.println(" V = " + solution.getValue());
   System.out.println(" V = " + solution.getPoint()[nrVar] + "\n\n");
 }
  /**
   * Maximize V while not violating the constraints
   *
   * @param state = state the maximization has to take place for
   * @return = array with values for pi's and v
   * @throws OptimizationException
   */
  private double[] solveEquations(int state) throws OptimizationException {
    Collection constraints = new ArrayList();
    // for each possible action of the prey
    for (int preyAction = 0; preyAction < Action.nrActions; preyAction++) {
      // initialize weigths for this constraint
      double[] Q = new double[Action.nrActions + 1];
      // for each possible action of the predator
      for (int predAction = 0; predAction < Action.nrActions; predAction++) {
        int newStatePred = policy.getLinearIndexForAction(state, Action.getAction(predAction));
        int newStatePrey =
            policy.getLinearIndexForAction(newStatePred, Action.getReverseAction(preyAction));
        // calculate expected reward R(s,a,o)
        double expReward = 0;
        if (preyAction == Action.Wait.getIntValue()) {
          expReward = policy.getReward(newStatePrey, false);
        } else {
          expReward =
              policy.getReward(newStatePrey, false) * (1.0 - Ptrip)
                  + policy.getReward(newStatePred, false) * Ptrip;
        }
        // add weight to constraint for this combitnation
        if (preyAction == Action.Wait.getIntValue()) {
          Q[predAction] = expReward + learningRate * vValues.getV(newStatePrey);
        } else {
          Q[predAction] =
              expReward
                  + learningRate * vValues.getV(newStatePrey) * (1.0 - Ptrip)
                  + learningRate * vValues.getV(newStatePred) * Ptrip;
        }
      }
      // add constraint weight for V
      Q[Action.nrActions] = -1.0;
      // add constraint
      constraints.add(new LinearConstraint(Q, Relationship.GEQ, 0));
    }

    // add constraints that probabilities need to be > 0
    for (int predAction = 0; predAction < Action.nrActions; predAction++) {
      double[] constraintProb = new double[Action.nrActions + 1];
      Arrays.fill(constraintProb, 0.0);
      constraintProb[predAction] = 1.0;
      constraints.add(new LinearConstraint(constraintProb, Relationship.GEQ, 0));
    }
    // add total is zero constraint
    double[] totalZero = new double[Action.nrActions + 1];
    Arrays.fill(totalZero, 1.0);
    totalZero[Action.nrActions] = 0.0;
    constraints.add(new LinearConstraint(totalZero, Relationship.EQ, 1.0));
    // build objective function
    double[] objective = new double[Action.nrActions + 1];
    Arrays.fill(objective, 0.0);
    objective[Action.nrActions] = 1.0;
    LinearObjectiveFunction f = new LinearObjectiveFunction(objective, 0);

    // solve and return
    RealPointValuePair solution =
        new SimplexSolver().optimize(f, constraints, GoalType.MAXIMIZE, false);
    return solution.getPoint();
  }
  /**
   * Evaluate all the non-evaluated points of the simplex.
   *
   * @param comparator comparator to use to sort simplex vertices from best to worst
   * @exception FunctionEvaluationException if no value can be computed for the parameters
   * @exception OptimizationException if the maximal number of evaluations is exceeded
   */
  protected void evaluateSimplex(final Comparator<RealPointValuePair> comparator)
      throws FunctionEvaluationException, OptimizationException {

    // evaluate the objective function at all non-evaluated simplex points
    for (int i = 0; i < simplex.length; ++i) {
      final RealPointValuePair vertex = simplex[i];
      final double[] point = vertex.getPointRef();
      if (Double.isNaN(vertex.getValue())) {
        simplex[i] = new RealPointValuePair(point, evaluate(point), false);
      }
    }

    // sort the simplex from best to worst
    Arrays.sort(simplex, comparator);
  }
  /** {@inheritDoc} */
  @Override
  protected void iterateSimplex(final Comparator<RealPointValuePair> comparator)
      throws FunctionEvaluationException, OptimizationException {

    incrementIterationsCounter();

    // the simplex has n+1 point if dimension is n
    final int n = simplex.length - 1;

    // interesting values
    final RealPointValuePair best = simplex[0];
    final RealPointValuePair secondBest = simplex[n - 1];
    final RealPointValuePair worst = simplex[n];
    final double[] xWorst = worst.getPointRef();

    // compute the centroid of the best vertices
    // (dismissing the worst point at index n)
    final double[] centroid = new double[n];
    for (int i = 0; i < n; ++i) {
      final double[] x = simplex[i].getPointRef();
      for (int j = 0; j < n; ++j) {
        centroid[j] += x[j];
      }
    }
    final double scaling = 1.0 / n;
    for (int j = 0; j < n; ++j) {
      centroid[j] *= scaling;
    }

    // compute the reflection point
    final double[] xR = new double[n];
    for (int j = 0; j < n; ++j) {
      xR[j] = centroid[j] + rho * (centroid[j] - xWorst[j]);
    }
    final RealPointValuePair reflected = new RealPointValuePair(xR, evaluate(xR), false);

    if ((comparator.compare(best, reflected) <= 0)
        && (comparator.compare(reflected, secondBest) < 0)) {

      // accept the reflected point
      replaceWorstPoint(reflected, comparator);

    } else if (comparator.compare(reflected, best) < 0) {

      // compute the expansion point
      final double[] xE = new double[n];
      for (int j = 0; j < n; ++j) {
        xE[j] = centroid[j] + khi * (xR[j] - centroid[j]);
      }
      final RealPointValuePair expanded = new RealPointValuePair(xE, evaluate(xE), false);

      if (comparator.compare(expanded, reflected) < 0) {
        // accept the expansion point
        replaceWorstPoint(expanded, comparator);
      } else {
        // accept the reflected point
        replaceWorstPoint(reflected, comparator);
      }

    } else {

      if (comparator.compare(reflected, worst) < 0) {

        // perform an outside contraction
        final double[] xC = new double[n];
        for (int j = 0; j < n; ++j) {
          xC[j] = centroid[j] + gamma * (xR[j] - centroid[j]);
        }
        final RealPointValuePair outContracted = new RealPointValuePair(xC, evaluate(xC), false);

        if (comparator.compare(outContracted, reflected) <= 0) {
          // accept the contraction point
          replaceWorstPoint(outContracted, comparator);
          return;
        }

      } else {

        // perform an inside contraction
        final double[] xC = new double[n];
        for (int j = 0; j < n; ++j) {
          xC[j] = centroid[j] - gamma * (centroid[j] - xWorst[j]);
        }
        final RealPointValuePair inContracted = new RealPointValuePair(xC, evaluate(xC), false);

        if (comparator.compare(inContracted, worst) < 0) {
          // accept the contraction point
          replaceWorstPoint(inContracted, comparator);
          return;
        }
      }

      // perform a shrink
      final double[] xSmallest = simplex[0].getPointRef();
      for (int i = 1; i < simplex.length; ++i) {
        final double[] x = simplex[i].getPoint();
        for (int j = 0; j < n; ++j) {
          x[j] = xSmallest[j] + sigma * (x[j] - xSmallest[j]);
        }
        simplex[i] = new RealPointValuePair(x, Double.NaN, false);
      }
      evaluateSimplex(comparator);
    }
  }