コード例 #1
0
 /**
  * prints solution after maximizing v without violating constraints for debugging purpose
  *
  * @param solution = the solution
  * @param nrVar = the number of variables in the solution
  */
 private void printSolution(RealPointValuePair solution, int nrVar) {
   for (int i = 0; i < nrVar; i++) {
     System.out.print("pi" + i + "=" + solution.getPoint()[i] + " ");
   }
   System.out.println(" V = " + solution.getValue());
   System.out.println(" V = " + solution.getPoint()[nrVar] + "\n\n");
 }
コード例 #2
0
  /**
   * Maximize V while not violating the constraints
   *
   * @param state = state the maximization has to take place for
   * @return = array with values for pi's and v
   * @throws OptimizationException
   */
  private double[] solveEquations(int state) throws OptimizationException {
    Collection constraints = new ArrayList();
    // for each possible action of the prey
    for (int preyAction = 0; preyAction < Action.nrActions; preyAction++) {
      // initialize weigths for this constraint
      double[] Q = new double[Action.nrActions + 1];
      // for each possible action of the predator
      for (int predAction = 0; predAction < Action.nrActions; predAction++) {
        int newStatePred = policy.getLinearIndexForAction(state, Action.getAction(predAction));
        int newStatePrey =
            policy.getLinearIndexForAction(newStatePred, Action.getReverseAction(preyAction));
        // calculate expected reward R(s,a,o)
        double expReward = 0;
        if (preyAction == Action.Wait.getIntValue()) {
          expReward = policy.getReward(newStatePrey, false);
        } else {
          expReward =
              policy.getReward(newStatePrey, false) * (1.0 - Ptrip)
                  + policy.getReward(newStatePred, false) * Ptrip;
        }
        // add weight to constraint for this combitnation
        if (preyAction == Action.Wait.getIntValue()) {
          Q[predAction] = expReward + learningRate * vValues.getV(newStatePrey);
        } else {
          Q[predAction] =
              expReward
                  + learningRate * vValues.getV(newStatePrey) * (1.0 - Ptrip)
                  + learningRate * vValues.getV(newStatePred) * Ptrip;
        }
      }
      // add constraint weight for V
      Q[Action.nrActions] = -1.0;
      // add constraint
      constraints.add(new LinearConstraint(Q, Relationship.GEQ, 0));
    }

    // add constraints that probabilities need to be > 0
    for (int predAction = 0; predAction < Action.nrActions; predAction++) {
      double[] constraintProb = new double[Action.nrActions + 1];
      Arrays.fill(constraintProb, 0.0);
      constraintProb[predAction] = 1.0;
      constraints.add(new LinearConstraint(constraintProb, Relationship.GEQ, 0));
    }
    // add total is zero constraint
    double[] totalZero = new double[Action.nrActions + 1];
    Arrays.fill(totalZero, 1.0);
    totalZero[Action.nrActions] = 0.0;
    constraints.add(new LinearConstraint(totalZero, Relationship.EQ, 1.0));
    // build objective function
    double[] objective = new double[Action.nrActions + 1];
    Arrays.fill(objective, 0.0);
    objective[Action.nrActions] = 1.0;
    LinearObjectiveFunction f = new LinearObjectiveFunction(objective, 0);

    // solve and return
    RealPointValuePair solution =
        new SimplexSolver().optimize(f, constraints, GoalType.MAXIMIZE, false);
    return solution.getPoint();
  }