/**
  * The policy is learned here
  *
  * @param sweeps = number of sweeps executed
  * @throws OptimizationException
  */
 public void learn(int sweeps) throws OptimizationException {
   largestDif = 0.0;
   // temporary table to store new V-values
   StateRepV newV = new StateRepV(0.0, false);
   for (int i = 0; i < sweeps; i++) {
     double diff = 0.0;
     // for each state
     for (int state = 0; state < StateRepV.nrStates; state++) {
       // solve the set of equations
       double[] values = solveEquations(state);
       // calculate difference in v-value
       if (state != 0) {
         diff = Math.abs(vValues.getV(state) - values[Action.nrActions]);
         if (diff > largestDif) {
           largestDif = diff;
         }
         newV.setValue(state, values[Action.nrActions]);
       }
       // repair values returned by solving the equations if neccessary
       for (int a = 0; a < Action.nrActions; a++) {
         if (values[a] < 0.00000001) {
           policy.setValue(state, Action.getAction(a), 0.00000001);
         } else if (values[a] > 0.99999999) {
           policy.setValue(state, Action.getAction(a), 1.0);
         } else {
           policy.setValue(state, Action.getAction(a), values[a]);
         }
       }
     }
     // put new values in V-table
     vValues = newV;
     newV = new StateRepV(init, false);
   }
 }