@Override
  public PerformanceVector evaluateIndividual(Individual individual) {
    double[] beta = individual.getValues();

    double fitness = 0.0d;
    for (Example example : exampleSet) {
      double eta = 0.0d;
      int i = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        eta += beta[i] * value;
        i++;
      }
      if (addIntercept) {
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double classValue = example.getValue(label);
      double currentFitness = classValue * Math.log(pi) + (1 - classValue) * Math.log(1 - pi);
      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      fitness += weightValue * currentFitness;
    }

    PerformanceVector performanceVector = new PerformanceVector();
    performanceVector.addCriterion(
        new EstimatedPerformance("log_reg_fitness", fitness, exampleSet.size(), false));
    return performanceVector;
  }
 @Override
 public int compare(AggregationIndividual i1, AggregationIndividual i2) {
   PerformanceVector pv1 = i1.getPerformance();
   PerformanceVector pv2 = i2.getPerformance();
   return (-1)
       * Double.compare(pv1.getCriterion(m).getFitness(), pv2.getCriterion(m).getFitness());
 }
示例#3
0
 @Override
 public Object clone() throws CloneNotSupportedException {
   PerformanceVector av = new PerformanceVector();
   for (int i = 0; i < size(); i++) {
     Averagable avg = getAveragable(i);
     av.addAveragable((Averagable) (avg).clone());
   }
   av.cloneAnnotationsFrom(this);
   return av;
 }
 private double getError(ExampleSet exampleSet, Model model) throws OperatorException {
   exampleSet = model.apply(exampleSet);
   try {
     PerformanceEvaluator evaluator = OperatorService.createOperator(PerformanceEvaluator.class);
     evaluator.setParameter("classification_error", "true");
     PerformanceVector performance = evaluator.doWork(exampleSet);
     return performance.getMainCriterion().getAverage();
   } catch (OperatorCreationException e) {
     e.printStackTrace();
     return Double.NaN;
   }
 }
 @Override
 public PerformanceVector evaluateIndividual(Individual individual) {
   double[] fitness = optimizationFunction.getFitness(individual.getValues(), ys, kernel);
   PerformanceVector performanceVector = new PerformanceVector();
   if (fitness.length == 1) {
     performanceVector.addCriterion(new EstimatedPerformance("SVM_fitness", fitness[0], 1, false));
   } else {
     performanceVector.addCriterion(new EstimatedPerformance("alpha_sum", fitness[0], 1, false));
     performanceVector.addCriterion(
         new EstimatedPerformance("svm_objective_function", fitness[1], 1, false));
     if (fitness.length == 3)
       performanceVector.addCriterion(
           new EstimatedPerformance("alpha_label_sum", fitness[2], 1, false));
   }
   return performanceVector;
 }
 /** Returns the estimated performances of this SVM. Does only work for classification tasks. */
 @Override
 public PerformanceVector getEstimatedPerformance() throws OperatorException {
   if (!pattern)
     throw new UserError(
         this,
         912,
         this,
         "Cannot calculate leave one out estimation of error for regression tasks!");
   double[] estVector = ((SVMpattern) getSVM()).getXiAlphaEstimation(getKernel());
   PerformanceVector pv = new PerformanceVector();
   pv.addCriterion(new EstimatedPerformance("xialpha_error", estVector[0], 1, true));
   pv.addCriterion(new EstimatedPerformance("xialpha_precision", estVector[1], 1, false));
   pv.addCriterion(new EstimatedPerformance("xialpha_recall", estVector[2], 1, false));
   pv.setMainCriterionName("xialpha_error");
   return pv;
 }
 /** Delivers the fitness of the best individual as performance vector. */
 public PerformanceVector getOptimizationPerformance() {
   double[] bestValuesEver = getBestValuesEver();
   double[] finalFitness = optimizationFunction.getFitness(bestValuesEver, ys, kernel);
   PerformanceVector result = new PerformanceVector();
   if (finalFitness.length == 1) {
     result.addCriterion(
         new EstimatedPerformance("svm_objective_function", finalFitness[0], 1, false));
   } else {
     result.addCriterion(new EstimatedPerformance("alpha_sum", finalFitness[0], 1, false));
     result.addCriterion(
         new EstimatedPerformance("svm_objective_function", finalFitness[1], 1, false));
     if (finalFitness.length == 3)
       result.addCriterion(new EstimatedPerformance("alpha_label_sum", finalFitness[2], 1, false));
   }
   return result;
 }
  /** This method checks if the file with pathname value exists. */
  public boolean check(ProcessBranch operator, String value) throws OperatorException {
    if (value == null) {
      throw new UserError(operator, 205, ProcessBranch.PARAMETER_CONDITION_VALUE);
    }

    double minFitness = Double.NEGATIVE_INFINITY;
    try {
      minFitness = Double.parseDouble(value);
    } catch (NumberFormatException e) {
      throw new UserError(
          operator, 207, new Object[] {value, ProcessBranch.PARAMETER_CONDITION_VALUE, e});
    }

    PerformanceVector performance = operator.getConditionInput(PerformanceVector.class);
    return performance.getMainCriterion().getFitness() > minFitness;
  }
 /**
  * Returns true if the second performance vector is better in all fitness criteria than the first
  * one (remember: the criteria should be maximized).
  */
 public static boolean isDominated(AggregationIndividual i1, AggregationIndividual i2) {
   PerformanceVector pv1 = i1.getPerformance();
   PerformanceVector pv2 = i2.getPerformance();
   double[][] performances = new double[pv1.getSize()][2];
   for (int p = 0; p < performances.length; p++) {
     performances[p][0] = pv1.getCriterion(p).getFitness();
     performances[p][1] = pv2.getCriterion(p).getFitness();
   }
   boolean dominated = true;
   for (int p = 0; p < performances.length; p++) {
     dominated &= (performances[p][1] >= performances[p][0]);
   }
   boolean oneActuallyBetter = false;
   for (int p = 0; p < performances.length; p++) {
     oneActuallyBetter |= (performances[p][1] > performances[p][0]);
   }
   dominated &= oneActuallyBetter;
   return dominated;
 }
  public void checkOutput(IOContainer output) throws MissingIOObjectException {

    if (similarity.equals("Tree")) {
      TreeDistance treedistance = output.get(TreeDistance.class);
      for (int i = 0; i < expectedValues.length; i++) {
        assertEquals(treedistance.similarity(first[i], second[i]), expectedValues[i]);
      }
    }
    if (similarity.equals("Euclidean")) {
      EuclideanDistance euclideandistance = output.get(EuclideanDistance.class);
      for (int i = 0; i < expectedValues.length; i++) {
        assertEquals(euclideandistance.similarity(first[i], second[i]), expectedValues[i]);
      }
    }
    if (similarity.equals("Comparator")) {
      PerformanceVector performancevector = output.get(PerformanceVector.class);
      assertEquals(
          expectedValues[0], performancevector.getCriterion("similarity").getAverage(), 0.00001);
    }
  }
示例#11
0
  /**
   * Creates a new performance vector if the given one is null. Adds a MDL criterion. If the
   * criterion was already part of the performance vector before it will be overwritten.
   */
  private PerformanceVector count(ExampleSet exampleSet, PerformanceVector performanceCriteria)
      throws OperatorException {
    if (performanceCriteria == null) performanceCriteria = new PerformanceVector();

    MDLCriterion mdlCriterion =
        new MDLCriterion(getParameterAsInt(PARAMETER_OPTIMIZATION_DIRECTION));
    mdlCriterion.startCounting(exampleSet, true);
    this.lastCount = mdlCriterion.getAverage();
    performanceCriteria.addCriterion(mdlCriterion);
    return performanceCriteria;
  }
  public static int fillDataTable(
      SimpleDataTable dataTable,
      Map<String, double[]> lastPopulation,
      Population pop,
      boolean drawDominated) {
    lastPopulation.clear();
    dataTable.clear();
    int numberOfCriteria = 0;
    for (int i = 0; i < pop.getNumberOfIndividuals(); i++) {
      boolean dominated = false;
      if (!drawDominated) {
        for (int j = 0; j < pop.getNumberOfIndividuals(); j++) {
          if (i == j) continue;
          if (NonDominatedSortingSelection.isDominated(pop.get(i), pop.get(j))) {
            dominated = true;
            break;
          }
        }
      }

      if (drawDominated || (!dominated)) {
        StringBuffer id = new StringBuffer(i + " (");
        PerformanceVector current = pop.get(i).getPerformance();
        numberOfCriteria = Math.max(numberOfCriteria, current.getSize());
        double[] data = new double[current.getSize()];
        for (int d = 0; d < data.length; d++) {
          data[d] = current.getCriterion(d).getFitness();
          if (d != 0) id.append(", ");
          id.append(Tools.formatNumber(data[d]));
        }
        id.append(")");
        dataTable.add(new SimpleDataTableRow(data, id.toString()));
        double[] weights = pop.get(i).getWeights();
        double[] clone = new double[weights.length];
        System.arraycopy(weights, 0, clone, 0, weights.length);
        lastPopulation.put(id.toString(), clone);
      }
    }
    return numberOfCriteria;
  }
示例#13
0
  /** Can be used by subclasses to set the performance of the example set. */
  protected final void setResult(PerformanceVector pv) {
    this.lastMainPerformance = Double.NaN;
    this.lastMainVariance = Double.NaN;
    this.lastMainDeviation = Double.NaN;
    this.lastFirstPerformance = Double.NaN;
    this.lastSecondPerformance = Double.NaN;
    this.lastThirdPerformance = Double.NaN;

    if (pv != null) {
      // main result
      PerformanceCriterion mainCriterion = pv.getMainCriterion();
      if ((mainCriterion == null)
          && (pv.size() > 0)) { // use first if no main criterion was defined
        mainCriterion = pv.getCriterion(0);
      }
      if (mainCriterion != null) {
        this.lastMainPerformance = mainCriterion.getAverage();
        this.lastMainVariance = mainCriterion.getVariance();
        this.lastMainDeviation = mainCriterion.getStandardDeviation();
      }

      if (pv.size() >= 1) {
        PerformanceCriterion criterion = pv.getCriterion(0);
        if (criterion != null) {
          this.lastFirstPerformance = criterion.getAverage();
        }
      }

      if (pv.size() >= 2) {
        PerformanceCriterion criterion = pv.getCriterion(1);
        if (criterion != null) {
          this.lastSecondPerformance = criterion.getAverage();
        }
      }

      if (pv.size() >= 3) {
        PerformanceCriterion criterion = pv.getCriterion(2);
        if (criterion != null) {
          this.lastThirdPerformance = criterion.getAverage();
        }
      }
    }
  }
  /**
   * Creates a new performance vector if the given one is null. Adds a new estimated criterion. If
   * the criterion was already part of the performance vector before it will be overwritten.
   */
  private PerformanceVector count(KernelModel model, PerformanceVector performanceCriteria)
      throws OperatorException {
    if (performanceCriteria == null) performanceCriteria = new PerformanceVector();

    this.lastCount = 0;
    int svNumber = model.getNumberOfSupportVectors();
    for (int i = 0; i < svNumber; i++) {
      SupportVector sv = model.getSupportVector(i);
      if (Math.abs(sv.getAlpha()) > 0.0d) this.lastCount++;
    }
    EstimatedPerformance svCriterion =
        new EstimatedPerformance(
            "number_of_support_vectors",
            lastCount,
            1,
            getParameterAsInt(PARAMETER_OPTIMIZATION_DIRECTION) == MDLCriterion.MINIMIZATION);
    performanceCriteria.addCriterion(svCriterion);
    return performanceCriteria;
  }
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSetOriginal = exampleSetInput.getData(ExampleSet.class);
    ExampleSet exampleSet = (ExampleSet) exampleSetOriginal.clone();
    int numberOfAttributes = exampleSet.getAttributes().size();
    Attributes attributes = exampleSet.getAttributes();

    int maxNumberOfAttributes =
        Math.min(getParameterAsInt(PARAMETER_MAX_ATTRIBUTES), numberOfAttributes - 1);
    int maxNumberOfFails = getParameterAsInt(PARAMETER_ALLOWED_CONSECUTIVE_FAILS);
    int behavior = getParameterAsInt(PARAMETER_STOPPING_BEHAVIOR);

    boolean useRelativeIncrease =
        (behavior == WITH_DECREASE_EXCEEDS)
            ? getParameterAsBoolean(PARAMETER_USE_RELATIVE_DECREASE)
            : false;
    double maximalDecrease = 0;
    if (useRelativeIncrease)
      maximalDecrease =
          useRelativeIncrease
              ? getParameterAsDouble(PARAMETER_MAX_RELATIVE_DECREASE)
              : getParameterAsDouble(PARAMETER_MAX_ABSOLUT_DECREASE);
    double alpha =
        (behavior == WITH_DECREASE_SIGNIFICANT) ? getParameterAsDouble(PARAMETER_ALPHA) : 0d;

    // remembering attributes and removing all from example set
    Attribute[] attributeArray = new Attribute[numberOfAttributes];
    int i = 0;
    Iterator<Attribute> iterator = attributes.iterator();
    while (iterator.hasNext()) {
      Attribute attribute = iterator.next();
      attributeArray[i] = attribute;
      i++;
    }

    boolean[] selected = new boolean[numberOfAttributes];
    Arrays.fill(selected, true);

    boolean earlyAbort = false;
    List<Integer> speculativeList = new ArrayList<Integer>(maxNumberOfFails);
    int numberOfFails = maxNumberOfFails;
    currentNumberOfFeatures = numberOfAttributes;
    currentAttributes = attributes;
    PerformanceVector lastPerformance = getPerformance(exampleSet);
    PerformanceVector bestPerformanceEver = lastPerformance;
    for (i = 0; i < maxNumberOfAttributes && !earlyAbort; i++) {
      // setting values for logging
      currentNumberOfFeatures = numberOfAttributes - i - 1;

      // performing a round
      int bestIndex = 0;
      PerformanceVector currentBestPerformance = null;
      for (int current = 0; current < numberOfAttributes; current++) {
        if (selected[current]) {
          // switching off
          attributes.remove(attributeArray[current]);
          currentAttributes = attributes;

          // evaluate performance
          PerformanceVector performance = getPerformance(exampleSet);
          if (currentBestPerformance == null || performance.compareTo(currentBestPerformance) > 0) {
            bestIndex = current;
            currentBestPerformance = performance;
          }

          // switching on
          attributes.addRegular(attributeArray[current]);
          currentAttributes = null; // removing reference
        }
      }
      double currentFitness = currentBestPerformance.getMainCriterion().getFitness();
      if (i != 0) {
        double lastFitness = lastPerformance.getMainCriterion().getFitness();
        // switch stopping behavior
        switch (behavior) {
          case WITH_DECREASE:
            if (lastFitness >= currentFitness) earlyAbort = true;
            break;
          case WITH_DECREASE_EXCEEDS:
            if (useRelativeIncrease) {
              // relative increase testing
              if (currentFitness < lastFitness - Math.abs(lastFitness * maximalDecrease))
                earlyAbort = true;
            } else {
              // absolute increase testing
              if (currentFitness < lastFitness - maximalDecrease) earlyAbort = true;
            }
            break;
          case WITH_DECREASE_SIGNIFICANT:
            AnovaCalculator calculator = new AnovaCalculator();
            calculator.setAlpha(alpha);

            PerformanceCriterion pc = currentBestPerformance.getMainCriterion();
            calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance());
            pc = lastPerformance.getMainCriterion();
            calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance());

            SignificanceTestResult result;
            try {
              result = calculator.performSignificanceTest();
            } catch (SignificanceCalculationException e) {
              throw new UserError(this, 920, e.getMessage());
            }
            if (lastFitness > currentFitness && result.getProbability() < alpha) earlyAbort = true;
        }
      }
      if (earlyAbort) {
        // check if there are some free tries left
        if (numberOfFails == 0) {
          break;
        }
        numberOfFails--;
        speculativeList.add(bestIndex);
        earlyAbort = false;

        // needs performance increase compared to better performance of current and last!
        if (currentBestPerformance.compareTo(lastPerformance) > 0)
          lastPerformance = currentBestPerformance;
      } else {
        // resetting maximal number of fails.
        numberOfFails = maxNumberOfFails;
        speculativeList.clear();
        lastPerformance = currentBestPerformance;
        bestPerformanceEver = currentBestPerformance;
      }

      // switching best index off
      attributes.remove(attributeArray[bestIndex]);
      selected[bestIndex] = false;
    }
    // add predictively removed attributes: speculative execution did not yield  good result
    for (Integer removeIndex : speculativeList) {
      selected[removeIndex] = true;
      attributes.addRegular(attributeArray[removeIndex]);
    }

    AttributeWeights weights = new AttributeWeights();
    i = 0;
    for (Attribute attribute : attributeArray) {
      if (selected[i]) weights.setWeight(attribute.getName(), 1d);
      else weights.setWeight(attribute.getName(), 0d);
      i++;
    }

    exampleSetOutput.deliver(exampleSet);
    performanceOutput.deliver(bestPerformanceEver);
    weightsOutput.deliver(weights);
  }
示例#16
0
 public int compare(PerformanceVector av1, PerformanceVector av2) {
   return av1.getMainCriterion().compareTo(av2.getMainCriterion());
 }
 public static SimpleDataTable createDataTable(Population pop) {
   PerformanceVector prototype = pop.get(0).getPerformance();
   SimpleDataTable dataTable = new SimpleDataTable("Population", prototype.getCriteriaNames());
   return dataTable;
 }
  public PerformanceVectorViewer(
      final PerformanceVector performanceVector, final IOContainer container) {
    setLayout(new BorderLayout());

    // all criteria
    final CardLayout cardLayout = new CardLayout();
    final JPanel mainPanel = new JPanel(cardLayout);
    add(mainPanel, BorderLayout.CENTER);
    List<String> criteriaNameList = new LinkedList<>();
    for (int i = 0; i < performanceVector.getSize(); i++) {
      PerformanceCriterion criterion = performanceVector.getCriterion(i);
      criteriaNameList.add(criterion.getName());
      JPanel component =
          ResultDisplayTools.createVisualizationComponent(
              criterion, container, "Performance Criterion", false);
      JScrollPane criterionPane = new ExtendedJScrollPane(component);
      criterionPane.setBorder(null);
      criterionPane.setBackground(Colors.WHITE);
      mainPanel.add(criterionPane, criterion.getName());
    }
    if (criteriaNameList.isEmpty()) {
      remove(mainPanel);
      add(new ResourceLabel("result_view.no_criterions"));
      return;
    }
    String[] criteriaNames = new String[criteriaNameList.size()];
    criteriaNameList.toArray(criteriaNames);

    // selection list
    final JList<String> criteriaList =
        new JList<String>(criteriaNames) {

          private static final long serialVersionUID = 3031125186920370793L;

          @Override
          public Dimension getPreferredSize() {
            Dimension dim = super.getPreferredSize();
            dim.width = Math.max(150, dim.width);
            return dim;
          }
        };
    criteriaList.setCellRenderer(new CriterionListCellRenderer());
    criteriaList.setOpaque(false);

    criteriaList.setBorder(BorderFactory.createTitledBorder("Criterion"));
    criteriaList.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);

    criteriaList.addListSelectionListener(
        new ListSelectionListener() {

          @Override
          public void valueChanged(ListSelectionEvent e) {
            String selected = criteriaList.getSelectedValue();
            cardLayout.show(mainPanel, selected);
          }
        });

    JScrollPane listScrollPane = new ExtendedJScrollPane(criteriaList);
    listScrollPane.setBorder(BorderFactory.createEmptyBorder(0, 5, 0, 2));
    add(listScrollPane, BorderLayout.WEST);

    // select first criterion
    criteriaList.setSelectedIndices(new int[] {0});
  }
  // start
  @Override
  public void doWork() throws OperatorException {
    getParametersToOptimize();

    if (numberOfCombinations <= 1) {
      throw new UserError(this, 922);
    }

    int ifExceedsRegion = getParameterAsInt(PARAMETER_IF_EXCEEDS_REGION);
    int ifExceedsRange = getParameterAsInt(PARAMETER_IF_EXCEEDS_RANGE);

    // sort parameter values
    String[] valuesToSort;
    String s;
    double val1;
    double val2;
    int ind1;
    int ind2;
    for (int index = 0; index < numberOfParameters; index++) {
      valuesToSort = values[index];
      // straight-insertion-sort of valuesToSort
      for (ind1 = 0; ind1 < valuesToSort.length; ind1++) {
        val1 = Double.parseDouble(valuesToSort[ind1]);
        for (ind2 = ind1 + 1; ind2 < valuesToSort.length; ind2++) {
          val2 = Double.parseDouble(valuesToSort[ind2]);
          if (val1 > val2) {
            s = valuesToSort[ind1];
            valuesToSort[ind1] = valuesToSort[ind2];
            valuesToSort[ind2] = s;
            val1 = val2;
          }
        }
      }
    }
    int[] bestIndex = new int[numberOfParameters];
    ParameterSet[] allParameters = new ParameterSet[numberOfCombinations];
    int paramIndex = 0;
    // Test all parameter combinations
    best = null;

    // init operator progress (+ 1 for work after loop)
    getProgress().setTotal(allParameters.length + 1);
    while (true) {
      getLogger().fine("Using parameter set");
      // set all parameter values
      for (int j = 0; j < operators.length; j++) {
        operators[j].getParameters().setParameter(parameters[j], values[j][currentIndex[j]]);
        getLogger().fine(operators[j] + "." + parameters[j] + " = " + values[j][currentIndex[j]]);
      }

      PerformanceVector performance = getPerformanceVector();

      String[] currentValues = new String[parameters.length];
      for (int j = 0; j < parameters.length; j++) {
        currentValues[j] = values[j][currentIndex[j]];
      }
      allParameters[paramIndex] =
          new ParameterSet(operators, parameters, currentValues, performance);

      if (best == null || performance.compareTo(best.getPerformance()) > 0) {
        best = allParameters[paramIndex];
        // bestIndex = currentIndex;
        for (int j = 0; j < numberOfParameters; j++) {
          bestIndex[j] = currentIndex[j];
        }
      }

      getProgress().step();

      // next parameter values
      int k = 0;
      boolean ok = true;
      while (!(++currentIndex[k] < values[k].length)) {
        currentIndex[k] = 0;
        k++;
        if (k >= currentIndex.length) {
          ok = false;
          break;
        }
      }
      if (!ok) {
        break;
      }

      paramIndex++;
    }

    // start quadratic optimization
    int nrParameters = 0;
    for (int i = 0; i < numberOfParameters; i++) {

      if (values[i].length > 2) {
        log("Param " + i + ", bestI = " + bestIndex[i]);
        nrParameters++;
        if (bestIndex[i] == 0) {
          bestIndex[i]++;
        }
        if (bestIndex[i] == values[i].length - 1) {
          bestIndex[i]--;
        }
      } else {
        getLogger().warning("Parameter " + parameters[i] + " has less than 3 values, skipped.");
      }
    }

    if (nrParameters > 3) {
      getLogger()
          .warning("Optimization not recommended for more than 3 values. Check results carefully!");
    }

    if (nrParameters > 0) {
      // Designmatrix A fuer den 3^nrParameters-Plan aufstellen,
      // A*x=y loesen lassen
      // x = neue Parameter
      // check, ob neuen Parameter in zulaessigem Bereich
      // - Okay, wenn in Kubus von 3^k-Plan
      // - Warnung wenn in gegebenem Parameter-Bereich
      // - Fehler sonst
      int threetok = 1;
      for (int i = 0; i < nrParameters; i++) {
        threetok *= 3;
      }

      log("Optimising " + nrParameters + " parameters");

      Matrix designMatrix =
          new Matrix(threetok, nrParameters + nrParameters * (nrParameters + 1) / 2 + 1);
      Matrix y = new Matrix(threetok, 1);

      paramIndex = 0;
      for (int i = numberOfParameters - 1; i >= 0; i--) {
        if (values[i].length > 2) {
          currentIndex[i] = bestIndex[i] - 1;
        } else {
          currentIndex[i] = bestIndex[i];
        }
        paramIndex = paramIndex * values[i].length + currentIndex[i];
      }

      int row = 0;
      int c;
      while (row < designMatrix.getRowDimension()) {
        y.set(row, 0, allParameters[paramIndex].getPerformance().getMainCriterion().getFitness());

        designMatrix.set(row, 0, 1.0);
        c = 1;
        // compute A
        for (int i = 0; i < nrParameters; i++) {
          if (values[i].length > 2) {
            designMatrix.set(row, c, Double.parseDouble(values[i][currentIndex[i]]));
            c++;
          }
        }
        // compute C
        for (int i = 0; i < nrParameters; i++) {
          if (values[i].length > 2) {
            for (int j = i + 1; j < nrParameters; j++) {
              if (values[j].length > 2) {
                designMatrix.set(
                    row,
                    c,
                    Double.parseDouble(values[i][currentIndex[i]])
                        * Double.parseDouble(values[j][currentIndex[j]]));
                c++;
              }
            }
          }
        }
        // compute Q:
        for (int i = 0; i < nrParameters; i++) {
          if (values[i].length > 2) {
            designMatrix.set(
                row,
                c,
                Double.parseDouble(values[i][currentIndex[i]])
                    * Double.parseDouble(values[i][currentIndex[i]]));
            c++;
          }
        }

        // update currentIndex and paramIndex
        int k = 0;
        c = 1;
        while (k < numberOfParameters) {
          if (values[k].length > 2) {
            currentIndex[k]++;
            paramIndex += c;
            if (currentIndex[k] > bestIndex[k] + 1) {
              currentIndex[k] = bestIndex[k] - 1;
              paramIndex -= 3 * c;
              c *= values[k].length;
              k++;
            } else {
              break;
            }
          } else {
            c *= values[k].length;
            k++;
          }
        }
        row++;
      }

      // compute Designmatrix
      Matrix beta = designMatrix.solve(y);
      for (int i = 0; i < designMatrix.getColumnDimension(); i++) {
        logWarning(" -- Writing " + beta.get(i, 0) + " at position " + i + " in vector b");
      }
      // generate Matrix P~
      Matrix p = new Matrix(nrParameters, nrParameters);
      int betapos = nrParameters + 1;
      for (int j = 0; j < nrParameters - 1; j++) {
        for (int i = 1 + j; i < nrParameters; i++) {
          p.set(i, j, beta.get(betapos, 0) * 0.5);
          p.set(j, i, beta.get(betapos, 0) * 0.5);
          betapos++;
        }
      }
      for (int i = 0; i < nrParameters; i++) {
        p.set(i, i, beta.get(betapos, 0));
        betapos++;
      }
      // generate Matrix y~
      Matrix y2 = new Matrix(nrParameters, 1);
      for (int i = 0; i < nrParameters; i++) {
        y2.set(i, 0, beta.get(i + 1, 0));
      }
      y2 = y2.times(-0.5);
      // get stationary point x
      Matrix x = new Matrix(nrParameters, 1);
      try {
        x = p.solve(y2);
      } catch (RuntimeException e) {
        logWarning("Quadratic optimization failed. (invalid matrix)");
      }

      String[] Qvalues = new String[numberOfParameters];
      int pc = 0;
      boolean ok = true;
      for (int j = 0; j < numberOfParameters; j++) {
        if (values[j].length > 2) {
          if (x.get(pc, 0) > Double.parseDouble(values[j][bestIndex[j] + 1])
              || x.get(pc, 0) < Double.parseDouble(values[j][bestIndex[j] - 1])) {
            logWarning(
                "Parameter "
                    + parameters[j]
                    + " exceeds region of interest ("
                    + x.get(pc, 0)
                    + ")");
            if (ifExceedsRegion == CLIP) {
              // clip to bound
              if (x.get(pc, 0) > Double.parseDouble(values[j][bestIndex[j] + 1])) {
                x.set(pc, 0, Double.parseDouble(values[j][bestIndex[j] + 1]));
              } else {
                x.set(pc, 0, Double.parseDouble(values[j][bestIndex[j] - 1]));
              }
              ;
            } else if (ifExceedsRegion == FAIL) {
              ok = false;
            }
          }
          if (x.get(pc, 0) < Double.parseDouble(values[j][0])
              || x.get(pc, 0) > Double.parseDouble(values[j][values[j].length - 1])) {
            logWarning("Parameter " + parameters[j] + " exceeds range (" + x.get(pc, 0) + ")");
            if (ifExceedsRange == IGNORE) {
              // ignore error
              logWarning("  but no measures taken. Check parameters manually!");
            } else if (ifExceedsRange == CLIP) {
              // clip to bound
              if (x.get(pc, 0) > Double.parseDouble(values[j][0])) {
                x.set(pc, 0, Double.parseDouble(values[j][0]));
              } else {
                x.set(pc, 0, Double.parseDouble(values[j][values[j].length - 1]));
              }
              ;
            } else {
              ok = false;
            }
          }

          Qvalues[j] = x.get(pc, 0) + "";
          pc++;
        } else {
          Qvalues[j] = values[j][bestIndex[j]];
        }
      }

      getLogger().info("Optimised parameter set:");
      for (int j = 0; j < operators.length; j++) {
        operators[j].getParameters().setParameter(parameters[j], Qvalues[j]);
        getLogger().info("  " + operators[j] + "." + parameters[j] + " = " + Qvalues[j]);
      }
      if (ok) {
        PerformanceVector qPerformance = super.getPerformanceVector();
        log("Old: " + best.getPerformance().getMainCriterion().getFitness());
        log("New: " + qPerformance.getMainCriterion().getFitness());
        if (qPerformance.compareTo(best.getPerformance()) > 0) {
          best = new ParameterSet(operators, parameters, Qvalues, qPerformance);
          // log
          log("Optimised parameter set does increase the performance");
        } else {
          // different log
          log("Could not increase performance by quadratic optimization");
        }
      } else {
        // not ok
        getLogger()
            .warning("Parameters outside admissible range, not using optimised parameter set.");
      }
    } else {
      // Warning: no parameters to optimize
      getLogger().warning("No parameters to optimize");
    }
    // end quadratic optimization
    deliver(best);
    getProgress().complete();
  }
示例#20
0
  /**
   * Constructs a <code>Model</code> repeatedly running a weak learner, reweighting the training
   * example set accordingly, and combining the hypothesis using the available weighted performance
   * values.
   */
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    this.runVector = new RunVector();
    BayBoostModel ensembleNewBatch = null;
    BayBoostModel ensembleExtBatch = null;
    final Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); // for
    // models
    // and
    // their
    // probability
    // estimates
    Vector<BayBoostBaseModelInfo> modelInfo2 = new Vector<BayBoostBaseModelInfo>();
    this.currentIteration = 0;
    int firstOpenBatch = 1;

    // prepare the stream control attribute
    final Attribute streamControlAttribute;
    {
      Attribute attr = null;
      if ((attr = exampleSet.getAttributes().get(STREAM_CONTROL_ATTRIB_NAME)) == null)
        streamControlAttribute =
            com.rapidminer.example.Tools.createSpecialAttribute(
                exampleSet, STREAM_CONTROL_ATTRIB_NAME, Ontology.INTEGER);
      else {
        streamControlAttribute = attr;
        logWarning(
            "Attribute with the (reserved) name of the stream control attribute exists. It is probably an old version created by this operator. Trying to recycle it... ");
        // Resetting the stream control attribute values by overwriting
        // them with 0 avoids (unlikely)
        // problems in case the same ExampleSet is passed to this
        // operator over and over again:
        Iterator<Example> e = exampleSet.iterator();
        while (e.hasNext()) {
          e.next().setValue(streamControlAttribute, 0);
        }
      }
    }

    // and the weight attribute
    if (exampleSet.getAttributes().getWeight() == null) {
      this.prepareWeights(exampleSet);
    }

    boolean estimateFavoursExtBatch = true;
    // *** The main loop, one iteration per batch: ***
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      // increment batch number, collect batch and evaluate performance of
      // current model on batch
      double[] classPriors =
          this.prepareBatch(++this.currentIteration, reader, streamControlAttribute);

      ConditionedExampleSet trainingSet =
          new ConditionedExampleSet(
              exampleSet, new BatchFilterCondition(streamControlAttribute, this.currentIteration));

      final EstimatedPerformance estPerf;

      // Step 1: apply the ensemble model to the current batch (prediction
      // phase), evaluate and store result
      if (ensembleExtBatch != null) {
        // apply extended batch model first:
        trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet);
        this.performance = evaluatePredictions(trainingSet); // unweighted
        // performance;

        // then apply new batch model:
        trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
        double newBatchPerformance = evaluatePredictions(trainingSet);

        // heuristic: use extended batch model for predicting
        // unclassified instances
        if (estimateFavoursExtBatch == true)
          estPerf =
              new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false);
        else
          estPerf =
              new EstimatedPerformance("accuracy", newBatchPerformance, trainingSet.size(), false);

        // final double[] ensembleWeights;

        // continue with the better model:
        if (newBatchPerformance > this.performance) {
          this.performance = newBatchPerformance;
          firstOpenBatch = Math.max(1, this.currentIteration - 1);
          // ensembleWeights = ensembleNewBatch.getModelWeights();
        } else {
          modelInfo.clear();
          modelInfo.addAll(modelInfo2);
          // ensembleWeights = ensembleExtBatch.getModelWeights();
        }

      } else if (ensembleNewBatch != null) {
        trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
        this.performance = evaluatePredictions(trainingSet);
        firstOpenBatch = Math.max(1, this.currentIteration - 1);
        estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false);
      } else estPerf = null; // no model ==> no prediction performance

      if (estPerf != null) {
        PerformanceVector perf = new PerformanceVector();
        perf.addAveragable(estPerf);
        this.runVector.addVector(perf);
      }

      // *** retraining phase ***
      // Step 2: First reconstruct the initial weighting, if necessary.
      if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) {
        this.rescalePriors(trainingSet, classPriors);
      }

      estimateFavoursExtBatch = true;
      // Step 3: Find better weights for existing models and continue
      // training
      if (modelInfo.size() > 0) {

        modelInfo2 = new Vector<BayBoostBaseModelInfo>();
        for (BayBoostBaseModelInfo bbbmi : modelInfo) {
          modelInfo2.add(bbbmi); // BayBoostBaseModelInfo objects
          // cannot be changed, no deep copy
          // required
        }

        // separate hold out set
        final double holdOutRatio = this.getParameterAsDouble(PARAMETER_FRACTION_HOLD_OUT_SET);
        Vector<Example> holdOutExamples = new Vector<Example>();
        if (holdOutRatio > 0) {
          RandomGenerator random = RandomGenerator.getRandomGenerator(this);
          Iterator<Example> randBatchReader = trainingSet.iterator();
          while (randBatchReader.hasNext()) {
            Example example = randBatchReader.next();
            if (random.nextDoubleInRange(0, 1) <= holdOutRatio) {
              example.setValue(streamControlAttribute, 0);
              holdOutExamples.add(example);
            }
          }
          // TODO: create new example set
          // trainingSet.updateCondition();
        }

        // model 1: train one more base classifier
        boolean trainingExamplesLeft = this.adjustBaseModelWeights(trainingSet, modelInfo);
        if (trainingExamplesLeft) {
          // "trainingExamplesLeft" needs to be checked to avoid
          // exceptions.
          // Anyway, learning does not make sense, otherwise.
          if (!this.trainAdditionalModel(trainingSet, modelInfo)) {}
        }
        ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors);

        // model 2: remove last classifier, extend batch, train on
        // extended batch
        ExampleSet extendedBatch = // because of the ">=" condition it
            // is sufficient to remember the
            // opening batch
            new ConditionedExampleSet(
                exampleSet, new BatchFilterCondition(streamControlAttribute, firstOpenBatch));
        classPriors = this.prepareExtendedBatch(extendedBatch);
        if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) {
          this.rescalePriors(extendedBatch, classPriors);
        }
        modelInfo2.remove(modelInfo2.size() - 1);
        trainingExamplesLeft = this.adjustBaseModelWeights(extendedBatch, modelInfo2);
        // If no training examples are left: no need and chance to
        // continue training.
        if (trainingExamplesLeft == false) {
          ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors);
        } else {
          boolean success = this.trainAdditionalModel(extendedBatch, modelInfo2);
          if (success) {
            ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors);
          } else {
            ensembleExtBatch = null;
            estimateFavoursExtBatch = false;
          }
        }

        if (holdOutRatio > 0) {
          Iterator hoEit = holdOutExamples.iterator();
          while (hoEit.hasNext()) {
            ((Example) hoEit.next()).setValue(streamControlAttribute, this.currentIteration);
          }
          // TODO: create new example set
          // trainingSet.updateCondition();

          if (ensembleExtBatch != null) {
            trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
            hoEit = holdOutExamples.iterator();
            int errors = 0;
            while (hoEit.hasNext()) {
              Example example = (Example) hoEit.next();
              if (example.getPredictedLabel() != example.getLabel()) errors++;
            }
            double newBatchErr = ((double) errors) / holdOutExamples.size();

            trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet);
            hoEit = holdOutExamples.iterator();
            errors = 0;
            while (hoEit.hasNext()) {
              Example example = (Example) hoEit.next();
              if (example.getPredictedLabel() != example.getLabel()) errors++;
            }
            double extBatchErr = ((double) errors) / holdOutExamples.size();

            estimateFavoursExtBatch = (extBatchErr <= newBatchErr);

            if (estimateFavoursExtBatch) {
              ensembleExtBatch =
                  this.retrainLastWeight(ensembleExtBatch, trainingSet, holdOutExamples);
            } else
              ensembleNewBatch =
                  this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples);
          } else
            ensembleNewBatch =
                this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples);
        }
      } else {
        this.trainAdditionalModel(trainingSet, modelInfo);
        ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors);
        ensembleExtBatch = null;
        estimateFavoursExtBatch = false;
      }
    }
    this.restoreOldWeights(exampleSet);
    return (ensembleExtBatch == null ? ensembleNewBatch : ensembleExtBatch);
  }
  public PerformanceVector getPerformance() {
    double[] beta = getBestValuesEver();
    double numberOfSlopes = addIntercept ? beta.length - 1 : beta.length;
    double logLikelihood = getBestFitnessEver();
    double restrictedLogLikelihood = 0.0d;
    double minusTwoLogLikelihood = 0.0d;
    double modelChiSquared = 0.0d;
    double goodnessOfFit = 0.0d;
    double coxSnellRSquared = 0.0d;
    double nagelkerkeRSquared = 0.0d;
    double mcfaddenRSquared = 0.0d;
    double AIC = 0.0d;
    double BIC = 0.0d;

    double weightSum = 0.0d;
    double positiveSum = 0.0d;
    for (Example example : exampleSet) {
      double eta = 0.0d;
      int i = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        eta += beta[i] * value;
        i++;
      }
      if (addIntercept) {
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double classValue = example.getValue(label);
      double currentFit = (classValue - pi) * (classValue - pi) / (pi * (1 - pi));
      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      weightSum += weightValue;
      positiveSum += weightValue * classValue;
      goodnessOfFit += weightValue * currentFit;
    }
    double pi0 = positiveSum / weightSum;
    if (addIntercept) {
      restrictedLogLikelihood = weightSum * (pi0 * Math.log(pi0) + (1 - pi0) * Math.log(1 - pi0));
    } else {
      restrictedLogLikelihood = weightSum * Math.log(0.5);
    }
    minusTwoLogLikelihood = -2 * logLikelihood;
    modelChiSquared = 2 * (logLikelihood - restrictedLogLikelihood);
    coxSnellRSquared =
        1 - Math.pow(Math.exp(restrictedLogLikelihood) / Math.exp(logLikelihood), 2 / weightSum);
    nagelkerkeRSquared =
        coxSnellRSquared / (1 - Math.pow(Math.exp(restrictedLogLikelihood), 2 / weightSum));
    mcfaddenRSquared = 1 - logLikelihood / restrictedLogLikelihood;
    AIC = -2 * logLikelihood + 2 * (numberOfSlopes + 1);
    BIC = -2 * logLikelihood + Math.log(weightSum) * (numberOfSlopes + 1);

    PerformanceVector estimatedPerformance = new PerformanceVector();
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("log_likelihood", logLikelihood, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance(
            "restricted_log_likelihood", restrictedLogLikelihood, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance(
            "-2_log_likelihood", minusTwoLogLikelihood, exampleSet.size(), true));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("model_chi_squared", modelChiSquared, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("goodness_of_fit", goodnessOfFit, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance(
            "cox_snell_r_squared", coxSnellRSquared, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance(
            "nagelkerke_r_squared", nagelkerkeRSquared, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("mcfadden_r_squared", mcfaddenRSquared, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("AIC", AIC, exampleSet.size(), true));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("BIC", BIC, exampleSet.size(), true));
    estimatedPerformance.setMainCriterionName("AIC");
    return estimatedPerformance;
  }
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);
    Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel();
    if (predictedLabel == null) {
      throw new UserError(this, 107);
    }
    Attribute label = exampleSet.getAttributes().getLabel();
    if (label != null) {
      if (label.isNominal()) {
        double[][] costMatrix = getParameterAsMatrix(PARAMETER_COST_MATRIX);

        // build label ordering map
        Map<String, Integer> classOrderMap = null;
        if (isParameterSet(PARAMETER_CLASS_DEFINITION)) {
          String[] enumeratedValues =
              ParameterTypeEnumeration.transformString2Enumeration(
                  getParameterAsString(PARAMETER_CLASS_DEFINITION));

          if (enumeratedValues.length > 0) {
            classOrderMap = new HashMap<String, Integer>();
            int i = 0;

            for (String className : enumeratedValues) {
              classOrderMap.put(className, i);
              i++;
            }
            // check whether each possible label occurred once
            for (String value : label.getMapping().getValues()) {
              if (!classOrderMap.containsKey(value)) {
                throw new UserError(
                    this, "performance_costs.class_order_definition_misses_value", value);
              }
            }

            // check whether map is of same size than costMatrix
            if (costMatrix.length != classOrderMap.size())
              throw new UserError(
                  this,
                  "performance_costs.cost_matrix_with_wrong_dimension",
                  costMatrix.length,
                  classOrderMap.size());
          }
        }

        MeasuredPerformance criterion =
            new ClassificationCostCriterion(costMatrix, classOrderMap, label, predictedLabel);
        PerformanceVector performance = new PerformanceVector();
        performance.addCriterion(criterion);
        // now measuring costs
        criterion.startCounting(exampleSet, false);
        for (Example example : exampleSet) {
          criterion.countExample(example);
        }

        // setting logging value
        lastCosts = criterion.getAverage();

        exampleSetOutput.deliver(exampleSet);
        performanceOutput.deliver(performance);
      } else {
        throw new UserError(this, 101, "CostEvaluator", label.getName());
      }
    } else {
      throw new UserError(this, 105);
    }
  }