Exemplo n.º 1
0
 /**
  * Similar to prepareBatch, but for extended batches.
  *
  * @param extendedBatch containing the extended batch
  * @return the class priors of the batch
  */
 private double[] prepareExtendedBatch(ExampleSet extendedBatch) {
   int[] classCount = new int[2];
   Iterator<Example> reader = extendedBatch.iterator();
   while (reader.hasNext()) {
     Example example = reader.next();
     example.setWeight(1);
     classCount[(int) example.getLabel()]++;
   }
   double[] classPriors = new double[2];
   int sum = classCount[0] + classCount[1];
   classPriors[0] = ((double) classCount[0]) / sum;
   classPriors[1] = ((double) classCount[1]) / sum;
   return classPriors;
 }
Exemplo n.º 2
0
  /**
   * Computes the weighted class priors of the boolean target attribute and shifts weights so that
   * the priors are equal afterwards.
   */
  private void rescalePriors(ExampleSet exampleSet, double[] classPriors) {
    // The weights of class i are calculated as
    // (1 / #classes) / (#rel_freq_class_i)
    double[] weights = new double[2];
    for (int i = 0; i < weights.length; i++) {
      weights[i] = 1.0d / (weights.length * (classPriors[i]));
    }

    Iterator<Example> exRead = exampleSet.iterator();
    while (exRead.hasNext()) {
      Example example = exRead.next();
      example.setWeight(weights[(int) (example.getLabel())]);
    }
  }
Exemplo n.º 3
0
  protected void prepareWeights(ExampleSet exampleSet) {
    Attribute weightAttr = exampleSet.getAttributes().getWeight();
    if (weightAttr == null) {
      this.oldWeights = null;
      com.rapidminer.example.Tools.createWeightAttribute(exampleSet);
    } else { // Back up old weights
      this.oldWeights = new double[exampleSet.size()];
      Iterator<Example> reader = exampleSet.iterator();

      for (int i = 0; (reader.hasNext() && i < oldWeights.length); i++) {
        Example example = reader.next();
        if (example != null) {
          this.oldWeights[i] = example.getWeight();
          example.setWeight(1);
        }
      }
    }
  }
Exemplo n.º 4
0
 /**
  * The preparation part collecting the examples of a batch, computing priors and resetting weights
  * to 1.
  *
  * @param currentBatchNum the batch number to be assigned to the examples
  * @param reader the <code>Iterator<Example></code> with the cursor on the current point in the
  *     stream.
  * @param batchAttribute the attribute to write the batch number to
  * @return the class priors of the batch
  */
 private double[] prepareBatch(
     int currentBatchNum, Iterator<Example> reader, Attribute batchAttribute)
     throws UndefinedParameterError {
   final int batchSize = this.getParameterAsInt(PARAMETER_BATCH_SIZE);
   int batchCount = 0;
   // Read and classify examples from stream, as long as the buffer (next
   // batch)
   // is not full. Examples are weighted at this point, in order to
   // simulate sampling.
   int[] classCount = new int[2];
   while ((batchCount++ < batchSize) && reader.hasNext()) {
     Example example = reader.next();
     example.setValue(batchAttribute, currentBatchNum);
     example.setWeight(1);
     classCount[(int) example.getLabel()]++;
   }
   double[] classPriors = new double[2];
   classPriors[0] = ((double) classCount[0]) / --batchCount;
   classPriors[1] = ((double) classCount[1]) / batchCount;
   return classPriors;
 }
Exemplo n.º 5
0
  private BayBoostModel retrainLastWeight(
      BayBoostModel ensemble, ExampleSet exampleSet, Vector holdOutSet) throws OperatorException {
    this.prepareExtendedBatch(exampleSet); // method fits by chance
    int modelNum = ensemble.getNumberOfModels();
    Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>();
    double[] priors = ensemble.getPriors();
    for (int i = 0; i < modelNum - 1; i++) {
      Model model = ensemble.getModel(i);
      ContingencyMatrix cm = ensemble.getContingencyMatrix(i);
      modelInfo.add(new BayBoostBaseModelInfo(model, cm));
      exampleSet = model.apply(exampleSet);
      WeightedPerformanceMeasures.reweightExamples(exampleSet, cm, false);
    }
    Model latestModel = ensemble.getModel(modelNum - 1);
    exampleSet = latestModel.apply(exampleSet);

    // quite ugly:
    double[] weights = new double[holdOutSet.size()];
    Iterator it = holdOutSet.iterator();
    int index = 0;
    while (it.hasNext()) {
      Example example = (Example) it.next();
      weights[index++] = example.getWeight();
    }
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      reader.next().setWeight(0);
    }
    it = holdOutSet.iterator();
    index = 0;
    while (it.hasNext()) {
      Example example = (Example) it.next();
      example.setWeight(weights[index++]);
    }

    WeightedPerformanceMeasures wp = new WeightedPerformanceMeasures(exampleSet);
    modelInfo.add(new BayBoostBaseModelInfo(latestModel, wp.getContingencyMatrix()));

    return new BayBoostModel(exampleSet, modelInfo, priors);
  }