/**
   * Iterates over all models and returns the class with maximum likelihood.
   *
   * @param origExampleSet the set of examples to be classified
   */
  @Override
  public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel)
      throws OperatorException {
    final String attributePrefix = "AdaBoostModelPrediction";
    final int numLabels = predictedLabel.getMapping().size();
    final Attribute[] specialAttributes = new Attribute[numLabels];
    OperatorProgress progress = null;
    if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) {
      progress = getOperator().getProgress();
      progress.setTotal(100);
    }
    for (int i = 0; i < numLabels; i++) {
      specialAttributes[i] =
          com.rapidminer.example.Tools.createSpecialAttribute(
              origExampleSet, attributePrefix + i, Ontology.NUMERICAL);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels));
      }
    }

    Iterator<Example> reader = origExampleSet.iterator();
    int progressCounter = 0;
    while (reader.hasNext()) {
      Example example = reader.next();
      for (int i = 0; i < specialAttributes.length; i++) {
        example.setValue(specialAttributes[i], 0);
      }
      if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) {
        progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25);
      }
    }

    reader = origExampleSet.iterator();
    for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) {
      Model model = this.getModel(modelNr);
      ExampleSet exampleSet = (ExampleSet) origExampleSet.clone();
      exampleSet = model.apply(exampleSet);
      this.updateEstimates(exampleSet, modelNr, specialAttributes);
      PredictionModel.removePredictedLabel(exampleSet);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50);
      }
    }

    // Turn prediction weights into confidences and a crisp predcition:
    this.evaluateSpecialAttributes(origExampleSet, specialAttributes);

    // Clean up attributes:
    for (int i = 0; i < numLabels; i++) {
      origExampleSet.getAttributes().remove(specialAttributes[i]);
      origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75);
      }
    }

    return origExampleSet;
  }
Пример #2
0
  public IOObject[] apply() throws OperatorException {
    ExampleSet eSet = getInput(ExampleSet.class);

    // only warning, removing is done by createSpecialAttribute(...)
    Attribute idAttribute = eSet.getAttributes().getId();
    if (idAttribute != null) {
      logWarning("Overwriting old id attribute!");
    }

    // create new id attribute
    boolean nominalIds = getParameterAsBoolean(PARAMETER_CREATE_NOMINAL_IDS);
    idAttribute =
        Tools.createSpecialAttribute(
            eSet, Attributes.ID_NAME, nominalIds ? Ontology.NOMINAL : Ontology.INTEGER);

    // set IDs
    int currentId = 1;
    Iterator<Example> r = eSet.iterator();
    while (r.hasNext()) {
      Example example = r.next();
      example.setValue(
          idAttribute,
          nominalIds ? idAttribute.getMapping().mapString("id_" + currentId) : currentId);
      currentId++;
      checkForStop();
    }

    // initialize example visualizer
    Operator visualizer = null;
    try {
      visualizer = OperatorService.createOperator(ExampleVisualizationOperator.class);
    } catch (OperatorCreationException e) {
      logNote("Cannot initialize example visualizer, skipping...");
    }
    if (visualizer != null) visualizer.apply(new IOContainer(eSet));

    return new IOObject[] {eSet};
  }
Пример #3
0
  /**
   * Constructs a <code>Model</code> repeatedly running a weak learner, reweighting the training
   * example set accordingly, and combining the hypothesis using the available weighted performance
   * values.
   */
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    this.runVector = new RunVector();
    BayBoostModel ensembleNewBatch = null;
    BayBoostModel ensembleExtBatch = null;
    final Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); // for
    // models
    // and
    // their
    // probability
    // estimates
    Vector<BayBoostBaseModelInfo> modelInfo2 = new Vector<BayBoostBaseModelInfo>();
    this.currentIteration = 0;
    int firstOpenBatch = 1;

    // prepare the stream control attribute
    final Attribute streamControlAttribute;
    {
      Attribute attr = null;
      if ((attr = exampleSet.getAttributes().get(STREAM_CONTROL_ATTRIB_NAME)) == null)
        streamControlAttribute =
            com.rapidminer.example.Tools.createSpecialAttribute(
                exampleSet, STREAM_CONTROL_ATTRIB_NAME, Ontology.INTEGER);
      else {
        streamControlAttribute = attr;
        logWarning(
            "Attribute with the (reserved) name of the stream control attribute exists. It is probably an old version created by this operator. Trying to recycle it... ");
        // Resetting the stream control attribute values by overwriting
        // them with 0 avoids (unlikely)
        // problems in case the same ExampleSet is passed to this
        // operator over and over again:
        Iterator<Example> e = exampleSet.iterator();
        while (e.hasNext()) {
          e.next().setValue(streamControlAttribute, 0);
        }
      }
    }

    // and the weight attribute
    if (exampleSet.getAttributes().getWeight() == null) {
      this.prepareWeights(exampleSet);
    }

    boolean estimateFavoursExtBatch = true;
    // *** The main loop, one iteration per batch: ***
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      // increment batch number, collect batch and evaluate performance of
      // current model on batch
      double[] classPriors =
          this.prepareBatch(++this.currentIteration, reader, streamControlAttribute);

      ConditionedExampleSet trainingSet =
          new ConditionedExampleSet(
              exampleSet, new BatchFilterCondition(streamControlAttribute, this.currentIteration));

      final EstimatedPerformance estPerf;

      // Step 1: apply the ensemble model to the current batch (prediction
      // phase), evaluate and store result
      if (ensembleExtBatch != null) {
        // apply extended batch model first:
        trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet);
        this.performance = evaluatePredictions(trainingSet); // unweighted
        // performance;

        // then apply new batch model:
        trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
        double newBatchPerformance = evaluatePredictions(trainingSet);

        // heuristic: use extended batch model for predicting
        // unclassified instances
        if (estimateFavoursExtBatch == true)
          estPerf =
              new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false);
        else
          estPerf =
              new EstimatedPerformance("accuracy", newBatchPerformance, trainingSet.size(), false);

        // final double[] ensembleWeights;

        // continue with the better model:
        if (newBatchPerformance > this.performance) {
          this.performance = newBatchPerformance;
          firstOpenBatch = Math.max(1, this.currentIteration - 1);
          // ensembleWeights = ensembleNewBatch.getModelWeights();
        } else {
          modelInfo.clear();
          modelInfo.addAll(modelInfo2);
          // ensembleWeights = ensembleExtBatch.getModelWeights();
        }

      } else if (ensembleNewBatch != null) {
        trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
        this.performance = evaluatePredictions(trainingSet);
        firstOpenBatch = Math.max(1, this.currentIteration - 1);
        estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false);
      } else estPerf = null; // no model ==> no prediction performance

      if (estPerf != null) {
        PerformanceVector perf = new PerformanceVector();
        perf.addAveragable(estPerf);
        this.runVector.addVector(perf);
      }

      // *** retraining phase ***
      // Step 2: First reconstruct the initial weighting, if necessary.
      if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) {
        this.rescalePriors(trainingSet, classPriors);
      }

      estimateFavoursExtBatch = true;
      // Step 3: Find better weights for existing models and continue
      // training
      if (modelInfo.size() > 0) {

        modelInfo2 = new Vector<BayBoostBaseModelInfo>();
        for (BayBoostBaseModelInfo bbbmi : modelInfo) {
          modelInfo2.add(bbbmi); // BayBoostBaseModelInfo objects
          // cannot be changed, no deep copy
          // required
        }

        // separate hold out set
        final double holdOutRatio = this.getParameterAsDouble(PARAMETER_FRACTION_HOLD_OUT_SET);
        Vector<Example> holdOutExamples = new Vector<Example>();
        if (holdOutRatio > 0) {
          RandomGenerator random = RandomGenerator.getRandomGenerator(this);
          Iterator<Example> randBatchReader = trainingSet.iterator();
          while (randBatchReader.hasNext()) {
            Example example = randBatchReader.next();
            if (random.nextDoubleInRange(0, 1) <= holdOutRatio) {
              example.setValue(streamControlAttribute, 0);
              holdOutExamples.add(example);
            }
          }
          // TODO: create new example set
          // trainingSet.updateCondition();
        }

        // model 1: train one more base classifier
        boolean trainingExamplesLeft = this.adjustBaseModelWeights(trainingSet, modelInfo);
        if (trainingExamplesLeft) {
          // "trainingExamplesLeft" needs to be checked to avoid
          // exceptions.
          // Anyway, learning does not make sense, otherwise.
          if (!this.trainAdditionalModel(trainingSet, modelInfo)) {}
        }
        ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors);

        // model 2: remove last classifier, extend batch, train on
        // extended batch
        ExampleSet extendedBatch = // because of the ">=" condition it
            // is sufficient to remember the
            // opening batch
            new ConditionedExampleSet(
                exampleSet, new BatchFilterCondition(streamControlAttribute, firstOpenBatch));
        classPriors = this.prepareExtendedBatch(extendedBatch);
        if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) {
          this.rescalePriors(extendedBatch, classPriors);
        }
        modelInfo2.remove(modelInfo2.size() - 1);
        trainingExamplesLeft = this.adjustBaseModelWeights(extendedBatch, modelInfo2);
        // If no training examples are left: no need and chance to
        // continue training.
        if (trainingExamplesLeft == false) {
          ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors);
        } else {
          boolean success = this.trainAdditionalModel(extendedBatch, modelInfo2);
          if (success) {
            ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors);
          } else {
            ensembleExtBatch = null;
            estimateFavoursExtBatch = false;
          }
        }

        if (holdOutRatio > 0) {
          Iterator hoEit = holdOutExamples.iterator();
          while (hoEit.hasNext()) {
            ((Example) hoEit.next()).setValue(streamControlAttribute, this.currentIteration);
          }
          // TODO: create new example set
          // trainingSet.updateCondition();

          if (ensembleExtBatch != null) {
            trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
            hoEit = holdOutExamples.iterator();
            int errors = 0;
            while (hoEit.hasNext()) {
              Example example = (Example) hoEit.next();
              if (example.getPredictedLabel() != example.getLabel()) errors++;
            }
            double newBatchErr = ((double) errors) / holdOutExamples.size();

            trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet);
            hoEit = holdOutExamples.iterator();
            errors = 0;
            while (hoEit.hasNext()) {
              Example example = (Example) hoEit.next();
              if (example.getPredictedLabel() != example.getLabel()) errors++;
            }
            double extBatchErr = ((double) errors) / holdOutExamples.size();

            estimateFavoursExtBatch = (extBatchErr <= newBatchErr);

            if (estimateFavoursExtBatch) {
              ensembleExtBatch =
                  this.retrainLastWeight(ensembleExtBatch, trainingSet, holdOutExamples);
            } else
              ensembleNewBatch =
                  this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples);
          } else
            ensembleNewBatch =
                this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples);
        }
      } else {
        this.trainAdditionalModel(trainingSet, modelInfo);
        ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors);
        ensembleExtBatch = null;
        estimateFavoursExtBatch = false;
      }
    }
    this.restoreOldWeights(exampleSet);
    return (ensembleExtBatch == null ? ensembleNewBatch : ensembleExtBatch);
  }