コード例 #1
0
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    Kernel kernel = getKernel();
    kernel.init(exampleSet);

    double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
    NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping();
    String classNeg = labelMapping.getNegativeString();
    String classPos = labelMapping.getPositiveString();
    double classValueNeg = labelMapping.getNegativeIndex();
    int numberOfAttributes = exampleSet.getAttributes().size();
    HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel);
    model.init(new double[numberOfAttributes], 0);
    for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) {
      double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate);
      Attributes attributes = exampleSet.getAttributes();
      for (Example example : exampleSet) {
        double prediction = model.predict(example);
        if (prediction != example.getLabel()) {
          double direction = (example.getLabel() == classValueNeg) ? -1 : 1;
          // adapting intercept
          model.setIntercept(model.getIntercept() + learnRate * direction);
          // adapting coefficients
          double coefficients[] = model.getCoefficients();
          int i = 0;
          for (Attribute attribute : attributes) {
            coefficients[i] += learnRate * direction * example.getValue(attribute);
            i++;
          }
        }
      }
    }
    return model;
  }
コード例 #2
0
  /** Creates a new evolutionary SVM optimization. */
  public ClassificationEvoOptimization(
      ExampleSet exampleSet, // training data
      Kernel kernel,
      double c, // SVM paras
      int initType, // start population creation type para
      int maxIterations,
      int generationsWithoutImprovement,
      int popSize, // GA paras
      int selectionType,
      double tournamentFraction,
      boolean keepBest, // selection paras
      int mutationType, // type of mutation
      double crossoverProb,
      boolean showConvergencePlot,
      boolean showPopulationPlot,
      ExampleSet holdOutSet,
      RandomGenerator random,
      LoggingHandler logging,
      Operator executingOperator) {
    super(
        EvoSVM.createBoundArray(0.0d, exampleSet.size()),
        EvoSVM.determineMax(c, kernel, exampleSet, selectionType, exampleSet.size()),
        popSize,
        exampleSet.size(),
        initType,
        maxIterations,
        generationsWithoutImprovement,
        selectionType,
        tournamentFraction,
        keepBest,
        mutationType,
        Double.NaN,
        crossoverProb,
        showConvergencePlot,
        showPopulationPlot,
        random,
        logging,
        executingOperator);
    this.exampleSet = exampleSet;
    this.holdOutSet = holdOutSet;
    this.populationSize = popSize;

    this.kernel = kernel;
    this.c = getMax(0);

    // label values
    this.ys = new double[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int index = 0;
    Attribute label = exampleSet.getAttributes().getLabel();
    while (reader.hasNext()) {
      Example example = reader.next();
      ys[index++] = example.getLabel() == label.getMapping().getPositiveIndex() ? 1.0d : -1.0d;
    }

    // optimization function
    this.optimizationFunction =
        new ClassificationOptimizationFunction(selectionType == NON_DOMINATED_SORTING_SELECTION);
  }
コード例 #3
0
 /** returns the accuracy of the predictions for the given example set */
 private double evaluatePredictions(ExampleSet exampleSet) {
   Iterator<Example> reader = exampleSet.iterator();
   int count = 0;
   int correct = 0;
   while (reader.hasNext()) {
     count++;
     Example example = reader.next();
     if (example.getLabel() == example.getPredictedLabel()) correct++;
   }
   return ((double) correct) / count;
 }
コード例 #4
0
 public void apply(Example example) {
   if (applicable(example)) {
     double weight = 1.0d;
     if (example.getAttributes().getWeight() != null) {
       weight = example.getWeight();
     }
     coveredWeight += weight;
     if (example.getLabel()
         == example.getAttributes().getLabel().getMapping().getPositiveIndex()) {
       positiveWeight += weight;
     }
   }
 }
コード例 #5
0
 /**
  * Similar to prepareBatch, but for extended batches.
  *
  * @param extendedBatch containing the extended batch
  * @return the class priors of the batch
  */
 private double[] prepareExtendedBatch(ExampleSet extendedBatch) {
   int[] classCount = new int[2];
   Iterator<Example> reader = extendedBatch.iterator();
   while (reader.hasNext()) {
     Example example = reader.next();
     example.setWeight(1);
     classCount[(int) example.getLabel()]++;
   }
   double[] classPriors = new double[2];
   int sum = classCount[0] + classCount[1];
   classPriors[0] = ((double) classCount[0]) / sum;
   classPriors[1] = ((double) classCount[1]) / sum;
   return classPriors;
 }
コード例 #6
0
  /**
   * Computes the weighted class priors of the boolean target attribute and shifts weights so that
   * the priors are equal afterwards.
   */
  private void rescalePriors(ExampleSet exampleSet, double[] classPriors) {
    // The weights of class i are calculated as
    // (1 / #classes) / (#rel_freq_class_i)
    double[] weights = new double[2];
    for (int i = 0; i < weights.length; i++) {
      weights[i] = 1.0d / (weights.length * (classPriors[i]));
    }

    Iterator<Example> exRead = exampleSet.iterator();
    while (exRead.hasNext()) {
      Example example = exRead.next();
      example.setWeight(weights[(int) (example.getLabel())]);
    }
  }
コード例 #7
0
 /**
  * The preparation part collecting the examples of a batch, computing priors and resetting weights
  * to 1.
  *
  * @param currentBatchNum the batch number to be assigned to the examples
  * @param reader the <code>Iterator<Example></code> with the cursor on the current point in the
  *     stream.
  * @param batchAttribute the attribute to write the batch number to
  * @return the class priors of the batch
  */
 private double[] prepareBatch(
     int currentBatchNum, Iterator<Example> reader, Attribute batchAttribute)
     throws UndefinedParameterError {
   final int batchSize = this.getParameterAsInt(PARAMETER_BATCH_SIZE);
   int batchCount = 0;
   // Read and classify examples from stream, as long as the buffer (next
   // batch)
   // is not full. Examples are weighted at this point, in order to
   // simulate sampling.
   int[] classCount = new int[2];
   while ((batchCount++ < batchSize) && reader.hasNext()) {
     Example example = reader.next();
     example.setValue(batchAttribute, currentBatchNum);
     example.setWeight(1);
     classCount[(int) example.getLabel()]++;
   }
   double[] classPriors = new double[2];
   classPriors[0] = ((double) classCount[0]) / --batchCount;
   classPriors[1] = ((double) classCount[1]) / batchCount;
   return classPriors;
 }
コード例 #8
0
  private RuleModel getBestModel(
      Collection<RuleModel> models, ExampleSet exampleSet, boolean useExampleWeights) {
    Attribute exampleWeightAttribute =
        exampleSet.getAttributes().getSpecial(Attributes.WEIGHT_NAME);
    useExampleWeights = useExampleWeights && (exampleWeightAttribute != null);

    // calculating weighted error for rules
    double[] weightedError = new double[models.size()];
    double totalWeight = 0;
    for (Example example : exampleSet) {
      int i = 0;
      double currentWeight = 1;
      if (useExampleWeights) {
        currentWeight = example.getValue(exampleWeightAttribute);
      }
      double currentLabel = example.getLabel();
      totalWeight += currentWeight;
      for (RuleModel currentModel : models) {
        if (currentLabel != currentModel.getPrediction(example)) {
          weightedError[i] += currentWeight;
        }
        i++;
      }
    }

    // finding best rule
    int i = 0;
    double bestError = Double.POSITIVE_INFINITY;
    RuleModel bestModel = null;
    for (RuleModel currentModel : models) {
      if (weightedError[i] < bestError) {
        bestError = weightedError[i];
        bestModel = currentModel;
      }
      i++;
    }
    return bestModel;
  }
コード例 #9
0
  /**
   * Constructs a <code>Model</code> repeatedly running a weak learner, reweighting the training
   * example set accordingly, and combining the hypothesis using the available weighted performance
   * values.
   */
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    this.runVector = new RunVector();
    BayBoostModel ensembleNewBatch = null;
    BayBoostModel ensembleExtBatch = null;
    final Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); // for
    // models
    // and
    // their
    // probability
    // estimates
    Vector<BayBoostBaseModelInfo> modelInfo2 = new Vector<BayBoostBaseModelInfo>();
    this.currentIteration = 0;
    int firstOpenBatch = 1;

    // prepare the stream control attribute
    final Attribute streamControlAttribute;
    {
      Attribute attr = null;
      if ((attr = exampleSet.getAttributes().get(STREAM_CONTROL_ATTRIB_NAME)) == null)
        streamControlAttribute =
            com.rapidminer.example.Tools.createSpecialAttribute(
                exampleSet, STREAM_CONTROL_ATTRIB_NAME, Ontology.INTEGER);
      else {
        streamControlAttribute = attr;
        logWarning(
            "Attribute with the (reserved) name of the stream control attribute exists. It is probably an old version created by this operator. Trying to recycle it... ");
        // Resetting the stream control attribute values by overwriting
        // them with 0 avoids (unlikely)
        // problems in case the same ExampleSet is passed to this
        // operator over and over again:
        Iterator<Example> e = exampleSet.iterator();
        while (e.hasNext()) {
          e.next().setValue(streamControlAttribute, 0);
        }
      }
    }

    // and the weight attribute
    if (exampleSet.getAttributes().getWeight() == null) {
      this.prepareWeights(exampleSet);
    }

    boolean estimateFavoursExtBatch = true;
    // *** The main loop, one iteration per batch: ***
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      // increment batch number, collect batch and evaluate performance of
      // current model on batch
      double[] classPriors =
          this.prepareBatch(++this.currentIteration, reader, streamControlAttribute);

      ConditionedExampleSet trainingSet =
          new ConditionedExampleSet(
              exampleSet, new BatchFilterCondition(streamControlAttribute, this.currentIteration));

      final EstimatedPerformance estPerf;

      // Step 1: apply the ensemble model to the current batch (prediction
      // phase), evaluate and store result
      if (ensembleExtBatch != null) {
        // apply extended batch model first:
        trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet);
        this.performance = evaluatePredictions(trainingSet); // unweighted
        // performance;

        // then apply new batch model:
        trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
        double newBatchPerformance = evaluatePredictions(trainingSet);

        // heuristic: use extended batch model for predicting
        // unclassified instances
        if (estimateFavoursExtBatch == true)
          estPerf =
              new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false);
        else
          estPerf =
              new EstimatedPerformance("accuracy", newBatchPerformance, trainingSet.size(), false);

        // final double[] ensembleWeights;

        // continue with the better model:
        if (newBatchPerformance > this.performance) {
          this.performance = newBatchPerformance;
          firstOpenBatch = Math.max(1, this.currentIteration - 1);
          // ensembleWeights = ensembleNewBatch.getModelWeights();
        } else {
          modelInfo.clear();
          modelInfo.addAll(modelInfo2);
          // ensembleWeights = ensembleExtBatch.getModelWeights();
        }

      } else if (ensembleNewBatch != null) {
        trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
        this.performance = evaluatePredictions(trainingSet);
        firstOpenBatch = Math.max(1, this.currentIteration - 1);
        estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false);
      } else estPerf = null; // no model ==> no prediction performance

      if (estPerf != null) {
        PerformanceVector perf = new PerformanceVector();
        perf.addAveragable(estPerf);
        this.runVector.addVector(perf);
      }

      // *** retraining phase ***
      // Step 2: First reconstruct the initial weighting, if necessary.
      if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) {
        this.rescalePriors(trainingSet, classPriors);
      }

      estimateFavoursExtBatch = true;
      // Step 3: Find better weights for existing models and continue
      // training
      if (modelInfo.size() > 0) {

        modelInfo2 = new Vector<BayBoostBaseModelInfo>();
        for (BayBoostBaseModelInfo bbbmi : modelInfo) {
          modelInfo2.add(bbbmi); // BayBoostBaseModelInfo objects
          // cannot be changed, no deep copy
          // required
        }

        // separate hold out set
        final double holdOutRatio = this.getParameterAsDouble(PARAMETER_FRACTION_HOLD_OUT_SET);
        Vector<Example> holdOutExamples = new Vector<Example>();
        if (holdOutRatio > 0) {
          RandomGenerator random = RandomGenerator.getRandomGenerator(this);
          Iterator<Example> randBatchReader = trainingSet.iterator();
          while (randBatchReader.hasNext()) {
            Example example = randBatchReader.next();
            if (random.nextDoubleInRange(0, 1) <= holdOutRatio) {
              example.setValue(streamControlAttribute, 0);
              holdOutExamples.add(example);
            }
          }
          // TODO: create new example set
          // trainingSet.updateCondition();
        }

        // model 1: train one more base classifier
        boolean trainingExamplesLeft = this.adjustBaseModelWeights(trainingSet, modelInfo);
        if (trainingExamplesLeft) {
          // "trainingExamplesLeft" needs to be checked to avoid
          // exceptions.
          // Anyway, learning does not make sense, otherwise.
          if (!this.trainAdditionalModel(trainingSet, modelInfo)) {}
        }
        ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors);

        // model 2: remove last classifier, extend batch, train on
        // extended batch
        ExampleSet extendedBatch = // because of the ">=" condition it
            // is sufficient to remember the
            // opening batch
            new ConditionedExampleSet(
                exampleSet, new BatchFilterCondition(streamControlAttribute, firstOpenBatch));
        classPriors = this.prepareExtendedBatch(extendedBatch);
        if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) {
          this.rescalePriors(extendedBatch, classPriors);
        }
        modelInfo2.remove(modelInfo2.size() - 1);
        trainingExamplesLeft = this.adjustBaseModelWeights(extendedBatch, modelInfo2);
        // If no training examples are left: no need and chance to
        // continue training.
        if (trainingExamplesLeft == false) {
          ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors);
        } else {
          boolean success = this.trainAdditionalModel(extendedBatch, modelInfo2);
          if (success) {
            ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors);
          } else {
            ensembleExtBatch = null;
            estimateFavoursExtBatch = false;
          }
        }

        if (holdOutRatio > 0) {
          Iterator hoEit = holdOutExamples.iterator();
          while (hoEit.hasNext()) {
            ((Example) hoEit.next()).setValue(streamControlAttribute, this.currentIteration);
          }
          // TODO: create new example set
          // trainingSet.updateCondition();

          if (ensembleExtBatch != null) {
            trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
            hoEit = holdOutExamples.iterator();
            int errors = 0;
            while (hoEit.hasNext()) {
              Example example = (Example) hoEit.next();
              if (example.getPredictedLabel() != example.getLabel()) errors++;
            }
            double newBatchErr = ((double) errors) / holdOutExamples.size();

            trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet);
            hoEit = holdOutExamples.iterator();
            errors = 0;
            while (hoEit.hasNext()) {
              Example example = (Example) hoEit.next();
              if (example.getPredictedLabel() != example.getLabel()) errors++;
            }
            double extBatchErr = ((double) errors) / holdOutExamples.size();

            estimateFavoursExtBatch = (extBatchErr <= newBatchErr);

            if (estimateFavoursExtBatch) {
              ensembleExtBatch =
                  this.retrainLastWeight(ensembleExtBatch, trainingSet, holdOutExamples);
            } else
              ensembleNewBatch =
                  this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples);
          } else
            ensembleNewBatch =
                this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples);
        }
      } else {
        this.trainAdditionalModel(trainingSet, modelInfo);
        ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors);
        ensembleExtBatch = null;
        estimateFavoursExtBatch = false;
      }
    }
    this.restoreOldWeights(exampleSet);
    return (ensembleExtBatch == null ? ensembleNewBatch : ensembleExtBatch);
  }