@Override public Model learn(ExampleSet exampleSet) throws OperatorException { Kernel kernel = getKernel(); kernel.init(exampleSet); double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE); NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping(); String classNeg = labelMapping.getNegativeString(); String classPos = labelMapping.getPositiveString(); double classValueNeg = labelMapping.getNegativeIndex(); int numberOfAttributes = exampleSet.getAttributes().size(); HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel); model.init(new double[numberOfAttributes], 0); for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) { double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate); Attributes attributes = exampleSet.getAttributes(); for (Example example : exampleSet) { double prediction = model.predict(example); if (prediction != example.getLabel()) { double direction = (example.getLabel() == classValueNeg) ? -1 : 1; // adapting intercept model.setIntercept(model.getIntercept() + learnRate * direction); // adapting coefficients double coefficients[] = model.getCoefficients(); int i = 0; for (Attribute attribute : attributes) { coefficients[i] += learnRate * direction * example.getValue(attribute); i++; } } } } return model; }
/** Creates a new evolutionary SVM optimization. */ public ClassificationEvoOptimization( ExampleSet exampleSet, // training data Kernel kernel, double c, // SVM paras int initType, // start population creation type para int maxIterations, int generationsWithoutImprovement, int popSize, // GA paras int selectionType, double tournamentFraction, boolean keepBest, // selection paras int mutationType, // type of mutation double crossoverProb, boolean showConvergencePlot, boolean showPopulationPlot, ExampleSet holdOutSet, RandomGenerator random, LoggingHandler logging, Operator executingOperator) { super( EvoSVM.createBoundArray(0.0d, exampleSet.size()), EvoSVM.determineMax(c, kernel, exampleSet, selectionType, exampleSet.size()), popSize, exampleSet.size(), initType, maxIterations, generationsWithoutImprovement, selectionType, tournamentFraction, keepBest, mutationType, Double.NaN, crossoverProb, showConvergencePlot, showPopulationPlot, random, logging, executingOperator); this.exampleSet = exampleSet; this.holdOutSet = holdOutSet; this.populationSize = popSize; this.kernel = kernel; this.c = getMax(0); // label values this.ys = new double[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); int index = 0; Attribute label = exampleSet.getAttributes().getLabel(); while (reader.hasNext()) { Example example = reader.next(); ys[index++] = example.getLabel() == label.getMapping().getPositiveIndex() ? 1.0d : -1.0d; } // optimization function this.optimizationFunction = new ClassificationOptimizationFunction(selectionType == NON_DOMINATED_SORTING_SELECTION); }
/** returns the accuracy of the predictions for the given example set */ private double evaluatePredictions(ExampleSet exampleSet) { Iterator<Example> reader = exampleSet.iterator(); int count = 0; int correct = 0; while (reader.hasNext()) { count++; Example example = reader.next(); if (example.getLabel() == example.getPredictedLabel()) correct++; } return ((double) correct) / count; }
public void apply(Example example) { if (applicable(example)) { double weight = 1.0d; if (example.getAttributes().getWeight() != null) { weight = example.getWeight(); } coveredWeight += weight; if (example.getLabel() == example.getAttributes().getLabel().getMapping().getPositiveIndex()) { positiveWeight += weight; } } }
/** * Similar to prepareBatch, but for extended batches. * * @param extendedBatch containing the extended batch * @return the class priors of the batch */ private double[] prepareExtendedBatch(ExampleSet extendedBatch) { int[] classCount = new int[2]; Iterator<Example> reader = extendedBatch.iterator(); while (reader.hasNext()) { Example example = reader.next(); example.setWeight(1); classCount[(int) example.getLabel()]++; } double[] classPriors = new double[2]; int sum = classCount[0] + classCount[1]; classPriors[0] = ((double) classCount[0]) / sum; classPriors[1] = ((double) classCount[1]) / sum; return classPriors; }
/** * Computes the weighted class priors of the boolean target attribute and shifts weights so that * the priors are equal afterwards. */ private void rescalePriors(ExampleSet exampleSet, double[] classPriors) { // The weights of class i are calculated as // (1 / #classes) / (#rel_freq_class_i) double[] weights = new double[2]; for (int i = 0; i < weights.length; i++) { weights[i] = 1.0d / (weights.length * (classPriors[i])); } Iterator<Example> exRead = exampleSet.iterator(); while (exRead.hasNext()) { Example example = exRead.next(); example.setWeight(weights[(int) (example.getLabel())]); } }
/** * The preparation part collecting the examples of a batch, computing priors and resetting weights * to 1. * * @param currentBatchNum the batch number to be assigned to the examples * @param reader the <code>Iterator<Example></code> with the cursor on the current point in the * stream. * @param batchAttribute the attribute to write the batch number to * @return the class priors of the batch */ private double[] prepareBatch( int currentBatchNum, Iterator<Example> reader, Attribute batchAttribute) throws UndefinedParameterError { final int batchSize = this.getParameterAsInt(PARAMETER_BATCH_SIZE); int batchCount = 0; // Read and classify examples from stream, as long as the buffer (next // batch) // is not full. Examples are weighted at this point, in order to // simulate sampling. int[] classCount = new int[2]; while ((batchCount++ < batchSize) && reader.hasNext()) { Example example = reader.next(); example.setValue(batchAttribute, currentBatchNum); example.setWeight(1); classCount[(int) example.getLabel()]++; } double[] classPriors = new double[2]; classPriors[0] = ((double) classCount[0]) / --batchCount; classPriors[1] = ((double) classCount[1]) / batchCount; return classPriors; }
private RuleModel getBestModel( Collection<RuleModel> models, ExampleSet exampleSet, boolean useExampleWeights) { Attribute exampleWeightAttribute = exampleSet.getAttributes().getSpecial(Attributes.WEIGHT_NAME); useExampleWeights = useExampleWeights && (exampleWeightAttribute != null); // calculating weighted error for rules double[] weightedError = new double[models.size()]; double totalWeight = 0; for (Example example : exampleSet) { int i = 0; double currentWeight = 1; if (useExampleWeights) { currentWeight = example.getValue(exampleWeightAttribute); } double currentLabel = example.getLabel(); totalWeight += currentWeight; for (RuleModel currentModel : models) { if (currentLabel != currentModel.getPrediction(example)) { weightedError[i] += currentWeight; } i++; } } // finding best rule int i = 0; double bestError = Double.POSITIVE_INFINITY; RuleModel bestModel = null; for (RuleModel currentModel : models) { if (weightedError[i] < bestError) { bestError = weightedError[i]; bestModel = currentModel; } i++; } return bestModel; }
/** * Constructs a <code>Model</code> repeatedly running a weak learner, reweighting the training * example set accordingly, and combining the hypothesis using the available weighted performance * values. */ public Model learn(ExampleSet exampleSet) throws OperatorException { this.runVector = new RunVector(); BayBoostModel ensembleNewBatch = null; BayBoostModel ensembleExtBatch = null; final Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); // for // models // and // their // probability // estimates Vector<BayBoostBaseModelInfo> modelInfo2 = new Vector<BayBoostBaseModelInfo>(); this.currentIteration = 0; int firstOpenBatch = 1; // prepare the stream control attribute final Attribute streamControlAttribute; { Attribute attr = null; if ((attr = exampleSet.getAttributes().get(STREAM_CONTROL_ATTRIB_NAME)) == null) streamControlAttribute = com.rapidminer.example.Tools.createSpecialAttribute( exampleSet, STREAM_CONTROL_ATTRIB_NAME, Ontology.INTEGER); else { streamControlAttribute = attr; logWarning( "Attribute with the (reserved) name of the stream control attribute exists. It is probably an old version created by this operator. Trying to recycle it... "); // Resetting the stream control attribute values by overwriting // them with 0 avoids (unlikely) // problems in case the same ExampleSet is passed to this // operator over and over again: Iterator<Example> e = exampleSet.iterator(); while (e.hasNext()) { e.next().setValue(streamControlAttribute, 0); } } } // and the weight attribute if (exampleSet.getAttributes().getWeight() == null) { this.prepareWeights(exampleSet); } boolean estimateFavoursExtBatch = true; // *** The main loop, one iteration per batch: *** Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { // increment batch number, collect batch and evaluate performance of // current model on batch double[] classPriors = this.prepareBatch(++this.currentIteration, reader, streamControlAttribute); ConditionedExampleSet trainingSet = new ConditionedExampleSet( exampleSet, new BatchFilterCondition(streamControlAttribute, this.currentIteration)); final EstimatedPerformance estPerf; // Step 1: apply the ensemble model to the current batch (prediction // phase), evaluate and store result if (ensembleExtBatch != null) { // apply extended batch model first: trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet); this.performance = evaluatePredictions(trainingSet); // unweighted // performance; // then apply new batch model: trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); double newBatchPerformance = evaluatePredictions(trainingSet); // heuristic: use extended batch model for predicting // unclassified instances if (estimateFavoursExtBatch == true) estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false); else estPerf = new EstimatedPerformance("accuracy", newBatchPerformance, trainingSet.size(), false); // final double[] ensembleWeights; // continue with the better model: if (newBatchPerformance > this.performance) { this.performance = newBatchPerformance; firstOpenBatch = Math.max(1, this.currentIteration - 1); // ensembleWeights = ensembleNewBatch.getModelWeights(); } else { modelInfo.clear(); modelInfo.addAll(modelInfo2); // ensembleWeights = ensembleExtBatch.getModelWeights(); } } else if (ensembleNewBatch != null) { trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); this.performance = evaluatePredictions(trainingSet); firstOpenBatch = Math.max(1, this.currentIteration - 1); estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false); } else estPerf = null; // no model ==> no prediction performance if (estPerf != null) { PerformanceVector perf = new PerformanceVector(); perf.addAveragable(estPerf); this.runVector.addVector(perf); } // *** retraining phase *** // Step 2: First reconstruct the initial weighting, if necessary. if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) { this.rescalePriors(trainingSet, classPriors); } estimateFavoursExtBatch = true; // Step 3: Find better weights for existing models and continue // training if (modelInfo.size() > 0) { modelInfo2 = new Vector<BayBoostBaseModelInfo>(); for (BayBoostBaseModelInfo bbbmi : modelInfo) { modelInfo2.add(bbbmi); // BayBoostBaseModelInfo objects // cannot be changed, no deep copy // required } // separate hold out set final double holdOutRatio = this.getParameterAsDouble(PARAMETER_FRACTION_HOLD_OUT_SET); Vector<Example> holdOutExamples = new Vector<Example>(); if (holdOutRatio > 0) { RandomGenerator random = RandomGenerator.getRandomGenerator(this); Iterator<Example> randBatchReader = trainingSet.iterator(); while (randBatchReader.hasNext()) { Example example = randBatchReader.next(); if (random.nextDoubleInRange(0, 1) <= holdOutRatio) { example.setValue(streamControlAttribute, 0); holdOutExamples.add(example); } } // TODO: create new example set // trainingSet.updateCondition(); } // model 1: train one more base classifier boolean trainingExamplesLeft = this.adjustBaseModelWeights(trainingSet, modelInfo); if (trainingExamplesLeft) { // "trainingExamplesLeft" needs to be checked to avoid // exceptions. // Anyway, learning does not make sense, otherwise. if (!this.trainAdditionalModel(trainingSet, modelInfo)) {} } ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors); // model 2: remove last classifier, extend batch, train on // extended batch ExampleSet extendedBatch = // because of the ">=" condition it // is sufficient to remember the // opening batch new ConditionedExampleSet( exampleSet, new BatchFilterCondition(streamControlAttribute, firstOpenBatch)); classPriors = this.prepareExtendedBatch(extendedBatch); if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) { this.rescalePriors(extendedBatch, classPriors); } modelInfo2.remove(modelInfo2.size() - 1); trainingExamplesLeft = this.adjustBaseModelWeights(extendedBatch, modelInfo2); // If no training examples are left: no need and chance to // continue training. if (trainingExamplesLeft == false) { ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors); } else { boolean success = this.trainAdditionalModel(extendedBatch, modelInfo2); if (success) { ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors); } else { ensembleExtBatch = null; estimateFavoursExtBatch = false; } } if (holdOutRatio > 0) { Iterator hoEit = holdOutExamples.iterator(); while (hoEit.hasNext()) { ((Example) hoEit.next()).setValue(streamControlAttribute, this.currentIteration); } // TODO: create new example set // trainingSet.updateCondition(); if (ensembleExtBatch != null) { trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); hoEit = holdOutExamples.iterator(); int errors = 0; while (hoEit.hasNext()) { Example example = (Example) hoEit.next(); if (example.getPredictedLabel() != example.getLabel()) errors++; } double newBatchErr = ((double) errors) / holdOutExamples.size(); trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet); hoEit = holdOutExamples.iterator(); errors = 0; while (hoEit.hasNext()) { Example example = (Example) hoEit.next(); if (example.getPredictedLabel() != example.getLabel()) errors++; } double extBatchErr = ((double) errors) / holdOutExamples.size(); estimateFavoursExtBatch = (extBatchErr <= newBatchErr); if (estimateFavoursExtBatch) { ensembleExtBatch = this.retrainLastWeight(ensembleExtBatch, trainingSet, holdOutExamples); } else ensembleNewBatch = this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples); } else ensembleNewBatch = this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples); } } else { this.trainAdditionalModel(trainingSet, modelInfo); ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors); ensembleExtBatch = null; estimateFavoursExtBatch = false; } } this.restoreOldWeights(exampleSet); return (ensembleExtBatch == null ? ensembleNewBatch : ensembleExtBatch); }