/** * Similar to prepareBatch, but for extended batches. * * @param extendedBatch containing the extended batch * @return the class priors of the batch */ private double[] prepareExtendedBatch(ExampleSet extendedBatch) { int[] classCount = new int[2]; Iterator<Example> reader = extendedBatch.iterator(); while (reader.hasNext()) { Example example = reader.next(); example.setWeight(1); classCount[(int) example.getLabel()]++; } double[] classPriors = new double[2]; int sum = classCount[0] + classCount[1]; classPriors[0] = ((double) classCount[0]) / sum; classPriors[1] = ((double) classCount[1]) / sum; return classPriors; }
/** * Computes the weighted class priors of the boolean target attribute and shifts weights so that * the priors are equal afterwards. */ private void rescalePriors(ExampleSet exampleSet, double[] classPriors) { // The weights of class i are calculated as // (1 / #classes) / (#rel_freq_class_i) double[] weights = new double[2]; for (int i = 0; i < weights.length; i++) { weights[i] = 1.0d / (weights.length * (classPriors[i])); } Iterator<Example> exRead = exampleSet.iterator(); while (exRead.hasNext()) { Example example = exRead.next(); example.setWeight(weights[(int) (example.getLabel())]); } }
protected void prepareWeights(ExampleSet exampleSet) { Attribute weightAttr = exampleSet.getAttributes().getWeight(); if (weightAttr == null) { this.oldWeights = null; com.rapidminer.example.Tools.createWeightAttribute(exampleSet); } else { // Back up old weights this.oldWeights = new double[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); for (int i = 0; (reader.hasNext() && i < oldWeights.length); i++) { Example example = reader.next(); if (example != null) { this.oldWeights[i] = example.getWeight(); example.setWeight(1); } } } }
/** * The preparation part collecting the examples of a batch, computing priors and resetting weights * to 1. * * @param currentBatchNum the batch number to be assigned to the examples * @param reader the <code>Iterator<Example></code> with the cursor on the current point in the * stream. * @param batchAttribute the attribute to write the batch number to * @return the class priors of the batch */ private double[] prepareBatch( int currentBatchNum, Iterator<Example> reader, Attribute batchAttribute) throws UndefinedParameterError { final int batchSize = this.getParameterAsInt(PARAMETER_BATCH_SIZE); int batchCount = 0; // Read and classify examples from stream, as long as the buffer (next // batch) // is not full. Examples are weighted at this point, in order to // simulate sampling. int[] classCount = new int[2]; while ((batchCount++ < batchSize) && reader.hasNext()) { Example example = reader.next(); example.setValue(batchAttribute, currentBatchNum); example.setWeight(1); classCount[(int) example.getLabel()]++; } double[] classPriors = new double[2]; classPriors[0] = ((double) classCount[0]) / --batchCount; classPriors[1] = ((double) classCount[1]) / batchCount; return classPriors; }
private BayBoostModel retrainLastWeight( BayBoostModel ensemble, ExampleSet exampleSet, Vector holdOutSet) throws OperatorException { this.prepareExtendedBatch(exampleSet); // method fits by chance int modelNum = ensemble.getNumberOfModels(); Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); double[] priors = ensemble.getPriors(); for (int i = 0; i < modelNum - 1; i++) { Model model = ensemble.getModel(i); ContingencyMatrix cm = ensemble.getContingencyMatrix(i); modelInfo.add(new BayBoostBaseModelInfo(model, cm)); exampleSet = model.apply(exampleSet); WeightedPerformanceMeasures.reweightExamples(exampleSet, cm, false); } Model latestModel = ensemble.getModel(modelNum - 1); exampleSet = latestModel.apply(exampleSet); // quite ugly: double[] weights = new double[holdOutSet.size()]; Iterator it = holdOutSet.iterator(); int index = 0; while (it.hasNext()) { Example example = (Example) it.next(); weights[index++] = example.getWeight(); } Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { reader.next().setWeight(0); } it = holdOutSet.iterator(); index = 0; while (it.hasNext()) { Example example = (Example) it.next(); example.setWeight(weights[index++]); } WeightedPerformanceMeasures wp = new WeightedPerformanceMeasures(exampleSet); modelInfo.add(new BayBoostBaseModelInfo(latestModel, wp.getContingencyMatrix())); return new BayBoostModel(exampleSet, modelInfo, priors); }