/** * Iterates over all models and returns the class with maximum likelihood. * * @param origExampleSet the set of examples to be classified */ @Override public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel) throws OperatorException { final String attributePrefix = "AdaBoostModelPrediction"; final int numLabels = predictedLabel.getMapping().size(); final Attribute[] specialAttributes = new Attribute[numLabels]; OperatorProgress progress = null; if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) { progress = getOperator().getProgress(); progress.setTotal(100); } for (int i = 0; i < numLabels; i++) { specialAttributes[i] = com.rapidminer.example.Tools.createSpecialAttribute( origExampleSet, attributePrefix + i, Ontology.NUMERICAL); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels)); } } Iterator<Example> reader = origExampleSet.iterator(); int progressCounter = 0; while (reader.hasNext()) { Example example = reader.next(); for (int i = 0; i < specialAttributes.length; i++) { example.setValue(specialAttributes[i], 0); } if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) { progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25); } } reader = origExampleSet.iterator(); for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) { Model model = this.getModel(modelNr); ExampleSet exampleSet = (ExampleSet) origExampleSet.clone(); exampleSet = model.apply(exampleSet); this.updateEstimates(exampleSet, modelNr, specialAttributes); PredictionModel.removePredictedLabel(exampleSet); if (progress != null) { progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50); } } // Turn prediction weights into confidences and a crisp predcition: this.evaluateSpecialAttributes(origExampleSet, specialAttributes); // Clean up attributes: for (int i = 0; i < numLabels; i++) { origExampleSet.getAttributes().remove(specialAttributes[i]); origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75); } } return origExampleSet; }
/** @see com.rapidminer.operator.OperatorChain#doWork() */ @Override public void doWork() throws OperatorException { List<Operator> nested = this.getImmediateChildren(); log.info("This StreamProcess has {} nested operators", nested.size()); for (Operator op : nested) { log.info(" op: {}", op); if (op instanceof DataStreamOperator) { log.info("Resetting stream-operator {}", op); ((DataStreamOperator) op).reset(); } } log.info("Starting some work in doWork()"); ExampleSet exampleSet = input.getData(ExampleSet.class); log.info("input is an example set with {} examples", exampleSet.size()); int i = 0; Iterator<Example> it = exampleSet.iterator(); while (it.hasNext()) { Example example = it.next(); log.info("Processing example {}", i); DataObject datum = StreamUtils.wrap(example); log.info("Wrapped data-object is: {}", datum); dataStream.deliver(datum); getSubprocess(0).execute(); inApplyLoop(); i++; } // super.doWork(); log.info("doWork() is finished."); }
private static Map<Integer, MeanVariance> createMeanVariances( com.rapidminer.example.ExampleSet exampleSet) { double[] sum = new double[exampleSet.getAttributes().size()]; double[] squaredSum = new double[sum.length]; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); while (reader.hasNext()) { com.rapidminer.example.Example example = reader.next(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = example.getValue(attribute); sum[a] += value; squaredSum[a] += value * value; a++; } } Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>(); for (int a = 0; a < sum.length; a++) { sum[a] /= exampleSet.size(); squaredSum[a] /= exampleSet.size(); meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a]))); } return meanVariances; }
/** * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing * values, with approximate comparisons. * * @param eSet the example set * @param a the first attribute to correlate * @param b the second attribute to correlate * @param fuzz values within +/- fuzz may be considered tied * @return Kendall's tau-b rank correlation * @throws OperatorException */ public static double tau_b(ExampleSet eSet, Attribute a, Attribute b, double fuzz) throws OperatorException { ExampleSet e = extract(eSet, a, b); // reduced example set FuzzyComp fc = new FuzzyComp(fuzz); int c = 0; // concordant pairs int d = 0; // discordant pairs int ta = 0; // pairs tied on a (only) int tb = 0; // pairs tied on b (only) int tc = 0; // pairs tied on both a and b int n = 0; // number of times iterator i is bumped Iterator<Example> i = e.iterator(); while (i.hasNext()) { // iterate through all possible pairs Example z1 = i.next(); n++; double x = z1.getValue(a); double y = z1.getValue(b); if (b.isNominal() && a != null) { String yString = b.getMapping().mapIndex((int) y); y = a.getMapping().getIndex(yString); } Iterator<Example> j = e.iterator(); for (int k = 0; k < n; k++) j.next(); // increment j to match i while (j.hasNext()) { // move on to subsequent examples Example z2 = j.next(); double xx = z2.getValue(a); double yy = z2.getValue(b); if (b.isNominal() && a != null) { String yyString = b.getMapping().mapIndex((int) yy); yy = a.getMapping().getIndex(yyString); } int xc = fc.compare(x, xx); int yc = fc.compare(y, yy); if (xc == 0) { if (yc == 0) tc++; // tied on both attributes else ta++; // tied only on a } else if (yc == 0) tb++; // tied only on b else if (xc == yc) c++; // concordant pair else d++; // discordant pair } } double num = c - d; double den = Math.sqrt((c + d + ta) * (c + d + tb)); if (den != 0) return num / den; else return 0; }
/** Creates a new evolutionary SVM optimization. */ public ClassificationEvoOptimization( ExampleSet exampleSet, // training data Kernel kernel, double c, // SVM paras int initType, // start population creation type para int maxIterations, int generationsWithoutImprovement, int popSize, // GA paras int selectionType, double tournamentFraction, boolean keepBest, // selection paras int mutationType, // type of mutation double crossoverProb, boolean showConvergencePlot, boolean showPopulationPlot, ExampleSet holdOutSet, RandomGenerator random, LoggingHandler logging, Operator executingOperator) { super( EvoSVM.createBoundArray(0.0d, exampleSet.size()), EvoSVM.determineMax(c, kernel, exampleSet, selectionType, exampleSet.size()), popSize, exampleSet.size(), initType, maxIterations, generationsWithoutImprovement, selectionType, tournamentFraction, keepBest, mutationType, Double.NaN, crossoverProb, showConvergencePlot, showPopulationPlot, random, logging, executingOperator); this.exampleSet = exampleSet; this.holdOutSet = holdOutSet; this.populationSize = popSize; this.kernel = kernel; this.c = getMax(0); // label values this.ys = new double[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); int index = 0; Attribute label = exampleSet.getAttributes().getLabel(); while (reader.hasNext()) { Example example = reader.next(); ys[index++] = example.getLabel() == label.getMapping().getPositiveIndex() ? 1.0d : -1.0d; } // optimization function this.optimizationFunction = new ClassificationOptimizationFunction(selectionType == NON_DOMINATED_SORTING_SELECTION); }
/** * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha * values and b are zero, the label will be set if it is known. */ public SVMExamples( com.rapidminer.example.ExampleSet exampleSet, Attribute labelAttribute, Map<Integer, MeanVariance> meanVariances) { this(exampleSet.size(), 0.0d); this.meanVarianceMap = meanVariances; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); Attribute idAttribute = exampleSet.getAttributes().getId(); int exampleCounter = 0; while (reader.hasNext()) { com.rapidminer.example.Example current = reader.next(); Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = current.getValue(attribute); if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) { attributeMap.put(a, value); } if ((a + 1) > dim) { dim = (a + 1); } a++; } atts[exampleCounter] = new double[attributeMap.size()]; index[exampleCounter] = new int[attributeMap.size()]; Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator(); int attributeCounter = 0; while (i.hasNext()) { Map.Entry<Integer, Double> e = i.next(); Integer indexValue = e.getKey(); Double attributeValue = e.getValue(); index[exampleCounter][attributeCounter] = indexValue.intValue(); double value = attributeValue.doubleValue(); MeanVariance meanVariance = meanVarianceMap.get(indexValue); if (meanVariance != null) { if (meanVariance.getVariance() == 0.0d) { value = 0.0d; } else { value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance()); } } atts[exampleCounter][attributeCounter] = value; attributeCounter++; } if (labelAttribute != null) { double label = current.getValue(labelAttribute); if (labelAttribute.isNominal()) { ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1); } else { ys[exampleCounter] = label; } } if (idAttribute != null) { ids[exampleCounter] = current.getValueAsString(idAttribute); } exampleCounter++; } }
/** * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing * values. * * @param eSet the example set * @param a the first attribute to correlate * @param b the second attribute to correlate * @return Kendall's tau-b rank correlation * @throws OperatorException */ public static double tau_b(ExampleSet eSet, Attribute a, Attribute b) throws OperatorException { ExampleSet e = extract(eSet, a, b); // reduced example set long c = 0; // concordant pairs long d = 0; // discordant pairs long ta = 0; // pairs tied on a (only) long tb = 0; // pairs tied on b (only) long tc = 0; // pairs tied on both a and b int n = 0; // number of times iterator i is bumped Iterator<Example> i = e.iterator(); while (i.hasNext()) { // iterate through all possible pairs Example z1 = i.next(); n++; double x = z1.getValue(a); double y = z1.getValue(b); if (b.isNominal() && a != null) { String yString = b.getMapping().mapIndex((int) y); y = a.getMapping().getIndex(yString); } Iterator<Example> j = e.iterator(); for (int k = 0; k < n; k++) j.next(); // increment j to match i while (j.hasNext()) { // move on to subsequent examples Example z2 = j.next(); double xx = z2.getValue(a); double yy = z2.getValue(b); if (b.isNominal() && a != null) { String yyString = b.getMapping().mapIndex((int) yy); yy = a.getMapping().getIndex(yyString); } if (x == xx) { if (y == yy) tc++; // tied on both attributes else ta++; // tied only on a } else if (y == yy) tb++; // tied only on b else if ((x > xx && y > yy) || (x < xx && y < yy)) c++; // concordant pair else d++; // discordant pair } } double num = c - d; double f1 = c + d + ta; double f2 = c + d + tb; double den = Math.sqrt(f1 * f2); if (den != 0) return num / den; else return 0; }
public Model learn(ExampleSet exampleSet) throws OperatorException { double value = 0.0; double[] confidences = null; int method = getParameterAsInt(PARAMETER_METHOD); Attribute label = exampleSet.getAttributes().getLabel(); if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) { logWarning( "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!"); method = MODE; } else if ((!label.isNominal()) && (method == MODE)) { logWarning( "Cannot use method '" + METHODS[method] + "' for numerical labels: changing to 'average'!"); method = AVERAGE; } switch (method) { case MEDIAN: double[] labels = new double[exampleSet.size()]; Iterator<Example> r = exampleSet.iterator(); int counter = 0; while (r.hasNext()) { Example example = r.next(); labels[counter++] = example.getValue(example.getAttributes().getLabel()); } java.util.Arrays.sort(labels); value = labels[exampleSet.size() / 2]; break; case AVERAGE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.AVERAGE); break; case MODE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.MODE); confidences = new double[label.getMapping().size()]; for (int i = 0; i < confidences.length; i++) { confidences[i] = exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i)) / exampleSet.size(); } break; case CONSTANT: value = getParameterAsDouble(PARAMETER_CONSTANT); break; case ATTRIBUTE: return new AttributeDefaultModel( exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); default: // cannot happen throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!"); } log( "Default value is '" + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "") + "'."); return new DefaultModel(exampleSet, value, confidences); }
/** returns the accuracy of the predictions for the given example set */ private double evaluatePredictions(ExampleSet exampleSet) { Iterator<Example> reader = exampleSet.iterator(); int count = 0; int correct = 0; while (reader.hasNext()) { count++; Example example = reader.next(); if (example.getLabel() == example.getPredictedLabel()) correct++; } return ((double) correct) / count; }
private void restoreOldWeights(ExampleSet exampleSet) { if (this.oldWeights != null) { // need to reset weights Iterator<Example> reader = exampleSet.iterator(); int i = 0; while (reader.hasNext() && i < this.oldWeights.length) { reader.next().setWeight(this.oldWeights[i++]); } } else { // need to delete the weights attribute Attribute weight = exampleSet.getAttributes().getWeight(); exampleSet.getAttributes().remove(weight); exampleSet.getExampleTable().removeAttribute(weight); } }
/** * Computes the weighted class priors of the boolean target attribute and shifts weights so that * the priors are equal afterwards. */ private void rescalePriors(ExampleSet exampleSet, double[] classPriors) { // The weights of class i are calculated as // (1 / #classes) / (#rel_freq_class_i) double[] weights = new double[2]; for (int i = 0; i < weights.length; i++) { weights[i] = 1.0d / (weights.length * (classPriors[i])); } Iterator<Example> exRead = exampleSet.iterator(); while (exRead.hasNext()) { Example example = exRead.next(); example.setWeight(weights[(int) (example.getLabel())]); } }
/** * Similar to prepareBatch, but for extended batches. * * @param extendedBatch containing the extended batch * @return the class priors of the batch */ private double[] prepareExtendedBatch(ExampleSet extendedBatch) { int[] classCount = new int[2]; Iterator<Example> reader = extendedBatch.iterator(); while (reader.hasNext()) { Example example = reader.next(); example.setWeight(1); classCount[(int) example.getLabel()]++; } double[] classPriors = new double[2]; int sum = classCount[0] + classCount[1]; classPriors[0] = ((double) classCount[0]) / sum; classPriors[1] = ((double) classCount[1]) / sum; return classPriors; }
public static SplittedExampleSet splitByAttribute( ExampleSet exampleSet, Attribute attribute, double value) { int[] elements = new int[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); int i = 0; while (reader.hasNext()) { Example example = reader.next(); double currentValue = example.getValue(attribute); if (currentValue <= value) elements[i++] = 0; else elements[i++] = 1; } Partition partition = new Partition(elements, 2); return new SplittedExampleSet(exampleSet, partition); }
private void updateEstimates(ExampleSet exampleSet, int modelNr, Attribute[] specialAttributes) { Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { Example example = reader.next(); int predicted = (int) example.getPredictedLabel(); double oldValue = example.getValue(specialAttributes[predicted]); if (Double.isNaN(oldValue)) { logWarning("Found NaN confidence as intermediate prediction."); oldValue = 0; } if (!Double.isInfinite(oldValue)) { example.setValue(specialAttributes[predicted], oldValue + this.getWeightForModel(modelNr)); } } }
protected void prepareWeights(ExampleSet exampleSet) { Attribute weightAttr = exampleSet.getAttributes().getWeight(); if (weightAttr == null) { this.oldWeights = null; com.rapidminer.example.Tools.createWeightAttribute(exampleSet); } else { // Back up old weights this.oldWeights = new double[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); for (int i = 0; (reader.hasNext() && i < oldWeights.length); i++) { Example example = reader.next(); if (example != null) { this.oldWeights[i] = example.getWeight(); example.setWeight(1); } } } }
private void evaluateSpecialAttributes(ExampleSet exampleSet, Attribute[] specialAttributes) { Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel(); Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { Example example = reader.next(); double sum = 0; double[] confidences = new double[specialAttributes.length]; double bestConf = -1; int bestLabel = 0; for (int n = 0; n < confidences.length; n++) { confidences[n] = example.getValue(specialAttributes[n]); if (confidences[n] > bestConf) { bestConf = confidences[n]; bestLabel = n; } } example.setValue( predictedLabel, predictedLabel.getMapping().mapString(this.getLabel().getMapping().mapIndex(bestLabel))); for (int n = 0; n < confidences.length; n++) { confidences[n] = Math.exp(confidences[n] - bestConf); // remember for normalization: sum += confidences[n]; } // Normalize and set confidence values for all classes: if (Double.isInfinite(sum) || Double.isNaN(sum)) { int best = (int) example.getPredictedLabel(); for (int k = 0; k < confidences.length; k++) { confidences[k] = 0; } confidences[best] = 1; } else { for (int k = 0; k < confidences.length; k++) { confidences[k] /= sum; example.setConfidence(predictedLabel.getMapping().mapIndex(k), confidences[k]); } } } }
private BayBoostModel retrainLastWeight( BayBoostModel ensemble, ExampleSet exampleSet, Vector holdOutSet) throws OperatorException { this.prepareExtendedBatch(exampleSet); // method fits by chance int modelNum = ensemble.getNumberOfModels(); Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); double[] priors = ensemble.getPriors(); for (int i = 0; i < modelNum - 1; i++) { Model model = ensemble.getModel(i); ContingencyMatrix cm = ensemble.getContingencyMatrix(i); modelInfo.add(new BayBoostBaseModelInfo(model, cm)); exampleSet = model.apply(exampleSet); WeightedPerformanceMeasures.reweightExamples(exampleSet, cm, false); } Model latestModel = ensemble.getModel(modelNum - 1); exampleSet = latestModel.apply(exampleSet); // quite ugly: double[] weights = new double[holdOutSet.size()]; Iterator it = holdOutSet.iterator(); int index = 0; while (it.hasNext()) { Example example = (Example) it.next(); weights[index++] = example.getWeight(); } Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { reader.next().setWeight(0); } it = holdOutSet.iterator(); index = 0; while (it.hasNext()) { Example example = (Example) it.next(); example.setWeight(weights[index++]); } WeightedPerformanceMeasures wp = new WeightedPerformanceMeasures(exampleSet); modelInfo.add(new BayBoostBaseModelInfo(latestModel, wp.getContingencyMatrix())); return new BayBoostModel(exampleSet, modelInfo, priors); }
/** * Calculates ranks for an attribute. * * <p>Ranks are returned as double precision values, with 1 as the rank of the smallest value. * Values within +/- fuzz of each other may be considered tied. Tied values receive identical * ranks. Missing values receive rank NaN. * * <p>Note that application of the "fuzz" factor is dependent on the order of the observations in * the example set. For instance, if the first three values encountered are x, x+fuzz and * x+2*fuzz, the first two will be considered tied but the third will not, since x+2*fuzz is not * within +/- fuzz of x. * * @param eSet the example set * @param att the attribute to rank * @param fuzz values within +/- fuzz may be considered tied * @return a double precision array of ranks */ public static double[] rank(ExampleSet eSet, Attribute att, Attribute mappingAtt, double fuzz) { TreeMap<Double, ArrayList<Integer>> map; if (fuzz == 0.0) map = new TreeMap<Double, ArrayList<Integer>>(); else { FuzzyComp fc = new FuzzyComp(fuzz); map = new TreeMap<Double, ArrayList<Integer>>(fc); } double[] rank = new double[eSet.size()]; Iterator<Example> reader = eSet.iterator(); int i = 0; // example index // iterate through the example set while (reader.hasNext()) { // get the attribute values from the next example Example e = reader.next(); double x = e.getValue(att); if (att.isNominal() && mappingAtt != null) { String xString = att.getMapping().mapIndex((int) x); x = mappingAtt.getMapping().getIndex(xString); } // punt if either is missing if (Double.isNaN(x)) rank[i++] = Double.NaN; else { // insert x into the tree if (!map.containsKey(x)) // new key -- create a new entry in the map map.put(x, new ArrayList<Integer>()); map.get(x).add(i++); // add the index to the list } } // convert the map to ranks double r = 0; for (double x : map.keySet()) { ArrayList<Integer> y = map.get(x); double v = r + (1.0 + y.size()) / 2.0; for (int j : y) rank[j] = v; r += y.size(); } return rank; }
public IOObject[] apply() throws OperatorException { ExampleSet eSet = getInput(ExampleSet.class); // only warning, removing is done by createSpecialAttribute(...) Attribute idAttribute = eSet.getAttributes().getId(); if (idAttribute != null) { logWarning("Overwriting old id attribute!"); } // create new id attribute boolean nominalIds = getParameterAsBoolean(PARAMETER_CREATE_NOMINAL_IDS); idAttribute = Tools.createSpecialAttribute( eSet, Attributes.ID_NAME, nominalIds ? Ontology.NOMINAL : Ontology.INTEGER); // set IDs int currentId = 1; Iterator<Example> r = eSet.iterator(); while (r.hasNext()) { Example example = r.next(); example.setValue( idAttribute, nominalIds ? idAttribute.getMapping().mapString("id_" + currentId) : currentId); currentId++; checkForStop(); } // initialize example visualizer Operator visualizer = null; try { visualizer = OperatorService.createOperator(ExampleVisualizationOperator.class); } catch (OperatorCreationException e) { logNote("Cannot initialize example visualizer, skipping..."); } if (visualizer != null) visualizer.apply(new IOContainer(eSet)); return new IOObject[] {eSet}; }
@Override public ExampleSet performPrediction(ExampleSet exampleSet, Attribute predictedLabel) throws OperatorException { Attribute label = this.getLabel(); final int posLabel = label.getMapping().getPositiveIndex(); final int negLabel = label.getMapping().getNegativeIndex(); final String posLabelS = label.getMapping().mapIndex(posLabel); final String negLabelS = label.getMapping().mapIndex(negLabel); exampleSet = model.apply(exampleSet); Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { Example example = reader.next(); double predicted = PlattScaling.getLogOddsPosConfidence(example.getConfidence(posLabelS)); double scaledPos = 1.0d / (1.0d + Math.exp(predicted * parameters.getA() + parameters.getB())); double scaledNeg = 1.0d - scaledPos; example.setValue(predictedLabel, (scaledPos >= scaledNeg) ? posLabel : negLabel); example.setConfidence(posLabelS, scaledPos); example.setConfidence(negLabelS, scaledNeg); } return exampleSet; }
@Override protected void createMatrices() { List<Attribute> attributes = new ArrayList<Attribute>(exampleSet.getAttributes().size()); for (Attribute attribute : exampleSet.getAttributes()) { attributes.add((Attribute) attribute.clone()); } MemoryExampleTable table = new MemoryExampleTable(attributes); for (int x = 0; x < dimensions[0]; x++) { for (int y = 0; y < dimensions[1]; y++) { DataRow row = new DoubleArrayDataRow(net.getNodeWeights(new int[] {x, y})); table.addDataRow(row); } } ExampleSet set = table.createExampleSet(); this.classificationMatrix = new double[dimensions[0]][dimensions[1]]; try { set = model.apply(set); Iterator<Example> exampleIterator = set.iterator(); for (int x = 0; x < dimensions[0]; x++) { for (int y = 0; y < dimensions[1]; y++) { Example example = exampleIterator.next(); classificationMatrix[x][y] = example.getValue(example.getAttributes().getPredictedLabel()); } } } catch (OperatorException e) { // LogService.getGlobal().log("Cannot use Model for prediction of node label: " + // e.getMessage(), LogService.WARNING); LogService.getRoot() .log( Level.WARNING, "com.rapidminer.operator.visualization.SOMModelPlotter.using_model_for_prediction_error" + e.getMessage()); } super.createMatrices(); }
/** * Constructs a <code>Model</code> repeatedly running a weak learner, reweighting the training * example set accordingly, and combining the hypothesis using the available weighted performance * values. */ public Model learn(ExampleSet exampleSet) throws OperatorException { this.runVector = new RunVector(); BayBoostModel ensembleNewBatch = null; BayBoostModel ensembleExtBatch = null; final Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); // for // models // and // their // probability // estimates Vector<BayBoostBaseModelInfo> modelInfo2 = new Vector<BayBoostBaseModelInfo>(); this.currentIteration = 0; int firstOpenBatch = 1; // prepare the stream control attribute final Attribute streamControlAttribute; { Attribute attr = null; if ((attr = exampleSet.getAttributes().get(STREAM_CONTROL_ATTRIB_NAME)) == null) streamControlAttribute = com.rapidminer.example.Tools.createSpecialAttribute( exampleSet, STREAM_CONTROL_ATTRIB_NAME, Ontology.INTEGER); else { streamControlAttribute = attr; logWarning( "Attribute with the (reserved) name of the stream control attribute exists. It is probably an old version created by this operator. Trying to recycle it... "); // Resetting the stream control attribute values by overwriting // them with 0 avoids (unlikely) // problems in case the same ExampleSet is passed to this // operator over and over again: Iterator<Example> e = exampleSet.iterator(); while (e.hasNext()) { e.next().setValue(streamControlAttribute, 0); } } } // and the weight attribute if (exampleSet.getAttributes().getWeight() == null) { this.prepareWeights(exampleSet); } boolean estimateFavoursExtBatch = true; // *** The main loop, one iteration per batch: *** Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { // increment batch number, collect batch and evaluate performance of // current model on batch double[] classPriors = this.prepareBatch(++this.currentIteration, reader, streamControlAttribute); ConditionedExampleSet trainingSet = new ConditionedExampleSet( exampleSet, new BatchFilterCondition(streamControlAttribute, this.currentIteration)); final EstimatedPerformance estPerf; // Step 1: apply the ensemble model to the current batch (prediction // phase), evaluate and store result if (ensembleExtBatch != null) { // apply extended batch model first: trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet); this.performance = evaluatePredictions(trainingSet); // unweighted // performance; // then apply new batch model: trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); double newBatchPerformance = evaluatePredictions(trainingSet); // heuristic: use extended batch model for predicting // unclassified instances if (estimateFavoursExtBatch == true) estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false); else estPerf = new EstimatedPerformance("accuracy", newBatchPerformance, trainingSet.size(), false); // final double[] ensembleWeights; // continue with the better model: if (newBatchPerformance > this.performance) { this.performance = newBatchPerformance; firstOpenBatch = Math.max(1, this.currentIteration - 1); // ensembleWeights = ensembleNewBatch.getModelWeights(); } else { modelInfo.clear(); modelInfo.addAll(modelInfo2); // ensembleWeights = ensembleExtBatch.getModelWeights(); } } else if (ensembleNewBatch != null) { trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); this.performance = evaluatePredictions(trainingSet); firstOpenBatch = Math.max(1, this.currentIteration - 1); estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false); } else estPerf = null; // no model ==> no prediction performance if (estPerf != null) { PerformanceVector perf = new PerformanceVector(); perf.addAveragable(estPerf); this.runVector.addVector(perf); } // *** retraining phase *** // Step 2: First reconstruct the initial weighting, if necessary. if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) { this.rescalePriors(trainingSet, classPriors); } estimateFavoursExtBatch = true; // Step 3: Find better weights for existing models and continue // training if (modelInfo.size() > 0) { modelInfo2 = new Vector<BayBoostBaseModelInfo>(); for (BayBoostBaseModelInfo bbbmi : modelInfo) { modelInfo2.add(bbbmi); // BayBoostBaseModelInfo objects // cannot be changed, no deep copy // required } // separate hold out set final double holdOutRatio = this.getParameterAsDouble(PARAMETER_FRACTION_HOLD_OUT_SET); Vector<Example> holdOutExamples = new Vector<Example>(); if (holdOutRatio > 0) { RandomGenerator random = RandomGenerator.getRandomGenerator(this); Iterator<Example> randBatchReader = trainingSet.iterator(); while (randBatchReader.hasNext()) { Example example = randBatchReader.next(); if (random.nextDoubleInRange(0, 1) <= holdOutRatio) { example.setValue(streamControlAttribute, 0); holdOutExamples.add(example); } } // TODO: create new example set // trainingSet.updateCondition(); } // model 1: train one more base classifier boolean trainingExamplesLeft = this.adjustBaseModelWeights(trainingSet, modelInfo); if (trainingExamplesLeft) { // "trainingExamplesLeft" needs to be checked to avoid // exceptions. // Anyway, learning does not make sense, otherwise. if (!this.trainAdditionalModel(trainingSet, modelInfo)) {} } ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors); // model 2: remove last classifier, extend batch, train on // extended batch ExampleSet extendedBatch = // because of the ">=" condition it // is sufficient to remember the // opening batch new ConditionedExampleSet( exampleSet, new BatchFilterCondition(streamControlAttribute, firstOpenBatch)); classPriors = this.prepareExtendedBatch(extendedBatch); if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) { this.rescalePriors(extendedBatch, classPriors); } modelInfo2.remove(modelInfo2.size() - 1); trainingExamplesLeft = this.adjustBaseModelWeights(extendedBatch, modelInfo2); // If no training examples are left: no need and chance to // continue training. if (trainingExamplesLeft == false) { ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors); } else { boolean success = this.trainAdditionalModel(extendedBatch, modelInfo2); if (success) { ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors); } else { ensembleExtBatch = null; estimateFavoursExtBatch = false; } } if (holdOutRatio > 0) { Iterator hoEit = holdOutExamples.iterator(); while (hoEit.hasNext()) { ((Example) hoEit.next()).setValue(streamControlAttribute, this.currentIteration); } // TODO: create new example set // trainingSet.updateCondition(); if (ensembleExtBatch != null) { trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); hoEit = holdOutExamples.iterator(); int errors = 0; while (hoEit.hasNext()) { Example example = (Example) hoEit.next(); if (example.getPredictedLabel() != example.getLabel()) errors++; } double newBatchErr = ((double) errors) / holdOutExamples.size(); trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet); hoEit = holdOutExamples.iterator(); errors = 0; while (hoEit.hasNext()) { Example example = (Example) hoEit.next(); if (example.getPredictedLabel() != example.getLabel()) errors++; } double extBatchErr = ((double) errors) / holdOutExamples.size(); estimateFavoursExtBatch = (extBatchErr <= newBatchErr); if (estimateFavoursExtBatch) { ensembleExtBatch = this.retrainLastWeight(ensembleExtBatch, trainingSet, holdOutExamples); } else ensembleNewBatch = this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples); } else ensembleNewBatch = this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples); } } else { this.trainAdditionalModel(trainingSet, modelInfo); ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors); ensembleExtBatch = null; estimateFavoursExtBatch = false; } } this.restoreOldWeights(exampleSet); return (ensembleExtBatch == null ? ensembleNewBatch : ensembleExtBatch); }
/** Creates an iterator over all examples. */ public Iterator<Example> iterator() { return new AttributesExampleReader(parent.iterator(), this); }
/** Returns a model containing all support vectors, i.e. the examples with non-zero alphas. */ private EvoSVMModel getModel(double[] alphas) { // calculate support vectors Iterator<Example> reader = exampleSet.iterator(); List<SupportVector> supportVectors = new ArrayList<SupportVector>(); int index = 0; while (reader.hasNext()) { double currentAlpha = alphas[index]; Example currentExample = reader.next(); if (currentAlpha != 0.0d) { double[] x = new double[exampleSet.getAttributes().size()]; int a = 0; for (Attribute attribute : exampleSet.getAttributes()) x[a++] = currentExample.getValue(attribute); supportVectors.add(new SupportVector(x, ys[index], currentAlpha)); } index++; } // calculate all sum values double[] sum = new double[exampleSet.size()]; reader = exampleSet.iterator(); index = 0; while (reader.hasNext()) { Example current = reader.next(); double[] x = new double[exampleSet.getAttributes().size()]; int a = 0; for (Attribute attribute : exampleSet.getAttributes()) x[a++] = current.getValue(attribute); sum[index] = kernel.getSum(supportVectors, x); index++; } // calculate b (from Stefan's mySVM code) double bSum = 0.0d; int bCounter = 0; for (int i = 0; i < alphas.length; i++) { if ((ys[i] * alphas[i] - c < -IS_ZERO) && (ys[i] * alphas[i] > IS_ZERO)) { bSum += ys[i] - sum[i]; bCounter++; } else if ((ys[i] * alphas[i] + c > IS_ZERO) && (ys[i] * alphas[i] < -IS_ZERO)) { bSum += ys[i] - sum[i]; bCounter++; } } if (bCounter == 0) { // unlikely bSum = 0.0d; for (int i = 0; i < alphas.length; i++) { if ((ys[i] * alphas[i] < IS_ZERO) && (ys[i] * alphas[i] > -IS_ZERO)) { bSum += ys[i] - sum[i]; bCounter++; } } if (bCounter == 0) { // even unlikelier bSum = 0.0d; for (int i = 0; i < alphas.length; i++) { bSum += ys[i] - sum[i]; bCounter++; } } } return new EvoSVMModel(exampleSet, supportVectors, kernel, bSum / bCounter); }