public Split getBestSplit(ExampleSet inputSet, Attribute attribute, String labelName) { SortedExampleSet exampleSet = new SortedExampleSet((ExampleSet) inputSet.clone(), attribute, SortedExampleSet.INCREASING); Attribute labelAttribute = exampleSet.getAttributes().getLabel(); int labelIndex = labelAttribute.getMapping().mapString(labelName); double oldLabel = Double.NaN; double bestSplit = Double.NaN; double lastValue = Double.NaN; double bestBenefit = Double.NEGATIVE_INFINITY; double bestTotalWeight = 0; int bestSplitType = Split.LESS_SPLIT; // initiating online counting of benefit: only 2 Datascans needed then criterion.reinitOnlineCounting(exampleSet); for (Example e : exampleSet) { double currentValue = e.getValue(attribute); double label = e.getValue(labelAttribute); if ((Double.isNaN(oldLabel)) || (oldLabel != label) && (lastValue != currentValue)) { double splitValue = (lastValue + currentValue) / 2.0d; double[] benefits; if (labelName == null) { benefits = criterion.getOnlineBenefit(e); } else { benefits = criterion.getOnlineBenefit(e, labelIndex); } // online method returns both possible relations in one array(greater / smaller) in one // array if ((benefits[0] > minValue) && (benefits[0] > 0) && (benefits[1] > 0) && ((benefits[0] > bestBenefit) || ((benefits[0] == bestBenefit) && (benefits[1] > bestTotalWeight)))) { bestBenefit = benefits[0]; bestSplit = splitValue; bestTotalWeight = benefits[1]; bestSplitType = Split.LESS_SPLIT; } if ((benefits[2] > minValue) && (benefits[2] > 0) && (benefits[3] > 0) && ((benefits[2] > bestBenefit) || ((benefits[2] == bestBenefit) && (benefits[3] > bestTotalWeight)))) { bestBenefit = benefits[2]; bestSplit = splitValue; bestTotalWeight = benefits[3]; bestSplitType = Split.GREATER_SPLIT; } oldLabel = label; } lastValue = currentValue; criterion.update(e); } return new Split(bestSplit, new double[] {bestBenefit, bestTotalWeight}, bestSplitType); }