@Override public PerformanceVector evaluateIndividual(Individual individual) { double[] beta = individual.getValues(); double fitness = 0.0d; for (Example example : exampleSet) { double eta = 0.0d; int i = 0; for (Attribute attribute : example.getAttributes()) { double value = example.getValue(attribute); eta += beta[i] * value; i++; } if (addIntercept) { eta += beta[beta.length - 1]; } double pi = Math.exp(eta) / (1 + Math.exp(eta)); double classValue = example.getValue(label); double currentFitness = classValue * Math.log(pi) + (1 - classValue) * Math.log(1 - pi); double weightValue = 1.0d; if (weight != null) weightValue = example.getValue(weight); fitness += weightValue * currentFitness; } PerformanceVector performanceVector = new PerformanceVector(); performanceVector.addCriterion( new EstimatedPerformance("log_reg_fitness", fitness, exampleSet.size(), false)); return performanceVector; }
@Override public int compare(AggregationIndividual i1, AggregationIndividual i2) { PerformanceVector pv1 = i1.getPerformance(); PerformanceVector pv2 = i2.getPerformance(); return (-1) * Double.compare(pv1.getCriterion(m).getFitness(), pv2.getCriterion(m).getFitness()); }
@Override public Object clone() throws CloneNotSupportedException { PerformanceVector av = new PerformanceVector(); for (int i = 0; i < size(); i++) { Averagable avg = getAveragable(i); av.addAveragable((Averagable) (avg).clone()); } av.cloneAnnotationsFrom(this); return av; }
private double getError(ExampleSet exampleSet, Model model) throws OperatorException { exampleSet = model.apply(exampleSet); try { PerformanceEvaluator evaluator = OperatorService.createOperator(PerformanceEvaluator.class); evaluator.setParameter("classification_error", "true"); PerformanceVector performance = evaluator.doWork(exampleSet); return performance.getMainCriterion().getAverage(); } catch (OperatorCreationException e) { e.printStackTrace(); return Double.NaN; } }
@Override public PerformanceVector evaluateIndividual(Individual individual) { double[] fitness = optimizationFunction.getFitness(individual.getValues(), ys, kernel); PerformanceVector performanceVector = new PerformanceVector(); if (fitness.length == 1) { performanceVector.addCriterion(new EstimatedPerformance("SVM_fitness", fitness[0], 1, false)); } else { performanceVector.addCriterion(new EstimatedPerformance("alpha_sum", fitness[0], 1, false)); performanceVector.addCriterion( new EstimatedPerformance("svm_objective_function", fitness[1], 1, false)); if (fitness.length == 3) performanceVector.addCriterion( new EstimatedPerformance("alpha_label_sum", fitness[2], 1, false)); } return performanceVector; }
/** Returns the estimated performances of this SVM. Does only work for classification tasks. */ @Override public PerformanceVector getEstimatedPerformance() throws OperatorException { if (!pattern) throw new UserError( this, 912, this, "Cannot calculate leave one out estimation of error for regression tasks!"); double[] estVector = ((SVMpattern) getSVM()).getXiAlphaEstimation(getKernel()); PerformanceVector pv = new PerformanceVector(); pv.addCriterion(new EstimatedPerformance("xialpha_error", estVector[0], 1, true)); pv.addCriterion(new EstimatedPerformance("xialpha_precision", estVector[1], 1, false)); pv.addCriterion(new EstimatedPerformance("xialpha_recall", estVector[2], 1, false)); pv.setMainCriterionName("xialpha_error"); return pv; }
/** Delivers the fitness of the best individual as performance vector. */ public PerformanceVector getOptimizationPerformance() { double[] bestValuesEver = getBestValuesEver(); double[] finalFitness = optimizationFunction.getFitness(bestValuesEver, ys, kernel); PerformanceVector result = new PerformanceVector(); if (finalFitness.length == 1) { result.addCriterion( new EstimatedPerformance("svm_objective_function", finalFitness[0], 1, false)); } else { result.addCriterion(new EstimatedPerformance("alpha_sum", finalFitness[0], 1, false)); result.addCriterion( new EstimatedPerformance("svm_objective_function", finalFitness[1], 1, false)); if (finalFitness.length == 3) result.addCriterion(new EstimatedPerformance("alpha_label_sum", finalFitness[2], 1, false)); } return result; }
/** This method checks if the file with pathname value exists. */ public boolean check(ProcessBranch operator, String value) throws OperatorException { if (value == null) { throw new UserError(operator, 205, ProcessBranch.PARAMETER_CONDITION_VALUE); } double minFitness = Double.NEGATIVE_INFINITY; try { minFitness = Double.parseDouble(value); } catch (NumberFormatException e) { throw new UserError( operator, 207, new Object[] {value, ProcessBranch.PARAMETER_CONDITION_VALUE, e}); } PerformanceVector performance = operator.getConditionInput(PerformanceVector.class); return performance.getMainCriterion().getFitness() > minFitness; }
/** * Returns true if the second performance vector is better in all fitness criteria than the first * one (remember: the criteria should be maximized). */ public static boolean isDominated(AggregationIndividual i1, AggregationIndividual i2) { PerformanceVector pv1 = i1.getPerformance(); PerformanceVector pv2 = i2.getPerformance(); double[][] performances = new double[pv1.getSize()][2]; for (int p = 0; p < performances.length; p++) { performances[p][0] = pv1.getCriterion(p).getFitness(); performances[p][1] = pv2.getCriterion(p).getFitness(); } boolean dominated = true; for (int p = 0; p < performances.length; p++) { dominated &= (performances[p][1] >= performances[p][0]); } boolean oneActuallyBetter = false; for (int p = 0; p < performances.length; p++) { oneActuallyBetter |= (performances[p][1] > performances[p][0]); } dominated &= oneActuallyBetter; return dominated; }
public void checkOutput(IOContainer output) throws MissingIOObjectException { if (similarity.equals("Tree")) { TreeDistance treedistance = output.get(TreeDistance.class); for (int i = 0; i < expectedValues.length; i++) { assertEquals(treedistance.similarity(first[i], second[i]), expectedValues[i]); } } if (similarity.equals("Euclidean")) { EuclideanDistance euclideandistance = output.get(EuclideanDistance.class); for (int i = 0; i < expectedValues.length; i++) { assertEquals(euclideandistance.similarity(first[i], second[i]), expectedValues[i]); } } if (similarity.equals("Comparator")) { PerformanceVector performancevector = output.get(PerformanceVector.class); assertEquals( expectedValues[0], performancevector.getCriterion("similarity").getAverage(), 0.00001); } }
/** * Creates a new performance vector if the given one is null. Adds a MDL criterion. If the * criterion was already part of the performance vector before it will be overwritten. */ private PerformanceVector count(ExampleSet exampleSet, PerformanceVector performanceCriteria) throws OperatorException { if (performanceCriteria == null) performanceCriteria = new PerformanceVector(); MDLCriterion mdlCriterion = new MDLCriterion(getParameterAsInt(PARAMETER_OPTIMIZATION_DIRECTION)); mdlCriterion.startCounting(exampleSet, true); this.lastCount = mdlCriterion.getAverage(); performanceCriteria.addCriterion(mdlCriterion); return performanceCriteria; }
public static int fillDataTable( SimpleDataTable dataTable, Map<String, double[]> lastPopulation, Population pop, boolean drawDominated) { lastPopulation.clear(); dataTable.clear(); int numberOfCriteria = 0; for (int i = 0; i < pop.getNumberOfIndividuals(); i++) { boolean dominated = false; if (!drawDominated) { for (int j = 0; j < pop.getNumberOfIndividuals(); j++) { if (i == j) continue; if (NonDominatedSortingSelection.isDominated(pop.get(i), pop.get(j))) { dominated = true; break; } } } if (drawDominated || (!dominated)) { StringBuffer id = new StringBuffer(i + " ("); PerformanceVector current = pop.get(i).getPerformance(); numberOfCriteria = Math.max(numberOfCriteria, current.getSize()); double[] data = new double[current.getSize()]; for (int d = 0; d < data.length; d++) { data[d] = current.getCriterion(d).getFitness(); if (d != 0) id.append(", "); id.append(Tools.formatNumber(data[d])); } id.append(")"); dataTable.add(new SimpleDataTableRow(data, id.toString())); double[] weights = pop.get(i).getWeights(); double[] clone = new double[weights.length]; System.arraycopy(weights, 0, clone, 0, weights.length); lastPopulation.put(id.toString(), clone); } } return numberOfCriteria; }
/** Can be used by subclasses to set the performance of the example set. */ protected final void setResult(PerformanceVector pv) { this.lastMainPerformance = Double.NaN; this.lastMainVariance = Double.NaN; this.lastMainDeviation = Double.NaN; this.lastFirstPerformance = Double.NaN; this.lastSecondPerformance = Double.NaN; this.lastThirdPerformance = Double.NaN; if (pv != null) { // main result PerformanceCriterion mainCriterion = pv.getMainCriterion(); if ((mainCriterion == null) && (pv.size() > 0)) { // use first if no main criterion was defined mainCriterion = pv.getCriterion(0); } if (mainCriterion != null) { this.lastMainPerformance = mainCriterion.getAverage(); this.lastMainVariance = mainCriterion.getVariance(); this.lastMainDeviation = mainCriterion.getStandardDeviation(); } if (pv.size() >= 1) { PerformanceCriterion criterion = pv.getCriterion(0); if (criterion != null) { this.lastFirstPerformance = criterion.getAverage(); } } if (pv.size() >= 2) { PerformanceCriterion criterion = pv.getCriterion(1); if (criterion != null) { this.lastSecondPerformance = criterion.getAverage(); } } if (pv.size() >= 3) { PerformanceCriterion criterion = pv.getCriterion(2); if (criterion != null) { this.lastThirdPerformance = criterion.getAverage(); } } } }
/** * Creates a new performance vector if the given one is null. Adds a new estimated criterion. If * the criterion was already part of the performance vector before it will be overwritten. */ private PerformanceVector count(KernelModel model, PerformanceVector performanceCriteria) throws OperatorException { if (performanceCriteria == null) performanceCriteria = new PerformanceVector(); this.lastCount = 0; int svNumber = model.getNumberOfSupportVectors(); for (int i = 0; i < svNumber; i++) { SupportVector sv = model.getSupportVector(i); if (Math.abs(sv.getAlpha()) > 0.0d) this.lastCount++; } EstimatedPerformance svCriterion = new EstimatedPerformance( "number_of_support_vectors", lastCount, 1, getParameterAsInt(PARAMETER_OPTIMIZATION_DIRECTION) == MDLCriterion.MINIMIZATION); performanceCriteria.addCriterion(svCriterion); return performanceCriteria; }
@Override public void doWork() throws OperatorException { ExampleSet exampleSetOriginal = exampleSetInput.getData(ExampleSet.class); ExampleSet exampleSet = (ExampleSet) exampleSetOriginal.clone(); int numberOfAttributes = exampleSet.getAttributes().size(); Attributes attributes = exampleSet.getAttributes(); int maxNumberOfAttributes = Math.min(getParameterAsInt(PARAMETER_MAX_ATTRIBUTES), numberOfAttributes - 1); int maxNumberOfFails = getParameterAsInt(PARAMETER_ALLOWED_CONSECUTIVE_FAILS); int behavior = getParameterAsInt(PARAMETER_STOPPING_BEHAVIOR); boolean useRelativeIncrease = (behavior == WITH_DECREASE_EXCEEDS) ? getParameterAsBoolean(PARAMETER_USE_RELATIVE_DECREASE) : false; double maximalDecrease = 0; if (useRelativeIncrease) maximalDecrease = useRelativeIncrease ? getParameterAsDouble(PARAMETER_MAX_RELATIVE_DECREASE) : getParameterAsDouble(PARAMETER_MAX_ABSOLUT_DECREASE); double alpha = (behavior == WITH_DECREASE_SIGNIFICANT) ? getParameterAsDouble(PARAMETER_ALPHA) : 0d; // remembering attributes and removing all from example set Attribute[] attributeArray = new Attribute[numberOfAttributes]; int i = 0; Iterator<Attribute> iterator = attributes.iterator(); while (iterator.hasNext()) { Attribute attribute = iterator.next(); attributeArray[i] = attribute; i++; } boolean[] selected = new boolean[numberOfAttributes]; Arrays.fill(selected, true); boolean earlyAbort = false; List<Integer> speculativeList = new ArrayList<Integer>(maxNumberOfFails); int numberOfFails = maxNumberOfFails; currentNumberOfFeatures = numberOfAttributes; currentAttributes = attributes; PerformanceVector lastPerformance = getPerformance(exampleSet); PerformanceVector bestPerformanceEver = lastPerformance; for (i = 0; i < maxNumberOfAttributes && !earlyAbort; i++) { // setting values for logging currentNumberOfFeatures = numberOfAttributes - i - 1; // performing a round int bestIndex = 0; PerformanceVector currentBestPerformance = null; for (int current = 0; current < numberOfAttributes; current++) { if (selected[current]) { // switching off attributes.remove(attributeArray[current]); currentAttributes = attributes; // evaluate performance PerformanceVector performance = getPerformance(exampleSet); if (currentBestPerformance == null || performance.compareTo(currentBestPerformance) > 0) { bestIndex = current; currentBestPerformance = performance; } // switching on attributes.addRegular(attributeArray[current]); currentAttributes = null; // removing reference } } double currentFitness = currentBestPerformance.getMainCriterion().getFitness(); if (i != 0) { double lastFitness = lastPerformance.getMainCriterion().getFitness(); // switch stopping behavior switch (behavior) { case WITH_DECREASE: if (lastFitness >= currentFitness) earlyAbort = true; break; case WITH_DECREASE_EXCEEDS: if (useRelativeIncrease) { // relative increase testing if (currentFitness < lastFitness - Math.abs(lastFitness * maximalDecrease)) earlyAbort = true; } else { // absolute increase testing if (currentFitness < lastFitness - maximalDecrease) earlyAbort = true; } break; case WITH_DECREASE_SIGNIFICANT: AnovaCalculator calculator = new AnovaCalculator(); calculator.setAlpha(alpha); PerformanceCriterion pc = currentBestPerformance.getMainCriterion(); calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance()); pc = lastPerformance.getMainCriterion(); calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance()); SignificanceTestResult result; try { result = calculator.performSignificanceTest(); } catch (SignificanceCalculationException e) { throw new UserError(this, 920, e.getMessage()); } if (lastFitness > currentFitness && result.getProbability() < alpha) earlyAbort = true; } } if (earlyAbort) { // check if there are some free tries left if (numberOfFails == 0) { break; } numberOfFails--; speculativeList.add(bestIndex); earlyAbort = false; // needs performance increase compared to better performance of current and last! if (currentBestPerformance.compareTo(lastPerformance) > 0) lastPerformance = currentBestPerformance; } else { // resetting maximal number of fails. numberOfFails = maxNumberOfFails; speculativeList.clear(); lastPerformance = currentBestPerformance; bestPerformanceEver = currentBestPerformance; } // switching best index off attributes.remove(attributeArray[bestIndex]); selected[bestIndex] = false; } // add predictively removed attributes: speculative execution did not yield good result for (Integer removeIndex : speculativeList) { selected[removeIndex] = true; attributes.addRegular(attributeArray[removeIndex]); } AttributeWeights weights = new AttributeWeights(); i = 0; for (Attribute attribute : attributeArray) { if (selected[i]) weights.setWeight(attribute.getName(), 1d); else weights.setWeight(attribute.getName(), 0d); i++; } exampleSetOutput.deliver(exampleSet); performanceOutput.deliver(bestPerformanceEver); weightsOutput.deliver(weights); }
public int compare(PerformanceVector av1, PerformanceVector av2) { return av1.getMainCriterion().compareTo(av2.getMainCriterion()); }
public static SimpleDataTable createDataTable(Population pop) { PerformanceVector prototype = pop.get(0).getPerformance(); SimpleDataTable dataTable = new SimpleDataTable("Population", prototype.getCriteriaNames()); return dataTable; }
public PerformanceVectorViewer( final PerformanceVector performanceVector, final IOContainer container) { setLayout(new BorderLayout()); // all criteria final CardLayout cardLayout = new CardLayout(); final JPanel mainPanel = new JPanel(cardLayout); add(mainPanel, BorderLayout.CENTER); List<String> criteriaNameList = new LinkedList<>(); for (int i = 0; i < performanceVector.getSize(); i++) { PerformanceCriterion criterion = performanceVector.getCriterion(i); criteriaNameList.add(criterion.getName()); JPanel component = ResultDisplayTools.createVisualizationComponent( criterion, container, "Performance Criterion", false); JScrollPane criterionPane = new ExtendedJScrollPane(component); criterionPane.setBorder(null); criterionPane.setBackground(Colors.WHITE); mainPanel.add(criterionPane, criterion.getName()); } if (criteriaNameList.isEmpty()) { remove(mainPanel); add(new ResourceLabel("result_view.no_criterions")); return; } String[] criteriaNames = new String[criteriaNameList.size()]; criteriaNameList.toArray(criteriaNames); // selection list final JList<String> criteriaList = new JList<String>(criteriaNames) { private static final long serialVersionUID = 3031125186920370793L; @Override public Dimension getPreferredSize() { Dimension dim = super.getPreferredSize(); dim.width = Math.max(150, dim.width); return dim; } }; criteriaList.setCellRenderer(new CriterionListCellRenderer()); criteriaList.setOpaque(false); criteriaList.setBorder(BorderFactory.createTitledBorder("Criterion")); criteriaList.setSelectionMode(ListSelectionModel.SINGLE_SELECTION); criteriaList.addListSelectionListener( new ListSelectionListener() { @Override public void valueChanged(ListSelectionEvent e) { String selected = criteriaList.getSelectedValue(); cardLayout.show(mainPanel, selected); } }); JScrollPane listScrollPane = new ExtendedJScrollPane(criteriaList); listScrollPane.setBorder(BorderFactory.createEmptyBorder(0, 5, 0, 2)); add(listScrollPane, BorderLayout.WEST); // select first criterion criteriaList.setSelectedIndices(new int[] {0}); }
// start @Override public void doWork() throws OperatorException { getParametersToOptimize(); if (numberOfCombinations <= 1) { throw new UserError(this, 922); } int ifExceedsRegion = getParameterAsInt(PARAMETER_IF_EXCEEDS_REGION); int ifExceedsRange = getParameterAsInt(PARAMETER_IF_EXCEEDS_RANGE); // sort parameter values String[] valuesToSort; String s; double val1; double val2; int ind1; int ind2; for (int index = 0; index < numberOfParameters; index++) { valuesToSort = values[index]; // straight-insertion-sort of valuesToSort for (ind1 = 0; ind1 < valuesToSort.length; ind1++) { val1 = Double.parseDouble(valuesToSort[ind1]); for (ind2 = ind1 + 1; ind2 < valuesToSort.length; ind2++) { val2 = Double.parseDouble(valuesToSort[ind2]); if (val1 > val2) { s = valuesToSort[ind1]; valuesToSort[ind1] = valuesToSort[ind2]; valuesToSort[ind2] = s; val1 = val2; } } } } int[] bestIndex = new int[numberOfParameters]; ParameterSet[] allParameters = new ParameterSet[numberOfCombinations]; int paramIndex = 0; // Test all parameter combinations best = null; // init operator progress (+ 1 for work after loop) getProgress().setTotal(allParameters.length + 1); while (true) { getLogger().fine("Using parameter set"); // set all parameter values for (int j = 0; j < operators.length; j++) { operators[j].getParameters().setParameter(parameters[j], values[j][currentIndex[j]]); getLogger().fine(operators[j] + "." + parameters[j] + " = " + values[j][currentIndex[j]]); } PerformanceVector performance = getPerformanceVector(); String[] currentValues = new String[parameters.length]; for (int j = 0; j < parameters.length; j++) { currentValues[j] = values[j][currentIndex[j]]; } allParameters[paramIndex] = new ParameterSet(operators, parameters, currentValues, performance); if (best == null || performance.compareTo(best.getPerformance()) > 0) { best = allParameters[paramIndex]; // bestIndex = currentIndex; for (int j = 0; j < numberOfParameters; j++) { bestIndex[j] = currentIndex[j]; } } getProgress().step(); // next parameter values int k = 0; boolean ok = true; while (!(++currentIndex[k] < values[k].length)) { currentIndex[k] = 0; k++; if (k >= currentIndex.length) { ok = false; break; } } if (!ok) { break; } paramIndex++; } // start quadratic optimization int nrParameters = 0; for (int i = 0; i < numberOfParameters; i++) { if (values[i].length > 2) { log("Param " + i + ", bestI = " + bestIndex[i]); nrParameters++; if (bestIndex[i] == 0) { bestIndex[i]++; } if (bestIndex[i] == values[i].length - 1) { bestIndex[i]--; } } else { getLogger().warning("Parameter " + parameters[i] + " has less than 3 values, skipped."); } } if (nrParameters > 3) { getLogger() .warning("Optimization not recommended for more than 3 values. Check results carefully!"); } if (nrParameters > 0) { // Designmatrix A fuer den 3^nrParameters-Plan aufstellen, // A*x=y loesen lassen // x = neue Parameter // check, ob neuen Parameter in zulaessigem Bereich // - Okay, wenn in Kubus von 3^k-Plan // - Warnung wenn in gegebenem Parameter-Bereich // - Fehler sonst int threetok = 1; for (int i = 0; i < nrParameters; i++) { threetok *= 3; } log("Optimising " + nrParameters + " parameters"); Matrix designMatrix = new Matrix(threetok, nrParameters + nrParameters * (nrParameters + 1) / 2 + 1); Matrix y = new Matrix(threetok, 1); paramIndex = 0; for (int i = numberOfParameters - 1; i >= 0; i--) { if (values[i].length > 2) { currentIndex[i] = bestIndex[i] - 1; } else { currentIndex[i] = bestIndex[i]; } paramIndex = paramIndex * values[i].length + currentIndex[i]; } int row = 0; int c; while (row < designMatrix.getRowDimension()) { y.set(row, 0, allParameters[paramIndex].getPerformance().getMainCriterion().getFitness()); designMatrix.set(row, 0, 1.0); c = 1; // compute A for (int i = 0; i < nrParameters; i++) { if (values[i].length > 2) { designMatrix.set(row, c, Double.parseDouble(values[i][currentIndex[i]])); c++; } } // compute C for (int i = 0; i < nrParameters; i++) { if (values[i].length > 2) { for (int j = i + 1; j < nrParameters; j++) { if (values[j].length > 2) { designMatrix.set( row, c, Double.parseDouble(values[i][currentIndex[i]]) * Double.parseDouble(values[j][currentIndex[j]])); c++; } } } } // compute Q: for (int i = 0; i < nrParameters; i++) { if (values[i].length > 2) { designMatrix.set( row, c, Double.parseDouble(values[i][currentIndex[i]]) * Double.parseDouble(values[i][currentIndex[i]])); c++; } } // update currentIndex and paramIndex int k = 0; c = 1; while (k < numberOfParameters) { if (values[k].length > 2) { currentIndex[k]++; paramIndex += c; if (currentIndex[k] > bestIndex[k] + 1) { currentIndex[k] = bestIndex[k] - 1; paramIndex -= 3 * c; c *= values[k].length; k++; } else { break; } } else { c *= values[k].length; k++; } } row++; } // compute Designmatrix Matrix beta = designMatrix.solve(y); for (int i = 0; i < designMatrix.getColumnDimension(); i++) { logWarning(" -- Writing " + beta.get(i, 0) + " at position " + i + " in vector b"); } // generate Matrix P~ Matrix p = new Matrix(nrParameters, nrParameters); int betapos = nrParameters + 1; for (int j = 0; j < nrParameters - 1; j++) { for (int i = 1 + j; i < nrParameters; i++) { p.set(i, j, beta.get(betapos, 0) * 0.5); p.set(j, i, beta.get(betapos, 0) * 0.5); betapos++; } } for (int i = 0; i < nrParameters; i++) { p.set(i, i, beta.get(betapos, 0)); betapos++; } // generate Matrix y~ Matrix y2 = new Matrix(nrParameters, 1); for (int i = 0; i < nrParameters; i++) { y2.set(i, 0, beta.get(i + 1, 0)); } y2 = y2.times(-0.5); // get stationary point x Matrix x = new Matrix(nrParameters, 1); try { x = p.solve(y2); } catch (RuntimeException e) { logWarning("Quadratic optimization failed. (invalid matrix)"); } String[] Qvalues = new String[numberOfParameters]; int pc = 0; boolean ok = true; for (int j = 0; j < numberOfParameters; j++) { if (values[j].length > 2) { if (x.get(pc, 0) > Double.parseDouble(values[j][bestIndex[j] + 1]) || x.get(pc, 0) < Double.parseDouble(values[j][bestIndex[j] - 1])) { logWarning( "Parameter " + parameters[j] + " exceeds region of interest (" + x.get(pc, 0) + ")"); if (ifExceedsRegion == CLIP) { // clip to bound if (x.get(pc, 0) > Double.parseDouble(values[j][bestIndex[j] + 1])) { x.set(pc, 0, Double.parseDouble(values[j][bestIndex[j] + 1])); } else { x.set(pc, 0, Double.parseDouble(values[j][bestIndex[j] - 1])); } ; } else if (ifExceedsRegion == FAIL) { ok = false; } } if (x.get(pc, 0) < Double.parseDouble(values[j][0]) || x.get(pc, 0) > Double.parseDouble(values[j][values[j].length - 1])) { logWarning("Parameter " + parameters[j] + " exceeds range (" + x.get(pc, 0) + ")"); if (ifExceedsRange == IGNORE) { // ignore error logWarning(" but no measures taken. Check parameters manually!"); } else if (ifExceedsRange == CLIP) { // clip to bound if (x.get(pc, 0) > Double.parseDouble(values[j][0])) { x.set(pc, 0, Double.parseDouble(values[j][0])); } else { x.set(pc, 0, Double.parseDouble(values[j][values[j].length - 1])); } ; } else { ok = false; } } Qvalues[j] = x.get(pc, 0) + ""; pc++; } else { Qvalues[j] = values[j][bestIndex[j]]; } } getLogger().info("Optimised parameter set:"); for (int j = 0; j < operators.length; j++) { operators[j].getParameters().setParameter(parameters[j], Qvalues[j]); getLogger().info(" " + operators[j] + "." + parameters[j] + " = " + Qvalues[j]); } if (ok) { PerformanceVector qPerformance = super.getPerformanceVector(); log("Old: " + best.getPerformance().getMainCriterion().getFitness()); log("New: " + qPerformance.getMainCriterion().getFitness()); if (qPerformance.compareTo(best.getPerformance()) > 0) { best = new ParameterSet(operators, parameters, Qvalues, qPerformance); // log log("Optimised parameter set does increase the performance"); } else { // different log log("Could not increase performance by quadratic optimization"); } } else { // not ok getLogger() .warning("Parameters outside admissible range, not using optimised parameter set."); } } else { // Warning: no parameters to optimize getLogger().warning("No parameters to optimize"); } // end quadratic optimization deliver(best); getProgress().complete(); }
/** * Constructs a <code>Model</code> repeatedly running a weak learner, reweighting the training * example set accordingly, and combining the hypothesis using the available weighted performance * values. */ public Model learn(ExampleSet exampleSet) throws OperatorException { this.runVector = new RunVector(); BayBoostModel ensembleNewBatch = null; BayBoostModel ensembleExtBatch = null; final Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); // for // models // and // their // probability // estimates Vector<BayBoostBaseModelInfo> modelInfo2 = new Vector<BayBoostBaseModelInfo>(); this.currentIteration = 0; int firstOpenBatch = 1; // prepare the stream control attribute final Attribute streamControlAttribute; { Attribute attr = null; if ((attr = exampleSet.getAttributes().get(STREAM_CONTROL_ATTRIB_NAME)) == null) streamControlAttribute = com.rapidminer.example.Tools.createSpecialAttribute( exampleSet, STREAM_CONTROL_ATTRIB_NAME, Ontology.INTEGER); else { streamControlAttribute = attr; logWarning( "Attribute with the (reserved) name of the stream control attribute exists. It is probably an old version created by this operator. Trying to recycle it... "); // Resetting the stream control attribute values by overwriting // them with 0 avoids (unlikely) // problems in case the same ExampleSet is passed to this // operator over and over again: Iterator<Example> e = exampleSet.iterator(); while (e.hasNext()) { e.next().setValue(streamControlAttribute, 0); } } } // and the weight attribute if (exampleSet.getAttributes().getWeight() == null) { this.prepareWeights(exampleSet); } boolean estimateFavoursExtBatch = true; // *** The main loop, one iteration per batch: *** Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { // increment batch number, collect batch and evaluate performance of // current model on batch double[] classPriors = this.prepareBatch(++this.currentIteration, reader, streamControlAttribute); ConditionedExampleSet trainingSet = new ConditionedExampleSet( exampleSet, new BatchFilterCondition(streamControlAttribute, this.currentIteration)); final EstimatedPerformance estPerf; // Step 1: apply the ensemble model to the current batch (prediction // phase), evaluate and store result if (ensembleExtBatch != null) { // apply extended batch model first: trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet); this.performance = evaluatePredictions(trainingSet); // unweighted // performance; // then apply new batch model: trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); double newBatchPerformance = evaluatePredictions(trainingSet); // heuristic: use extended batch model for predicting // unclassified instances if (estimateFavoursExtBatch == true) estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false); else estPerf = new EstimatedPerformance("accuracy", newBatchPerformance, trainingSet.size(), false); // final double[] ensembleWeights; // continue with the better model: if (newBatchPerformance > this.performance) { this.performance = newBatchPerformance; firstOpenBatch = Math.max(1, this.currentIteration - 1); // ensembleWeights = ensembleNewBatch.getModelWeights(); } else { modelInfo.clear(); modelInfo.addAll(modelInfo2); // ensembleWeights = ensembleExtBatch.getModelWeights(); } } else if (ensembleNewBatch != null) { trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); this.performance = evaluatePredictions(trainingSet); firstOpenBatch = Math.max(1, this.currentIteration - 1); estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false); } else estPerf = null; // no model ==> no prediction performance if (estPerf != null) { PerformanceVector perf = new PerformanceVector(); perf.addAveragable(estPerf); this.runVector.addVector(perf); } // *** retraining phase *** // Step 2: First reconstruct the initial weighting, if necessary. if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) { this.rescalePriors(trainingSet, classPriors); } estimateFavoursExtBatch = true; // Step 3: Find better weights for existing models and continue // training if (modelInfo.size() > 0) { modelInfo2 = new Vector<BayBoostBaseModelInfo>(); for (BayBoostBaseModelInfo bbbmi : modelInfo) { modelInfo2.add(bbbmi); // BayBoostBaseModelInfo objects // cannot be changed, no deep copy // required } // separate hold out set final double holdOutRatio = this.getParameterAsDouble(PARAMETER_FRACTION_HOLD_OUT_SET); Vector<Example> holdOutExamples = new Vector<Example>(); if (holdOutRatio > 0) { RandomGenerator random = RandomGenerator.getRandomGenerator(this); Iterator<Example> randBatchReader = trainingSet.iterator(); while (randBatchReader.hasNext()) { Example example = randBatchReader.next(); if (random.nextDoubleInRange(0, 1) <= holdOutRatio) { example.setValue(streamControlAttribute, 0); holdOutExamples.add(example); } } // TODO: create new example set // trainingSet.updateCondition(); } // model 1: train one more base classifier boolean trainingExamplesLeft = this.adjustBaseModelWeights(trainingSet, modelInfo); if (trainingExamplesLeft) { // "trainingExamplesLeft" needs to be checked to avoid // exceptions. // Anyway, learning does not make sense, otherwise. if (!this.trainAdditionalModel(trainingSet, modelInfo)) {} } ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors); // model 2: remove last classifier, extend batch, train on // extended batch ExampleSet extendedBatch = // because of the ">=" condition it // is sufficient to remember the // opening batch new ConditionedExampleSet( exampleSet, new BatchFilterCondition(streamControlAttribute, firstOpenBatch)); classPriors = this.prepareExtendedBatch(extendedBatch); if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) { this.rescalePriors(extendedBatch, classPriors); } modelInfo2.remove(modelInfo2.size() - 1); trainingExamplesLeft = this.adjustBaseModelWeights(extendedBatch, modelInfo2); // If no training examples are left: no need and chance to // continue training. if (trainingExamplesLeft == false) { ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors); } else { boolean success = this.trainAdditionalModel(extendedBatch, modelInfo2); if (success) { ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors); } else { ensembleExtBatch = null; estimateFavoursExtBatch = false; } } if (holdOutRatio > 0) { Iterator hoEit = holdOutExamples.iterator(); while (hoEit.hasNext()) { ((Example) hoEit.next()).setValue(streamControlAttribute, this.currentIteration); } // TODO: create new example set // trainingSet.updateCondition(); if (ensembleExtBatch != null) { trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); hoEit = holdOutExamples.iterator(); int errors = 0; while (hoEit.hasNext()) { Example example = (Example) hoEit.next(); if (example.getPredictedLabel() != example.getLabel()) errors++; } double newBatchErr = ((double) errors) / holdOutExamples.size(); trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet); hoEit = holdOutExamples.iterator(); errors = 0; while (hoEit.hasNext()) { Example example = (Example) hoEit.next(); if (example.getPredictedLabel() != example.getLabel()) errors++; } double extBatchErr = ((double) errors) / holdOutExamples.size(); estimateFavoursExtBatch = (extBatchErr <= newBatchErr); if (estimateFavoursExtBatch) { ensembleExtBatch = this.retrainLastWeight(ensembleExtBatch, trainingSet, holdOutExamples); } else ensembleNewBatch = this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples); } else ensembleNewBatch = this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples); } } else { this.trainAdditionalModel(trainingSet, modelInfo); ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors); ensembleExtBatch = null; estimateFavoursExtBatch = false; } } this.restoreOldWeights(exampleSet); return (ensembleExtBatch == null ? ensembleNewBatch : ensembleExtBatch); }
public PerformanceVector getPerformance() { double[] beta = getBestValuesEver(); double numberOfSlopes = addIntercept ? beta.length - 1 : beta.length; double logLikelihood = getBestFitnessEver(); double restrictedLogLikelihood = 0.0d; double minusTwoLogLikelihood = 0.0d; double modelChiSquared = 0.0d; double goodnessOfFit = 0.0d; double coxSnellRSquared = 0.0d; double nagelkerkeRSquared = 0.0d; double mcfaddenRSquared = 0.0d; double AIC = 0.0d; double BIC = 0.0d; double weightSum = 0.0d; double positiveSum = 0.0d; for (Example example : exampleSet) { double eta = 0.0d; int i = 0; for (Attribute attribute : example.getAttributes()) { double value = example.getValue(attribute); eta += beta[i] * value; i++; } if (addIntercept) { eta += beta[beta.length - 1]; } double pi = Math.exp(eta) / (1 + Math.exp(eta)); double classValue = example.getValue(label); double currentFit = (classValue - pi) * (classValue - pi) / (pi * (1 - pi)); double weightValue = 1.0d; if (weight != null) weightValue = example.getValue(weight); weightSum += weightValue; positiveSum += weightValue * classValue; goodnessOfFit += weightValue * currentFit; } double pi0 = positiveSum / weightSum; if (addIntercept) { restrictedLogLikelihood = weightSum * (pi0 * Math.log(pi0) + (1 - pi0) * Math.log(1 - pi0)); } else { restrictedLogLikelihood = weightSum * Math.log(0.5); } minusTwoLogLikelihood = -2 * logLikelihood; modelChiSquared = 2 * (logLikelihood - restrictedLogLikelihood); coxSnellRSquared = 1 - Math.pow(Math.exp(restrictedLogLikelihood) / Math.exp(logLikelihood), 2 / weightSum); nagelkerkeRSquared = coxSnellRSquared / (1 - Math.pow(Math.exp(restrictedLogLikelihood), 2 / weightSum)); mcfaddenRSquared = 1 - logLikelihood / restrictedLogLikelihood; AIC = -2 * logLikelihood + 2 * (numberOfSlopes + 1); BIC = -2 * logLikelihood + Math.log(weightSum) * (numberOfSlopes + 1); PerformanceVector estimatedPerformance = new PerformanceVector(); estimatedPerformance.addCriterion( new EstimatedPerformance("log_likelihood", logLikelihood, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance( "restricted_log_likelihood", restrictedLogLikelihood, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance( "-2_log_likelihood", minusTwoLogLikelihood, exampleSet.size(), true)); estimatedPerformance.addCriterion( new EstimatedPerformance("model_chi_squared", modelChiSquared, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance("goodness_of_fit", goodnessOfFit, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance( "cox_snell_r_squared", coxSnellRSquared, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance( "nagelkerke_r_squared", nagelkerkeRSquared, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance("mcfadden_r_squared", mcfaddenRSquared, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance("AIC", AIC, exampleSet.size(), true)); estimatedPerformance.addCriterion( new EstimatedPerformance("BIC", BIC, exampleSet.size(), true)); estimatedPerformance.setMainCriterionName("AIC"); return estimatedPerformance; }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel(); if (predictedLabel == null) { throw new UserError(this, 107); } Attribute label = exampleSet.getAttributes().getLabel(); if (label != null) { if (label.isNominal()) { double[][] costMatrix = getParameterAsMatrix(PARAMETER_COST_MATRIX); // build label ordering map Map<String, Integer> classOrderMap = null; if (isParameterSet(PARAMETER_CLASS_DEFINITION)) { String[] enumeratedValues = ParameterTypeEnumeration.transformString2Enumeration( getParameterAsString(PARAMETER_CLASS_DEFINITION)); if (enumeratedValues.length > 0) { classOrderMap = new HashMap<String, Integer>(); int i = 0; for (String className : enumeratedValues) { classOrderMap.put(className, i); i++; } // check whether each possible label occurred once for (String value : label.getMapping().getValues()) { if (!classOrderMap.containsKey(value)) { throw new UserError( this, "performance_costs.class_order_definition_misses_value", value); } } // check whether map is of same size than costMatrix if (costMatrix.length != classOrderMap.size()) throw new UserError( this, "performance_costs.cost_matrix_with_wrong_dimension", costMatrix.length, classOrderMap.size()); } } MeasuredPerformance criterion = new ClassificationCostCriterion(costMatrix, classOrderMap, label, predictedLabel); PerformanceVector performance = new PerformanceVector(); performance.addCriterion(criterion); // now measuring costs criterion.startCounting(exampleSet, false); for (Example example : exampleSet) { criterion.countExample(example); } // setting logging value lastCosts = criterion.getAverage(); exampleSetOutput.deliver(exampleSet); performanceOutput.deliver(performance); } else { throw new UserError(this, 101, "CostEvaluator", label.getName()); } } else { throw new UserError(this, 105); } }