@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // searching confidence attributes Attributes attributes = exampleSet.getAttributes(); Attribute predictedLabel = attributes.getPredictedLabel(); if (predictedLabel == null) { throw new UserError(this, 107); } NominalMapping mapping = predictedLabel.getMapping(); int numberOfLabels = mapping.size(); Attribute[] confidences = new Attribute[numberOfLabels]; String[] labelValue = new String[numberOfLabels]; int i = 0; for (String value : mapping.getValues()) { labelValue[i] = value; confidences[i] = attributes.getConfidence(value); if (confidences[i] == null) { throw new UserError(this, 154, value); } i++; } // generating new prediction attributes int k = Math.min(numberOfLabels, getParameterAsInt(PARAMETER_NUMBER_OF_RANKS)); Attribute[] kthPredictions = new Attribute[k]; Attribute[] kthConfidences = new Attribute[k]; for (i = 0; i < k; i++) { kthPredictions[i] = AttributeFactory.createAttribute(predictedLabel.getValueType()); kthPredictions[i].setName(predictedLabel.getName() + "_" + (i + 1)); kthPredictions[i].setMapping((NominalMapping) predictedLabel.getMapping().clone()); kthConfidences[i] = AttributeFactory.createAttribute(Ontology.REAL); kthConfidences[i].setName(Attributes.CONFIDENCE_NAME + "_" + (i + 1)); attributes.addRegular(kthPredictions[i]); attributes.addRegular(kthConfidences[i]); attributes.setSpecialAttribute(kthPredictions[i], Attributes.PREDICTION_NAME + "_" + (i + 1)); attributes.setSpecialAttribute(kthConfidences[i], Attributes.CONFIDENCE_NAME + "_" + (i + 1)); } exampleSet.getExampleTable().addAttributes(Arrays.asList(kthConfidences)); exampleSet.getExampleTable().addAttributes(Arrays.asList(kthPredictions)); // now setting values for (Example example : exampleSet) { ArrayList<Tupel<Double, Integer>> labelConfidences = new ArrayList<Tupel<Double, Integer>>(numberOfLabels); for (i = 0; i < numberOfLabels; i++) { labelConfidences.add(new Tupel<Double, Integer>(example.getValue(confidences[i]), i)); } Collections.sort(labelConfidences); for (i = 0; i < k; i++) { Tupel<Double, Integer> tupel = labelConfidences.get(numberOfLabels - i - 1); example.setValue( kthPredictions[i], tupel.getSecond()); // Can use index since mapping has been cloned from above example.setValue(kthConfidences[i], tupel.getFirst()); } } // deleting old prediction / confidences attributes.remove(predictedLabel); if (getParameterAsBoolean(PARAMETER_REMOVE_OLD_PREDICTIONS)) { for (i = 0; i < confidences.length; i++) { attributes.remove(confidences[i]); } } return exampleSet; }
@Override public void doWork() throws OperatorException { // sanity checks ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); // checking preconditions Attribute label = exampleSet.getAttributes().getLabel(); if (label == null) { throw new UserError(this, 105); } if (!label.isNominal()) { throw new UserError(this, 101, label, "threshold finding"); } exampleSet.recalculateAttributeStatistics(label); NominalMapping mapping = label.getMapping(); if (mapping.size() != 2) { throw new UserError( this, 118, new Object[] {label, Integer.valueOf(mapping.getValues().size()), Integer.valueOf(2)}); } if (exampleSet.getAttributes().getPredictedLabel() == null) { throw new UserError(this, 107); } boolean useExplictLabels = getParameterAsBoolean(PARAMETER_DEFINE_LABELS); double secondCost = getParameterAsDouble(PARAMETER_MISCLASSIFICATION_COSTS_SECOND); double firstCost = getParameterAsDouble(PARAMETER_MISCLASSIFICATION_COSTS_FIRST); if (useExplictLabels) { String firstLabel = getParameterAsString(PARAMETER_FIRST_LABEL); String secondLabel = getParameterAsString(PARAMETER_SECOND_LABEL); if (mapping.getIndex(firstLabel) == -1) { throw new UserError(this, 143, firstLabel, label.getName()); } if (mapping.getIndex(secondLabel) == -1) { throw new UserError(this, 143, secondLabel, label.getName()); } // if explicit order differs from order in data: internally swap costs. if (mapping.getIndex(firstLabel) > mapping.getIndex(secondLabel)) { double temp = firstCost; firstCost = secondCost; secondCost = temp; } } // check whether the confidence attributes are available if (exampleSet.getAttributes().getConfidence(mapping.getPositiveString()) == null) { throw new UserError( this, 113, Attributes.CONFIDENCE_NAME + "_" + mapping.getPositiveString()); } if (exampleSet.getAttributes().getConfidence(mapping.getNegativeString()) == null) { throw new UserError( this, 113, Attributes.CONFIDENCE_NAME + "_" + mapping.getNegativeString()); } // create ROC data ROCDataGenerator rocDataGenerator = new ROCDataGenerator(firstCost, secondCost); ROCData rocData = rocDataGenerator.createROCData( exampleSet, getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS), ROCBias.getROCBiasParameter(this)); // create plotter if (getParameterAsBoolean(PARAMETER_SHOW_ROC_PLOT)) { rocDataGenerator.createROCPlotDialog(rocData, true, true); } // create and return output exampleSetOutput.deliver(exampleSet); thresholdOutput.deliver( new Threshold( rocDataGenerator.getBestThreshold(), mapping.getNegativeString(), mapping.getPositiveString())); }