@Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // searching confidence attributes
    Attributes attributes = exampleSet.getAttributes();
    Attribute predictedLabel = attributes.getPredictedLabel();
    if (predictedLabel == null) {
      throw new UserError(this, 107);
    }

    NominalMapping mapping = predictedLabel.getMapping();
    int numberOfLabels = mapping.size();
    Attribute[] confidences = new Attribute[numberOfLabels];
    String[] labelValue = new String[numberOfLabels];
    int i = 0;
    for (String value : mapping.getValues()) {
      labelValue[i] = value;
      confidences[i] = attributes.getConfidence(value);
      if (confidences[i] == null) {
        throw new UserError(this, 154, value);
      }
      i++;
    }

    // generating new prediction attributes
    int k = Math.min(numberOfLabels, getParameterAsInt(PARAMETER_NUMBER_OF_RANKS));
    Attribute[] kthPredictions = new Attribute[k];
    Attribute[] kthConfidences = new Attribute[k];
    for (i = 0; i < k; i++) {
      kthPredictions[i] = AttributeFactory.createAttribute(predictedLabel.getValueType());
      kthPredictions[i].setName(predictedLabel.getName() + "_" + (i + 1));
      kthPredictions[i].setMapping((NominalMapping) predictedLabel.getMapping().clone());
      kthConfidences[i] = AttributeFactory.createAttribute(Ontology.REAL);
      kthConfidences[i].setName(Attributes.CONFIDENCE_NAME + "_" + (i + 1));
      attributes.addRegular(kthPredictions[i]);
      attributes.addRegular(kthConfidences[i]);
      attributes.setSpecialAttribute(kthPredictions[i], Attributes.PREDICTION_NAME + "_" + (i + 1));
      attributes.setSpecialAttribute(kthConfidences[i], Attributes.CONFIDENCE_NAME + "_" + (i + 1));
    }
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthConfidences));
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthPredictions));

    // now setting values
    for (Example example : exampleSet) {
      ArrayList<Tupel<Double, Integer>> labelConfidences =
          new ArrayList<Tupel<Double, Integer>>(numberOfLabels);
      for (i = 0; i < numberOfLabels; i++) {
        labelConfidences.add(new Tupel<Double, Integer>(example.getValue(confidences[i]), i));
      }
      Collections.sort(labelConfidences);
      for (i = 0; i < k; i++) {
        Tupel<Double, Integer> tupel = labelConfidences.get(numberOfLabels - i - 1);
        example.setValue(
            kthPredictions[i],
            tupel.getSecond()); // Can use index since mapping has been cloned from above
        example.setValue(kthConfidences[i], tupel.getFirst());
      }
    }

    // deleting old prediction / confidences
    attributes.remove(predictedLabel);
    if (getParameterAsBoolean(PARAMETER_REMOVE_OLD_PREDICTIONS)) {
      for (i = 0; i < confidences.length; i++) {
        attributes.remove(confidences[i]);
      }
    }

    return exampleSet;
  }
コード例 #2
0
  @Override
  public void doWork() throws OperatorException {
    // sanity checks
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    // checking preconditions
    Attribute label = exampleSet.getAttributes().getLabel();
    if (label == null) {
      throw new UserError(this, 105);
    }
    if (!label.isNominal()) {
      throw new UserError(this, 101, label, "threshold finding");
    }
    exampleSet.recalculateAttributeStatistics(label);
    NominalMapping mapping = label.getMapping();
    if (mapping.size() != 2) {
      throw new UserError(
          this,
          118,
          new Object[] {label, Integer.valueOf(mapping.getValues().size()), Integer.valueOf(2)});
    }
    if (exampleSet.getAttributes().getPredictedLabel() == null) {
      throw new UserError(this, 107);
    }
    boolean useExplictLabels = getParameterAsBoolean(PARAMETER_DEFINE_LABELS);

    double secondCost = getParameterAsDouble(PARAMETER_MISCLASSIFICATION_COSTS_SECOND);
    double firstCost = getParameterAsDouble(PARAMETER_MISCLASSIFICATION_COSTS_FIRST);
    if (useExplictLabels) {
      String firstLabel = getParameterAsString(PARAMETER_FIRST_LABEL);
      String secondLabel = getParameterAsString(PARAMETER_SECOND_LABEL);

      if (mapping.getIndex(firstLabel) == -1) {
        throw new UserError(this, 143, firstLabel, label.getName());
      }
      if (mapping.getIndex(secondLabel) == -1) {
        throw new UserError(this, 143, secondLabel, label.getName());
      }

      // if explicit order differs from order in data: internally swap costs.
      if (mapping.getIndex(firstLabel) > mapping.getIndex(secondLabel)) {
        double temp = firstCost;
        firstCost = secondCost;
        secondCost = temp;
      }
    }

    // check whether the confidence attributes are available
    if (exampleSet.getAttributes().getConfidence(mapping.getPositiveString()) == null) {
      throw new UserError(
          this, 113, Attributes.CONFIDENCE_NAME + "_" + mapping.getPositiveString());
    }
    if (exampleSet.getAttributes().getConfidence(mapping.getNegativeString()) == null) {
      throw new UserError(
          this, 113, Attributes.CONFIDENCE_NAME + "_" + mapping.getNegativeString());
    }
    // create ROC data
    ROCDataGenerator rocDataGenerator = new ROCDataGenerator(firstCost, secondCost);
    ROCData rocData =
        rocDataGenerator.createROCData(
            exampleSet,
            getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS),
            ROCBias.getROCBiasParameter(this));

    // create plotter
    if (getParameterAsBoolean(PARAMETER_SHOW_ROC_PLOT)) {
      rocDataGenerator.createROCPlotDialog(rocData, true, true);
    }

    // create and return output
    exampleSetOutput.deliver(exampleSet);
    thresholdOutput.deliver(
        new Threshold(
            rocDataGenerator.getBestThreshold(),
            mapping.getNegativeString(),
            mapping.getPositiveString()));
  }