Exemplo n.º 1
0
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    Kernel kernel = getKernel();
    kernel.init(exampleSet);

    double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
    NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping();
    String classNeg = labelMapping.getNegativeString();
    String classPos = labelMapping.getPositiveString();
    double classValueNeg = labelMapping.getNegativeIndex();
    int numberOfAttributes = exampleSet.getAttributes().size();
    HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel);
    model.init(new double[numberOfAttributes], 0);
    for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) {
      double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate);
      Attributes attributes = exampleSet.getAttributes();
      for (Example example : exampleSet) {
        double prediction = model.predict(example);
        if (prediction != example.getLabel()) {
          double direction = (example.getLabel() == classValueNeg) ? -1 : 1;
          // adapting intercept
          model.setIntercept(model.getIntercept() + learnRate * direction);
          // adapting coefficients
          double coefficients[] = model.getCoefficients();
          int i = 0;
          for (Attribute attribute : attributes) {
            coefficients[i] += learnRate * direction * example.getValue(attribute);
            i++;
          }
        }
      }
    }
    return model;
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // searching confidence attributes
    Attributes attributes = exampleSet.getAttributes();
    Attribute predictedLabel = attributes.getPredictedLabel();
    if (predictedLabel == null) {
      throw new UserError(this, 107);
    }

    NominalMapping mapping = predictedLabel.getMapping();
    int numberOfLabels = mapping.size();
    Attribute[] confidences = new Attribute[numberOfLabels];
    String[] labelValue = new String[numberOfLabels];
    int i = 0;
    for (String value : mapping.getValues()) {
      labelValue[i] = value;
      confidences[i] = attributes.getConfidence(value);
      if (confidences[i] == null) {
        throw new UserError(this, 154, value);
      }
      i++;
    }

    // generating new prediction attributes
    int k = Math.min(numberOfLabels, getParameterAsInt(PARAMETER_NUMBER_OF_RANKS));
    Attribute[] kthPredictions = new Attribute[k];
    Attribute[] kthConfidences = new Attribute[k];
    for (i = 0; i < k; i++) {
      kthPredictions[i] = AttributeFactory.createAttribute(predictedLabel.getValueType());
      kthPredictions[i].setName(predictedLabel.getName() + "_" + (i + 1));
      kthPredictions[i].setMapping((NominalMapping) predictedLabel.getMapping().clone());
      kthConfidences[i] = AttributeFactory.createAttribute(Ontology.REAL);
      kthConfidences[i].setName(Attributes.CONFIDENCE_NAME + "_" + (i + 1));
      attributes.addRegular(kthPredictions[i]);
      attributes.addRegular(kthConfidences[i]);
      attributes.setSpecialAttribute(kthPredictions[i], Attributes.PREDICTION_NAME + "_" + (i + 1));
      attributes.setSpecialAttribute(kthConfidences[i], Attributes.CONFIDENCE_NAME + "_" + (i + 1));
    }
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthConfidences));
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthPredictions));

    // now setting values
    for (Example example : exampleSet) {
      ArrayList<Tupel<Double, Integer>> labelConfidences =
          new ArrayList<Tupel<Double, Integer>>(numberOfLabels);
      for (i = 0; i < numberOfLabels; i++) {
        labelConfidences.add(new Tupel<Double, Integer>(example.getValue(confidences[i]), i));
      }
      Collections.sort(labelConfidences);
      for (i = 0; i < k; i++) {
        Tupel<Double, Integer> tupel = labelConfidences.get(numberOfLabels - i - 1);
        example.setValue(
            kthPredictions[i],
            tupel.getSecond()); // Can use index since mapping has been cloned from above
        example.setValue(kthConfidences[i], tupel.getFirst());
      }
    }

    // deleting old prediction / confidences
    attributes.remove(predictedLabel);
    if (getParameterAsBoolean(PARAMETER_REMOVE_OLD_PREDICTIONS)) {
      for (i = 0; i < confidences.length; i++) {
        attributes.remove(confidences[i]);
      }
    }

    return exampleSet;
  }
  @Override
  public void doWork() throws OperatorException {
    // sanity checks
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    // checking preconditions
    Attribute label = exampleSet.getAttributes().getLabel();
    if (label == null) {
      throw new UserError(this, 105);
    }
    if (!label.isNominal()) {
      throw new UserError(this, 101, label, "threshold finding");
    }
    exampleSet.recalculateAttributeStatistics(label);
    NominalMapping mapping = label.getMapping();
    if (mapping.size() != 2) {
      throw new UserError(
          this,
          118,
          new Object[] {label, Integer.valueOf(mapping.getValues().size()), Integer.valueOf(2)});
    }
    if (exampleSet.getAttributes().getPredictedLabel() == null) {
      throw new UserError(this, 107);
    }
    boolean useExplictLabels = getParameterAsBoolean(PARAMETER_DEFINE_LABELS);

    double secondCost = getParameterAsDouble(PARAMETER_MISCLASSIFICATION_COSTS_SECOND);
    double firstCost = getParameterAsDouble(PARAMETER_MISCLASSIFICATION_COSTS_FIRST);
    if (useExplictLabels) {
      String firstLabel = getParameterAsString(PARAMETER_FIRST_LABEL);
      String secondLabel = getParameterAsString(PARAMETER_SECOND_LABEL);

      if (mapping.getIndex(firstLabel) == -1) {
        throw new UserError(this, 143, firstLabel, label.getName());
      }
      if (mapping.getIndex(secondLabel) == -1) {
        throw new UserError(this, 143, secondLabel, label.getName());
      }

      // if explicit order differs from order in data: internally swap costs.
      if (mapping.getIndex(firstLabel) > mapping.getIndex(secondLabel)) {
        double temp = firstCost;
        firstCost = secondCost;
        secondCost = temp;
      }
    }

    // check whether the confidence attributes are available
    if (exampleSet.getAttributes().getConfidence(mapping.getPositiveString()) == null) {
      throw new UserError(
          this, 113, Attributes.CONFIDENCE_NAME + "_" + mapping.getPositiveString());
    }
    if (exampleSet.getAttributes().getConfidence(mapping.getNegativeString()) == null) {
      throw new UserError(
          this, 113, Attributes.CONFIDENCE_NAME + "_" + mapping.getNegativeString());
    }
    // create ROC data
    ROCDataGenerator rocDataGenerator = new ROCDataGenerator(firstCost, secondCost);
    ROCData rocData =
        rocDataGenerator.createROCData(
            exampleSet,
            getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS),
            ROCBias.getROCBiasParameter(this));

    // create plotter
    if (getParameterAsBoolean(PARAMETER_SHOW_ROC_PLOT)) {
      rocDataGenerator.createROCPlotDialog(rocData, true, true);
    }

    // create and return output
    exampleSetOutput.deliver(exampleSet);
    thresholdOutput.deliver(
        new Threshold(
            rocDataGenerator.getBestThreshold(),
            mapping.getNegativeString(),
            mapping.getPositiveString()));
  }