/**
   * Applies the sorted and unmachted attribute list to the provided {@link Attributes}. All
   * unmachted attributes are removed from attributes and all {@link Attribute}s from the sorted
   * list are added in correct order.
   *
   * @param sortedAttributeList attributes that will be removed first and added in correct order
   *     afterwards.
   * @param unmachtedAttributes attributes that should be removed. May be <code>null</code> if no
   *     attributes should be removed.
   */
  private void applySortedAttributes(
      List<Attribute> sortedAttributeList,
      List<Attribute> unmachtedAttributes,
      Attributes attributes) {
    if (unmachtedAttributes != null) {
      for (Attribute unmachted : unmachtedAttributes) {
        attributes.remove(unmachted);
      }
    }

    for (Attribute attribute : sortedAttributeList) {
      AttributeRole role = attributes.getRole(attribute);
      attributes.remove(attribute);

      if (role.isSpecial()) {
        attributes.setSpecialAttribute(attribute, role.getSpecialName());
      } else { // regular
        attributes.addRegular(attribute);
      }
    }
  }
  @Override
  public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();

    // constructing new attributes with generic names, holding old ones, if old type wasn't real
    Attribute[] oldAttributes = new Attribute[attributes.size()];
    int i = 0;
    for (Attribute attribute : attributes) {
      oldAttributes[i] = attribute;
      i++;
    }
    Attribute[] newAttributes = new Attribute[attributes.size()];
    for (i = 0; i < newAttributes.length; i++) {
      newAttributes[i] = oldAttributes[i];
      if (oldAttributes[i].isNumerical())
        if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(oldAttributes[i].getValueType(), Ontology.REAL)) {
          newAttributes[i] = AttributeFactory.createAttribute(Ontology.REAL);
          exampleSet.getExampleTable().addAttribute(newAttributes[i]);
          attributes.addRegular(newAttributes[i]);
        }
    }

    // applying on data
    applyOnData(exampleSet, oldAttributes, newAttributes);

    // removing old attributes and change new attributes name to old ones if needed
    for (i = 0; i < oldAttributes.length; i++) {
      attributes.remove(oldAttributes[i]);
      // if attribute is new, then remove for later storing in correct order
      if (oldAttributes[i] != newAttributes[i]) attributes.remove(newAttributes[i]);
      attributes.addRegular(newAttributes[i]);
      newAttributes[i].setName(oldAttributes[i].getName());
    }

    return exampleSet;
  }
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSetOriginal = exampleSetInput.getData(ExampleSet.class);
    ExampleSet exampleSet = (ExampleSet) exampleSetOriginal.clone();
    int numberOfAttributes = exampleSet.getAttributes().size();
    Attributes attributes = exampleSet.getAttributes();

    int maxNumberOfAttributes =
        Math.min(getParameterAsInt(PARAMETER_MAX_ATTRIBUTES), numberOfAttributes - 1);
    int maxNumberOfFails = getParameterAsInt(PARAMETER_ALLOWED_CONSECUTIVE_FAILS);
    int behavior = getParameterAsInt(PARAMETER_STOPPING_BEHAVIOR);

    boolean useRelativeIncrease =
        (behavior == WITH_DECREASE_EXCEEDS)
            ? getParameterAsBoolean(PARAMETER_USE_RELATIVE_DECREASE)
            : false;
    double maximalDecrease = 0;
    if (useRelativeIncrease)
      maximalDecrease =
          useRelativeIncrease
              ? getParameterAsDouble(PARAMETER_MAX_RELATIVE_DECREASE)
              : getParameterAsDouble(PARAMETER_MAX_ABSOLUT_DECREASE);
    double alpha =
        (behavior == WITH_DECREASE_SIGNIFICANT) ? getParameterAsDouble(PARAMETER_ALPHA) : 0d;

    // remembering attributes and removing all from example set
    Attribute[] attributeArray = new Attribute[numberOfAttributes];
    int i = 0;
    Iterator<Attribute> iterator = attributes.iterator();
    while (iterator.hasNext()) {
      Attribute attribute = iterator.next();
      attributeArray[i] = attribute;
      i++;
    }

    boolean[] selected = new boolean[numberOfAttributes];
    Arrays.fill(selected, true);

    boolean earlyAbort = false;
    List<Integer> speculativeList = new ArrayList<Integer>(maxNumberOfFails);
    int numberOfFails = maxNumberOfFails;
    currentNumberOfFeatures = numberOfAttributes;
    currentAttributes = attributes;
    PerformanceVector lastPerformance = getPerformance(exampleSet);
    PerformanceVector bestPerformanceEver = lastPerformance;
    for (i = 0; i < maxNumberOfAttributes && !earlyAbort; i++) {
      // setting values for logging
      currentNumberOfFeatures = numberOfAttributes - i - 1;

      // performing a round
      int bestIndex = 0;
      PerformanceVector currentBestPerformance = null;
      for (int current = 0; current < numberOfAttributes; current++) {
        if (selected[current]) {
          // switching off
          attributes.remove(attributeArray[current]);
          currentAttributes = attributes;

          // evaluate performance
          PerformanceVector performance = getPerformance(exampleSet);
          if (currentBestPerformance == null || performance.compareTo(currentBestPerformance) > 0) {
            bestIndex = current;
            currentBestPerformance = performance;
          }

          // switching on
          attributes.addRegular(attributeArray[current]);
          currentAttributes = null; // removing reference
        }
      }
      double currentFitness = currentBestPerformance.getMainCriterion().getFitness();
      if (i != 0) {
        double lastFitness = lastPerformance.getMainCriterion().getFitness();
        // switch stopping behavior
        switch (behavior) {
          case WITH_DECREASE:
            if (lastFitness >= currentFitness) earlyAbort = true;
            break;
          case WITH_DECREASE_EXCEEDS:
            if (useRelativeIncrease) {
              // relative increase testing
              if (currentFitness < lastFitness - Math.abs(lastFitness * maximalDecrease))
                earlyAbort = true;
            } else {
              // absolute increase testing
              if (currentFitness < lastFitness - maximalDecrease) earlyAbort = true;
            }
            break;
          case WITH_DECREASE_SIGNIFICANT:
            AnovaCalculator calculator = new AnovaCalculator();
            calculator.setAlpha(alpha);

            PerformanceCriterion pc = currentBestPerformance.getMainCriterion();
            calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance());
            pc = lastPerformance.getMainCriterion();
            calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance());

            SignificanceTestResult result;
            try {
              result = calculator.performSignificanceTest();
            } catch (SignificanceCalculationException e) {
              throw new UserError(this, 920, e.getMessage());
            }
            if (lastFitness > currentFitness && result.getProbability() < alpha) earlyAbort = true;
        }
      }
      if (earlyAbort) {
        // check if there are some free tries left
        if (numberOfFails == 0) {
          break;
        }
        numberOfFails--;
        speculativeList.add(bestIndex);
        earlyAbort = false;

        // needs performance increase compared to better performance of current and last!
        if (currentBestPerformance.compareTo(lastPerformance) > 0)
          lastPerformance = currentBestPerformance;
      } else {
        // resetting maximal number of fails.
        numberOfFails = maxNumberOfFails;
        speculativeList.clear();
        lastPerformance = currentBestPerformance;
        bestPerformanceEver = currentBestPerformance;
      }

      // switching best index off
      attributes.remove(attributeArray[bestIndex]);
      selected[bestIndex] = false;
    }
    // add predictively removed attributes: speculative execution did not yield  good result
    for (Integer removeIndex : speculativeList) {
      selected[removeIndex] = true;
      attributes.addRegular(attributeArray[removeIndex]);
    }

    AttributeWeights weights = new AttributeWeights();
    i = 0;
    for (Attribute attribute : attributeArray) {
      if (selected[i]) weights.setWeight(attribute.getName(), 1d);
      else weights.setWeight(attribute.getName(), 0d);
      i++;
    }

    exampleSetOutput.deliver(exampleSet);
    performanceOutput.deliver(bestPerformanceEver);
    weightsOutput.deliver(weights);
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // searching confidence attributes
    Attributes attributes = exampleSet.getAttributes();
    Attribute predictedLabel = attributes.getPredictedLabel();
    if (predictedLabel == null) {
      throw new UserError(this, 107);
    }

    NominalMapping mapping = predictedLabel.getMapping();
    int numberOfLabels = mapping.size();
    Attribute[] confidences = new Attribute[numberOfLabels];
    String[] labelValue = new String[numberOfLabels];
    int i = 0;
    for (String value : mapping.getValues()) {
      labelValue[i] = value;
      confidences[i] = attributes.getConfidence(value);
      if (confidences[i] == null) {
        throw new UserError(this, 154, value);
      }
      i++;
    }

    // generating new prediction attributes
    int k = Math.min(numberOfLabels, getParameterAsInt(PARAMETER_NUMBER_OF_RANKS));
    Attribute[] kthPredictions = new Attribute[k];
    Attribute[] kthConfidences = new Attribute[k];
    for (i = 0; i < k; i++) {
      kthPredictions[i] = AttributeFactory.createAttribute(predictedLabel.getValueType());
      kthPredictions[i].setName(predictedLabel.getName() + "_" + (i + 1));
      kthPredictions[i].setMapping((NominalMapping) predictedLabel.getMapping().clone());
      kthConfidences[i] = AttributeFactory.createAttribute(Ontology.REAL);
      kthConfidences[i].setName(Attributes.CONFIDENCE_NAME + "_" + (i + 1));
      attributes.addRegular(kthPredictions[i]);
      attributes.addRegular(kthConfidences[i]);
      attributes.setSpecialAttribute(kthPredictions[i], Attributes.PREDICTION_NAME + "_" + (i + 1));
      attributes.setSpecialAttribute(kthConfidences[i], Attributes.CONFIDENCE_NAME + "_" + (i + 1));
    }
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthConfidences));
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthPredictions));

    // now setting values
    for (Example example : exampleSet) {
      ArrayList<Tupel<Double, Integer>> labelConfidences =
          new ArrayList<Tupel<Double, Integer>>(numberOfLabels);
      for (i = 0; i < numberOfLabels; i++) {
        labelConfidences.add(new Tupel<Double, Integer>(example.getValue(confidences[i]), i));
      }
      Collections.sort(labelConfidences);
      for (i = 0; i < k; i++) {
        Tupel<Double, Integer> tupel = labelConfidences.get(numberOfLabels - i - 1);
        example.setValue(
            kthPredictions[i],
            tupel.getSecond()); // Can use index since mapping has been cloned from above
        example.setValue(kthConfidences[i], tupel.getFirst());
      }
    }

    // deleting old prediction / confidences
    attributes.remove(predictedLabel);
    if (getParameterAsBoolean(PARAMETER_REMOVE_OLD_PREDICTIONS)) {
      for (i = 0; i < confidences.length; i++) {
        attributes.remove(confidences[i]);
      }
    }

    return exampleSet;
  }
Beispiel #5
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    exampleSet.recalculateAllAttributeStatistics();

    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (varianceThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeVariance[numberOfUsedComponents] < varianceThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
        if (numberOfUsedComponents == eigenVectors.size()) {
          numberOfUsedComponents--;
        }
      }
    }
    if (numberOfUsedComponents == -1) {
      // keep all components
      numberOfUsedComponents = attributes.size();
    }

    // retrieve factors inside eigenVectors
    double[][] eigenValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      eigenValueFactors[i] = this.eigenVectors.get(i).getEigenvector();
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      derivedAttributes[i] = AttributeFactory.createAttribute("pc_" + (i + 1), Ontology.REAL);
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute) - means[d];
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += eigenValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }
  @Override
  public ExampleSet apply(ExampleSet inputExampleSet) throws OperatorException {
    ExampleSet exampleSet = (ExampleSet) inputExampleSet.clone();
    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (proportionThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeSingularValueProportion[numberOfUsedComponents] < proportionThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
      }
    }
    // if nothing defined or number exceeds maximal number of possible components
    if (numberOfUsedComponents == -1 || numberOfUsedComponents > getNumberOfComponents()) {
      // keep all components
      numberOfUsedComponents = getNumberOfComponents();
    }

    // retrieve factors inside singularValueVectors
    double[][] singularValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    double[][] vMatrixData = vMatrix.getArray();
    for (int i = 0; i < numberOfUsedComponents; i++) {
      double invertedSingularValue = 1d / singularValues[i];
      for (int j = 0; j < attributeNames.length; j++) {
        singularValueFactors[i][j] = vMatrixData[j][i] * invertedSingularValue;
      }
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      if (useLegacyNames) {
        derivedAttributes[i] = AttributeFactory.createAttribute("d" + i, Ontology.REAL);
      } else {
        derivedAttributes[i] = AttributeFactory.createAttribute("svd_" + (i + 1), Ontology.REAL);
      }
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute);
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += singularValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }