@Override
  public AttributeWeights calculateWeights(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();
    Attribute labelAttribute = attributes.getLabel();
    boolean useSquaredCorrelation = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION);

    AttributeWeights weights = new AttributeWeights(exampleSet);
    getProgress().setTotal(attributes.size());
    int progressCounter = 0;
    int exampleSetSize = exampleSet.size();
    int exampleCounter = 0;
    for (Attribute attribute : attributes) {
      double correlation =
          MathFunctions.correlation(exampleSet, labelAttribute, attribute, useSquaredCorrelation);
      weights.setWeight(attribute.getName(), Math.abs(correlation));
      progressCounter++;
      exampleCounter += exampleSetSize;
      if (exampleCounter > PROGRESS_UPDATE_STEPS) {
        exampleCounter = 0;
        getProgress().setCompleted(progressCounter);
      }
    }

    return weights;
  }
  @Override
  public void doWork() throws OperatorException {
    CentroidClusterModel model = modelInput.getData(CentroidClusterModel.class);

    Attributes trainAttributes = model.getTrainingHeader().getAttributes();
    String[] attributeNames = model.getAttributeNames();
    Attribute[] attributes = new Attribute[attributeNames.length + 1];
    for (int i = 0; i < attributeNames.length; i++) {
      Attribute originalAttribute = trainAttributes.get(attributeNames[i]);
      attributes[i] =
          AttributeFactory.createAttribute(attributeNames[i], originalAttribute.getValueType());
      if (originalAttribute.isNominal()) {
        attributes[i].setMapping((NominalMapping) originalAttribute.getMapping().clone());
      }
    }
    Attribute clusterAttribute = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
    attributes[attributes.length - 1] = clusterAttribute;

    MemoryExampleTable table = new MemoryExampleTable(attributes);
    for (int i = 0; i < model.getNumberOfClusters(); i++) {
      double[] data = new double[attributeNames.length + 1];
      System.arraycopy(model.getCentroidCoordinates(i), 0, data, 0, attributeNames.length);
      data[attributeNames.length] = clusterAttribute.getMapping().mapString("cluster_" + i);
      table.addDataRow(new DoubleArrayDataRow(data));
    }

    ExampleSet resultSet = table.createExampleSet();
    resultSet.getAttributes().setSpecialAttribute(clusterAttribute, Attributes.CLUSTER_NAME);

    modelOutput.deliver(model);
    exampleSetOutput.deliver(resultSet);
  }
Exemplo n.º 3
0
  @Override
  public void doWork() throws OperatorException {

    ExampleSet exampleSet = exampleSetInput.getData();

    IEntityMapping user_mapping = new EntityMapping();
    IEntityMapping item_mapping = new EntityMapping();
    IRatings training_data = new Ratings();

    if (exampleSet.getAttributes().getSpecial("user identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getSpecial("item identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getLabel() == null) {
      throw new UserError(this, 105);
    }

    Attributes Att = exampleSet.getAttributes();
    AttributeRole ur = Att.getRole("user identification");
    Attribute u = ur.getAttribute();
    AttributeRole ir = Att.getRole("item identification");
    Attribute i = ir.getAttribute();
    Attribute ui = Att.getLabel();

    for (Example example : exampleSet) {

      double j = example.getValue(u);
      int uid = user_mapping.ToInternalID((int) j);

      j = example.getValue(i);
      int iid = item_mapping.ToInternalID((int) j);

      double r = example.getValue(ui);
      training_data.Add(uid, iid, r);
    }

    _slopeOne recommendAlg = new _slopeOne();

    recommendAlg.user_mapping = user_mapping;
    recommendAlg.item_mapping = item_mapping;
    recommendAlg.SetMinRating(getParameterAsInt("Min Rating"));
    recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range"));

    recommendAlg.SetRatings(training_data);

    recommendAlg.Train();

    exampleSetOutput.deliver(exampleSet);

    exampleSetOutput1.deliver(recommendAlg);
  }
  private double[] getExampleValues(Example example) {
    Attributes attributes = example.getAttributes();
    double[] attributeValues = new double[attributes.size()];

    int i = 0;
    for (Attribute attribute : attributes) {
      attributeValues[i] = example.getValue(attribute);
      i++;
    }
    return attributeValues;
  }
Exemplo n.º 5
0
 @Override
 public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
   Attributes attributes = exampleSet.getAttributes();
   Set<Attribute> attributeSubset = attributeSelector.getAttributeSubset(exampleSet, true);
   Iterator<Attribute> r = attributes.allAttributes();
   while (r.hasNext()) {
     Attribute attribute = r.next();
     if (!attributeSubset.contains(attribute)) r.remove();
   }
   return exampleSet;
 }
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    DistanceMeasure measure = DistanceMeasures.createMeasure(this);
    measure.init(exampleSet);
    GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure);

    // check if weights should be used
    boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS);
    // check if robust estimate should be performed: Then calculate weights and use it anyway
    if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) {
      useWeights = true;
      LocalPolynomialExampleWeightingOperator weightingOperator;
      try {
        weightingOperator =
            OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class);
        exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this);
      } catch (OperatorCreationException e) {
        throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage());
      }
    }

    Attributes attributes = exampleSet.getAttributes();
    Attribute label = attributes.getLabel();
    Attribute weightAttribute = attributes.getWeight();
    for (Example example : exampleSet) {
      double[] values = new double[attributes.size()];
      double labelValue = example.getValue(label);
      double weight = 1d;
      if (weightAttribute != null && useWeights) {
        weight = example.getValue(weightAttribute);
      }

      // filter out examples without influence
      if (weight > 0d) {
        // copying example values
        int i = 0;
        for (Attribute attribute : attributes) {
          values[i] = example.getValue(attribute);
          i++;
        }

        // inserting into geometric data collection
        data.add(values, new RegressionData(values, labelValue, weight));
      }
    }
    return new LocalPolynomialRegressionModel(
        exampleSet,
        data,
        Neighborhoods.createNeighborhood(this),
        SmoothingKernels.createKernel(this),
        getParameterAsInt(PARAMETER_DEGREE),
        getParameterAsDouble(PARAMETER_RIDGE));
  }
 private double[] getMeanVector(ExampleSet exampleSet) {
   exampleSet.recalculateAllAttributeStatistics();
   Attributes attributes = exampleSet.getAttributes();
   double[] meanVector = new double[attributes.size()];
   int i = 0;
   for (Attribute attribute : attributes) {
     if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
       meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MINIMUM);
     } else if (attribute.isNominal())
       meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MODE);
     else meanVector[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE);
     i++;
   }
   return meanVector;
 }
 @Override
 public void init(ExampleSet exampleSet) throws OperatorException {
   super.init(exampleSet);
   Tools.onlyNumericalAttributes(exampleSet, "value based similarities");
   Attributes attributes = exampleSet.getAttributes();
   if (attributes.size() != 1)
     throw new OperatorException(
         "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence.");
   for (Example example : exampleSet) {
     for (Attribute attribute : attributes) {
       if (example.getValue(attribute) <= 0)
         throw new OperatorException(
             "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence.");
       ;
     }
   }
 }
Exemplo n.º 9
0
  @Override
  public void doWork() throws OperatorException {

    ExampleSet exampleSet = exampleSetInput.getData();

    IPosOnlyFeedback training_data = new PosOnlyFeedback();
    IEntityMapping user_mapping = new EntityMapping();
    IEntityMapping item_mapping = new EntityMapping();

    if (exampleSet.getAttributes().getSpecial("user identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getSpecial("item identification") == null) {
      throw new UserError(this, 105);
    }

    Attributes Att = exampleSet.getAttributes();
    AttributeRole ur = Att.getRole("user identification");
    Attribute u = ur.getAttribute();
    AttributeRole ir = Att.getRole("item identification");
    Attribute i = ir.getAttribute();

    for (Example example : exampleSet) {

      double j = example.getValue(u);
      int uid = (int) j;

      j = example.getValue(i);
      int iid = (int) j;

      training_data.Add(user_mapping.ToInternalID(uid), item_mapping.ToInternalID(iid));
      checkForStop();
    }

    System.out.println(training_data.GetMaxItemID() + " " + training_data.GetMaxUserID());

    Random recommendAlg = new Random();
    recommendAlg.SetFeedback(training_data);
    recommendAlg.user_mapping = user_mapping;
    recommendAlg.item_mapping = item_mapping;
    recommendAlg.Train();

    exampleSetOutput.deliver(exampleSet);
    exampleSetOutput1.deliver(recommendAlg);
  }
Exemplo n.º 10
0
 private double[] getAsDoubleArray(Example example, Attributes attributes) {
   double[] values = new double[attributes.size()];
   int i = 0;
   for (Attribute attribute : attributes) {
     values[i] = example.getValue(attribute);
     i++;
   }
   return values;
 }
Exemplo n.º 11
0
 private double[] getAttributeValues(Example example, Attributes attributes, double[] means) {
   double[] values = new double[attributes.size()];
   int x = 0;
   for (Attribute attribute : attributes) {
     values[x] = example.getValue(attribute) - means[x];
     x++;
   }
   return values;
 }
  /**
   * Applies the sorted and unmachted attribute list to the provided {@link Attributes}. All
   * unmachted attributes are removed from attributes and all {@link Attribute}s from the sorted
   * list are added in correct order.
   *
   * @param sortedAttributeList attributes that will be removed first and added in correct order
   *     afterwards.
   * @param unmachtedAttributes attributes that should be removed. May be <code>null</code> if no
   *     attributes should be removed.
   */
  private void applySortedAttributes(
      List<Attribute> sortedAttributeList,
      List<Attribute> unmachtedAttributes,
      Attributes attributes) {
    if (unmachtedAttributes != null) {
      for (Attribute unmachted : unmachtedAttributes) {
        attributes.remove(unmachted);
      }
    }

    for (Attribute attribute : sortedAttributeList) {
      AttributeRole role = attributes.getRole(attribute);
      attributes.remove(attribute);

      if (role.isSpecial()) {
        attributes.setSpecialAttribute(attribute, role.getSpecialName());
      } else { // regular
        attributes.addRegular(attribute);
      }
    }
  }
Exemplo n.º 13
0
  private void setData(Example example, String attributeName, String value, Attributes attributes)
      throws UserError {
    Attribute attribute = attributes.get(attributeName);
    if (attribute == null) {
      throw new UserError(this, 111, attributeName);
    }

    if (attribute.isNominal()) {
      example.setValue(attribute, attribute.getMapping().mapString(value));
    } else {
      try {
        double doubleValue = Double.parseDouble(value);
        example.setValue(attribute, doubleValue);
      } catch (NumberFormatException e) {
        throw new UserError(this, 211, PARAMETER_VALUE, value);
      }
    }
  }
  private Attribute[] getMatchingAttributes(Attributes attributes, String regex)
      throws OperatorException {
    Pattern pattern = null;
    try {
      pattern = Pattern.compile(regex);
    } catch (PatternSyntaxException e) {
      throw new UserError(this, 206, regex, e.getMessage());
    }
    List<Attribute> attributeList = new LinkedList<Attribute>();
    Iterator<Attribute> iterator = attributes.allAttributes();
    while (iterator.hasNext()) {
      Attribute attribute = iterator.next();
      if (pattern.matcher(attribute.getName()).matches()) {
        attributeList.add(attribute);
      }
    }

    // building array of attributes for faster access.
    Attribute[] attributesArray = new Attribute[attributeList.size()];
    attributesArray = attributeList.toArray(attributesArray);
    return attributesArray;
  }
  @Override
  public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();

    // constructing new attributes with generic names, holding old ones, if old type wasn't real
    Attribute[] oldAttributes = new Attribute[attributes.size()];
    int i = 0;
    for (Attribute attribute : attributes) {
      oldAttributes[i] = attribute;
      i++;
    }
    Attribute[] newAttributes = new Attribute[attributes.size()];
    for (i = 0; i < newAttributes.length; i++) {
      newAttributes[i] = oldAttributes[i];
      if (oldAttributes[i].isNumerical())
        if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(oldAttributes[i].getValueType(), Ontology.REAL)) {
          newAttributes[i] = AttributeFactory.createAttribute(Ontology.REAL);
          exampleSet.getExampleTable().addAttribute(newAttributes[i]);
          attributes.addRegular(newAttributes[i]);
        }
    }

    // applying on data
    applyOnData(exampleSet, oldAttributes, newAttributes);

    // removing old attributes and change new attributes name to old ones if needed
    for (i = 0; i < oldAttributes.length; i++) {
      attributes.remove(oldAttributes[i]);
      // if attribute is new, then remove for later storing in correct order
      if (oldAttributes[i] != newAttributes[i]) attributes.remove(newAttributes[i]);
      attributes.addRegular(newAttributes[i]);
      newAttributes[i].setName(oldAttributes[i].getName());
    }

    return exampleSet;
  }
Exemplo n.º 16
0
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    int k = getParameterAsInt(PARAMETER_K);
    int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS);
    boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS);
    Kernel kernel = Kernel.createKernel(this);

    // init operator progress
    getProgress().setTotal(maxOptimizationSteps);

    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);

    if (exampleSet.size() < k) {
      throw new UserError(this, 142, k);
    }

    // extracting attribute names
    Attributes attributes = exampleSet.getAttributes();
    ArrayList<String> attributeNames = new ArrayList<String>(attributes.size());
    for (Attribute attribute : attributes) {
      attributeNames.add(attribute.getName());
    }
    Attribute weightAttribute = attributes.getWeight();

    RandomGenerator generator = RandomGenerator.getRandomGenerator(this);

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    // init centroids
    int[] clusterAssignments = new int[exampleSet.size()];

    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = generator.nextIntInRange(0, k);
    }

    // run optimization steps
    boolean stable = false;
    for (int step = 0; step < maxOptimizationSteps && !stable; step++) {
      // calculating cluster kernel properties
      double[] clusterWeights = new double[k];
      double[] clusterKernelCorrection = new double[k];
      int i = 0;
      for (Example firstExample : exampleSet) {
        double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d;
        double[] firstExampleValues = getAsDoubleArray(firstExample, attributes);
        clusterWeights[clusterAssignments[i]] += firstExampleWeight;
        int j = 0;
        for (Example secondExample : exampleSet) {
          if (clusterAssignments[i] == clusterAssignments[j]) {
            double secondExampleWeight =
                useExampleWeights ? secondExample.getValue(weightAttribute) : 1d;
            clusterKernelCorrection[clusterAssignments[i]] +=
                firstExampleWeight
                    * secondExampleWeight
                    * kernel.calculateDistance(
                        firstExampleValues, getAsDoubleArray(secondExample, attributes));
          }
          j++;
        }
        i++;
      }
      for (int z = 0; z < k; z++) {
        clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z];
      }

      // assign examples to new centroids
      int[] newClusterAssignments = new int[exampleSet.size()];
      i = 0;
      for (Example example : exampleSet) {
        double[] exampleValues = getAsDoubleArray(example, attributes);
        double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues);
        double nearestDistance = Double.POSITIVE_INFINITY;
        int nearestIndex = 0;
        for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) {
          double distance = 0;
          // iterating over all examples in cluster to get kernel distance
          int j = 0;
          for (Example clusterExample : exampleSet) {
            if (clusterAssignments[j] == clusterIndex) {
              distance +=
                  (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d)
                      * kernel.calculateDistance(
                          getAsDoubleArray(clusterExample, attributes), exampleValues);
            }
            j++;
          }
          distance *= -2d / clusterWeights[clusterIndex];
          // copy in outer loop
          distance += exampleKernelValue;
          distance += clusterKernelCorrection[clusterIndex];
          if (distance < nearestDistance) {
            nearestDistance = distance;
            nearestIndex = clusterIndex;
          }
        }
        newClusterAssignments[i] = nearestIndex;
        i++;
      }

      // finishing assignment
      stable = true;
      for (int j = 0; j < exampleSet.size() && stable; j++) {
        stable &= newClusterAssignments[j] == clusterAssignments[j];
      }
      clusterAssignments = newClusterAssignments;

      // trigger operator progress
      getProgress().step();
    }

    // setting last clustering into model
    model.setClusterAssignments(clusterAssignments, exampleSet);

    getProgress().complete();

    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSetOriginal = exampleSetInput.getData(ExampleSet.class);
    ExampleSet exampleSet = (ExampleSet) exampleSetOriginal.clone();
    int numberOfAttributes = exampleSet.getAttributes().size();
    Attributes attributes = exampleSet.getAttributes();

    int maxNumberOfAttributes =
        Math.min(getParameterAsInt(PARAMETER_MAX_ATTRIBUTES), numberOfAttributes - 1);
    int maxNumberOfFails = getParameterAsInt(PARAMETER_ALLOWED_CONSECUTIVE_FAILS);
    int behavior = getParameterAsInt(PARAMETER_STOPPING_BEHAVIOR);

    boolean useRelativeIncrease =
        (behavior == WITH_DECREASE_EXCEEDS)
            ? getParameterAsBoolean(PARAMETER_USE_RELATIVE_DECREASE)
            : false;
    double maximalDecrease = 0;
    if (useRelativeIncrease)
      maximalDecrease =
          useRelativeIncrease
              ? getParameterAsDouble(PARAMETER_MAX_RELATIVE_DECREASE)
              : getParameterAsDouble(PARAMETER_MAX_ABSOLUT_DECREASE);
    double alpha =
        (behavior == WITH_DECREASE_SIGNIFICANT) ? getParameterAsDouble(PARAMETER_ALPHA) : 0d;

    // remembering attributes and removing all from example set
    Attribute[] attributeArray = new Attribute[numberOfAttributes];
    int i = 0;
    Iterator<Attribute> iterator = attributes.iterator();
    while (iterator.hasNext()) {
      Attribute attribute = iterator.next();
      attributeArray[i] = attribute;
      i++;
    }

    boolean[] selected = new boolean[numberOfAttributes];
    Arrays.fill(selected, true);

    boolean earlyAbort = false;
    List<Integer> speculativeList = new ArrayList<Integer>(maxNumberOfFails);
    int numberOfFails = maxNumberOfFails;
    currentNumberOfFeatures = numberOfAttributes;
    currentAttributes = attributes;
    PerformanceVector lastPerformance = getPerformance(exampleSet);
    PerformanceVector bestPerformanceEver = lastPerformance;
    for (i = 0; i < maxNumberOfAttributes && !earlyAbort; i++) {
      // setting values for logging
      currentNumberOfFeatures = numberOfAttributes - i - 1;

      // performing a round
      int bestIndex = 0;
      PerformanceVector currentBestPerformance = null;
      for (int current = 0; current < numberOfAttributes; current++) {
        if (selected[current]) {
          // switching off
          attributes.remove(attributeArray[current]);
          currentAttributes = attributes;

          // evaluate performance
          PerformanceVector performance = getPerformance(exampleSet);
          if (currentBestPerformance == null || performance.compareTo(currentBestPerformance) > 0) {
            bestIndex = current;
            currentBestPerformance = performance;
          }

          // switching on
          attributes.addRegular(attributeArray[current]);
          currentAttributes = null; // removing reference
        }
      }
      double currentFitness = currentBestPerformance.getMainCriterion().getFitness();
      if (i != 0) {
        double lastFitness = lastPerformance.getMainCriterion().getFitness();
        // switch stopping behavior
        switch (behavior) {
          case WITH_DECREASE:
            if (lastFitness >= currentFitness) earlyAbort = true;
            break;
          case WITH_DECREASE_EXCEEDS:
            if (useRelativeIncrease) {
              // relative increase testing
              if (currentFitness < lastFitness - Math.abs(lastFitness * maximalDecrease))
                earlyAbort = true;
            } else {
              // absolute increase testing
              if (currentFitness < lastFitness - maximalDecrease) earlyAbort = true;
            }
            break;
          case WITH_DECREASE_SIGNIFICANT:
            AnovaCalculator calculator = new AnovaCalculator();
            calculator.setAlpha(alpha);

            PerformanceCriterion pc = currentBestPerformance.getMainCriterion();
            calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance());
            pc = lastPerformance.getMainCriterion();
            calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance());

            SignificanceTestResult result;
            try {
              result = calculator.performSignificanceTest();
            } catch (SignificanceCalculationException e) {
              throw new UserError(this, 920, e.getMessage());
            }
            if (lastFitness > currentFitness && result.getProbability() < alpha) earlyAbort = true;
        }
      }
      if (earlyAbort) {
        // check if there are some free tries left
        if (numberOfFails == 0) {
          break;
        }
        numberOfFails--;
        speculativeList.add(bestIndex);
        earlyAbort = false;

        // needs performance increase compared to better performance of current and last!
        if (currentBestPerformance.compareTo(lastPerformance) > 0)
          lastPerformance = currentBestPerformance;
      } else {
        // resetting maximal number of fails.
        numberOfFails = maxNumberOfFails;
        speculativeList.clear();
        lastPerformance = currentBestPerformance;
        bestPerformanceEver = currentBestPerformance;
      }

      // switching best index off
      attributes.remove(attributeArray[bestIndex]);
      selected[bestIndex] = false;
    }
    // add predictively removed attributes: speculative execution did not yield  good result
    for (Integer removeIndex : speculativeList) {
      selected[removeIndex] = true;
      attributes.addRegular(attributeArray[removeIndex]);
    }

    AttributeWeights weights = new AttributeWeights();
    i = 0;
    for (Attribute attribute : attributeArray) {
      if (selected[i]) weights.setWeight(attribute.getName(), 1d);
      else weights.setWeight(attribute.getName(), 0d);
      i++;
    }

    exampleSetOutput.deliver(exampleSet);
    performanceOutput.deliver(bestPerformanceEver);
    weightsOutput.deliver(weights);
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // searching confidence attributes
    Attributes attributes = exampleSet.getAttributes();
    Attribute predictedLabel = attributes.getPredictedLabel();
    if (predictedLabel == null) {
      throw new UserError(this, 107);
    }

    NominalMapping mapping = predictedLabel.getMapping();
    int numberOfLabels = mapping.size();
    Attribute[] confidences = new Attribute[numberOfLabels];
    String[] labelValue = new String[numberOfLabels];
    int i = 0;
    for (String value : mapping.getValues()) {
      labelValue[i] = value;
      confidences[i] = attributes.getConfidence(value);
      if (confidences[i] == null) {
        throw new UserError(this, 154, value);
      }
      i++;
    }

    // generating new prediction attributes
    int k = Math.min(numberOfLabels, getParameterAsInt(PARAMETER_NUMBER_OF_RANKS));
    Attribute[] kthPredictions = new Attribute[k];
    Attribute[] kthConfidences = new Attribute[k];
    for (i = 0; i < k; i++) {
      kthPredictions[i] = AttributeFactory.createAttribute(predictedLabel.getValueType());
      kthPredictions[i].setName(predictedLabel.getName() + "_" + (i + 1));
      kthPredictions[i].setMapping((NominalMapping) predictedLabel.getMapping().clone());
      kthConfidences[i] = AttributeFactory.createAttribute(Ontology.REAL);
      kthConfidences[i].setName(Attributes.CONFIDENCE_NAME + "_" + (i + 1));
      attributes.addRegular(kthPredictions[i]);
      attributes.addRegular(kthConfidences[i]);
      attributes.setSpecialAttribute(kthPredictions[i], Attributes.PREDICTION_NAME + "_" + (i + 1));
      attributes.setSpecialAttribute(kthConfidences[i], Attributes.CONFIDENCE_NAME + "_" + (i + 1));
    }
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthConfidences));
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthPredictions));

    // now setting values
    for (Example example : exampleSet) {
      ArrayList<Tupel<Double, Integer>> labelConfidences =
          new ArrayList<Tupel<Double, Integer>>(numberOfLabels);
      for (i = 0; i < numberOfLabels; i++) {
        labelConfidences.add(new Tupel<Double, Integer>(example.getValue(confidences[i]), i));
      }
      Collections.sort(labelConfidences);
      for (i = 0; i < k; i++) {
        Tupel<Double, Integer> tupel = labelConfidences.get(numberOfLabels - i - 1);
        example.setValue(
            kthPredictions[i],
            tupel.getSecond()); // Can use index since mapping has been cloned from above
        example.setValue(kthConfidences[i], tupel.getFirst());
      }
    }

    // deleting old prediction / confidences
    attributes.remove(predictedLabel);
    if (getParameterAsBoolean(PARAMETER_REMOVE_OLD_PREDICTIONS)) {
      for (i = 0; i < confidences.length; i++) {
        attributes.remove(confidences[i]);
      }
    }

    return exampleSet;
  }
Exemplo n.º 19
0
  @Override
  public ExampleSet apply(ExampleSet inputExampleSet) throws OperatorException {
    ExampleSet exampleSet = (ExampleSet) inputExampleSet.clone();
    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (proportionThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeSingularValueProportion[numberOfUsedComponents] < proportionThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
      }
    }
    // if nothing defined or number exceeds maximal number of possible components
    if (numberOfUsedComponents == -1 || numberOfUsedComponents > getNumberOfComponents()) {
      // keep all components
      numberOfUsedComponents = getNumberOfComponents();
    }

    // retrieve factors inside singularValueVectors
    double[][] singularValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    double[][] vMatrixData = vMatrix.getArray();
    for (int i = 0; i < numberOfUsedComponents; i++) {
      double invertedSingularValue = 1d / singularValues[i];
      for (int j = 0; j < attributeNames.length; j++) {
        singularValueFactors[i][j] = vMatrixData[j][i] * invertedSingularValue;
      }
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      if (useLegacyNames) {
        derivedAttributes[i] = AttributeFactory.createAttribute("d" + i, Ontology.REAL);
      } else {
        derivedAttributes[i] = AttributeFactory.createAttribute("svd_" + (i + 1), Ontology.REAL);
      }
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute);
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += singularValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }
Exemplo n.º 20
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    exampleSet.recalculateAllAttributeStatistics();

    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (varianceThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeVariance[numberOfUsedComponents] < varianceThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
        if (numberOfUsedComponents == eigenVectors.size()) {
          numberOfUsedComponents--;
        }
      }
    }
    if (numberOfUsedComponents == -1) {
      // keep all components
      numberOfUsedComponents = attributes.size();
    }

    // retrieve factors inside eigenVectors
    double[][] eigenValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      eigenValueFactors[i] = this.eigenVectors.get(i).getEigenvector();
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      derivedAttributes[i] = AttributeFactory.createAttribute("pc_" + (i + 1), Ontology.REAL);
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute) - means[d];
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += eigenValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }