@Override
  public AttributeWeights calculateWeights(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();
    Attribute labelAttribute = attributes.getLabel();
    boolean useSquaredCorrelation = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION);

    AttributeWeights weights = new AttributeWeights(exampleSet);
    getProgress().setTotal(attributes.size());
    int progressCounter = 0;
    int exampleSetSize = exampleSet.size();
    int exampleCounter = 0;
    for (Attribute attribute : attributes) {
      double correlation =
          MathFunctions.correlation(exampleSet, labelAttribute, attribute, useSquaredCorrelation);
      weights.setWeight(attribute.getName(), Math.abs(correlation));
      progressCounter++;
      exampleCounter += exampleSetSize;
      if (exampleCounter > PROGRESS_UPDATE_STEPS) {
        exampleCounter = 0;
        getProgress().setCompleted(progressCounter);
      }
    }

    return weights;
  }
 private double[] getAsDoubleArray(Example example, Attributes attributes) {
   double[] values = new double[attributes.size()];
   int i = 0;
   for (Attribute attribute : attributes) {
     values[i] = example.getValue(attribute);
     i++;
   }
   return values;
 }
 private double[] getAttributeValues(Example example, Attributes attributes, double[] means) {
   double[] values = new double[attributes.size()];
   int x = 0;
   for (Attribute attribute : attributes) {
     values[x] = example.getValue(attribute) - means[x];
     x++;
   }
   return values;
 }
  private double[] getExampleValues(Example example) {
    Attributes attributes = example.getAttributes();
    double[] attributeValues = new double[attributes.size()];

    int i = 0;
    for (Attribute attribute : attributes) {
      attributeValues[i] = example.getValue(attribute);
      i++;
    }
    return attributeValues;
  }
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    DistanceMeasure measure = DistanceMeasures.createMeasure(this);
    measure.init(exampleSet);
    GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure);

    // check if weights should be used
    boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS);
    // check if robust estimate should be performed: Then calculate weights and use it anyway
    if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) {
      useWeights = true;
      LocalPolynomialExampleWeightingOperator weightingOperator;
      try {
        weightingOperator =
            OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class);
        exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this);
      } catch (OperatorCreationException e) {
        throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage());
      }
    }

    Attributes attributes = exampleSet.getAttributes();
    Attribute label = attributes.getLabel();
    Attribute weightAttribute = attributes.getWeight();
    for (Example example : exampleSet) {
      double[] values = new double[attributes.size()];
      double labelValue = example.getValue(label);
      double weight = 1d;
      if (weightAttribute != null && useWeights) {
        weight = example.getValue(weightAttribute);
      }

      // filter out examples without influence
      if (weight > 0d) {
        // copying example values
        int i = 0;
        for (Attribute attribute : attributes) {
          values[i] = example.getValue(attribute);
          i++;
        }

        // inserting into geometric data collection
        data.add(values, new RegressionData(values, labelValue, weight));
      }
    }
    return new LocalPolynomialRegressionModel(
        exampleSet,
        data,
        Neighborhoods.createNeighborhood(this),
        SmoothingKernels.createKernel(this),
        getParameterAsInt(PARAMETER_DEGREE),
        getParameterAsDouble(PARAMETER_RIDGE));
  }
  @Override
  public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();

    // constructing new attributes with generic names, holding old ones, if old type wasn't real
    Attribute[] oldAttributes = new Attribute[attributes.size()];
    int i = 0;
    for (Attribute attribute : attributes) {
      oldAttributes[i] = attribute;
      i++;
    }
    Attribute[] newAttributes = new Attribute[attributes.size()];
    for (i = 0; i < newAttributes.length; i++) {
      newAttributes[i] = oldAttributes[i];
      if (oldAttributes[i].isNumerical())
        if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(oldAttributes[i].getValueType(), Ontology.REAL)) {
          newAttributes[i] = AttributeFactory.createAttribute(Ontology.REAL);
          exampleSet.getExampleTable().addAttribute(newAttributes[i]);
          attributes.addRegular(newAttributes[i]);
        }
    }

    // applying on data
    applyOnData(exampleSet, oldAttributes, newAttributes);

    // removing old attributes and change new attributes name to old ones if needed
    for (i = 0; i < oldAttributes.length; i++) {
      attributes.remove(oldAttributes[i]);
      // if attribute is new, then remove for later storing in correct order
      if (oldAttributes[i] != newAttributes[i]) attributes.remove(newAttributes[i]);
      attributes.addRegular(newAttributes[i]);
      newAttributes[i].setName(oldAttributes[i].getName());
    }

    return exampleSet;
  }
 private double[] getMeanVector(ExampleSet exampleSet) {
   exampleSet.recalculateAllAttributeStatistics();
   Attributes attributes = exampleSet.getAttributes();
   double[] meanVector = new double[attributes.size()];
   int i = 0;
   for (Attribute attribute : attributes) {
     if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
       meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MINIMUM);
     } else if (attribute.isNominal())
       meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MODE);
     else meanVector[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE);
     i++;
   }
   return meanVector;
 }
 @Override
 public void init(ExampleSet exampleSet) throws OperatorException {
   super.init(exampleSet);
   Tools.onlyNumericalAttributes(exampleSet, "value based similarities");
   Attributes attributes = exampleSet.getAttributes();
   if (attributes.size() != 1)
     throw new OperatorException(
         "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence.");
   for (Example example : exampleSet) {
     for (Attribute attribute : attributes) {
       if (example.getValue(attribute) <= 0)
         throw new OperatorException(
             "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence.");
       ;
     }
   }
 }
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    int k = getParameterAsInt(PARAMETER_K);
    int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS);
    boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS);
    Kernel kernel = Kernel.createKernel(this);

    // init operator progress
    getProgress().setTotal(maxOptimizationSteps);

    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);

    if (exampleSet.size() < k) {
      throw new UserError(this, 142, k);
    }

    // extracting attribute names
    Attributes attributes = exampleSet.getAttributes();
    ArrayList<String> attributeNames = new ArrayList<String>(attributes.size());
    for (Attribute attribute : attributes) {
      attributeNames.add(attribute.getName());
    }
    Attribute weightAttribute = attributes.getWeight();

    RandomGenerator generator = RandomGenerator.getRandomGenerator(this);

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    // init centroids
    int[] clusterAssignments = new int[exampleSet.size()];

    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = generator.nextIntInRange(0, k);
    }

    // run optimization steps
    boolean stable = false;
    for (int step = 0; step < maxOptimizationSteps && !stable; step++) {
      // calculating cluster kernel properties
      double[] clusterWeights = new double[k];
      double[] clusterKernelCorrection = new double[k];
      int i = 0;
      for (Example firstExample : exampleSet) {
        double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d;
        double[] firstExampleValues = getAsDoubleArray(firstExample, attributes);
        clusterWeights[clusterAssignments[i]] += firstExampleWeight;
        int j = 0;
        for (Example secondExample : exampleSet) {
          if (clusterAssignments[i] == clusterAssignments[j]) {
            double secondExampleWeight =
                useExampleWeights ? secondExample.getValue(weightAttribute) : 1d;
            clusterKernelCorrection[clusterAssignments[i]] +=
                firstExampleWeight
                    * secondExampleWeight
                    * kernel.calculateDistance(
                        firstExampleValues, getAsDoubleArray(secondExample, attributes));
          }
          j++;
        }
        i++;
      }
      for (int z = 0; z < k; z++) {
        clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z];
      }

      // assign examples to new centroids
      int[] newClusterAssignments = new int[exampleSet.size()];
      i = 0;
      for (Example example : exampleSet) {
        double[] exampleValues = getAsDoubleArray(example, attributes);
        double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues);
        double nearestDistance = Double.POSITIVE_INFINITY;
        int nearestIndex = 0;
        for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) {
          double distance = 0;
          // iterating over all examples in cluster to get kernel distance
          int j = 0;
          for (Example clusterExample : exampleSet) {
            if (clusterAssignments[j] == clusterIndex) {
              distance +=
                  (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d)
                      * kernel.calculateDistance(
                          getAsDoubleArray(clusterExample, attributes), exampleValues);
            }
            j++;
          }
          distance *= -2d / clusterWeights[clusterIndex];
          // copy in outer loop
          distance += exampleKernelValue;
          distance += clusterKernelCorrection[clusterIndex];
          if (distance < nearestDistance) {
            nearestDistance = distance;
            nearestIndex = clusterIndex;
          }
        }
        newClusterAssignments[i] = nearestIndex;
        i++;
      }

      // finishing assignment
      stable = true;
      for (int j = 0; j < exampleSet.size() && stable; j++) {
        stable &= newClusterAssignments[j] == clusterAssignments[j];
      }
      clusterAssignments = newClusterAssignments;

      // trigger operator progress
      getProgress().step();
    }

    // setting last clustering into model
    model.setClusterAssignments(clusterAssignments, exampleSet);

    getProgress().complete();

    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
示例#10
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    exampleSet.recalculateAllAttributeStatistics();

    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (varianceThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeVariance[numberOfUsedComponents] < varianceThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
        if (numberOfUsedComponents == eigenVectors.size()) {
          numberOfUsedComponents--;
        }
      }
    }
    if (numberOfUsedComponents == -1) {
      // keep all components
      numberOfUsedComponents = attributes.size();
    }

    // retrieve factors inside eigenVectors
    double[][] eigenValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      eigenValueFactors[i] = this.eigenVectors.get(i).getEigenvector();
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      derivedAttributes[i] = AttributeFactory.createAttribute("pc_" + (i + 1), Ontology.REAL);
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute) - means[d];
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += eigenValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }
示例#11
0
  @Override
  public ExampleSet apply(ExampleSet inputExampleSet) throws OperatorException {
    ExampleSet exampleSet = (ExampleSet) inputExampleSet.clone();
    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (proportionThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeSingularValueProportion[numberOfUsedComponents] < proportionThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
      }
    }
    // if nothing defined or number exceeds maximal number of possible components
    if (numberOfUsedComponents == -1 || numberOfUsedComponents > getNumberOfComponents()) {
      // keep all components
      numberOfUsedComponents = getNumberOfComponents();
    }

    // retrieve factors inside singularValueVectors
    double[][] singularValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    double[][] vMatrixData = vMatrix.getArray();
    for (int i = 0; i < numberOfUsedComponents; i++) {
      double invertedSingularValue = 1d / singularValues[i];
      for (int j = 0; j < attributeNames.length; j++) {
        singularValueFactors[i][j] = vMatrixData[j][i] * invertedSingularValue;
      }
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      if (useLegacyNames) {
        derivedAttributes[i] = AttributeFactory.createAttribute("d" + i, Ontology.REAL);
      } else {
        derivedAttributes[i] = AttributeFactory.createAttribute("svd_" + (i + 1), Ontology.REAL);
      }
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute);
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += singularValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }