@Override
  public void doWork() throws OperatorException {
    CentroidClusterModel model = modelInput.getData(CentroidClusterModel.class);

    Attributes trainAttributes = model.getTrainingHeader().getAttributes();
    String[] attributeNames = model.getAttributeNames();
    Attribute[] attributes = new Attribute[attributeNames.length + 1];
    for (int i = 0; i < attributeNames.length; i++) {
      Attribute originalAttribute = trainAttributes.get(attributeNames[i]);
      attributes[i] =
          AttributeFactory.createAttribute(attributeNames[i], originalAttribute.getValueType());
      if (originalAttribute.isNominal()) {
        attributes[i].setMapping((NominalMapping) originalAttribute.getMapping().clone());
      }
    }
    Attribute clusterAttribute = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
    attributes[attributes.length - 1] = clusterAttribute;

    MemoryExampleTable table = new MemoryExampleTable(attributes);
    for (int i = 0; i < model.getNumberOfClusters(); i++) {
      double[] data = new double[attributeNames.length + 1];
      System.arraycopy(model.getCentroidCoordinates(i), 0, data, 0, attributeNames.length);
      data[attributeNames.length] = clusterAttribute.getMapping().mapString("cluster_" + i);
      table.addDataRow(new DoubleArrayDataRow(data));
    }

    ExampleSet resultSet = table.createExampleSet();
    resultSet.getAttributes().setSpecialAttribute(clusterAttribute, Attributes.CLUSTER_NAME);

    modelOutput.deliver(model);
    exampleSetOutput.deliver(resultSet);
  }
Beispiel #2
0
  private void setData(Example example, String attributeName, String value, Attributes attributes)
      throws UserError {
    Attribute attribute = attributes.get(attributeName);
    if (attribute == null) {
      throw new UserError(this, 111, attributeName);
    }

    if (attribute.isNominal()) {
      example.setValue(attribute, attribute.getMapping().mapString(value));
    } else {
      try {
        double doubleValue = Double.parseDouble(value);
        example.setValue(attribute, doubleValue);
      } catch (NumberFormatException e) {
        throw new UserError(this, 211, PARAMETER_VALUE, value);
      }
    }
  }
Beispiel #3
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    exampleSet.recalculateAllAttributeStatistics();

    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (varianceThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeVariance[numberOfUsedComponents] < varianceThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
        if (numberOfUsedComponents == eigenVectors.size()) {
          numberOfUsedComponents--;
        }
      }
    }
    if (numberOfUsedComponents == -1) {
      // keep all components
      numberOfUsedComponents = attributes.size();
    }

    // retrieve factors inside eigenVectors
    double[][] eigenValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      eigenValueFactors[i] = this.eigenVectors.get(i).getEigenvector();
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      derivedAttributes[i] = AttributeFactory.createAttribute("pc_" + (i + 1), Ontology.REAL);
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute) - means[d];
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += eigenValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }
  @Override
  public ExampleSet apply(ExampleSet inputExampleSet) throws OperatorException {
    ExampleSet exampleSet = (ExampleSet) inputExampleSet.clone();
    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (proportionThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeSingularValueProportion[numberOfUsedComponents] < proportionThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
      }
    }
    // if nothing defined or number exceeds maximal number of possible components
    if (numberOfUsedComponents == -1 || numberOfUsedComponents > getNumberOfComponents()) {
      // keep all components
      numberOfUsedComponents = getNumberOfComponents();
    }

    // retrieve factors inside singularValueVectors
    double[][] singularValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    double[][] vMatrixData = vMatrix.getArray();
    for (int i = 0; i < numberOfUsedComponents; i++) {
      double invertedSingularValue = 1d / singularValues[i];
      for (int j = 0; j < attributeNames.length; j++) {
        singularValueFactors[i][j] = vMatrixData[j][i] * invertedSingularValue;
      }
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      if (useLegacyNames) {
        derivedAttributes[i] = AttributeFactory.createAttribute("d" + i, Ontology.REAL);
      } else {
        derivedAttributes[i] = AttributeFactory.createAttribute("svd_" + (i + 1), Ontology.REAL);
      }
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute);
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += singularValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }