@Override public void doWork() throws OperatorException { CentroidClusterModel model = modelInput.getData(CentroidClusterModel.class); Attributes trainAttributes = model.getTrainingHeader().getAttributes(); String[] attributeNames = model.getAttributeNames(); Attribute[] attributes = new Attribute[attributeNames.length + 1]; for (int i = 0; i < attributeNames.length; i++) { Attribute originalAttribute = trainAttributes.get(attributeNames[i]); attributes[i] = AttributeFactory.createAttribute(attributeNames[i], originalAttribute.getValueType()); if (originalAttribute.isNominal()) { attributes[i].setMapping((NominalMapping) originalAttribute.getMapping().clone()); } } Attribute clusterAttribute = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); attributes[attributes.length - 1] = clusterAttribute; MemoryExampleTable table = new MemoryExampleTable(attributes); for (int i = 0; i < model.getNumberOfClusters(); i++) { double[] data = new double[attributeNames.length + 1]; System.arraycopy(model.getCentroidCoordinates(i), 0, data, 0, attributeNames.length); data[attributeNames.length] = clusterAttribute.getMapping().mapString("cluster_" + i); table.addDataRow(new DoubleArrayDataRow(data)); } ExampleSet resultSet = table.createExampleSet(); resultSet.getAttributes().setSpecialAttribute(clusterAttribute, Attributes.CLUSTER_NAME); modelOutput.deliver(model); exampleSetOutput.deliver(resultSet); }
private void setData(Example example, String attributeName, String value, Attributes attributes) throws UserError { Attribute attribute = attributes.get(attributeName); if (attribute == null) { throw new UserError(this, 111, attributeName); } if (attribute.isNominal()) { example.setValue(attribute, attribute.getMapping().mapString(value)); } else { try { double doubleValue = Double.parseDouble(value); example.setValue(attribute, doubleValue); } catch (NumberFormatException e) { throw new UserError(this, 211, PARAMETER_VALUE, value); } } }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { exampleSet.recalculateAllAttributeStatistics(); Attributes attributes = exampleSet.getAttributes(); if (attributeNames.length != attributes.size()) { throw new UserError(null, 133, numberOfComponents, attributes.size()); } // remember attributes that have been removed during training. These will be removed lateron Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()]; int d = 0; for (Attribute oldAttribute : getTrainingHeader().getAttributes()) { inputAttributes[d] = attributes.get(oldAttribute.getName()); d++; } // determining number of used components int numberOfUsedComponents = -1; if (manualNumber) { numberOfUsedComponents = numberOfComponents; } else { if (varianceThreshold == 0.0d) { numberOfUsedComponents = -1; } else { numberOfUsedComponents = 0; while (cumulativeVariance[numberOfUsedComponents] < varianceThreshold) { numberOfUsedComponents++; } numberOfUsedComponents++; if (numberOfUsedComponents == eigenVectors.size()) { numberOfUsedComponents--; } } } if (numberOfUsedComponents == -1) { // keep all components numberOfUsedComponents = attributes.size(); } // retrieve factors inside eigenVectors double[][] eigenValueFactors = new double[numberOfUsedComponents][attributeNames.length]; for (int i = 0; i < numberOfUsedComponents; i++) { eigenValueFactors[i] = this.eigenVectors.get(i).getEigenvector(); } // now build new attributes Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents]; for (int i = 0; i < numberOfUsedComponents; i++) { derivedAttributes[i] = AttributeFactory.createAttribute("pc_" + (i + 1), Ontology.REAL); exampleSet.getExampleTable().addAttribute(derivedAttributes[i]); attributes.addRegular(derivedAttributes[i]); } // now iterator through all examples and derive value of new features double[] derivedValues = new double[numberOfUsedComponents]; for (Example example : exampleSet) { // calculate values of new attributes with single scan over attributes d = 0; for (Attribute attribute : inputAttributes) { double attributeValue = example.getValue(attribute) - means[d]; for (int i = 0; i < numberOfUsedComponents; i++) { derivedValues[i] += eigenValueFactors[i][d] * attributeValue; } d++; } // set values for (int i = 0; i < numberOfUsedComponents; i++) { example.setValue(derivedAttributes[i], derivedValues[i]); } // set values back Arrays.fill(derivedValues, 0); } // now remove attributes if needed if (!keepAttributes) { for (Attribute attribute : inputAttributes) { attributes.remove(attribute); } } return exampleSet; }
@Override public ExampleSet apply(ExampleSet inputExampleSet) throws OperatorException { ExampleSet exampleSet = (ExampleSet) inputExampleSet.clone(); Attributes attributes = exampleSet.getAttributes(); if (attributeNames.length != attributes.size()) { throw new UserError(null, 133, numberOfComponents, attributes.size()); } // remember attributes that have been removed during training. These will be removed lateron Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()]; int d = 0; for (Attribute oldAttribute : getTrainingHeader().getAttributes()) { inputAttributes[d] = attributes.get(oldAttribute.getName()); d++; } // determining number of used components int numberOfUsedComponents = -1; if (manualNumber) { numberOfUsedComponents = numberOfComponents; } else { if (proportionThreshold == 0.0d) { numberOfUsedComponents = -1; } else { numberOfUsedComponents = 0; while (cumulativeSingularValueProportion[numberOfUsedComponents] < proportionThreshold) { numberOfUsedComponents++; } numberOfUsedComponents++; } } // if nothing defined or number exceeds maximal number of possible components if (numberOfUsedComponents == -1 || numberOfUsedComponents > getNumberOfComponents()) { // keep all components numberOfUsedComponents = getNumberOfComponents(); } // retrieve factors inside singularValueVectors double[][] singularValueFactors = new double[numberOfUsedComponents][attributeNames.length]; double[][] vMatrixData = vMatrix.getArray(); for (int i = 0; i < numberOfUsedComponents; i++) { double invertedSingularValue = 1d / singularValues[i]; for (int j = 0; j < attributeNames.length; j++) { singularValueFactors[i][j] = vMatrixData[j][i] * invertedSingularValue; } } // now build new attributes Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents]; for (int i = 0; i < numberOfUsedComponents; i++) { if (useLegacyNames) { derivedAttributes[i] = AttributeFactory.createAttribute("d" + i, Ontology.REAL); } else { derivedAttributes[i] = AttributeFactory.createAttribute("svd_" + (i + 1), Ontology.REAL); } exampleSet.getExampleTable().addAttribute(derivedAttributes[i]); attributes.addRegular(derivedAttributes[i]); } // now iterator through all examples and derive value of new features double[] derivedValues = new double[numberOfUsedComponents]; for (Example example : exampleSet) { // calculate values of new attributes with single scan over attributes d = 0; for (Attribute attribute : inputAttributes) { double attributeValue = example.getValue(attribute); for (int i = 0; i < numberOfUsedComponents; i++) { derivedValues[i] += singularValueFactors[i][d] * attributeValue; } d++; } // set values for (int i = 0; i < numberOfUsedComponents; i++) { example.setValue(derivedAttributes[i], derivedValues[i]); } // set values back Arrays.fill(derivedValues, 0); } // now remove attributes if needed if (!keepAttributes) { for (Attribute attribute : inputAttributes) { attributes.remove(attribute); } } return exampleSet; }