@Override public final void doWork() throws OperatorException { ExampleSet inputExampleSet = exampleSetInput.getData(ExampleSet.class); ExampleSet applySet = null; // check for needed copy of original exampleset if (originalOutput.isConnected() && writesIntoExistingData()) { int type = DataRowFactory.TYPE_DOUBLE_ARRAY; if (inputExampleSet.getExampleTable() instanceof MemoryExampleTable) { DataRowReader dataRowReader = inputExampleSet.getExampleTable().getDataRowReader(); if (dataRowReader.hasNext()) { type = dataRowReader.next().getType(); } } // check if type is supported to be copied if (type >= 0) { applySet = MaterializeDataInMemory.materializeExampleSet(inputExampleSet, type); } } if (applySet == null) applySet = (ExampleSet) inputExampleSet.clone(); // we apply on the materialized data, because writing can't take place in views anyway. ExampleSet result = apply(applySet); originalOutput.deliver(inputExampleSet); exampleSetOutput.deliver(result); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME); Long offset = getParameterAsLong(PARMETER_TIME_OFFSET); Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName); if (numericalAttribute == null) { throw new UserError(this, 111, attributeName); } Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME); exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); for (Example example : exampleSet) { double value = example.getValue(numericalAttribute); if (Double.isNaN(value)) { example.setValue(newAttribute, value); } else { value += offset; example.setValue(newAttribute, value); } } if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) { AttributeRole oldRole = exampleSet.getAttributes().getRole(numericalAttribute); exampleSet.getAttributes().remove(numericalAttribute); newAttribute.setName(attributeName); exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName()); } else { newAttribute.setName(attributeName + "_AS_DATE"); } return exampleSet; }
@Override public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException { boolean round = getParameterAsBoolean(PARAMETER_ROUND); List<Attribute> newAttributes = new LinkedList<Attribute>(); Iterator<Attribute> a = exampleSet.getAttributes().iterator(); while (a.hasNext()) { Attribute attribute = a.next(); if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL)) && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) { Attribute newAttribute = AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER); newAttributes.add(newAttribute); exampleSet.getExampleTable().addAttribute(newAttribute); for (Example example : exampleSet) { double originalValue = example.getValue(attribute); if (Double.isNaN(originalValue)) { example.setValue(newAttribute, Double.NaN); } else { long newValue = round ? Math.round(originalValue) : (long) originalValue; example.setValue(newAttribute, newValue); } } a.remove(); } } for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute); return exampleSet; }
@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // generating assignment RandomGenerator random = RandomGenerator.getRandomGenerator(this); int clusterAssignments[] = new int[exampleSet.size()]; int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS); for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = random.nextInt(k); } ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); model.setClusterAssignments(clusterAssignments, exampleSet); // generating cluster attribute if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
/** * Iterates over all models and returns the class with maximum likelihood. * * @param origExampleSet the set of examples to be classified */ @Override public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel) throws OperatorException { final String attributePrefix = "AdaBoostModelPrediction"; final int numLabels = predictedLabel.getMapping().size(); final Attribute[] specialAttributes = new Attribute[numLabels]; OperatorProgress progress = null; if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) { progress = getOperator().getProgress(); progress.setTotal(100); } for (int i = 0; i < numLabels; i++) { specialAttributes[i] = com.rapidminer.example.Tools.createSpecialAttribute( origExampleSet, attributePrefix + i, Ontology.NUMERICAL); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels)); } } Iterator<Example> reader = origExampleSet.iterator(); int progressCounter = 0; while (reader.hasNext()) { Example example = reader.next(); for (int i = 0; i < specialAttributes.length; i++) { example.setValue(specialAttributes[i], 0); } if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) { progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25); } } reader = origExampleSet.iterator(); for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) { Model model = this.getModel(modelNr); ExampleSet exampleSet = (ExampleSet) origExampleSet.clone(); exampleSet = model.apply(exampleSet); this.updateEstimates(exampleSet, modelNr, specialAttributes); PredictionModel.removePredictedLabel(exampleSet); if (progress != null) { progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50); } } // Turn prediction weights into confidences and a crisp predcition: this.evaluateSpecialAttributes(origExampleSet, specialAttributes); // Clean up attributes: for (int i = 0; i < numLabels; i++) { origExampleSet.getAttributes().remove(specialAttributes[i]); origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75); } } return origExampleSet; }
/** * Helper method replacing <code>Model.createPredictedLabel(ExampleSet)</code> in order to lower * memory consumption. */ private static void createOrReplacePredictedLabelFor(ExampleSet exampleSet, Model model) { Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel(); if (predictedLabel != null) { // remove old predicted label exampleSet.getAttributes().remove(predictedLabel); exampleSet.getExampleTable().removeAttribute(predictedLabel); } // model.createPredictedLabel(exampleSet); // not longer necessary since // label creation is done by model.apply(...). }
private void restoreOldWeights(ExampleSet exampleSet) { if (this.oldWeights != null) { // need to reset weights Iterator<Example> reader = exampleSet.iterator(); int i = 0; while (reader.hasNext() && i < this.oldWeights.length) { reader.next().setWeight(this.oldWeights[i++]); } } else { // need to delete the weights attribute Attribute weight = exampleSet.getAttributes().getWeight(); exampleSet.getAttributes().remove(weight); exampleSet.getExampleTable().removeAttribute(weight); } }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { int size = exampleSet.size(); // cannot bootstrap without any examples if (size < 1) { throw new UserError(this, 117); } RandomGenerator random = RandomGenerator.getRandomGenerator(this); switch (getParameterAsInt(PARAMETER_SAMPLE)) { case SAMPLE_ABSOLUTE: size = getParameterAsInt(PARAMETER_SAMPLE_SIZE); break; case SAMPLE_RELATIVE: size = (int) Math.round(exampleSet.size() * getParameterAsDouble(PARAMETER_SAMPLE_RATIO)); break; } int[] mapping = null; if (getParameterAsBoolean(PARAMETER_USE_WEIGHTS) && exampleSet.getAttributes().getWeight() != null) { mapping = MappedExampleSet.createWeightedBootstrappingMapping(exampleSet, size, random); } else { mapping = MappedExampleSet.createBootstrappingMapping(exampleSet, size, random); } // create and materialize example set ExampleSet mappedExampleSet = new MappedExampleSet(exampleSet, mapping, true); if (getCompatibilityLevel().isAbove(VERSION_6_4_0)) { int type = DataRowFactory.TYPE_DOUBLE_ARRAY; if (exampleSet.size() > 0) { type = exampleSet.getExampleTable().getDataRow(0).getType(); } mappedExampleSet = MaterializeDataInMemory.materializeExampleSet(mappedExampleSet, type); } return mappedExampleSet; }
@Override public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException { Attributes attributes = exampleSet.getAttributes(); // constructing new attributes with generic names, holding old ones, if old type wasn't real Attribute[] oldAttributes = new Attribute[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { oldAttributes[i] = attribute; i++; } Attribute[] newAttributes = new Attribute[attributes.size()]; for (i = 0; i < newAttributes.length; i++) { newAttributes[i] = oldAttributes[i]; if (oldAttributes[i].isNumerical()) if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(oldAttributes[i].getValueType(), Ontology.REAL)) { newAttributes[i] = AttributeFactory.createAttribute(Ontology.REAL); exampleSet.getExampleTable().addAttribute(newAttributes[i]); attributes.addRegular(newAttributes[i]); } } // applying on data applyOnData(exampleSet, oldAttributes, newAttributes); // removing old attributes and change new attributes name to old ones if needed for (i = 0; i < oldAttributes.length; i++) { attributes.remove(oldAttributes[i]); // if attribute is new, then remove for later storing in correct order if (oldAttributes[i] != newAttributes[i]) attributes.remove(newAttributes[i]); attributes.addRegular(newAttributes[i]); newAttributes[i].setName(oldAttributes[i].getName()); } return exampleSet; }
@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { int k = getParameterAsInt(PARAMETER_K); int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS); boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS); Kernel kernel = Kernel.createKernel(this); // init operator progress getProgress().setTotal(maxOptimizationSteps); // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // additional checks Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]); if (exampleSet.size() < k) { throw new UserError(this, 142, k); } // extracting attribute names Attributes attributes = exampleSet.getAttributes(); ArrayList<String> attributeNames = new ArrayList<String>(attributes.size()); for (Attribute attribute : attributes) { attributeNames.add(attribute.getName()); } Attribute weightAttribute = attributes.getWeight(); RandomGenerator generator = RandomGenerator.getRandomGenerator(this); ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); // init centroids int[] clusterAssignments = new int[exampleSet.size()]; for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = generator.nextIntInRange(0, k); } // run optimization steps boolean stable = false; for (int step = 0; step < maxOptimizationSteps && !stable; step++) { // calculating cluster kernel properties double[] clusterWeights = new double[k]; double[] clusterKernelCorrection = new double[k]; int i = 0; for (Example firstExample : exampleSet) { double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d; double[] firstExampleValues = getAsDoubleArray(firstExample, attributes); clusterWeights[clusterAssignments[i]] += firstExampleWeight; int j = 0; for (Example secondExample : exampleSet) { if (clusterAssignments[i] == clusterAssignments[j]) { double secondExampleWeight = useExampleWeights ? secondExample.getValue(weightAttribute) : 1d; clusterKernelCorrection[clusterAssignments[i]] += firstExampleWeight * secondExampleWeight * kernel.calculateDistance( firstExampleValues, getAsDoubleArray(secondExample, attributes)); } j++; } i++; } for (int z = 0; z < k; z++) { clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z]; } // assign examples to new centroids int[] newClusterAssignments = new int[exampleSet.size()]; i = 0; for (Example example : exampleSet) { double[] exampleValues = getAsDoubleArray(example, attributes); double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues); double nearestDistance = Double.POSITIVE_INFINITY; int nearestIndex = 0; for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) { double distance = 0; // iterating over all examples in cluster to get kernel distance int j = 0; for (Example clusterExample : exampleSet) { if (clusterAssignments[j] == clusterIndex) { distance += (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d) * kernel.calculateDistance( getAsDoubleArray(clusterExample, attributes), exampleValues); } j++; } distance *= -2d / clusterWeights[clusterIndex]; // copy in outer loop distance += exampleKernelValue; distance += clusterKernelCorrection[clusterIndex]; if (distance < nearestDistance) { nearestDistance = distance; nearestIndex = clusterIndex; } } newClusterAssignments[i] = nearestIndex; i++; } // finishing assignment stable = true; for (int j = 0; j < exampleSet.size() && stable; j++) { stable &= newClusterAssignments[j] == clusterAssignments[j]; } clusterAssignments = newClusterAssignments; // trigger operator progress getProgress().step(); } // setting last clustering into model model.setClusterAssignments(clusterAssignments, exampleSet); getProgress().complete(); if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
/** * Parses the provided expression and iterates over the {@link ExampleSet}, interprets attributes * as variables, evaluates the function and creates a new attribute with the given name that takes * the expression's value. The type of the attribute depends on the expression type and is {@link * Ontology#NOMINAL} for strings, {@link Ontology#INTEGER} for integers, {@link Ontology#REAL} for * reals, {@link Ontology#DATE_TIME} for Dates, and {@link Ontology#BINOMINAL} with values * "true" and "false" for booleans. If the executing operator is defined, * there will be a check for stop before the calculation of each example. * * @param exampleSet the example set to which the generated attribute is added * @param name the new attribute name * @param expression the expression used to generate attribute values * @param parser the expression parser used to parse the expression argument * @param resolver the example resolver which is used by the parser to resolve example values * @param executingOperator the operator calling this method. <code>null</code> is allowed. If not * null the operator will be used to check for stop * @throws ProcessStoppedException in case the process was stopped by the user * @throws ExpressionException in case parsing the expression fails */ public static Attribute addAttribute( ExampleSet exampleSet, String name, String expression, ExpressionParser parser, ExampleResolver resolver, Operator executingOperator) throws ProcessStoppedException, ExpressionException { // parse the expression Expression parsedExpression = parser.parse(expression); Attribute newAttribute = null; // if != null this needs to be overridden Attribute existingAttribute = exampleSet.getAttributes().get(name); StringBuffer appendix = new StringBuffer(); String targetName = name; if (existingAttribute != null) { // If an existing attribute will be overridden, first a unique temporary name has to be // generated by appending a random string to the attribute's name until it's a unique // attribute name. After the new attribute is build, it's name is set the 'targetName' // at the end of this method. // do { appendix.append(RandomGenerator.getGlobalRandomGenerator().nextString(5)); } while (exampleSet.getAttributes().get(name + appendix.toString()) != null); name = name + appendix.toString(); } ExpressionType resultType = parsedExpression.getExpressionType(); int ontology = resultType.getAttributeType(); if (ontology == Ontology.BINOMINAL) { newAttribute = AttributeFactory.createAttribute(name, Ontology.BINOMINAL); newAttribute.getMapping().mapString("false"); newAttribute.getMapping().mapString("true"); } else { newAttribute = AttributeFactory.createAttribute(name, ontology); } // set construction description newAttribute.setConstruction(expression); // add new attribute to table and example set exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); // create attribute of correct type and all values for (Example example : exampleSet) { if (executingOperator != null) { executingOperator.checkForStop(); } // bind example to resolver resolver.bind(example); // calculate result try { switch (resultType) { case DOUBLE: case INTEGER: example.setValue(newAttribute, parsedExpression.evaluateNumerical()); break; case DATE: Date date = parsedExpression.evaluateDate(); example.setValue(newAttribute, date == null ? Double.NaN : date.getTime()); break; default: example.setValue(newAttribute, parsedExpression.evaluateNominal()); break; } } finally { // avoid memory leaks resolver.unbind(); } } // remove existing attribute (if necessary) if (existingAttribute != null) { AttributeRole oldRole = exampleSet.getAttributes().getRole(existingAttribute); exampleSet.getAttributes().remove(existingAttribute); newAttribute.setName(targetName); // restore role from old attribute to new attribute if (oldRole.isSpecial()) { exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName()); } } // update example resolver after meta data change resolver.addAttributeMetaData( new AttributeMetaData(exampleSet.getAttributes().getRole(newAttribute), exampleSet, true)); return newAttribute; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // searching confidence attributes Attributes attributes = exampleSet.getAttributes(); Attribute predictedLabel = attributes.getPredictedLabel(); if (predictedLabel == null) { throw new UserError(this, 107); } NominalMapping mapping = predictedLabel.getMapping(); int numberOfLabels = mapping.size(); Attribute[] confidences = new Attribute[numberOfLabels]; String[] labelValue = new String[numberOfLabels]; int i = 0; for (String value : mapping.getValues()) { labelValue[i] = value; confidences[i] = attributes.getConfidence(value); if (confidences[i] == null) { throw new UserError(this, 154, value); } i++; } // generating new prediction attributes int k = Math.min(numberOfLabels, getParameterAsInt(PARAMETER_NUMBER_OF_RANKS)); Attribute[] kthPredictions = new Attribute[k]; Attribute[] kthConfidences = new Attribute[k]; for (i = 0; i < k; i++) { kthPredictions[i] = AttributeFactory.createAttribute(predictedLabel.getValueType()); kthPredictions[i].setName(predictedLabel.getName() + "_" + (i + 1)); kthPredictions[i].setMapping((NominalMapping) predictedLabel.getMapping().clone()); kthConfidences[i] = AttributeFactory.createAttribute(Ontology.REAL); kthConfidences[i].setName(Attributes.CONFIDENCE_NAME + "_" + (i + 1)); attributes.addRegular(kthPredictions[i]); attributes.addRegular(kthConfidences[i]); attributes.setSpecialAttribute(kthPredictions[i], Attributes.PREDICTION_NAME + "_" + (i + 1)); attributes.setSpecialAttribute(kthConfidences[i], Attributes.CONFIDENCE_NAME + "_" + (i + 1)); } exampleSet.getExampleTable().addAttributes(Arrays.asList(kthConfidences)); exampleSet.getExampleTable().addAttributes(Arrays.asList(kthPredictions)); // now setting values for (Example example : exampleSet) { ArrayList<Tupel<Double, Integer>> labelConfidences = new ArrayList<Tupel<Double, Integer>>(numberOfLabels); for (i = 0; i < numberOfLabels; i++) { labelConfidences.add(new Tupel<Double, Integer>(example.getValue(confidences[i]), i)); } Collections.sort(labelConfidences); for (i = 0; i < k; i++) { Tupel<Double, Integer> tupel = labelConfidences.get(numberOfLabels - i - 1); example.setValue( kthPredictions[i], tupel.getSecond()); // Can use index since mapping has been cloned from above example.setValue(kthConfidences[i], tupel.getFirst()); } } // deleting old prediction / confidences attributes.remove(predictedLabel); if (getParameterAsBoolean(PARAMETER_REMOVE_OLD_PREDICTIONS)) { for (i = 0; i < confidences.length; i++) { attributes.remove(confidences[i]); } } return exampleSet; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // init char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0); Character groupingCharacter = null; if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) { groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0); } Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false); int size = attributeSet.size(); int[] valueTypes = new int[size]; int index = 0; for (Attribute attribute : attributeSet) { valueTypes[index++] = attribute.getValueType(); } // guessing int[] guessedValueTypes = new int[valueTypes.length]; int checkedCounter = 0; for (Example example : exampleSet) { index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } double originalValue = example.getValue(attribute); if (!Double.isNaN(originalValue)) { if (guessedValueTypes[index] != Ontology.NOMINAL) { try { String valueString = example.getValueAsString(attribute); if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); double value = Double.parseDouble(valueString); if (guessedValueTypes[index] != Ontology.REAL) { if (Tools.isEqual(Math.round(value), value)) { guessedValueTypes[index] = Ontology.INTEGER; } else { guessedValueTypes[index] = Ontology.REAL; } } } } catch (NumberFormatException e) { guessedValueTypes[index] = Ontology.NOMINAL; checkedCounter++; } } } index++; } if (checkedCounter >= guessedValueTypes.length) { break; } } // the example set contains at least one example and the guessing was performed if (exampleSet.size() > 0) { valueTypes = guessedValueTypes; // new attributes List<AttributeRole> newAttributes = new LinkedList<AttributeRole>(); index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } AttributeRole role = exampleSet.getAttributes().getRole(attribute); Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]); exampleSet.getExampleTable().addAttribute(newAttribute); AttributeRole newRole = new AttributeRole(newAttribute); newRole.setSpecial(role.getSpecialName()); newAttributes.add(newRole); // copy data for (Example e : exampleSet) { double oldValue = e.getValue(attribute); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) { if (!Double.isNaN(oldValue)) { String valueString = e.getValueAsString(attribute); if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { e.setValue(newAttribute, Double.NaN); } else { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); e.setValue(newAttribute, Double.parseDouble(valueString)); } } else { e.setValue(newAttribute, Double.NaN); } } else { if (!Double.isNaN(oldValue)) { String value = e.getValueAsString(attribute); e.setValue(newAttribute, newAttribute.getMapping().mapString(value)); } else { e.setValue(newAttribute, Double.NaN); } } } // delete attribute and rename the new attribute (due to deletion and data scans: no // more memory used :-) exampleSet.getExampleTable().removeAttribute(attribute); exampleSet.getAttributes().remove(role); newAttribute.setName(attribute.getName()); index++; } for (AttributeRole role : newAttributes) { if (role.isSpecial()) { exampleSet .getAttributes() .setSpecialAttribute(role.getAttribute(), role.getSpecialName()); } else { exampleSet.getAttributes().addRegular(role.getAttribute()); } } } return exampleSet; }
@Override public ExampleSet apply(ExampleSet inputExampleSet) throws OperatorException { ExampleSet exampleSet = (ExampleSet) inputExampleSet.clone(); Attributes attributes = exampleSet.getAttributes(); if (attributeNames.length != attributes.size()) { throw new UserError(null, 133, numberOfComponents, attributes.size()); } // remember attributes that have been removed during training. These will be removed lateron Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()]; int d = 0; for (Attribute oldAttribute : getTrainingHeader().getAttributes()) { inputAttributes[d] = attributes.get(oldAttribute.getName()); d++; } // determining number of used components int numberOfUsedComponents = -1; if (manualNumber) { numberOfUsedComponents = numberOfComponents; } else { if (proportionThreshold == 0.0d) { numberOfUsedComponents = -1; } else { numberOfUsedComponents = 0; while (cumulativeSingularValueProportion[numberOfUsedComponents] < proportionThreshold) { numberOfUsedComponents++; } numberOfUsedComponents++; } } // if nothing defined or number exceeds maximal number of possible components if (numberOfUsedComponents == -1 || numberOfUsedComponents > getNumberOfComponents()) { // keep all components numberOfUsedComponents = getNumberOfComponents(); } // retrieve factors inside singularValueVectors double[][] singularValueFactors = new double[numberOfUsedComponents][attributeNames.length]; double[][] vMatrixData = vMatrix.getArray(); for (int i = 0; i < numberOfUsedComponents; i++) { double invertedSingularValue = 1d / singularValues[i]; for (int j = 0; j < attributeNames.length; j++) { singularValueFactors[i][j] = vMatrixData[j][i] * invertedSingularValue; } } // now build new attributes Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents]; for (int i = 0; i < numberOfUsedComponents; i++) { if (useLegacyNames) { derivedAttributes[i] = AttributeFactory.createAttribute("d" + i, Ontology.REAL); } else { derivedAttributes[i] = AttributeFactory.createAttribute("svd_" + (i + 1), Ontology.REAL); } exampleSet.getExampleTable().addAttribute(derivedAttributes[i]); attributes.addRegular(derivedAttributes[i]); } // now iterator through all examples and derive value of new features double[] derivedValues = new double[numberOfUsedComponents]; for (Example example : exampleSet) { // calculate values of new attributes with single scan over attributes d = 0; for (Attribute attribute : inputAttributes) { double attributeValue = example.getValue(attribute); for (int i = 0; i < numberOfUsedComponents; i++) { derivedValues[i] += singularValueFactors[i][d] * attributeValue; } d++; } // set values for (int i = 0; i < numberOfUsedComponents; i++) { example.setValue(derivedAttributes[i], derivedValues[i]); } // set values back Arrays.fill(derivedValues, 0); } // now remove attributes if needed if (!keepAttributes) { for (Attribute attribute : inputAttributes) { attributes.remove(attribute); } } return exampleSet; }
public ExampleTable getExampleTable() { return parent.getExampleTable(); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { exampleSet.recalculateAllAttributeStatistics(); Attributes attributes = exampleSet.getAttributes(); if (attributeNames.length != attributes.size()) { throw new UserError(null, 133, numberOfComponents, attributes.size()); } // remember attributes that have been removed during training. These will be removed lateron Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()]; int d = 0; for (Attribute oldAttribute : getTrainingHeader().getAttributes()) { inputAttributes[d] = attributes.get(oldAttribute.getName()); d++; } // determining number of used components int numberOfUsedComponents = -1; if (manualNumber) { numberOfUsedComponents = numberOfComponents; } else { if (varianceThreshold == 0.0d) { numberOfUsedComponents = -1; } else { numberOfUsedComponents = 0; while (cumulativeVariance[numberOfUsedComponents] < varianceThreshold) { numberOfUsedComponents++; } numberOfUsedComponents++; if (numberOfUsedComponents == eigenVectors.size()) { numberOfUsedComponents--; } } } if (numberOfUsedComponents == -1) { // keep all components numberOfUsedComponents = attributes.size(); } // retrieve factors inside eigenVectors double[][] eigenValueFactors = new double[numberOfUsedComponents][attributeNames.length]; for (int i = 0; i < numberOfUsedComponents; i++) { eigenValueFactors[i] = this.eigenVectors.get(i).getEigenvector(); } // now build new attributes Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents]; for (int i = 0; i < numberOfUsedComponents; i++) { derivedAttributes[i] = AttributeFactory.createAttribute("pc_" + (i + 1), Ontology.REAL); exampleSet.getExampleTable().addAttribute(derivedAttributes[i]); attributes.addRegular(derivedAttributes[i]); } // now iterator through all examples and derive value of new features double[] derivedValues = new double[numberOfUsedComponents]; for (Example example : exampleSet) { // calculate values of new attributes with single scan over attributes d = 0; for (Attribute attribute : inputAttributes) { double attributeValue = example.getValue(attribute) - means[d]; for (int i = 0; i < numberOfUsedComponents; i++) { derivedValues[i] += eigenValueFactors[i][d] * attributeValue; } d++; } // set values for (int i = 0; i < numberOfUsedComponents; i++) { example.setValue(derivedAttributes[i], derivedValues[i]); } // set values back Arrays.fill(derivedValues, 0); } // now remove attributes if needed if (!keepAttributes) { for (Attribute attribute : inputAttributes) { attributes.remove(attribute); } } return exampleSet; }
public IOObject[] apply() throws OperatorException { ExampleSet exampleSet = getInput(ExampleSet.class); String labelName = getParameterAsString(PARAMETER_LABEL_NAME_STEM); int horizon = getParameterAsInt(PARAMETER_HORIZON); // TODO: check if appropriate label is there // TODO: check if window width is large enough // collect base names and attributes to remove, find label Attribute labelAttribute = null; List<String> baseNames = new LinkedList<String>(); List<Attribute> toRemove = new LinkedList<Attribute>(); int windowWidth = 0; for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.getName().endsWith("-0")) { String baseName = attribute.getName().substring(0, attribute.getName().lastIndexOf("-")); baseNames.add(baseName); if (attribute.getName().startsWith(labelName)) { labelAttribute = attribute; } } int index = Integer.valueOf(attribute.getName().substring(attribute.getName().lastIndexOf("-") + 1)); windowWidth = Math.max(windowWidth, index); if ((index >= 0) && (index < horizon)) { toRemove.add(attribute); } } // remove horizon attributes for (Attribute attribute : toRemove) { exampleSet.getAttributes().remove(attribute); } // set label exampleSet.getAttributes().setLabel(labelAttribute); // transform all values relative to last known label attribute value and create base value // column if (getParameterAsBoolean(PARAMETER_RELATIVE_TRANSFORMATION)) { if (labelAttribute.isNumerical()) { Attribute baseValueAttribute = AttributeFactory.createAttribute("base_value", Ontology.REAL); exampleSet.getExampleTable().addAttribute(baseValueAttribute); exampleSet.getAttributes().setSpecialAttribute(baseValueAttribute, "base_value"); for (Example example : exampleSet) { // handle label String lastKnownLabelName = labelName + "-" + horizon; Attribute lastKnownLabelAttribute = exampleSet.getAttributes().get(lastKnownLabelName); double baseLabelValue = example.getValue(lastKnownLabelAttribute); example.setValue(baseValueAttribute, baseLabelValue); for (String baseName : baseNames) { String lastKnownBaseName = baseName + "-" + horizon; Attribute lastKnownBaseAttribute = exampleSet.getAttributes().get(lastKnownBaseName); double baseAttributeValue = example.getValue(lastKnownBaseAttribute); for (int w = horizon; w <= windowWidth; w++) { String currentName = baseName + "-" + w; Attribute currentAttribute = exampleSet.getAttributes().get(currentName); double currentValue = example.getValue(currentAttribute); example.setValue(currentAttribute, currentValue - baseAttributeValue); } } example.setValue(labelAttribute, example.getValue(labelAttribute) - baseLabelValue); } } } return new IOObject[] {exampleSet}; }