/** * Applies the sorted and unmachted attribute list to the provided {@link Attributes}. All * unmachted attributes are removed from attributes and all {@link Attribute}s from the sorted * list are added in correct order. * * @param sortedAttributeList attributes that will be removed first and added in correct order * afterwards. * @param unmachtedAttributes attributes that should be removed. May be <code>null</code> if no * attributes should be removed. */ private void applySortedAttributes( List<Attribute> sortedAttributeList, List<Attribute> unmachtedAttributes, Attributes attributes) { if (unmachtedAttributes != null) { for (Attribute unmachted : unmachtedAttributes) { attributes.remove(unmachted); } } for (Attribute attribute : sortedAttributeList) { AttributeRole role = attributes.getRole(attribute); attributes.remove(attribute); if (role.isSpecial()) { attributes.setSpecialAttribute(attribute, role.getSpecialName()); } else { // regular attributes.addRegular(attribute); } } }
@Override public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException { Attributes attributes = exampleSet.getAttributes(); // constructing new attributes with generic names, holding old ones, if old type wasn't real Attribute[] oldAttributes = new Attribute[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { oldAttributes[i] = attribute; i++; } Attribute[] newAttributes = new Attribute[attributes.size()]; for (i = 0; i < newAttributes.length; i++) { newAttributes[i] = oldAttributes[i]; if (oldAttributes[i].isNumerical()) if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(oldAttributes[i].getValueType(), Ontology.REAL)) { newAttributes[i] = AttributeFactory.createAttribute(Ontology.REAL); exampleSet.getExampleTable().addAttribute(newAttributes[i]); attributes.addRegular(newAttributes[i]); } } // applying on data applyOnData(exampleSet, oldAttributes, newAttributes); // removing old attributes and change new attributes name to old ones if needed for (i = 0; i < oldAttributes.length; i++) { attributes.remove(oldAttributes[i]); // if attribute is new, then remove for later storing in correct order if (oldAttributes[i] != newAttributes[i]) attributes.remove(newAttributes[i]); attributes.addRegular(newAttributes[i]); newAttributes[i].setName(oldAttributes[i].getName()); } return exampleSet; }
@Override public void doWork() throws OperatorException { ExampleSet exampleSetOriginal = exampleSetInput.getData(ExampleSet.class); ExampleSet exampleSet = (ExampleSet) exampleSetOriginal.clone(); int numberOfAttributes = exampleSet.getAttributes().size(); Attributes attributes = exampleSet.getAttributes(); int maxNumberOfAttributes = Math.min(getParameterAsInt(PARAMETER_MAX_ATTRIBUTES), numberOfAttributes - 1); int maxNumberOfFails = getParameterAsInt(PARAMETER_ALLOWED_CONSECUTIVE_FAILS); int behavior = getParameterAsInt(PARAMETER_STOPPING_BEHAVIOR); boolean useRelativeIncrease = (behavior == WITH_DECREASE_EXCEEDS) ? getParameterAsBoolean(PARAMETER_USE_RELATIVE_DECREASE) : false; double maximalDecrease = 0; if (useRelativeIncrease) maximalDecrease = useRelativeIncrease ? getParameterAsDouble(PARAMETER_MAX_RELATIVE_DECREASE) : getParameterAsDouble(PARAMETER_MAX_ABSOLUT_DECREASE); double alpha = (behavior == WITH_DECREASE_SIGNIFICANT) ? getParameterAsDouble(PARAMETER_ALPHA) : 0d; // remembering attributes and removing all from example set Attribute[] attributeArray = new Attribute[numberOfAttributes]; int i = 0; Iterator<Attribute> iterator = attributes.iterator(); while (iterator.hasNext()) { Attribute attribute = iterator.next(); attributeArray[i] = attribute; i++; } boolean[] selected = new boolean[numberOfAttributes]; Arrays.fill(selected, true); boolean earlyAbort = false; List<Integer> speculativeList = new ArrayList<Integer>(maxNumberOfFails); int numberOfFails = maxNumberOfFails; currentNumberOfFeatures = numberOfAttributes; currentAttributes = attributes; PerformanceVector lastPerformance = getPerformance(exampleSet); PerformanceVector bestPerformanceEver = lastPerformance; for (i = 0; i < maxNumberOfAttributes && !earlyAbort; i++) { // setting values for logging currentNumberOfFeatures = numberOfAttributes - i - 1; // performing a round int bestIndex = 0; PerformanceVector currentBestPerformance = null; for (int current = 0; current < numberOfAttributes; current++) { if (selected[current]) { // switching off attributes.remove(attributeArray[current]); currentAttributes = attributes; // evaluate performance PerformanceVector performance = getPerformance(exampleSet); if (currentBestPerformance == null || performance.compareTo(currentBestPerformance) > 0) { bestIndex = current; currentBestPerformance = performance; } // switching on attributes.addRegular(attributeArray[current]); currentAttributes = null; // removing reference } } double currentFitness = currentBestPerformance.getMainCriterion().getFitness(); if (i != 0) { double lastFitness = lastPerformance.getMainCriterion().getFitness(); // switch stopping behavior switch (behavior) { case WITH_DECREASE: if (lastFitness >= currentFitness) earlyAbort = true; break; case WITH_DECREASE_EXCEEDS: if (useRelativeIncrease) { // relative increase testing if (currentFitness < lastFitness - Math.abs(lastFitness * maximalDecrease)) earlyAbort = true; } else { // absolute increase testing if (currentFitness < lastFitness - maximalDecrease) earlyAbort = true; } break; case WITH_DECREASE_SIGNIFICANT: AnovaCalculator calculator = new AnovaCalculator(); calculator.setAlpha(alpha); PerformanceCriterion pc = currentBestPerformance.getMainCriterion(); calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance()); pc = lastPerformance.getMainCriterion(); calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance()); SignificanceTestResult result; try { result = calculator.performSignificanceTest(); } catch (SignificanceCalculationException e) { throw new UserError(this, 920, e.getMessage()); } if (lastFitness > currentFitness && result.getProbability() < alpha) earlyAbort = true; } } if (earlyAbort) { // check if there are some free tries left if (numberOfFails == 0) { break; } numberOfFails--; speculativeList.add(bestIndex); earlyAbort = false; // needs performance increase compared to better performance of current and last! if (currentBestPerformance.compareTo(lastPerformance) > 0) lastPerformance = currentBestPerformance; } else { // resetting maximal number of fails. numberOfFails = maxNumberOfFails; speculativeList.clear(); lastPerformance = currentBestPerformance; bestPerformanceEver = currentBestPerformance; } // switching best index off attributes.remove(attributeArray[bestIndex]); selected[bestIndex] = false; } // add predictively removed attributes: speculative execution did not yield good result for (Integer removeIndex : speculativeList) { selected[removeIndex] = true; attributes.addRegular(attributeArray[removeIndex]); } AttributeWeights weights = new AttributeWeights(); i = 0; for (Attribute attribute : attributeArray) { if (selected[i]) weights.setWeight(attribute.getName(), 1d); else weights.setWeight(attribute.getName(), 0d); i++; } exampleSetOutput.deliver(exampleSet); performanceOutput.deliver(bestPerformanceEver); weightsOutput.deliver(weights); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // searching confidence attributes Attributes attributes = exampleSet.getAttributes(); Attribute predictedLabel = attributes.getPredictedLabel(); if (predictedLabel == null) { throw new UserError(this, 107); } NominalMapping mapping = predictedLabel.getMapping(); int numberOfLabels = mapping.size(); Attribute[] confidences = new Attribute[numberOfLabels]; String[] labelValue = new String[numberOfLabels]; int i = 0; for (String value : mapping.getValues()) { labelValue[i] = value; confidences[i] = attributes.getConfidence(value); if (confidences[i] == null) { throw new UserError(this, 154, value); } i++; } // generating new prediction attributes int k = Math.min(numberOfLabels, getParameterAsInt(PARAMETER_NUMBER_OF_RANKS)); Attribute[] kthPredictions = new Attribute[k]; Attribute[] kthConfidences = new Attribute[k]; for (i = 0; i < k; i++) { kthPredictions[i] = AttributeFactory.createAttribute(predictedLabel.getValueType()); kthPredictions[i].setName(predictedLabel.getName() + "_" + (i + 1)); kthPredictions[i].setMapping((NominalMapping) predictedLabel.getMapping().clone()); kthConfidences[i] = AttributeFactory.createAttribute(Ontology.REAL); kthConfidences[i].setName(Attributes.CONFIDENCE_NAME + "_" + (i + 1)); attributes.addRegular(kthPredictions[i]); attributes.addRegular(kthConfidences[i]); attributes.setSpecialAttribute(kthPredictions[i], Attributes.PREDICTION_NAME + "_" + (i + 1)); attributes.setSpecialAttribute(kthConfidences[i], Attributes.CONFIDENCE_NAME + "_" + (i + 1)); } exampleSet.getExampleTable().addAttributes(Arrays.asList(kthConfidences)); exampleSet.getExampleTable().addAttributes(Arrays.asList(kthPredictions)); // now setting values for (Example example : exampleSet) { ArrayList<Tupel<Double, Integer>> labelConfidences = new ArrayList<Tupel<Double, Integer>>(numberOfLabels); for (i = 0; i < numberOfLabels; i++) { labelConfidences.add(new Tupel<Double, Integer>(example.getValue(confidences[i]), i)); } Collections.sort(labelConfidences); for (i = 0; i < k; i++) { Tupel<Double, Integer> tupel = labelConfidences.get(numberOfLabels - i - 1); example.setValue( kthPredictions[i], tupel.getSecond()); // Can use index since mapping has been cloned from above example.setValue(kthConfidences[i], tupel.getFirst()); } } // deleting old prediction / confidences attributes.remove(predictedLabel); if (getParameterAsBoolean(PARAMETER_REMOVE_OLD_PREDICTIONS)) { for (i = 0; i < confidences.length; i++) { attributes.remove(confidences[i]); } } return exampleSet; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { exampleSet.recalculateAllAttributeStatistics(); Attributes attributes = exampleSet.getAttributes(); if (attributeNames.length != attributes.size()) { throw new UserError(null, 133, numberOfComponents, attributes.size()); } // remember attributes that have been removed during training. These will be removed lateron Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()]; int d = 0; for (Attribute oldAttribute : getTrainingHeader().getAttributes()) { inputAttributes[d] = attributes.get(oldAttribute.getName()); d++; } // determining number of used components int numberOfUsedComponents = -1; if (manualNumber) { numberOfUsedComponents = numberOfComponents; } else { if (varianceThreshold == 0.0d) { numberOfUsedComponents = -1; } else { numberOfUsedComponents = 0; while (cumulativeVariance[numberOfUsedComponents] < varianceThreshold) { numberOfUsedComponents++; } numberOfUsedComponents++; if (numberOfUsedComponents == eigenVectors.size()) { numberOfUsedComponents--; } } } if (numberOfUsedComponents == -1) { // keep all components numberOfUsedComponents = attributes.size(); } // retrieve factors inside eigenVectors double[][] eigenValueFactors = new double[numberOfUsedComponents][attributeNames.length]; for (int i = 0; i < numberOfUsedComponents; i++) { eigenValueFactors[i] = this.eigenVectors.get(i).getEigenvector(); } // now build new attributes Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents]; for (int i = 0; i < numberOfUsedComponents; i++) { derivedAttributes[i] = AttributeFactory.createAttribute("pc_" + (i + 1), Ontology.REAL); exampleSet.getExampleTable().addAttribute(derivedAttributes[i]); attributes.addRegular(derivedAttributes[i]); } // now iterator through all examples and derive value of new features double[] derivedValues = new double[numberOfUsedComponents]; for (Example example : exampleSet) { // calculate values of new attributes with single scan over attributes d = 0; for (Attribute attribute : inputAttributes) { double attributeValue = example.getValue(attribute) - means[d]; for (int i = 0; i < numberOfUsedComponents; i++) { derivedValues[i] += eigenValueFactors[i][d] * attributeValue; } d++; } // set values for (int i = 0; i < numberOfUsedComponents; i++) { example.setValue(derivedAttributes[i], derivedValues[i]); } // set values back Arrays.fill(derivedValues, 0); } // now remove attributes if needed if (!keepAttributes) { for (Attribute attribute : inputAttributes) { attributes.remove(attribute); } } return exampleSet; }
@Override public ExampleSet apply(ExampleSet inputExampleSet) throws OperatorException { ExampleSet exampleSet = (ExampleSet) inputExampleSet.clone(); Attributes attributes = exampleSet.getAttributes(); if (attributeNames.length != attributes.size()) { throw new UserError(null, 133, numberOfComponents, attributes.size()); } // remember attributes that have been removed during training. These will be removed lateron Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()]; int d = 0; for (Attribute oldAttribute : getTrainingHeader().getAttributes()) { inputAttributes[d] = attributes.get(oldAttribute.getName()); d++; } // determining number of used components int numberOfUsedComponents = -1; if (manualNumber) { numberOfUsedComponents = numberOfComponents; } else { if (proportionThreshold == 0.0d) { numberOfUsedComponents = -1; } else { numberOfUsedComponents = 0; while (cumulativeSingularValueProportion[numberOfUsedComponents] < proportionThreshold) { numberOfUsedComponents++; } numberOfUsedComponents++; } } // if nothing defined or number exceeds maximal number of possible components if (numberOfUsedComponents == -1 || numberOfUsedComponents > getNumberOfComponents()) { // keep all components numberOfUsedComponents = getNumberOfComponents(); } // retrieve factors inside singularValueVectors double[][] singularValueFactors = new double[numberOfUsedComponents][attributeNames.length]; double[][] vMatrixData = vMatrix.getArray(); for (int i = 0; i < numberOfUsedComponents; i++) { double invertedSingularValue = 1d / singularValues[i]; for (int j = 0; j < attributeNames.length; j++) { singularValueFactors[i][j] = vMatrixData[j][i] * invertedSingularValue; } } // now build new attributes Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents]; for (int i = 0; i < numberOfUsedComponents; i++) { if (useLegacyNames) { derivedAttributes[i] = AttributeFactory.createAttribute("d" + i, Ontology.REAL); } else { derivedAttributes[i] = AttributeFactory.createAttribute("svd_" + (i + 1), Ontology.REAL); } exampleSet.getExampleTable().addAttribute(derivedAttributes[i]); attributes.addRegular(derivedAttributes[i]); } // now iterator through all examples and derive value of new features double[] derivedValues = new double[numberOfUsedComponents]; for (Example example : exampleSet) { // calculate values of new attributes with single scan over attributes d = 0; for (Attribute attribute : inputAttributes) { double attributeValue = example.getValue(attribute); for (int i = 0; i < numberOfUsedComponents; i++) { derivedValues[i] += singularValueFactors[i][d] * attributeValue; } d++; } // set values for (int i = 0; i < numberOfUsedComponents; i++) { example.setValue(derivedAttributes[i], derivedValues[i]); } // set values back Arrays.fill(derivedValues, 0); } // now remove attributes if needed if (!keepAttributes) { for (Attribute attribute : inputAttributes) { attributes.remove(attribute); } } return exampleSet; }