@Override public AttributeWeights calculateWeights(ExampleSet exampleSet) throws OperatorException { Attributes attributes = exampleSet.getAttributes(); Attribute labelAttribute = attributes.getLabel(); boolean useSquaredCorrelation = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION); AttributeWeights weights = new AttributeWeights(exampleSet); getProgress().setTotal(attributes.size()); int progressCounter = 0; int exampleSetSize = exampleSet.size(); int exampleCounter = 0; for (Attribute attribute : attributes) { double correlation = MathFunctions.correlation(exampleSet, labelAttribute, attribute, useSquaredCorrelation); weights.setWeight(attribute.getName(), Math.abs(correlation)); progressCounter++; exampleCounter += exampleSetSize; if (exampleCounter > PROGRESS_UPDATE_STEPS) { exampleCounter = 0; getProgress().setCompleted(progressCounter); } } return weights; }
@Override public void doWork() throws OperatorException { CentroidClusterModel model = modelInput.getData(CentroidClusterModel.class); Attributes trainAttributes = model.getTrainingHeader().getAttributes(); String[] attributeNames = model.getAttributeNames(); Attribute[] attributes = new Attribute[attributeNames.length + 1]; for (int i = 0; i < attributeNames.length; i++) { Attribute originalAttribute = trainAttributes.get(attributeNames[i]); attributes[i] = AttributeFactory.createAttribute(attributeNames[i], originalAttribute.getValueType()); if (originalAttribute.isNominal()) { attributes[i].setMapping((NominalMapping) originalAttribute.getMapping().clone()); } } Attribute clusterAttribute = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); attributes[attributes.length - 1] = clusterAttribute; MemoryExampleTable table = new MemoryExampleTable(attributes); for (int i = 0; i < model.getNumberOfClusters(); i++) { double[] data = new double[attributeNames.length + 1]; System.arraycopy(model.getCentroidCoordinates(i), 0, data, 0, attributeNames.length); data[attributeNames.length] = clusterAttribute.getMapping().mapString("cluster_" + i); table.addDataRow(new DoubleArrayDataRow(data)); } ExampleSet resultSet = table.createExampleSet(); resultSet.getAttributes().setSpecialAttribute(clusterAttribute, Attributes.CLUSTER_NAME); modelOutput.deliver(model); exampleSetOutput.deliver(resultSet); }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(); IEntityMapping user_mapping = new EntityMapping(); IEntityMapping item_mapping = new EntityMapping(); IRatings training_data = new Ratings(); if (exampleSet.getAttributes().getSpecial("user identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getSpecial("item identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getLabel() == null) { throw new UserError(this, 105); } Attributes Att = exampleSet.getAttributes(); AttributeRole ur = Att.getRole("user identification"); Attribute u = ur.getAttribute(); AttributeRole ir = Att.getRole("item identification"); Attribute i = ir.getAttribute(); Attribute ui = Att.getLabel(); for (Example example : exampleSet) { double j = example.getValue(u); int uid = user_mapping.ToInternalID((int) j); j = example.getValue(i); int iid = item_mapping.ToInternalID((int) j); double r = example.getValue(ui); training_data.Add(uid, iid, r); } _slopeOne recommendAlg = new _slopeOne(); recommendAlg.user_mapping = user_mapping; recommendAlg.item_mapping = item_mapping; recommendAlg.SetMinRating(getParameterAsInt("Min Rating")); recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range")); recommendAlg.SetRatings(training_data); recommendAlg.Train(); exampleSetOutput.deliver(exampleSet); exampleSetOutput1.deliver(recommendAlg); }
private double[] getExampleValues(Example example) { Attributes attributes = example.getAttributes(); double[] attributeValues = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { attributeValues[i] = example.getValue(attribute); i++; } return attributeValues; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { Attributes attributes = exampleSet.getAttributes(); Set<Attribute> attributeSubset = attributeSelector.getAttributeSubset(exampleSet, true); Iterator<Attribute> r = attributes.allAttributes(); while (r.hasNext()) { Attribute attribute = r.next(); if (!attributeSubset.contains(attribute)) r.remove(); } return exampleSet; }
@Override public Model learn(ExampleSet exampleSet) throws OperatorException { DistanceMeasure measure = DistanceMeasures.createMeasure(this); measure.init(exampleSet); GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure); // check if weights should be used boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS); // check if robust estimate should be performed: Then calculate weights and use it anyway if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) { useWeights = true; LocalPolynomialExampleWeightingOperator weightingOperator; try { weightingOperator = OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class); exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this); } catch (OperatorCreationException e) { throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage()); } } Attributes attributes = exampleSet.getAttributes(); Attribute label = attributes.getLabel(); Attribute weightAttribute = attributes.getWeight(); for (Example example : exampleSet) { double[] values = new double[attributes.size()]; double labelValue = example.getValue(label); double weight = 1d; if (weightAttribute != null && useWeights) { weight = example.getValue(weightAttribute); } // filter out examples without influence if (weight > 0d) { // copying example values int i = 0; for (Attribute attribute : attributes) { values[i] = example.getValue(attribute); i++; } // inserting into geometric data collection data.add(values, new RegressionData(values, labelValue, weight)); } } return new LocalPolynomialRegressionModel( exampleSet, data, Neighborhoods.createNeighborhood(this), SmoothingKernels.createKernel(this), getParameterAsInt(PARAMETER_DEGREE), getParameterAsDouble(PARAMETER_RIDGE)); }
private double[] getMeanVector(ExampleSet exampleSet) { exampleSet.recalculateAllAttributeStatistics(); Attributes attributes = exampleSet.getAttributes(); double[] meanVector = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MINIMUM); } else if (attribute.isNominal()) meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MODE); else meanVector[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE); i++; } return meanVector; }
@Override public void init(ExampleSet exampleSet) throws OperatorException { super.init(exampleSet); Tools.onlyNumericalAttributes(exampleSet, "value based similarities"); Attributes attributes = exampleSet.getAttributes(); if (attributes.size() != 1) throw new OperatorException( "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence."); for (Example example : exampleSet) { for (Attribute attribute : attributes) { if (example.getValue(attribute) <= 0) throw new OperatorException( "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence."); ; } } }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(); IPosOnlyFeedback training_data = new PosOnlyFeedback(); IEntityMapping user_mapping = new EntityMapping(); IEntityMapping item_mapping = new EntityMapping(); if (exampleSet.getAttributes().getSpecial("user identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getSpecial("item identification") == null) { throw new UserError(this, 105); } Attributes Att = exampleSet.getAttributes(); AttributeRole ur = Att.getRole("user identification"); Attribute u = ur.getAttribute(); AttributeRole ir = Att.getRole("item identification"); Attribute i = ir.getAttribute(); for (Example example : exampleSet) { double j = example.getValue(u); int uid = (int) j; j = example.getValue(i); int iid = (int) j; training_data.Add(user_mapping.ToInternalID(uid), item_mapping.ToInternalID(iid)); checkForStop(); } System.out.println(training_data.GetMaxItemID() + " " + training_data.GetMaxUserID()); Random recommendAlg = new Random(); recommendAlg.SetFeedback(training_data); recommendAlg.user_mapping = user_mapping; recommendAlg.item_mapping = item_mapping; recommendAlg.Train(); exampleSetOutput.deliver(exampleSet); exampleSetOutput1.deliver(recommendAlg); }
private double[] getAsDoubleArray(Example example, Attributes attributes) { double[] values = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { values[i] = example.getValue(attribute); i++; } return values; }
private double[] getAttributeValues(Example example, Attributes attributes, double[] means) { double[] values = new double[attributes.size()]; int x = 0; for (Attribute attribute : attributes) { values[x] = example.getValue(attribute) - means[x]; x++; } return values; }
/** * Applies the sorted and unmachted attribute list to the provided {@link Attributes}. All * unmachted attributes are removed from attributes and all {@link Attribute}s from the sorted * list are added in correct order. * * @param sortedAttributeList attributes that will be removed first and added in correct order * afterwards. * @param unmachtedAttributes attributes that should be removed. May be <code>null</code> if no * attributes should be removed. */ private void applySortedAttributes( List<Attribute> sortedAttributeList, List<Attribute> unmachtedAttributes, Attributes attributes) { if (unmachtedAttributes != null) { for (Attribute unmachted : unmachtedAttributes) { attributes.remove(unmachted); } } for (Attribute attribute : sortedAttributeList) { AttributeRole role = attributes.getRole(attribute); attributes.remove(attribute); if (role.isSpecial()) { attributes.setSpecialAttribute(attribute, role.getSpecialName()); } else { // regular attributes.addRegular(attribute); } } }
private void setData(Example example, String attributeName, String value, Attributes attributes) throws UserError { Attribute attribute = attributes.get(attributeName); if (attribute == null) { throw new UserError(this, 111, attributeName); } if (attribute.isNominal()) { example.setValue(attribute, attribute.getMapping().mapString(value)); } else { try { double doubleValue = Double.parseDouble(value); example.setValue(attribute, doubleValue); } catch (NumberFormatException e) { throw new UserError(this, 211, PARAMETER_VALUE, value); } } }
private Attribute[] getMatchingAttributes(Attributes attributes, String regex) throws OperatorException { Pattern pattern = null; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new UserError(this, 206, regex, e.getMessage()); } List<Attribute> attributeList = new LinkedList<Attribute>(); Iterator<Attribute> iterator = attributes.allAttributes(); while (iterator.hasNext()) { Attribute attribute = iterator.next(); if (pattern.matcher(attribute.getName()).matches()) { attributeList.add(attribute); } } // building array of attributes for faster access. Attribute[] attributesArray = new Attribute[attributeList.size()]; attributesArray = attributeList.toArray(attributesArray); return attributesArray; }
@Override public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException { Attributes attributes = exampleSet.getAttributes(); // constructing new attributes with generic names, holding old ones, if old type wasn't real Attribute[] oldAttributes = new Attribute[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { oldAttributes[i] = attribute; i++; } Attribute[] newAttributes = new Attribute[attributes.size()]; for (i = 0; i < newAttributes.length; i++) { newAttributes[i] = oldAttributes[i]; if (oldAttributes[i].isNumerical()) if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(oldAttributes[i].getValueType(), Ontology.REAL)) { newAttributes[i] = AttributeFactory.createAttribute(Ontology.REAL); exampleSet.getExampleTable().addAttribute(newAttributes[i]); attributes.addRegular(newAttributes[i]); } } // applying on data applyOnData(exampleSet, oldAttributes, newAttributes); // removing old attributes and change new attributes name to old ones if needed for (i = 0; i < oldAttributes.length; i++) { attributes.remove(oldAttributes[i]); // if attribute is new, then remove for later storing in correct order if (oldAttributes[i] != newAttributes[i]) attributes.remove(newAttributes[i]); attributes.addRegular(newAttributes[i]); newAttributes[i].setName(oldAttributes[i].getName()); } return exampleSet; }
@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { int k = getParameterAsInt(PARAMETER_K); int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS); boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS); Kernel kernel = Kernel.createKernel(this); // init operator progress getProgress().setTotal(maxOptimizationSteps); // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // additional checks Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]); if (exampleSet.size() < k) { throw new UserError(this, 142, k); } // extracting attribute names Attributes attributes = exampleSet.getAttributes(); ArrayList<String> attributeNames = new ArrayList<String>(attributes.size()); for (Attribute attribute : attributes) { attributeNames.add(attribute.getName()); } Attribute weightAttribute = attributes.getWeight(); RandomGenerator generator = RandomGenerator.getRandomGenerator(this); ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); // init centroids int[] clusterAssignments = new int[exampleSet.size()]; for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = generator.nextIntInRange(0, k); } // run optimization steps boolean stable = false; for (int step = 0; step < maxOptimizationSteps && !stable; step++) { // calculating cluster kernel properties double[] clusterWeights = new double[k]; double[] clusterKernelCorrection = new double[k]; int i = 0; for (Example firstExample : exampleSet) { double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d; double[] firstExampleValues = getAsDoubleArray(firstExample, attributes); clusterWeights[clusterAssignments[i]] += firstExampleWeight; int j = 0; for (Example secondExample : exampleSet) { if (clusterAssignments[i] == clusterAssignments[j]) { double secondExampleWeight = useExampleWeights ? secondExample.getValue(weightAttribute) : 1d; clusterKernelCorrection[clusterAssignments[i]] += firstExampleWeight * secondExampleWeight * kernel.calculateDistance( firstExampleValues, getAsDoubleArray(secondExample, attributes)); } j++; } i++; } for (int z = 0; z < k; z++) { clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z]; } // assign examples to new centroids int[] newClusterAssignments = new int[exampleSet.size()]; i = 0; for (Example example : exampleSet) { double[] exampleValues = getAsDoubleArray(example, attributes); double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues); double nearestDistance = Double.POSITIVE_INFINITY; int nearestIndex = 0; for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) { double distance = 0; // iterating over all examples in cluster to get kernel distance int j = 0; for (Example clusterExample : exampleSet) { if (clusterAssignments[j] == clusterIndex) { distance += (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d) * kernel.calculateDistance( getAsDoubleArray(clusterExample, attributes), exampleValues); } j++; } distance *= -2d / clusterWeights[clusterIndex]; // copy in outer loop distance += exampleKernelValue; distance += clusterKernelCorrection[clusterIndex]; if (distance < nearestDistance) { nearestDistance = distance; nearestIndex = clusterIndex; } } newClusterAssignments[i] = nearestIndex; i++; } // finishing assignment stable = true; for (int j = 0; j < exampleSet.size() && stable; j++) { stable &= newClusterAssignments[j] == clusterAssignments[j]; } clusterAssignments = newClusterAssignments; // trigger operator progress getProgress().step(); } // setting last clustering into model model.setClusterAssignments(clusterAssignments, exampleSet); getProgress().complete(); if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
@Override public void doWork() throws OperatorException { ExampleSet exampleSetOriginal = exampleSetInput.getData(ExampleSet.class); ExampleSet exampleSet = (ExampleSet) exampleSetOriginal.clone(); int numberOfAttributes = exampleSet.getAttributes().size(); Attributes attributes = exampleSet.getAttributes(); int maxNumberOfAttributes = Math.min(getParameterAsInt(PARAMETER_MAX_ATTRIBUTES), numberOfAttributes - 1); int maxNumberOfFails = getParameterAsInt(PARAMETER_ALLOWED_CONSECUTIVE_FAILS); int behavior = getParameterAsInt(PARAMETER_STOPPING_BEHAVIOR); boolean useRelativeIncrease = (behavior == WITH_DECREASE_EXCEEDS) ? getParameterAsBoolean(PARAMETER_USE_RELATIVE_DECREASE) : false; double maximalDecrease = 0; if (useRelativeIncrease) maximalDecrease = useRelativeIncrease ? getParameterAsDouble(PARAMETER_MAX_RELATIVE_DECREASE) : getParameterAsDouble(PARAMETER_MAX_ABSOLUT_DECREASE); double alpha = (behavior == WITH_DECREASE_SIGNIFICANT) ? getParameterAsDouble(PARAMETER_ALPHA) : 0d; // remembering attributes and removing all from example set Attribute[] attributeArray = new Attribute[numberOfAttributes]; int i = 0; Iterator<Attribute> iterator = attributes.iterator(); while (iterator.hasNext()) { Attribute attribute = iterator.next(); attributeArray[i] = attribute; i++; } boolean[] selected = new boolean[numberOfAttributes]; Arrays.fill(selected, true); boolean earlyAbort = false; List<Integer> speculativeList = new ArrayList<Integer>(maxNumberOfFails); int numberOfFails = maxNumberOfFails; currentNumberOfFeatures = numberOfAttributes; currentAttributes = attributes; PerformanceVector lastPerformance = getPerformance(exampleSet); PerformanceVector bestPerformanceEver = lastPerformance; for (i = 0; i < maxNumberOfAttributes && !earlyAbort; i++) { // setting values for logging currentNumberOfFeatures = numberOfAttributes - i - 1; // performing a round int bestIndex = 0; PerformanceVector currentBestPerformance = null; for (int current = 0; current < numberOfAttributes; current++) { if (selected[current]) { // switching off attributes.remove(attributeArray[current]); currentAttributes = attributes; // evaluate performance PerformanceVector performance = getPerformance(exampleSet); if (currentBestPerformance == null || performance.compareTo(currentBestPerformance) > 0) { bestIndex = current; currentBestPerformance = performance; } // switching on attributes.addRegular(attributeArray[current]); currentAttributes = null; // removing reference } } double currentFitness = currentBestPerformance.getMainCriterion().getFitness(); if (i != 0) { double lastFitness = lastPerformance.getMainCriterion().getFitness(); // switch stopping behavior switch (behavior) { case WITH_DECREASE: if (lastFitness >= currentFitness) earlyAbort = true; break; case WITH_DECREASE_EXCEEDS: if (useRelativeIncrease) { // relative increase testing if (currentFitness < lastFitness - Math.abs(lastFitness * maximalDecrease)) earlyAbort = true; } else { // absolute increase testing if (currentFitness < lastFitness - maximalDecrease) earlyAbort = true; } break; case WITH_DECREASE_SIGNIFICANT: AnovaCalculator calculator = new AnovaCalculator(); calculator.setAlpha(alpha); PerformanceCriterion pc = currentBestPerformance.getMainCriterion(); calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance()); pc = lastPerformance.getMainCriterion(); calculator.addGroup(pc.getAverageCount(), pc.getAverage(), pc.getVariance()); SignificanceTestResult result; try { result = calculator.performSignificanceTest(); } catch (SignificanceCalculationException e) { throw new UserError(this, 920, e.getMessage()); } if (lastFitness > currentFitness && result.getProbability() < alpha) earlyAbort = true; } } if (earlyAbort) { // check if there are some free tries left if (numberOfFails == 0) { break; } numberOfFails--; speculativeList.add(bestIndex); earlyAbort = false; // needs performance increase compared to better performance of current and last! if (currentBestPerformance.compareTo(lastPerformance) > 0) lastPerformance = currentBestPerformance; } else { // resetting maximal number of fails. numberOfFails = maxNumberOfFails; speculativeList.clear(); lastPerformance = currentBestPerformance; bestPerformanceEver = currentBestPerformance; } // switching best index off attributes.remove(attributeArray[bestIndex]); selected[bestIndex] = false; } // add predictively removed attributes: speculative execution did not yield good result for (Integer removeIndex : speculativeList) { selected[removeIndex] = true; attributes.addRegular(attributeArray[removeIndex]); } AttributeWeights weights = new AttributeWeights(); i = 0; for (Attribute attribute : attributeArray) { if (selected[i]) weights.setWeight(attribute.getName(), 1d); else weights.setWeight(attribute.getName(), 0d); i++; } exampleSetOutput.deliver(exampleSet); performanceOutput.deliver(bestPerformanceEver); weightsOutput.deliver(weights); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // searching confidence attributes Attributes attributes = exampleSet.getAttributes(); Attribute predictedLabel = attributes.getPredictedLabel(); if (predictedLabel == null) { throw new UserError(this, 107); } NominalMapping mapping = predictedLabel.getMapping(); int numberOfLabels = mapping.size(); Attribute[] confidences = new Attribute[numberOfLabels]; String[] labelValue = new String[numberOfLabels]; int i = 0; for (String value : mapping.getValues()) { labelValue[i] = value; confidences[i] = attributes.getConfidence(value); if (confidences[i] == null) { throw new UserError(this, 154, value); } i++; } // generating new prediction attributes int k = Math.min(numberOfLabels, getParameterAsInt(PARAMETER_NUMBER_OF_RANKS)); Attribute[] kthPredictions = new Attribute[k]; Attribute[] kthConfidences = new Attribute[k]; for (i = 0; i < k; i++) { kthPredictions[i] = AttributeFactory.createAttribute(predictedLabel.getValueType()); kthPredictions[i].setName(predictedLabel.getName() + "_" + (i + 1)); kthPredictions[i].setMapping((NominalMapping) predictedLabel.getMapping().clone()); kthConfidences[i] = AttributeFactory.createAttribute(Ontology.REAL); kthConfidences[i].setName(Attributes.CONFIDENCE_NAME + "_" + (i + 1)); attributes.addRegular(kthPredictions[i]); attributes.addRegular(kthConfidences[i]); attributes.setSpecialAttribute(kthPredictions[i], Attributes.PREDICTION_NAME + "_" + (i + 1)); attributes.setSpecialAttribute(kthConfidences[i], Attributes.CONFIDENCE_NAME + "_" + (i + 1)); } exampleSet.getExampleTable().addAttributes(Arrays.asList(kthConfidences)); exampleSet.getExampleTable().addAttributes(Arrays.asList(kthPredictions)); // now setting values for (Example example : exampleSet) { ArrayList<Tupel<Double, Integer>> labelConfidences = new ArrayList<Tupel<Double, Integer>>(numberOfLabels); for (i = 0; i < numberOfLabels; i++) { labelConfidences.add(new Tupel<Double, Integer>(example.getValue(confidences[i]), i)); } Collections.sort(labelConfidences); for (i = 0; i < k; i++) { Tupel<Double, Integer> tupel = labelConfidences.get(numberOfLabels - i - 1); example.setValue( kthPredictions[i], tupel.getSecond()); // Can use index since mapping has been cloned from above example.setValue(kthConfidences[i], tupel.getFirst()); } } // deleting old prediction / confidences attributes.remove(predictedLabel); if (getParameterAsBoolean(PARAMETER_REMOVE_OLD_PREDICTIONS)) { for (i = 0; i < confidences.length; i++) { attributes.remove(confidences[i]); } } return exampleSet; }
@Override public ExampleSet apply(ExampleSet inputExampleSet) throws OperatorException { ExampleSet exampleSet = (ExampleSet) inputExampleSet.clone(); Attributes attributes = exampleSet.getAttributes(); if (attributeNames.length != attributes.size()) { throw new UserError(null, 133, numberOfComponents, attributes.size()); } // remember attributes that have been removed during training. These will be removed lateron Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()]; int d = 0; for (Attribute oldAttribute : getTrainingHeader().getAttributes()) { inputAttributes[d] = attributes.get(oldAttribute.getName()); d++; } // determining number of used components int numberOfUsedComponents = -1; if (manualNumber) { numberOfUsedComponents = numberOfComponents; } else { if (proportionThreshold == 0.0d) { numberOfUsedComponents = -1; } else { numberOfUsedComponents = 0; while (cumulativeSingularValueProportion[numberOfUsedComponents] < proportionThreshold) { numberOfUsedComponents++; } numberOfUsedComponents++; } } // if nothing defined or number exceeds maximal number of possible components if (numberOfUsedComponents == -1 || numberOfUsedComponents > getNumberOfComponents()) { // keep all components numberOfUsedComponents = getNumberOfComponents(); } // retrieve factors inside singularValueVectors double[][] singularValueFactors = new double[numberOfUsedComponents][attributeNames.length]; double[][] vMatrixData = vMatrix.getArray(); for (int i = 0; i < numberOfUsedComponents; i++) { double invertedSingularValue = 1d / singularValues[i]; for (int j = 0; j < attributeNames.length; j++) { singularValueFactors[i][j] = vMatrixData[j][i] * invertedSingularValue; } } // now build new attributes Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents]; for (int i = 0; i < numberOfUsedComponents; i++) { if (useLegacyNames) { derivedAttributes[i] = AttributeFactory.createAttribute("d" + i, Ontology.REAL); } else { derivedAttributes[i] = AttributeFactory.createAttribute("svd_" + (i + 1), Ontology.REAL); } exampleSet.getExampleTable().addAttribute(derivedAttributes[i]); attributes.addRegular(derivedAttributes[i]); } // now iterator through all examples and derive value of new features double[] derivedValues = new double[numberOfUsedComponents]; for (Example example : exampleSet) { // calculate values of new attributes with single scan over attributes d = 0; for (Attribute attribute : inputAttributes) { double attributeValue = example.getValue(attribute); for (int i = 0; i < numberOfUsedComponents; i++) { derivedValues[i] += singularValueFactors[i][d] * attributeValue; } d++; } // set values for (int i = 0; i < numberOfUsedComponents; i++) { example.setValue(derivedAttributes[i], derivedValues[i]); } // set values back Arrays.fill(derivedValues, 0); } // now remove attributes if needed if (!keepAttributes) { for (Attribute attribute : inputAttributes) { attributes.remove(attribute); } } return exampleSet; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { exampleSet.recalculateAllAttributeStatistics(); Attributes attributes = exampleSet.getAttributes(); if (attributeNames.length != attributes.size()) { throw new UserError(null, 133, numberOfComponents, attributes.size()); } // remember attributes that have been removed during training. These will be removed lateron Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()]; int d = 0; for (Attribute oldAttribute : getTrainingHeader().getAttributes()) { inputAttributes[d] = attributes.get(oldAttribute.getName()); d++; } // determining number of used components int numberOfUsedComponents = -1; if (manualNumber) { numberOfUsedComponents = numberOfComponents; } else { if (varianceThreshold == 0.0d) { numberOfUsedComponents = -1; } else { numberOfUsedComponents = 0; while (cumulativeVariance[numberOfUsedComponents] < varianceThreshold) { numberOfUsedComponents++; } numberOfUsedComponents++; if (numberOfUsedComponents == eigenVectors.size()) { numberOfUsedComponents--; } } } if (numberOfUsedComponents == -1) { // keep all components numberOfUsedComponents = attributes.size(); } // retrieve factors inside eigenVectors double[][] eigenValueFactors = new double[numberOfUsedComponents][attributeNames.length]; for (int i = 0; i < numberOfUsedComponents; i++) { eigenValueFactors[i] = this.eigenVectors.get(i).getEigenvector(); } // now build new attributes Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents]; for (int i = 0; i < numberOfUsedComponents; i++) { derivedAttributes[i] = AttributeFactory.createAttribute("pc_" + (i + 1), Ontology.REAL); exampleSet.getExampleTable().addAttribute(derivedAttributes[i]); attributes.addRegular(derivedAttributes[i]); } // now iterator through all examples and derive value of new features double[] derivedValues = new double[numberOfUsedComponents]; for (Example example : exampleSet) { // calculate values of new attributes with single scan over attributes d = 0; for (Attribute attribute : inputAttributes) { double attributeValue = example.getValue(attribute) - means[d]; for (int i = 0; i < numberOfUsedComponents; i++) { derivedValues[i] += eigenValueFactors[i][d] * attributeValue; } d++; } // set values for (int i = 0; i < numberOfUsedComponents; i++) { example.setValue(derivedAttributes[i], derivedValues[i]); } // set values back Arrays.fill(derivedValues, 0); } // now remove attributes if needed if (!keepAttributes) { for (Attribute attribute : inputAttributes) { attributes.remove(attribute); } } return exampleSet; }