@Override public PerformanceVector evaluateIndividual(Individual individual) { double[] beta = individual.getValues(); double fitness = 0.0d; for (Example example : exampleSet) { double eta = 0.0d; int i = 0; for (Attribute attribute : example.getAttributes()) { double value = example.getValue(attribute); eta += beta[i] * value; i++; } if (addIntercept) { eta += beta[beta.length - 1]; } double pi = Math.exp(eta) / (1 + Math.exp(eta)); double classValue = example.getValue(label); double currentFitness = classValue * Math.log(pi) + (1 - classValue) * Math.log(1 - pi); double weightValue = 1.0d; if (weight != null) weightValue = example.getValue(weight); fitness += weightValue * currentFitness; } PerformanceVector performanceVector = new PerformanceVector(); performanceVector.addCriterion( new EstimatedPerformance("log_reg_fitness", fitness, exampleSet.size(), false)); return performanceVector; }
/** * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha * values and b are zero, the label will be set if it is known. */ public SVMExamples( com.rapidminer.example.ExampleSet exampleSet, Attribute labelAttribute, Map<Integer, MeanVariance> meanVariances) { this(exampleSet.size(), 0.0d); this.meanVarianceMap = meanVariances; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); Attribute idAttribute = exampleSet.getAttributes().getId(); int exampleCounter = 0; while (reader.hasNext()) { com.rapidminer.example.Example current = reader.next(); Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = current.getValue(attribute); if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) { attributeMap.put(a, value); } if ((a + 1) > dim) { dim = (a + 1); } a++; } atts[exampleCounter] = new double[attributeMap.size()]; index[exampleCounter] = new int[attributeMap.size()]; Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator(); int attributeCounter = 0; while (i.hasNext()) { Map.Entry<Integer, Double> e = i.next(); Integer indexValue = e.getKey(); Double attributeValue = e.getValue(); index[exampleCounter][attributeCounter] = indexValue.intValue(); double value = attributeValue.doubleValue(); MeanVariance meanVariance = meanVarianceMap.get(indexValue); if (meanVariance != null) { if (meanVariance.getVariance() == 0.0d) { value = 0.0d; } else { value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance()); } } atts[exampleCounter][attributeCounter] = value; attributeCounter++; } if (labelAttribute != null) { double label = current.getValue(labelAttribute); if (labelAttribute.isNominal()) { ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1); } else { ys[exampleCounter] = label; } } if (idAttribute != null) { ids[exampleCounter] = current.getValueAsString(idAttribute); } exampleCounter++; } }
public Split getBestSplit(ExampleSet inputSet, Attribute attribute, String labelName) { SortedExampleSet exampleSet = new SortedExampleSet((ExampleSet) inputSet.clone(), attribute, SortedExampleSet.INCREASING); Attribute labelAttribute = exampleSet.getAttributes().getLabel(); int labelIndex = labelAttribute.getMapping().mapString(labelName); double oldLabel = Double.NaN; double bestSplit = Double.NaN; double lastValue = Double.NaN; double bestBenefit = Double.NEGATIVE_INFINITY; double bestTotalWeight = 0; int bestSplitType = Split.LESS_SPLIT; // initiating online counting of benefit: only 2 Datascans needed then criterion.reinitOnlineCounting(exampleSet); for (Example e : exampleSet) { double currentValue = e.getValue(attribute); double label = e.getValue(labelAttribute); if ((Double.isNaN(oldLabel)) || (oldLabel != label) && (lastValue != currentValue)) { double splitValue = (lastValue + currentValue) / 2.0d; double[] benefits; if (labelName == null) { benefits = criterion.getOnlineBenefit(e); } else { benefits = criterion.getOnlineBenefit(e, labelIndex); } // online method returns both possible relations in one array(greater / smaller) in one // array if ((benefits[0] > minValue) && (benefits[0] > 0) && (benefits[1] > 0) && ((benefits[0] > bestBenefit) || ((benefits[0] == bestBenefit) && (benefits[1] > bestTotalWeight)))) { bestBenefit = benefits[0]; bestSplit = splitValue; bestTotalWeight = benefits[1]; bestSplitType = Split.LESS_SPLIT; } if ((benefits[2] > minValue) && (benefits[2] > 0) && (benefits[3] > 0) && ((benefits[2] > bestBenefit) || ((benefits[2] == bestBenefit) && (benefits[3] > bestTotalWeight)))) { bestBenefit = benefits[2]; bestSplit = splitValue; bestTotalWeight = benefits[3]; bestSplitType = Split.GREATER_SPLIT; } oldLabel = label; } lastValue = currentValue; criterion.update(e); } return new Split(bestSplit, new double[] {bestBenefit, bestTotalWeight}, bestSplitType); }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(); IEntityMapping user_mapping = new EntityMapping(); IEntityMapping item_mapping = new EntityMapping(); IRatings training_data = new Ratings(); if (exampleSet.getAttributes().getSpecial("user identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getSpecial("item identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getLabel() == null) { throw new UserError(this, 105); } Attributes Att = exampleSet.getAttributes(); AttributeRole ur = Att.getRole("user identification"); Attribute u = ur.getAttribute(); AttributeRole ir = Att.getRole("item identification"); Attribute i = ir.getAttribute(); Attribute ui = Att.getLabel(); for (Example example : exampleSet) { double j = example.getValue(u); int uid = user_mapping.ToInternalID((int) j); j = example.getValue(i); int iid = item_mapping.ToInternalID((int) j); double r = example.getValue(ui); training_data.Add(uid, iid, r); } _slopeOne recommendAlg = new _slopeOne(); recommendAlg.user_mapping = user_mapping; recommendAlg.item_mapping = item_mapping; recommendAlg.SetMinRating(getParameterAsInt("Min Rating")); recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range")); recommendAlg.SetRatings(training_data); recommendAlg.Train(); exampleSetOutput.deliver(exampleSet); exampleSetOutput1.deliver(recommendAlg); }
public void update(Example example) { int labelIndex = (int) example.getValue(labelAttribute); if (weightAttribute != null) { double currentWeight = example.getValue(weightAttribute); labelWeights[labelIndex] += currentWeight; weight += currentWeight; } else { labelWeights[labelIndex] += 1d; weight += 1d; } }
@Override public Model learn(ExampleSet exampleSet) throws OperatorException { DistanceMeasure measure = DistanceMeasures.createMeasure(this); measure.init(exampleSet); GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure); // check if weights should be used boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS); // check if robust estimate should be performed: Then calculate weights and use it anyway if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) { useWeights = true; LocalPolynomialExampleWeightingOperator weightingOperator; try { weightingOperator = OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class); exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this); } catch (OperatorCreationException e) { throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage()); } } Attributes attributes = exampleSet.getAttributes(); Attribute label = attributes.getLabel(); Attribute weightAttribute = attributes.getWeight(); for (Example example : exampleSet) { double[] values = new double[attributes.size()]; double labelValue = example.getValue(label); double weight = 1d; if (weightAttribute != null && useWeights) { weight = example.getValue(weightAttribute); } // filter out examples without influence if (weight > 0d) { // copying example values int i = 0; for (Attribute attribute : attributes) { values[i] = example.getValue(attribute); i++; } // inserting into geometric data collection data.add(values, new RegressionData(values, labelValue, weight)); } } return new LocalPolynomialRegressionModel( exampleSet, data, Neighborhoods.createNeighborhood(this), SmoothingKernels.createKernel(this), getParameterAsInt(PARAMETER_DEGREE), getParameterAsDouble(PARAMETER_RIDGE)); }
private double[] estimateVariance() { double[] beta = getBestValuesEver(); Matrix hessian = new Matrix(beta.length, beta.length); for (Example example : exampleSet) { double[] values = new double[beta.length]; double eta = 0.0d; int j = 0; for (Attribute attribute : example.getAttributes()) { double value = example.getValue(attribute); values[j] = value; eta += beta[j] * value; j++; } if (addIntercept) { values[beta.length - 1] = 1.0d; eta += beta[beta.length - 1]; } double pi = Math.exp(eta) / (1 + Math.exp(eta)); double weightValue = 1.0d; if (weight != null) weightValue = example.getValue(weight); for (int x = 0; x < beta.length; x++) { for (int y = 0; y < beta.length; y++) { // sum is second derivative of log likelihood function double h = hessian.get(x, y) - values[x] * values[y] * weightValue * pi * (1 - pi); hessian.set(x, y, h); } } } double[] variance = new double[beta.length]; Matrix varianceCovarianceMatrix = null; try { // asymptotic variance-covariance matrix is inverse of hessian matrix varianceCovarianceMatrix = hessian.inverse(); } catch (Exception e) { logging.logWarning("could not determine variance-covariance matrix, hessian is singular"); for (int j = 0; j < beta.length; j++) { variance[j] = Double.NaN; } return variance; } for (int j = 0; j < beta.length; j++) { // get diagonal elements variance[j] = Math.abs(varianceCovarianceMatrix.get(j, j)); } return variance; }
private NeuralDataSet getTraining(ExampleSet exampleSet) { double[][] data = new double[exampleSet.size()][exampleSet.getAttributes().size()]; double[][] labels = new double[exampleSet.size()][1]; int index = 0; Attribute label = exampleSet.getAttributes().getLabel(); this.attributeMin = new double[exampleSet.getAttributes().size()]; this.attributeMax = new double[attributeMin.length]; exampleSet.recalculateAllAttributeStatistics(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { this.attributeMin[a] = exampleSet.getStatistics(attribute, Statistics.MINIMUM); this.attributeMax[a] = exampleSet.getStatistics(attribute, Statistics.MAXIMUM); a++; } this.labelMin = exampleSet.getStatistics(label, Statistics.MINIMUM); this.labelMax = exampleSet.getStatistics(label, Statistics.MAXIMUM); for (Example example : exampleSet) { // attributes a = 0; for (Attribute attribute : exampleSet.getAttributes()) { if (attributeMin[a] != attributeMax[a]) { data[index][a] = (example.getValue(attribute) - attributeMin[a]) / (attributeMax[a] - attributeMin[a]); } else { data[index][a] = example.getValue(attribute) - attributeMin[a]; } a++; } // label if (label.isNominal()) { labels[index][0] = example.getValue(label); } else { if (labelMax != labelMin) { labels[index][0] = (example.getValue(label) - labelMin) / (labelMax - labelMin); } else { labels[index][0] = example.getValue(label) - labelMin; } } index++; } return new BasicNeuralDataSet(data, labels); }
@Override public Model learn(ExampleSet exampleSet) throws OperatorException { Kernel kernel = getKernel(); kernel.init(exampleSet); double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE); NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping(); String classNeg = labelMapping.getNegativeString(); String classPos = labelMapping.getPositiveString(); double classValueNeg = labelMapping.getNegativeIndex(); int numberOfAttributes = exampleSet.getAttributes().size(); HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel); model.init(new double[numberOfAttributes], 0); for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) { double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate); Attributes attributes = exampleSet.getAttributes(); for (Example example : exampleSet) { double prediction = model.predict(example); if (prediction != example.getLabel()) { double direction = (example.getLabel() == classValueNeg) ? -1 : 1; // adapting intercept model.setIntercept(model.getIntercept() + learnRate * direction); // adapting coefficients double coefficients[] = model.getCoefficients(); int i = 0; for (Attribute attribute : attributes) { coefficients[i] += learnRate * direction * example.getValue(attribute); i++; } } } } return model; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME); Long offset = getParameterAsLong(PARMETER_TIME_OFFSET); Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName); if (numericalAttribute == null) { throw new UserError(this, 111, attributeName); } Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME); exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); for (Example example : exampleSet) { double value = example.getValue(numericalAttribute); if (Double.isNaN(value)) { example.setValue(newAttribute, value); } else { value += offset; example.setValue(newAttribute, value); } } if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) { AttributeRole oldRole = exampleSet.getAttributes().getRole(numericalAttribute); exampleSet.getAttributes().remove(numericalAttribute); newAttribute.setName(attributeName); exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName()); } else { newAttribute.setName(attributeName + "_AS_DATE"); } return exampleSet; }
@Override public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException { boolean round = getParameterAsBoolean(PARAMETER_ROUND); List<Attribute> newAttributes = new LinkedList<Attribute>(); Iterator<Attribute> a = exampleSet.getAttributes().iterator(); while (a.hasNext()) { Attribute attribute = a.next(); if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL)) && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) { Attribute newAttribute = AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER); newAttributes.add(newAttribute); exampleSet.getExampleTable().addAttribute(newAttribute); for (Example example : exampleSet) { double originalValue = example.getValue(attribute); if (Double.isNaN(originalValue)) { example.setValue(newAttribute, Double.NaN); } else { long newValue = round ? Math.round(originalValue) : (long) originalValue; example.setValue(newAttribute, newValue); } } a.remove(); } } for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute); return exampleSet; }
private static Map<Integer, MeanVariance> createMeanVariances( com.rapidminer.example.ExampleSet exampleSet) { double[] sum = new double[exampleSet.getAttributes().size()]; double[] squaredSum = new double[sum.length]; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); while (reader.hasNext()) { com.rapidminer.example.Example example = reader.next(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = example.getValue(attribute); sum[a] += value; squaredSum[a] += value * value; a++; } } Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>(); for (int a = 0; a < sum.length; a++) { sum[a] /= exampleSet.size(); squaredSum[a] /= exampleSet.size(); meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a]))); } return meanVariances; }
@Override public double predict(Example example) throws OperatorException { int i = 0; double distance = intercept; // using kernel for distance calculation double[] values = new double[example.getAttributes().size()]; for (Attribute currentAttribute : example.getAttributes()) { values[i] = example.getValue(currentAttribute); i++; } distance += kernel.calculateDistance(values, coefficients); if (getLabel().isNominal()) { int positiveMapping = getLabel().getMapping().mapString(classPositive); int negativeMapping = getLabel().getMapping().mapString(classNegative); boolean isApplying = example.getAttributes().getPredictedLabel() != null; if (isApplying) { example.setConfidence(classPositive, 1.0d / (1.0d + java.lang.Math.exp(-distance))); example.setConfidence(classNegative, 1.0d / (1.0d + java.lang.Math.exp(distance))); } if (distance < 0) { return negativeMapping; } else { return positiveMapping; } } else { return distance; } }
/** * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing * values, with approximate comparisons. * * @param eSet the example set * @param a the first attribute to correlate * @param b the second attribute to correlate * @param fuzz values within +/- fuzz may be considered tied * @return Kendall's tau-b rank correlation * @throws OperatorException */ public static double tau_b(ExampleSet eSet, Attribute a, Attribute b, double fuzz) throws OperatorException { ExampleSet e = extract(eSet, a, b); // reduced example set FuzzyComp fc = new FuzzyComp(fuzz); int c = 0; // concordant pairs int d = 0; // discordant pairs int ta = 0; // pairs tied on a (only) int tb = 0; // pairs tied on b (only) int tc = 0; // pairs tied on both a and b int n = 0; // number of times iterator i is bumped Iterator<Example> i = e.iterator(); while (i.hasNext()) { // iterate through all possible pairs Example z1 = i.next(); n++; double x = z1.getValue(a); double y = z1.getValue(b); if (b.isNominal() && a != null) { String yString = b.getMapping().mapIndex((int) y); y = a.getMapping().getIndex(yString); } Iterator<Example> j = e.iterator(); for (int k = 0; k < n; k++) j.next(); // increment j to match i while (j.hasNext()) { // move on to subsequent examples Example z2 = j.next(); double xx = z2.getValue(a); double yy = z2.getValue(b); if (b.isNominal() && a != null) { String yyString = b.getMapping().mapIndex((int) yy); yy = a.getMapping().getIndex(yyString); } int xc = fc.compare(x, xx); int yc = fc.compare(y, yy); if (xc == 0) { if (yc == 0) tc++; // tied on both attributes else ta++; // tied only on a } else if (yc == 0) tb++; // tied only on b else if (xc == yc) c++; // concordant pair else d++; // discordant pair } } double num = c - d; double den = Math.sqrt((c + d + ta) * (c + d + tb)); if (den != 0) return num / den; else return 0; }
/** Returns true if the label was not defined. */ @Override public boolean conditionOk(Example example) { if (Double.isNaN(example.getValue(example.getAttributes().getLabel()))) { return true; } else { return false; } }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(); IPosOnlyFeedback training_data = new PosOnlyFeedback(); IEntityMapping user_mapping = new EntityMapping(); IEntityMapping item_mapping = new EntityMapping(); if (exampleSet.getAttributes().getSpecial("user identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getSpecial("item identification") == null) { throw new UserError(this, 105); } Attributes Att = exampleSet.getAttributes(); AttributeRole ur = Att.getRole("user identification"); Attribute u = ur.getAttribute(); AttributeRole ir = Att.getRole("item identification"); Attribute i = ir.getAttribute(); for (Example example : exampleSet) { double j = example.getValue(u); int uid = (int) j; j = example.getValue(i); int iid = (int) j; training_data.Add(user_mapping.ToInternalID(uid), item_mapping.ToInternalID(iid)); checkForStop(); } System.out.println(training_data.GetMaxItemID() + " " + training_data.GetMaxUserID()); Random recommendAlg = new Random(); recommendAlg.SetFeedback(training_data); recommendAlg.user_mapping = user_mapping; recommendAlg.item_mapping = item_mapping; recommendAlg.Train(); exampleSetOutput.deliver(exampleSet); exampleSetOutput1.deliver(recommendAlg); }
/** * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing * values. * * @param eSet the example set * @param a the first attribute to correlate * @param b the second attribute to correlate * @return Kendall's tau-b rank correlation * @throws OperatorException */ public static double tau_b(ExampleSet eSet, Attribute a, Attribute b) throws OperatorException { ExampleSet e = extract(eSet, a, b); // reduced example set long c = 0; // concordant pairs long d = 0; // discordant pairs long ta = 0; // pairs tied on a (only) long tb = 0; // pairs tied on b (only) long tc = 0; // pairs tied on both a and b int n = 0; // number of times iterator i is bumped Iterator<Example> i = e.iterator(); while (i.hasNext()) { // iterate through all possible pairs Example z1 = i.next(); n++; double x = z1.getValue(a); double y = z1.getValue(b); if (b.isNominal() && a != null) { String yString = b.getMapping().mapIndex((int) y); y = a.getMapping().getIndex(yString); } Iterator<Example> j = e.iterator(); for (int k = 0; k < n; k++) j.next(); // increment j to match i while (j.hasNext()) { // move on to subsequent examples Example z2 = j.next(); double xx = z2.getValue(a); double yy = z2.getValue(b); if (b.isNominal() && a != null) { String yyString = b.getMapping().mapIndex((int) yy); yy = a.getMapping().getIndex(yyString); } if (x == xx) { if (y == yy) tc++; // tied on both attributes else ta++; // tied only on a } else if (y == yy) tb++; // tied only on b else if ((x > xx && y > yy) || (x < xx && y < yy)) c++; // concordant pair else d++; // discordant pair } } double num = c - d; double f1 = c + d + ta; double f2 = c + d + tb; double den = Math.sqrt(f1 * f2); if (den != 0) return num / den; else return 0; }
public Model learn(ExampleSet exampleSet) throws OperatorException { double value = 0.0; double[] confidences = null; int method = getParameterAsInt(PARAMETER_METHOD); Attribute label = exampleSet.getAttributes().getLabel(); if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) { logWarning( "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!"); method = MODE; } else if ((!label.isNominal()) && (method == MODE)) { logWarning( "Cannot use method '" + METHODS[method] + "' for numerical labels: changing to 'average'!"); method = AVERAGE; } switch (method) { case MEDIAN: double[] labels = new double[exampleSet.size()]; Iterator<Example> r = exampleSet.iterator(); int counter = 0; while (r.hasNext()) { Example example = r.next(); labels[counter++] = example.getValue(example.getAttributes().getLabel()); } java.util.Arrays.sort(labels); value = labels[exampleSet.size() / 2]; break; case AVERAGE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.AVERAGE); break; case MODE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.MODE); confidences = new double[label.getMapping().size()]; for (int i = 0; i < confidences.length; i++) { confidences[i] = exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i)) / exampleSet.size(); } break; case CONSTANT: value = getParameterAsDouble(PARAMETER_CONSTANT); break; case ATTRIBUTE: return new AttributeDefaultModel( exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); default: // cannot happen throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!"); } log( "Default value is '" + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "") + "'."); return new DefaultModel(exampleSet, value, confidences); }
private double[] getAttributeValues(Example example, Attributes attributes, double[] means) { double[] values = new double[attributes.size()]; int x = 0; for (Attribute attribute : attributes) { values[x] = example.getValue(attribute) - means[x]; x++; } return values; }
private double[] getAsDoubleArray(Example example, Attributes attributes) { double[] values = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { values[i] = example.getValue(attribute); i++; } return values; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // recall: difference = minuend - subtrahend // but the subtrahend is last on the ioobjects stack, so pop first ExampleSet subtrahendSet = subtrahendInput.getData(ExampleSet.class); ExampleSet minuendSet = exampleSet; subtrahendSet.remapIds(); minuendSet.remapIds(); Attribute minuendId = minuendSet.getAttributes().getId(); Attribute subtrahendId = subtrahendSet.getAttributes().getId(); // sanity checks if ((minuendId == null) || (subtrahendId == null)) { throw new UserError(this, 129); } if (minuendId.getValueType() != subtrahendId.getValueType()) { throw new UserError( this, 120, new Object[] { subtrahendId.getName(), Ontology.VALUE_TYPE_NAMES[subtrahendId.getValueType()], Ontology.VALUE_TYPE_NAMES[minuendId.getValueType()] }); } List<Integer> indices = new LinkedList<>(); { int i = 0; for (Example example : minuendSet) { double id = example.getValue(minuendId); Example subtrahendExample = null; if (minuendId.isNominal()) { subtrahendExample = subtrahendSet.getExampleFromId( subtrahendId.getMapping().getIndex(minuendId.getMapping().mapIndex((int) id))); } else { subtrahendExample = subtrahendSet.getExampleFromId(id); } if (subtrahendExample == null) { indices.add(i); } i++; } } int[] indexArray = new int[indices.size()]; for (int i = 0; i < indices.size(); i++) { indexArray[i] = indices.get(i); } ExampleSet minusSet = new MappedExampleSet(minuendSet, indexArray); return minusSet; }
private double[] getExampleValues(Example example) { Attributes attributes = example.getAttributes(); double[] attributeValues = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { attributeValues[i] = example.getValue(attribute); i++; } return attributeValues; }
/** * This method must be implemented by the subclasses. Subclasses have to iterate over the * exampleset and on each example iterate over the oldAttribute array and set the new values on * the corresponding new attribute */ protected void applyOnData( ExampleSet exampleSet, Attribute[] oldAttributes, Attribute[] newAttributes) { // copying data for (Example example : exampleSet) { for (int i = 0; i < oldAttributes.length; i++) { if (oldAttributes[i].isNumerical()) example.setValue( newAttributes[i], computeValue(oldAttributes[i], example.getValue(oldAttributes[i]))); } } }
@Override public void count(Example example, double weight) { double value = example.getValue(sourceAttribute); if (!Double.isNaN(value)) if (frequencies != null) { frequencies[(int) value] += weight; } else { Double frequency = frequenciesMap.get(value); if (frequency == null) frequenciesMap.put(value, weight); else frequenciesMap.put(value, frequency + weight); } }
@Override public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException { for (Example example : exampleSet) { for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.isNumerical()) { double value = example.getValue(attribute); value = Math.abs(value); example.setValue(attribute, value); } } } return exampleSet; }
public void reinitOnlineCounting(ExampleSet exampleSet) { // counting one time all class weights labelAttribute = exampleSet.getAttributes().getLabel(); weightAttribute = exampleSet.getAttributes().getWeight(); totalLabelWeights = new double[labelAttribute.getMapping().size()]; totalWeight = 0d; if (exampleSet.getAttributes().getWeight() != null) { for (Example example : exampleSet) { double weight = example.getWeight(); totalLabelWeights[(int) example.getValue(labelAttribute)] += weight; } } else { for (Example example : exampleSet) { totalLabelWeights[(int) example.getValue(labelAttribute)] += 1d; } } for (int i = 0; i < totalLabelWeights.length; i++) { totalWeight += totalLabelWeights[i]; } // resetting online counter for subtraction labelWeights = new double[labelAttribute.getMapping().size()]; weight = 0; }
public static SplittedExampleSet splitByAttribute( ExampleSet exampleSet, Attribute attribute, double value) { int[] elements = new int[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); int i = 0; while (reader.hasNext()) { Example example = reader.next(); double currentValue = example.getValue(attribute); if (currentValue <= value) elements[i++] = 0; else elements[i++] = 1; } Partition partition = new Partition(elements, 2); return new SplittedExampleSet(exampleSet, partition); }
@Override public void init(ExampleSet exampleSet) throws OperatorException { super.init(exampleSet); Tools.onlyNumericalAttributes(exampleSet, "value based similarities"); Attributes attributes = exampleSet.getAttributes(); if (attributes.size() != 1) throw new OperatorException( "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence."); for (Example example : exampleSet) { for (Attribute attribute : attributes) { if (example.getValue(attribute) <= 0) throw new OperatorException( "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence."); ; } } }
public double[] vectorSubtraction(Example x, double[] y) { if (x.getAttributes().size() != y.length) { throw new RuntimeException( "Cannot substract vectors: incompatible numbers of attributes (" + x.getAttributes().size() + " != " + y.length + ")!"); } double[] result = new double[x.getAttributes().size()]; int i = 0; for (Attribute att : x.getAttributes()) { result[i] = x.getValue(att) - y[i]; i++; } return result; }
private void updateEstimates(ExampleSet exampleSet, int modelNr, Attribute[] specialAttributes) { Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { Example example = reader.next(); int predicted = (int) example.getPredictedLabel(); double oldValue = example.getValue(specialAttributes[predicted]); if (Double.isNaN(oldValue)) { logWarning("Found NaN confidence as intermediate prediction."); oldValue = 0; } if (!Double.isInfinite(oldValue)) { example.setValue(specialAttributes[predicted], oldValue + this.getWeightForModel(modelNr)); } } }