@Override public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException { boolean round = getParameterAsBoolean(PARAMETER_ROUND); List<Attribute> newAttributes = new LinkedList<Attribute>(); Iterator<Attribute> a = exampleSet.getAttributes().iterator(); while (a.hasNext()) { Attribute attribute = a.next(); if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL)) && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) { Attribute newAttribute = AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER); newAttributes.add(newAttribute); exampleSet.getExampleTable().addAttribute(newAttribute); for (Example example : exampleSet) { double originalValue = example.getValue(attribute); if (Double.isNaN(originalValue)) { example.setValue(newAttribute, Double.NaN); } else { long newValue = round ? Math.round(originalValue) : (long) originalValue; example.setValue(newAttribute, newValue); } } a.remove(); } } for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute); return exampleSet; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME); Long offset = getParameterAsLong(PARMETER_TIME_OFFSET); Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName); if (numericalAttribute == null) { throw new UserError(this, 111, attributeName); } Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME); exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); for (Example example : exampleSet) { double value = example.getValue(numericalAttribute); if (Double.isNaN(value)) { example.setValue(newAttribute, value); } else { value += offset; example.setValue(newAttribute, value); } } if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) { AttributeRole oldRole = exampleSet.getAttributes().getRole(numericalAttribute); exampleSet.getAttributes().remove(numericalAttribute); newAttribute.setName(attributeName); exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName()); } else { newAttribute.setName(attributeName + "_AS_DATE"); } return exampleSet; }
@Override public PerformanceVector evaluateIndividual(Individual individual) { double[] beta = individual.getValues(); double fitness = 0.0d; for (Example example : exampleSet) { double eta = 0.0d; int i = 0; for (Attribute attribute : example.getAttributes()) { double value = example.getValue(attribute); eta += beta[i] * value; i++; } if (addIntercept) { eta += beta[beta.length - 1]; } double pi = Math.exp(eta) / (1 + Math.exp(eta)); double classValue = example.getValue(label); double currentFitness = classValue * Math.log(pi) + (1 - classValue) * Math.log(1 - pi); double weightValue = 1.0d; if (weight != null) weightValue = example.getValue(weight); fitness += weightValue * currentFitness; } PerformanceVector performanceVector = new PerformanceVector(); performanceVector.addCriterion( new EstimatedPerformance("log_reg_fitness", fitness, exampleSet.size(), false)); return performanceVector; }
private static Map<Integer, MeanVariance> createMeanVariances( com.rapidminer.example.ExampleSet exampleSet) { double[] sum = new double[exampleSet.getAttributes().size()]; double[] squaredSum = new double[sum.length]; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); while (reader.hasNext()) { com.rapidminer.example.Example example = reader.next(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = example.getValue(attribute); sum[a] += value; squaredSum[a] += value * value; a++; } } Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>(); for (int a = 0; a < sum.length; a++) { sum[a] /= exampleSet.size(); squaredSum[a] /= exampleSet.size(); meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a]))); } return meanVariances; }
@Override public Model learn(ExampleSet exampleSet) throws OperatorException { Kernel kernel = getKernel(); kernel.init(exampleSet); double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE); NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping(); String classNeg = labelMapping.getNegativeString(); String classPos = labelMapping.getPositiveString(); double classValueNeg = labelMapping.getNegativeIndex(); int numberOfAttributes = exampleSet.getAttributes().size(); HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel); model.init(new double[numberOfAttributes], 0); for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) { double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate); Attributes attributes = exampleSet.getAttributes(); for (Example example : exampleSet) { double prediction = model.predict(example); if (prediction != example.getLabel()) { double direction = (example.getLabel() == classValueNeg) ? -1 : 1; // adapting intercept model.setIntercept(model.getIntercept() + learnRate * direction); // adapting coefficients double coefficients[] = model.getCoefficients(); int i = 0; for (Attribute attribute : attributes) { coefficients[i] += learnRate * direction * example.getValue(attribute); i++; } } } } return model; }
@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // generating assignment RandomGenerator random = RandomGenerator.getRandomGenerator(this); int clusterAssignments[] = new int[exampleSet.size()]; int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS); for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = random.nextInt(k); } ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); model.setClusterAssignments(clusterAssignments, exampleSet); // generating cluster attribute if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
/** * Creates a {@link HistogramDataset} for this {@link Attribute}. * * @param exampleSet * @return */ private HistogramDataset createHistogramDataset(final ExampleSet exampleSet) { HistogramDataset dataset = new HistogramDataset(); double[] array = new double[exampleSet.size()]; int count = 0; for (Example example : exampleSet) { double value = example.getDataRow().get(getAttribute()); // don't use missing values because otherwise JFreeChart tries to plot them too which // can lead to false histograms if (!Double.isNaN(value)) { array[count++] = value; } } // add points to data set (if any) if (count > 0) { // truncate array if necessary if (count < array.length) { array = Arrays.copyOf(array, count); } dataset.addSeries( getAttribute().getName(), array, Math.min(array.length, MAX_BINS_HISTOGRAM)); } return dataset; }
/** Creates a new evolutionary SVM optimization. */ public ClassificationEvoOptimization( ExampleSet exampleSet, // training data Kernel kernel, double c, // SVM paras int initType, // start population creation type para int maxIterations, int generationsWithoutImprovement, int popSize, // GA paras int selectionType, double tournamentFraction, boolean keepBest, // selection paras int mutationType, // type of mutation double crossoverProb, boolean showConvergencePlot, boolean showPopulationPlot, ExampleSet holdOutSet, RandomGenerator random, LoggingHandler logging, Operator executingOperator) { super( EvoSVM.createBoundArray(0.0d, exampleSet.size()), EvoSVM.determineMax(c, kernel, exampleSet, selectionType, exampleSet.size()), popSize, exampleSet.size(), initType, maxIterations, generationsWithoutImprovement, selectionType, tournamentFraction, keepBest, mutationType, Double.NaN, crossoverProb, showConvergencePlot, showPopulationPlot, random, logging, executingOperator); this.exampleSet = exampleSet; this.holdOutSet = holdOutSet; this.populationSize = popSize; this.kernel = kernel; this.c = getMax(0); // label values this.ys = new double[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); int index = 0; Attribute label = exampleSet.getAttributes().getLabel(); while (reader.hasNext()) { Example example = reader.next(); ys[index++] = example.getLabel() == label.getMapping().getPositiveIndex() ? 1.0d : -1.0d; } // optimization function this.optimizationFunction = new ClassificationOptimizationFunction(selectionType == NON_DOMINATED_SORTING_SELECTION); }
/** * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha * values and b are zero, the label will be set if it is known. */ public SVMExamples( com.rapidminer.example.ExampleSet exampleSet, Attribute labelAttribute, Map<Integer, MeanVariance> meanVariances) { this(exampleSet.size(), 0.0d); this.meanVarianceMap = meanVariances; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); Attribute idAttribute = exampleSet.getAttributes().getId(); int exampleCounter = 0; while (reader.hasNext()) { com.rapidminer.example.Example current = reader.next(); Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = current.getValue(attribute); if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) { attributeMap.put(a, value); } if ((a + 1) > dim) { dim = (a + 1); } a++; } atts[exampleCounter] = new double[attributeMap.size()]; index[exampleCounter] = new int[attributeMap.size()]; Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator(); int attributeCounter = 0; while (i.hasNext()) { Map.Entry<Integer, Double> e = i.next(); Integer indexValue = e.getKey(); Double attributeValue = e.getValue(); index[exampleCounter][attributeCounter] = indexValue.intValue(); double value = attributeValue.doubleValue(); MeanVariance meanVariance = meanVarianceMap.get(indexValue); if (meanVariance != null) { if (meanVariance.getVariance() == 0.0d) { value = 0.0d; } else { value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance()); } } atts[exampleCounter][attributeCounter] = value; attributeCounter++; } if (labelAttribute != null) { double label = current.getValue(labelAttribute); if (labelAttribute.isNominal()) { ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1); } else { ys[exampleCounter] = label; } } if (idAttribute != null) { ids[exampleCounter] = current.getValueAsString(idAttribute); } exampleCounter++; } }
/** Returns true if the label was not defined. */ @Override public boolean conditionOk(Example example) { if (Double.isNaN(example.getValue(example.getAttributes().getLabel()))) { return true; } else { return false; } }
public Model learn(ExampleSet exampleSet) throws OperatorException { double value = 0.0; double[] confidences = null; int method = getParameterAsInt(PARAMETER_METHOD); Attribute label = exampleSet.getAttributes().getLabel(); if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) { logWarning( "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!"); method = MODE; } else if ((!label.isNominal()) && (method == MODE)) { logWarning( "Cannot use method '" + METHODS[method] + "' for numerical labels: changing to 'average'!"); method = AVERAGE; } switch (method) { case MEDIAN: double[] labels = new double[exampleSet.size()]; Iterator<Example> r = exampleSet.iterator(); int counter = 0; while (r.hasNext()) { Example example = r.next(); labels[counter++] = example.getValue(example.getAttributes().getLabel()); } java.util.Arrays.sort(labels); value = labels[exampleSet.size() / 2]; break; case AVERAGE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.AVERAGE); break; case MODE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.MODE); confidences = new double[label.getMapping().size()]; for (int i = 0; i < confidences.length; i++) { confidences[i] = exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i)) / exampleSet.size(); } break; case CONSTANT: value = getParameterAsDouble(PARAMETER_CONSTANT); break; case ATTRIBUTE: return new AttributeDefaultModel( exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); default: // cannot happen throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!"); } log( "Default value is '" + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "") + "'."); return new DefaultModel(exampleSet, value, confidences); }
/** * Iterates over all models and returns the class with maximum likelihood. * * @param origExampleSet the set of examples to be classified */ @Override public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel) throws OperatorException { final String attributePrefix = "AdaBoostModelPrediction"; final int numLabels = predictedLabel.getMapping().size(); final Attribute[] specialAttributes = new Attribute[numLabels]; OperatorProgress progress = null; if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) { progress = getOperator().getProgress(); progress.setTotal(100); } for (int i = 0; i < numLabels; i++) { specialAttributes[i] = com.rapidminer.example.Tools.createSpecialAttribute( origExampleSet, attributePrefix + i, Ontology.NUMERICAL); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels)); } } Iterator<Example> reader = origExampleSet.iterator(); int progressCounter = 0; while (reader.hasNext()) { Example example = reader.next(); for (int i = 0; i < specialAttributes.length; i++) { example.setValue(specialAttributes[i], 0); } if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) { progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25); } } reader = origExampleSet.iterator(); for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) { Model model = this.getModel(modelNr); ExampleSet exampleSet = (ExampleSet) origExampleSet.clone(); exampleSet = model.apply(exampleSet); this.updateEstimates(exampleSet, modelNr, specialAttributes); PredictionModel.removePredictedLabel(exampleSet); if (progress != null) { progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50); } } // Turn prediction weights into confidences and a crisp predcition: this.evaluateSpecialAttributes(origExampleSet, specialAttributes); // Clean up attributes: for (int i = 0; i < numLabels; i++) { origExampleSet.getAttributes().remove(specialAttributes[i]); origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75); } } return origExampleSet; }
public Split getBestSplit(ExampleSet inputSet, Attribute attribute, String labelName) { SortedExampleSet exampleSet = new SortedExampleSet((ExampleSet) inputSet.clone(), attribute, SortedExampleSet.INCREASING); Attribute labelAttribute = exampleSet.getAttributes().getLabel(); int labelIndex = labelAttribute.getMapping().mapString(labelName); double oldLabel = Double.NaN; double bestSplit = Double.NaN; double lastValue = Double.NaN; double bestBenefit = Double.NEGATIVE_INFINITY; double bestTotalWeight = 0; int bestSplitType = Split.LESS_SPLIT; // initiating online counting of benefit: only 2 Datascans needed then criterion.reinitOnlineCounting(exampleSet); for (Example e : exampleSet) { double currentValue = e.getValue(attribute); double label = e.getValue(labelAttribute); if ((Double.isNaN(oldLabel)) || (oldLabel != label) && (lastValue != currentValue)) { double splitValue = (lastValue + currentValue) / 2.0d; double[] benefits; if (labelName == null) { benefits = criterion.getOnlineBenefit(e); } else { benefits = criterion.getOnlineBenefit(e, labelIndex); } // online method returns both possible relations in one array(greater / smaller) in one // array if ((benefits[0] > minValue) && (benefits[0] > 0) && (benefits[1] > 0) && ((benefits[0] > bestBenefit) || ((benefits[0] == bestBenefit) && (benefits[1] > bestTotalWeight)))) { bestBenefit = benefits[0]; bestSplit = splitValue; bestTotalWeight = benefits[1]; bestSplitType = Split.LESS_SPLIT; } if ((benefits[2] > minValue) && (benefits[2] > 0) && (benefits[3] > 0) && ((benefits[2] > bestBenefit) || ((benefits[2] == bestBenefit) && (benefits[3] > bestTotalWeight)))) { bestBenefit = benefits[2]; bestSplit = splitValue; bestTotalWeight = benefits[3]; bestSplitType = Split.GREATER_SPLIT; } oldLabel = label; } lastValue = currentValue; criterion.update(e); } return new Split(bestSplit, new double[] {bestBenefit, bestTotalWeight}, bestSplitType); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // recall: difference = minuend - subtrahend // but the subtrahend is last on the ioobjects stack, so pop first ExampleSet subtrahendSet = subtrahendInput.getData(ExampleSet.class); ExampleSet minuendSet = exampleSet; subtrahendSet.remapIds(); minuendSet.remapIds(); Attribute minuendId = minuendSet.getAttributes().getId(); Attribute subtrahendId = subtrahendSet.getAttributes().getId(); // sanity checks if ((minuendId == null) || (subtrahendId == null)) { throw new UserError(this, 129); } if (minuendId.getValueType() != subtrahendId.getValueType()) { throw new UserError( this, 120, new Object[] { subtrahendId.getName(), Ontology.VALUE_TYPE_NAMES[subtrahendId.getValueType()], Ontology.VALUE_TYPE_NAMES[minuendId.getValueType()] }); } List<Integer> indices = new LinkedList<>(); { int i = 0; for (Example example : minuendSet) { double id = example.getValue(minuendId); Example subtrahendExample = null; if (minuendId.isNominal()) { subtrahendExample = subtrahendSet.getExampleFromId( subtrahendId.getMapping().getIndex(minuendId.getMapping().mapIndex((int) id))); } else { subtrahendExample = subtrahendSet.getExampleFromId(id); } if (subtrahendExample == null) { indices.add(i); } i++; } } int[] indexArray = new int[indices.size()]; for (int i = 0; i < indices.size(); i++) { indexArray[i] = indices.get(i); } ExampleSet minusSet = new MappedExampleSet(minuendSet, indexArray); return minusSet; }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(); IEntityMapping user_mapping = new EntityMapping(); IEntityMapping item_mapping = new EntityMapping(); IRatings training_data = new Ratings(); if (exampleSet.getAttributes().getSpecial("user identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getSpecial("item identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getLabel() == null) { throw new UserError(this, 105); } Attributes Att = exampleSet.getAttributes(); AttributeRole ur = Att.getRole("user identification"); Attribute u = ur.getAttribute(); AttributeRole ir = Att.getRole("item identification"); Attribute i = ir.getAttribute(); Attribute ui = Att.getLabel(); for (Example example : exampleSet) { double j = example.getValue(u); int uid = user_mapping.ToInternalID((int) j); j = example.getValue(i); int iid = item_mapping.ToInternalID((int) j); double r = example.getValue(ui); training_data.Add(uid, iid, r); } _slopeOne recommendAlg = new _slopeOne(); recommendAlg.user_mapping = user_mapping; recommendAlg.item_mapping = item_mapping; recommendAlg.SetMinRating(getParameterAsInt("Min Rating")); recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range")); recommendAlg.SetRatings(training_data); recommendAlg.Train(); exampleSetOutput.deliver(exampleSet); exampleSetOutput1.deliver(recommendAlg); }
/** returns the accuracy of the predictions for the given example set */ private double evaluatePredictions(ExampleSet exampleSet) { Iterator<Example> reader = exampleSet.iterator(); int count = 0; int correct = 0; while (reader.hasNext()) { count++; Example example = reader.next(); if (example.getLabel() == example.getPredictedLabel()) correct++; } return ((double) correct) / count; }
private double[] getExampleValues(Example example) { Attributes attributes = example.getAttributes(); double[] attributeValues = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { attributeValues[i] = example.getValue(attribute); i++; } return attributeValues; }
public void update(Example example) { int labelIndex = (int) example.getValue(labelAttribute); if (weightAttribute != null) { double currentWeight = example.getValue(weightAttribute); labelWeights[labelIndex] += currentWeight; weight += currentWeight; } else { labelWeights[labelIndex] += 1d; weight += 1d; } }
/** * This method must be implemented by the subclasses. Subclasses have to iterate over the * exampleset and on each example iterate over the oldAttribute array and set the new values on * the corresponding new attribute */ protected void applyOnData( ExampleSet exampleSet, Attribute[] oldAttributes, Attribute[] newAttributes) { // copying data for (Example example : exampleSet) { for (int i = 0; i < oldAttributes.length; i++) { if (oldAttributes[i].isNumerical()) example.setValue( newAttributes[i], computeValue(oldAttributes[i], example.getValue(oldAttributes[i]))); } } }
public Graph<String, String> createGraph() { graph = new UndirectedSparseGraph<String, String>(); Attribute id = exampleSet.getAttributes().getId(); if (id != null) { for (Example example : exampleSet) { graph.addVertex(example.getValueAsString(id)); } addEdges(); } return graph; }
@Override public Model learn(ExampleSet exampleSet) throws OperatorException { DistanceMeasure measure = DistanceMeasures.createMeasure(this); measure.init(exampleSet); GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure); // check if weights should be used boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS); // check if robust estimate should be performed: Then calculate weights and use it anyway if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) { useWeights = true; LocalPolynomialExampleWeightingOperator weightingOperator; try { weightingOperator = OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class); exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this); } catch (OperatorCreationException e) { throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage()); } } Attributes attributes = exampleSet.getAttributes(); Attribute label = attributes.getLabel(); Attribute weightAttribute = attributes.getWeight(); for (Example example : exampleSet) { double[] values = new double[attributes.size()]; double labelValue = example.getValue(label); double weight = 1d; if (weightAttribute != null && useWeights) { weight = example.getValue(weightAttribute); } // filter out examples without influence if (weight > 0d) { // copying example values int i = 0; for (Attribute attribute : attributes) { values[i] = example.getValue(attribute); i++; } // inserting into geometric data collection data.add(values, new RegressionData(values, labelValue, weight)); } } return new LocalPolynomialRegressionModel( exampleSet, data, Neighborhoods.createNeighborhood(this), SmoothingKernels.createKernel(this), getParameterAsInt(PARAMETER_DEGREE), getParameterAsDouble(PARAMETER_RIDGE)); }
@Override public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException { for (Example example : exampleSet) { for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.isNumerical()) { double value = example.getValue(attribute); value = Math.abs(value); example.setValue(attribute, value); } } } return exampleSet; }
public void apply(Example example) { if (applicable(example)) { double weight = 1.0d; if (example.getAttributes().getWeight() != null) { weight = example.getWeight(); } coveredWeight += weight; if (example.getLabel() == example.getAttributes().getLabel().getMapping().getPositiveIndex()) { positiveWeight += weight; } } }
/** * Computes the weighted class priors of the boolean target attribute and shifts weights so that * the priors are equal afterwards. */ private void rescalePriors(ExampleSet exampleSet, double[] classPriors) { // The weights of class i are calculated as // (1 / #classes) / (#rel_freq_class_i) double[] weights = new double[2]; for (int i = 0; i < weights.length; i++) { weights[i] = 1.0d / (weights.length * (classPriors[i])); } Iterator<Example> exRead = exampleSet.iterator(); while (exRead.hasNext()) { Example example = exRead.next(); example.setWeight(weights[(int) (example.getLabel())]); } }
/** * Similar to prepareBatch, but for extended batches. * * @param extendedBatch containing the extended batch * @return the class priors of the batch */ private double[] prepareExtendedBatch(ExampleSet extendedBatch) { int[] classCount = new int[2]; Iterator<Example> reader = extendedBatch.iterator(); while (reader.hasNext()) { Example example = reader.next(); example.setWeight(1); classCount[(int) example.getLabel()]++; } double[] classPriors = new double[2]; int sum = classCount[0] + classCount[1]; classPriors[0] = ((double) classCount[0]) / sum; classPriors[1] = ((double) classCount[1]) / sum; return classPriors; }
public LinkedList<String> getAllCategories(Attribute attribute) { LinkedList<String> allCategoryList = new LinkedList<String>(); Iterator<Example> reader = this.iterator(); while (reader.hasNext()) { Example example = reader.next(); String currentValue = example.getValueAsString(attribute); if (!inList(currentValue, allCategoryList)) allCategoryList.add(currentValue); } // return new SplittedExampleSet(exampleSet, partition); return allCategoryList; }
private double[] estimateVariance() { double[] beta = getBestValuesEver(); Matrix hessian = new Matrix(beta.length, beta.length); for (Example example : exampleSet) { double[] values = new double[beta.length]; double eta = 0.0d; int j = 0; for (Attribute attribute : example.getAttributes()) { double value = example.getValue(attribute); values[j] = value; eta += beta[j] * value; j++; } if (addIntercept) { values[beta.length - 1] = 1.0d; eta += beta[beta.length - 1]; } double pi = Math.exp(eta) / (1 + Math.exp(eta)); double weightValue = 1.0d; if (weight != null) weightValue = example.getValue(weight); for (int x = 0; x < beta.length; x++) { for (int y = 0; y < beta.length; y++) { // sum is second derivative of log likelihood function double h = hessian.get(x, y) - values[x] * values[y] * weightValue * pi * (1 - pi); hessian.set(x, y, h); } } } double[] variance = new double[beta.length]; Matrix varianceCovarianceMatrix = null; try { // asymptotic variance-covariance matrix is inverse of hessian matrix varianceCovarianceMatrix = hessian.inverse(); } catch (Exception e) { logging.logWarning("could not determine variance-covariance matrix, hessian is singular"); for (int j = 0; j < beta.length; j++) { variance[j] = Double.NaN; } return variance; } for (int j = 0; j < beta.length; j++) { // get diagonal elements variance[j] = Math.abs(varianceCovarianceMatrix.get(j, j)); } return variance; }
public static SplittedExampleSet splitByAttribute( ExampleSet exampleSet, Attribute attribute, double value) { int[] elements = new int[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); int i = 0; while (reader.hasNext()) { Example example = reader.next(); double currentValue = example.getValue(attribute); if (currentValue <= value) elements[i++] = 0; else elements[i++] = 1; } Partition partition = new Partition(elements, 2); return new SplittedExampleSet(exampleSet, partition); }
/** * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing * values, with approximate comparisons. * * @param eSet the example set * @param a the first attribute to correlate * @param b the second attribute to correlate * @param fuzz values within +/- fuzz may be considered tied * @return Kendall's tau-b rank correlation * @throws OperatorException */ public static double tau_b(ExampleSet eSet, Attribute a, Attribute b, double fuzz) throws OperatorException { ExampleSet e = extract(eSet, a, b); // reduced example set FuzzyComp fc = new FuzzyComp(fuzz); int c = 0; // concordant pairs int d = 0; // discordant pairs int ta = 0; // pairs tied on a (only) int tb = 0; // pairs tied on b (only) int tc = 0; // pairs tied on both a and b int n = 0; // number of times iterator i is bumped Iterator<Example> i = e.iterator(); while (i.hasNext()) { // iterate through all possible pairs Example z1 = i.next(); n++; double x = z1.getValue(a); double y = z1.getValue(b); if (b.isNominal() && a != null) { String yString = b.getMapping().mapIndex((int) y); y = a.getMapping().getIndex(yString); } Iterator<Example> j = e.iterator(); for (int k = 0; k < n; k++) j.next(); // increment j to match i while (j.hasNext()) { // move on to subsequent examples Example z2 = j.next(); double xx = z2.getValue(a); double yy = z2.getValue(b); if (b.isNominal() && a != null) { String yyString = b.getMapping().mapIndex((int) yy); yy = a.getMapping().getIndex(yyString); } int xc = fc.compare(x, xx); int yc = fc.compare(y, yy); if (xc == 0) { if (yc == 0) tc++; // tied on both attributes else ta++; // tied only on a } else if (yc == 0) tb++; // tied only on b else if (xc == yc) c++; // concordant pair else d++; // discordant pair } } double num = c - d; double den = Math.sqrt((c + d + ta) * (c + d + tb)); if (den != 0) return num / den; else return 0; }
public double[] vectorSubtraction(Example x, double[] y) { if (x.getAttributes().size() != y.length) { throw new RuntimeException( "Cannot substract vectors: incompatible numbers of attributes (" + x.getAttributes().size() + " != " + y.length + ")!"); } double[] result = new double[x.getAttributes().size()]; int i = 0; for (Attribute att : x.getAttributes()) { result[i] = x.getValue(att) - y[i]; i++; } return result; }