@Override public double predict(Example example) throws OperatorException { int i = 0; double distance = intercept; // using kernel for distance calculation double[] values = new double[example.getAttributes().size()]; for (Attribute currentAttribute : example.getAttributes()) { values[i] = example.getValue(currentAttribute); i++; } distance += kernel.calculateDistance(values, coefficients); if (getLabel().isNominal()) { int positiveMapping = getLabel().getMapping().mapString(classPositive); int negativeMapping = getLabel().getMapping().mapString(classNegative); boolean isApplying = example.getAttributes().getPredictedLabel() != null; if (isApplying) { example.setConfidence(classPositive, 1.0d / (1.0d + java.lang.Math.exp(-distance))); example.setConfidence(classNegative, 1.0d / (1.0d + java.lang.Math.exp(distance))); } if (distance < 0) { return negativeMapping; } else { return positiveMapping; } } else { return distance; } }
public void apply(Example example) { if (applicable(example)) { double weight = 1.0d; if (example.getAttributes().getWeight() != null) { weight = example.getWeight(); } coveredWeight += weight; if (example.getLabel() == example.getAttributes().getLabel().getMapping().getPositiveIndex()) { positiveWeight += weight; } } }
@Override public PerformanceVector evaluateIndividual(Individual individual) { double[] beta = individual.getValues(); double fitness = 0.0d; for (Example example : exampleSet) { double eta = 0.0d; int i = 0; for (Attribute attribute : example.getAttributes()) { double value = example.getValue(attribute); eta += beta[i] * value; i++; } if (addIntercept) { eta += beta[beta.length - 1]; } double pi = Math.exp(eta) / (1 + Math.exp(eta)); double classValue = example.getValue(label); double currentFitness = classValue * Math.log(pi) + (1 - classValue) * Math.log(1 - pi); double weightValue = 1.0d; if (weight != null) weightValue = example.getValue(weight); fitness += weightValue * currentFitness; } PerformanceVector performanceVector = new PerformanceVector(); performanceVector.addCriterion( new EstimatedPerformance("log_reg_fitness", fitness, exampleSet.size(), false)); return performanceVector; }
public double[] vectorSubtraction(Example x, double[] y) { if (x.getAttributes().size() != y.length) { throw new RuntimeException( "Cannot substract vectors: incompatible numbers of attributes (" + x.getAttributes().size() + " != " + y.length + ")!"); } double[] result = new double[x.getAttributes().size()]; int i = 0; for (Attribute att : x.getAttributes()) { result[i] = x.getValue(att) - y[i]; i++; } return result; }
/** Returns true if the label was not defined. */ @Override public boolean conditionOk(Example example) { if (Double.isNaN(example.getValue(example.getAttributes().getLabel()))) { return true; } else { return false; } }
public Model learn(ExampleSet exampleSet) throws OperatorException { double value = 0.0; double[] confidences = null; int method = getParameterAsInt(PARAMETER_METHOD); Attribute label = exampleSet.getAttributes().getLabel(); if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) { logWarning( "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!"); method = MODE; } else if ((!label.isNominal()) && (method == MODE)) { logWarning( "Cannot use method '" + METHODS[method] + "' for numerical labels: changing to 'average'!"); method = AVERAGE; } switch (method) { case MEDIAN: double[] labels = new double[exampleSet.size()]; Iterator<Example> r = exampleSet.iterator(); int counter = 0; while (r.hasNext()) { Example example = r.next(); labels[counter++] = example.getValue(example.getAttributes().getLabel()); } java.util.Arrays.sort(labels); value = labels[exampleSet.size() / 2]; break; case AVERAGE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.AVERAGE); break; case MODE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.MODE); confidences = new double[label.getMapping().size()]; for (int i = 0; i < confidences.length; i++) { confidences[i] = exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i)) / exampleSet.size(); } break; case CONSTANT: value = getParameterAsDouble(PARAMETER_CONSTANT); break; case ATTRIBUTE: return new AttributeDefaultModel( exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); default: // cannot happen throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!"); } log( "Default value is '" + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "") + "'."); return new DefaultModel(exampleSet, value, confidences); }
private double[] getExampleValues(Example example) { Attributes attributes = example.getAttributes(); double[] attributeValues = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { attributeValues[i] = example.getValue(attribute); i++; } return attributeValues; }
private double[] estimateVariance() { double[] beta = getBestValuesEver(); Matrix hessian = new Matrix(beta.length, beta.length); for (Example example : exampleSet) { double[] values = new double[beta.length]; double eta = 0.0d; int j = 0; for (Attribute attribute : example.getAttributes()) { double value = example.getValue(attribute); values[j] = value; eta += beta[j] * value; j++; } if (addIntercept) { values[beta.length - 1] = 1.0d; eta += beta[beta.length - 1]; } double pi = Math.exp(eta) / (1 + Math.exp(eta)); double weightValue = 1.0d; if (weight != null) weightValue = example.getValue(weight); for (int x = 0; x < beta.length; x++) { for (int y = 0; y < beta.length; y++) { // sum is second derivative of log likelihood function double h = hessian.get(x, y) - values[x] * values[y] * weightValue * pi * (1 - pi); hessian.set(x, y, h); } } } double[] variance = new double[beta.length]; Matrix varianceCovarianceMatrix = null; try { // asymptotic variance-covariance matrix is inverse of hessian matrix varianceCovarianceMatrix = hessian.inverse(); } catch (Exception e) { logging.logWarning("could not determine variance-covariance matrix, hessian is singular"); for (int j = 0; j < beta.length; j++) { variance[j] = Double.NaN; } return variance; } for (int j = 0; j < beta.length; j++) { // get diagonal elements variance[j] = Math.abs(varianceCovarianceMatrix.get(j, j)); } return variance; }
@Override protected void createMatrices() { List<Attribute> attributes = new ArrayList<Attribute>(exampleSet.getAttributes().size()); for (Attribute attribute : exampleSet.getAttributes()) { attributes.add((Attribute) attribute.clone()); } MemoryExampleTable table = new MemoryExampleTable(attributes); for (int x = 0; x < dimensions[0]; x++) { for (int y = 0; y < dimensions[1]; y++) { DataRow row = new DoubleArrayDataRow(net.getNodeWeights(new int[] {x, y})); table.addDataRow(row); } } ExampleSet set = table.createExampleSet(); this.classificationMatrix = new double[dimensions[0]][dimensions[1]]; try { set = model.apply(set); Iterator<Example> exampleIterator = set.iterator(); for (int x = 0; x < dimensions[0]; x++) { for (int y = 0; y < dimensions[1]; y++) { Example example = exampleIterator.next(); classificationMatrix[x][y] = example.getValue(example.getAttributes().getPredictedLabel()); } } } catch (OperatorException e) { // LogService.getGlobal().log("Cannot use Model for prediction of node label: " + // e.getMessage(), LogService.WARNING); LogService.getRoot() .log( Level.WARNING, "com.rapidminer.operator.visualization.SOMModelPlotter.using_model_for_prediction_error" + e.getMessage()); } super.createMatrices(); }
public boolean test(Example example) { return example.getValue(example.getAttributes().get(getAttributeName())) <= value; }
private boolean containsNAN(Example e) { for (Attribute attribute : e.getAttributes()) { if (Double.isNaN(e.getValue(attribute))) return true; } return false; }
public PerformanceVector getPerformance() { double[] beta = getBestValuesEver(); double numberOfSlopes = addIntercept ? beta.length - 1 : beta.length; double logLikelihood = getBestFitnessEver(); double restrictedLogLikelihood = 0.0d; double minusTwoLogLikelihood = 0.0d; double modelChiSquared = 0.0d; double goodnessOfFit = 0.0d; double coxSnellRSquared = 0.0d; double nagelkerkeRSquared = 0.0d; double mcfaddenRSquared = 0.0d; double AIC = 0.0d; double BIC = 0.0d; double weightSum = 0.0d; double positiveSum = 0.0d; for (Example example : exampleSet) { double eta = 0.0d; int i = 0; for (Attribute attribute : example.getAttributes()) { double value = example.getValue(attribute); eta += beta[i] * value; i++; } if (addIntercept) { eta += beta[beta.length - 1]; } double pi = Math.exp(eta) / (1 + Math.exp(eta)); double classValue = example.getValue(label); double currentFit = (classValue - pi) * (classValue - pi) / (pi * (1 - pi)); double weightValue = 1.0d; if (weight != null) weightValue = example.getValue(weight); weightSum += weightValue; positiveSum += weightValue * classValue; goodnessOfFit += weightValue * currentFit; } double pi0 = positiveSum / weightSum; if (addIntercept) { restrictedLogLikelihood = weightSum * (pi0 * Math.log(pi0) + (1 - pi0) * Math.log(1 - pi0)); } else { restrictedLogLikelihood = weightSum * Math.log(0.5); } minusTwoLogLikelihood = -2 * logLikelihood; modelChiSquared = 2 * (logLikelihood - restrictedLogLikelihood); coxSnellRSquared = 1 - Math.pow(Math.exp(restrictedLogLikelihood) / Math.exp(logLikelihood), 2 / weightSum); nagelkerkeRSquared = coxSnellRSquared / (1 - Math.pow(Math.exp(restrictedLogLikelihood), 2 / weightSum)); mcfaddenRSquared = 1 - logLikelihood / restrictedLogLikelihood; AIC = -2 * logLikelihood + 2 * (numberOfSlopes + 1); BIC = -2 * logLikelihood + Math.log(weightSum) * (numberOfSlopes + 1); PerformanceVector estimatedPerformance = new PerformanceVector(); estimatedPerformance.addCriterion( new EstimatedPerformance("log_likelihood", logLikelihood, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance( "restricted_log_likelihood", restrictedLogLikelihood, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance( "-2_log_likelihood", minusTwoLogLikelihood, exampleSet.size(), true)); estimatedPerformance.addCriterion( new EstimatedPerformance("model_chi_squared", modelChiSquared, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance("goodness_of_fit", goodnessOfFit, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance( "cox_snell_r_squared", coxSnellRSquared, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance( "nagelkerke_r_squared", nagelkerkeRSquared, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance("mcfadden_r_squared", mcfaddenRSquared, exampleSet.size(), false)); estimatedPerformance.addCriterion( new EstimatedPerformance("AIC", AIC, exampleSet.size(), true)); estimatedPerformance.addCriterion( new EstimatedPerformance("BIC", BIC, exampleSet.size(), true)); estimatedPerformance.setMainCriterionName("AIC"); return estimatedPerformance; }