@Override
 public double predict(Example example) throws OperatorException {
   int i = 0;
   double distance = intercept;
   // using kernel for distance calculation
   double[] values = new double[example.getAttributes().size()];
   for (Attribute currentAttribute : example.getAttributes()) {
     values[i] = example.getValue(currentAttribute);
     i++;
   }
   distance += kernel.calculateDistance(values, coefficients);
   if (getLabel().isNominal()) {
     int positiveMapping = getLabel().getMapping().mapString(classPositive);
     int negativeMapping = getLabel().getMapping().mapString(classNegative);
     boolean isApplying = example.getAttributes().getPredictedLabel() != null;
     if (isApplying) {
       example.setConfidence(classPositive, 1.0d / (1.0d + java.lang.Math.exp(-distance)));
       example.setConfidence(classNegative, 1.0d / (1.0d + java.lang.Math.exp(distance)));
     }
     if (distance < 0) {
       return negativeMapping;
     } else {
       return positiveMapping;
     }
   } else {
     return distance;
   }
 }
 public void apply(Example example) {
   if (applicable(example)) {
     double weight = 1.0d;
     if (example.getAttributes().getWeight() != null) {
       weight = example.getWeight();
     }
     coveredWeight += weight;
     if (example.getLabel()
         == example.getAttributes().getLabel().getMapping().getPositiveIndex()) {
       positiveWeight += weight;
     }
   }
 }
  @Override
  public PerformanceVector evaluateIndividual(Individual individual) {
    double[] beta = individual.getValues();

    double fitness = 0.0d;
    for (Example example : exampleSet) {
      double eta = 0.0d;
      int i = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        eta += beta[i] * value;
        i++;
      }
      if (addIntercept) {
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double classValue = example.getValue(label);
      double currentFitness = classValue * Math.log(pi) + (1 - classValue) * Math.log(1 - pi);
      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      fitness += weightValue * currentFitness;
    }

    PerformanceVector performanceVector = new PerformanceVector();
    performanceVector.addCriterion(
        new EstimatedPerformance("log_reg_fitness", fitness, exampleSet.size(), false));
    return performanceVector;
  }
示例#4
0
 public double[] vectorSubtraction(Example x, double[] y) {
   if (x.getAttributes().size() != y.length) {
     throw new RuntimeException(
         "Cannot substract vectors: incompatible numbers of attributes ("
             + x.getAttributes().size()
             + " != "
             + y.length
             + ")!");
   }
   double[] result = new double[x.getAttributes().size()];
   int i = 0;
   for (Attribute att : x.getAttributes()) {
     result[i] = x.getValue(att) - y[i];
     i++;
   }
   return result;
 }
 /** Returns true if the label was not defined. */
 @Override
 public boolean conditionOk(Example example) {
   if (Double.isNaN(example.getValue(example.getAttributes().getLabel()))) {
     return true;
   } else {
     return false;
   }
 }
 public Model learn(ExampleSet exampleSet) throws OperatorException {
   double value = 0.0;
   double[] confidences = null;
   int method = getParameterAsInt(PARAMETER_METHOD);
   Attribute label = exampleSet.getAttributes().getLabel();
   if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) {
     logWarning(
         "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!");
     method = MODE;
   } else if ((!label.isNominal()) && (method == MODE)) {
     logWarning(
         "Cannot use method '"
             + METHODS[method]
             + "' for numerical labels: changing to 'average'!");
     method = AVERAGE;
   }
   switch (method) {
     case MEDIAN:
       double[] labels = new double[exampleSet.size()];
       Iterator<Example> r = exampleSet.iterator();
       int counter = 0;
       while (r.hasNext()) {
         Example example = r.next();
         labels[counter++] = example.getValue(example.getAttributes().getLabel());
       }
       java.util.Arrays.sort(labels);
       value = labels[exampleSet.size() / 2];
       break;
     case AVERAGE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.AVERAGE);
       break;
     case MODE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.MODE);
       confidences = new double[label.getMapping().size()];
       for (int i = 0; i < confidences.length; i++) {
         confidences[i] =
             exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i))
                 / exampleSet.size();
       }
       break;
     case CONSTANT:
       value = getParameterAsDouble(PARAMETER_CONSTANT);
       break;
     case ATTRIBUTE:
       return new AttributeDefaultModel(
           exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME));
     default:
       // cannot happen
       throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!");
   }
   log(
       "Default value is '"
           + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "")
           + "'.");
   return new DefaultModel(exampleSet, value, confidences);
 }
  private double[] getExampleValues(Example example) {
    Attributes attributes = example.getAttributes();
    double[] attributeValues = new double[attributes.size()];

    int i = 0;
    for (Attribute attribute : attributes) {
      attributeValues[i] = example.getValue(attribute);
      i++;
    }
    return attributeValues;
  }
  private double[] estimateVariance() {
    double[] beta = getBestValuesEver();

    Matrix hessian = new Matrix(beta.length, beta.length);
    for (Example example : exampleSet) {
      double[] values = new double[beta.length];
      double eta = 0.0d;
      int j = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        values[j] = value;
        eta += beta[j] * value;
        j++;
      }
      if (addIntercept) {
        values[beta.length - 1] = 1.0d;
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      for (int x = 0; x < beta.length; x++) {
        for (int y = 0; y < beta.length; y++) {
          // sum is second derivative of log likelihood function
          double h = hessian.get(x, y) - values[x] * values[y] * weightValue * pi * (1 - pi);
          hessian.set(x, y, h);
        }
      }
    }

    double[] variance = new double[beta.length];
    Matrix varianceCovarianceMatrix = null;
    try {
      // asymptotic variance-covariance matrix is inverse of hessian matrix
      varianceCovarianceMatrix = hessian.inverse();
    } catch (Exception e) {
      logging.logWarning("could not determine variance-covariance matrix, hessian is singular");
      for (int j = 0; j < beta.length; j++) {
        variance[j] = Double.NaN;
      }
      return variance;
    }
    for (int j = 0; j < beta.length; j++) {
      // get diagonal elements
      variance[j] = Math.abs(varianceCovarianceMatrix.get(j, j));
    }

    return variance;
  }
 @Override
 protected void createMatrices() {
   List<Attribute> attributes = new ArrayList<Attribute>(exampleSet.getAttributes().size());
   for (Attribute attribute : exampleSet.getAttributes()) {
     attributes.add((Attribute) attribute.clone());
   }
   MemoryExampleTable table = new MemoryExampleTable(attributes);
   for (int x = 0; x < dimensions[0]; x++) {
     for (int y = 0; y < dimensions[1]; y++) {
       DataRow row = new DoubleArrayDataRow(net.getNodeWeights(new int[] {x, y}));
       table.addDataRow(row);
     }
   }
   ExampleSet set = table.createExampleSet();
   this.classificationMatrix = new double[dimensions[0]][dimensions[1]];
   try {
     set = model.apply(set);
     Iterator<Example> exampleIterator = set.iterator();
     for (int x = 0; x < dimensions[0]; x++) {
       for (int y = 0; y < dimensions[1]; y++) {
         Example example = exampleIterator.next();
         classificationMatrix[x][y] =
             example.getValue(example.getAttributes().getPredictedLabel());
       }
     }
   } catch (OperatorException e) {
     // LogService.getGlobal().log("Cannot use Model for prediction of node label: " +
     // e.getMessage(), LogService.WARNING);
     LogService.getRoot()
         .log(
             Level.WARNING,
             "com.rapidminer.operator.visualization.SOMModelPlotter.using_model_for_prediction_error"
                 + e.getMessage());
   }
   super.createMatrices();
 }
 public boolean test(Example example) {
   return example.getValue(example.getAttributes().get(getAttributeName())) <= value;
 }
 private boolean containsNAN(Example e) {
   for (Attribute attribute : e.getAttributes()) {
     if (Double.isNaN(e.getValue(attribute))) return true;
   }
   return false;
 }
  public PerformanceVector getPerformance() {
    double[] beta = getBestValuesEver();
    double numberOfSlopes = addIntercept ? beta.length - 1 : beta.length;
    double logLikelihood = getBestFitnessEver();
    double restrictedLogLikelihood = 0.0d;
    double minusTwoLogLikelihood = 0.0d;
    double modelChiSquared = 0.0d;
    double goodnessOfFit = 0.0d;
    double coxSnellRSquared = 0.0d;
    double nagelkerkeRSquared = 0.0d;
    double mcfaddenRSquared = 0.0d;
    double AIC = 0.0d;
    double BIC = 0.0d;

    double weightSum = 0.0d;
    double positiveSum = 0.0d;
    for (Example example : exampleSet) {
      double eta = 0.0d;
      int i = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        eta += beta[i] * value;
        i++;
      }
      if (addIntercept) {
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double classValue = example.getValue(label);
      double currentFit = (classValue - pi) * (classValue - pi) / (pi * (1 - pi));
      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      weightSum += weightValue;
      positiveSum += weightValue * classValue;
      goodnessOfFit += weightValue * currentFit;
    }
    double pi0 = positiveSum / weightSum;
    if (addIntercept) {
      restrictedLogLikelihood = weightSum * (pi0 * Math.log(pi0) + (1 - pi0) * Math.log(1 - pi0));
    } else {
      restrictedLogLikelihood = weightSum * Math.log(0.5);
    }
    minusTwoLogLikelihood = -2 * logLikelihood;
    modelChiSquared = 2 * (logLikelihood - restrictedLogLikelihood);
    coxSnellRSquared =
        1 - Math.pow(Math.exp(restrictedLogLikelihood) / Math.exp(logLikelihood), 2 / weightSum);
    nagelkerkeRSquared =
        coxSnellRSquared / (1 - Math.pow(Math.exp(restrictedLogLikelihood), 2 / weightSum));
    mcfaddenRSquared = 1 - logLikelihood / restrictedLogLikelihood;
    AIC = -2 * logLikelihood + 2 * (numberOfSlopes + 1);
    BIC = -2 * logLikelihood + Math.log(weightSum) * (numberOfSlopes + 1);

    PerformanceVector estimatedPerformance = new PerformanceVector();
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("log_likelihood", logLikelihood, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance(
            "restricted_log_likelihood", restrictedLogLikelihood, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance(
            "-2_log_likelihood", minusTwoLogLikelihood, exampleSet.size(), true));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("model_chi_squared", modelChiSquared, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("goodness_of_fit", goodnessOfFit, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance(
            "cox_snell_r_squared", coxSnellRSquared, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance(
            "nagelkerke_r_squared", nagelkerkeRSquared, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("mcfadden_r_squared", mcfaddenRSquared, exampleSet.size(), false));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("AIC", AIC, exampleSet.size(), true));
    estimatedPerformance.addCriterion(
        new EstimatedPerformance("BIC", BIC, exampleSet.size(), true));
    estimatedPerformance.setMainCriterionName("AIC");
    return estimatedPerformance;
  }