@Override
  public PerformanceVector evaluateIndividual(Individual individual) {
    double[] beta = individual.getValues();

    double fitness = 0.0d;
    for (Example example : exampleSet) {
      double eta = 0.0d;
      int i = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        eta += beta[i] * value;
        i++;
      }
      if (addIntercept) {
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double classValue = example.getValue(label);
      double currentFitness = classValue * Math.log(pi) + (1 - classValue) * Math.log(1 - pi);
      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      fitness += weightValue * currentFitness;
    }

    PerformanceVector performanceVector = new PerformanceVector();
    performanceVector.addCriterion(
        new EstimatedPerformance("log_reg_fitness", fitness, exampleSet.size(), false));
    return performanceVector;
  }
  /**
   * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha
   * values and b are zero, the label will be set if it is known.
   */
  public SVMExamples(
      com.rapidminer.example.ExampleSet exampleSet,
      Attribute labelAttribute,
      Map<Integer, MeanVariance> meanVariances) {
    this(exampleSet.size(), 0.0d);
    this.meanVarianceMap = meanVariances;

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    Attribute idAttribute = exampleSet.getAttributes().getId();
    int exampleCounter = 0;
    while (reader.hasNext()) {
      com.rapidminer.example.Example current = reader.next();
      Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = current.getValue(attribute);
        if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) {
          attributeMap.put(a, value);
        }
        if ((a + 1) > dim) {
          dim = (a + 1);
        }
        a++;
      }
      atts[exampleCounter] = new double[attributeMap.size()];
      index[exampleCounter] = new int[attributeMap.size()];
      Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator();
      int attributeCounter = 0;
      while (i.hasNext()) {
        Map.Entry<Integer, Double> e = i.next();
        Integer indexValue = e.getKey();
        Double attributeValue = e.getValue();
        index[exampleCounter][attributeCounter] = indexValue.intValue();
        double value = attributeValue.doubleValue();
        MeanVariance meanVariance = meanVarianceMap.get(indexValue);
        if (meanVariance != null) {
          if (meanVariance.getVariance() == 0.0d) {
            value = 0.0d;
          } else {
            value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance());
          }
        }
        atts[exampleCounter][attributeCounter] = value;
        attributeCounter++;
      }
      if (labelAttribute != null) {
        double label = current.getValue(labelAttribute);
        if (labelAttribute.isNominal()) {
          ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1);
        } else {
          ys[exampleCounter] = label;
        }
      }
      if (idAttribute != null) {
        ids[exampleCounter] = current.getValueAsString(idAttribute);
      }
      exampleCounter++;
    }
  }
  public Split getBestSplit(ExampleSet inputSet, Attribute attribute, String labelName) {
    SortedExampleSet exampleSet =
        new SortedExampleSet((ExampleSet) inputSet.clone(), attribute, SortedExampleSet.INCREASING);

    Attribute labelAttribute = exampleSet.getAttributes().getLabel();
    int labelIndex = labelAttribute.getMapping().mapString(labelName);

    double oldLabel = Double.NaN;
    double bestSplit = Double.NaN;
    double lastValue = Double.NaN;
    double bestBenefit = Double.NEGATIVE_INFINITY;
    double bestTotalWeight = 0;
    int bestSplitType = Split.LESS_SPLIT;

    // initiating online counting of benefit: only 2 Datascans needed then
    criterion.reinitOnlineCounting(exampleSet);
    for (Example e : exampleSet) {
      double currentValue = e.getValue(attribute);
      double label = e.getValue(labelAttribute);
      if ((Double.isNaN(oldLabel)) || (oldLabel != label) && (lastValue != currentValue)) {
        double splitValue = (lastValue + currentValue) / 2.0d;

        double[] benefits;
        if (labelName == null) {
          benefits = criterion.getOnlineBenefit(e);
        } else {
          benefits = criterion.getOnlineBenefit(e, labelIndex);
        }
        // online method returns both possible relations in one array(greater / smaller) in one
        // array
        if ((benefits[0] > minValue)
            && (benefits[0] > 0)
            && (benefits[1] > 0)
            && ((benefits[0] > bestBenefit)
                || ((benefits[0] == bestBenefit) && (benefits[1] > bestTotalWeight)))) {
          bestBenefit = benefits[0];
          bestSplit = splitValue;
          bestTotalWeight = benefits[1];
          bestSplitType = Split.LESS_SPLIT;
        }
        if ((benefits[2] > minValue)
            && (benefits[2] > 0)
            && (benefits[3] > 0)
            && ((benefits[2] > bestBenefit)
                || ((benefits[2] == bestBenefit) && (benefits[3] > bestTotalWeight)))) {
          bestBenefit = benefits[2];
          bestSplit = splitValue;
          bestTotalWeight = benefits[3];
          bestSplitType = Split.GREATER_SPLIT;
        }
        oldLabel = label;
      }
      lastValue = currentValue;
      criterion.update(e);
    }
    return new Split(bestSplit, new double[] {bestBenefit, bestTotalWeight}, bestSplitType);
  }
  @Override
  public void doWork() throws OperatorException {

    ExampleSet exampleSet = exampleSetInput.getData();

    IEntityMapping user_mapping = new EntityMapping();
    IEntityMapping item_mapping = new EntityMapping();
    IRatings training_data = new Ratings();

    if (exampleSet.getAttributes().getSpecial("user identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getSpecial("item identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getLabel() == null) {
      throw new UserError(this, 105);
    }

    Attributes Att = exampleSet.getAttributes();
    AttributeRole ur = Att.getRole("user identification");
    Attribute u = ur.getAttribute();
    AttributeRole ir = Att.getRole("item identification");
    Attribute i = ir.getAttribute();
    Attribute ui = Att.getLabel();

    for (Example example : exampleSet) {

      double j = example.getValue(u);
      int uid = user_mapping.ToInternalID((int) j);

      j = example.getValue(i);
      int iid = item_mapping.ToInternalID((int) j);

      double r = example.getValue(ui);
      training_data.Add(uid, iid, r);
    }

    _slopeOne recommendAlg = new _slopeOne();

    recommendAlg.user_mapping = user_mapping;
    recommendAlg.item_mapping = item_mapping;
    recommendAlg.SetMinRating(getParameterAsInt("Min Rating"));
    recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range"));

    recommendAlg.SetRatings(training_data);

    recommendAlg.Train();

    exampleSetOutput.deliver(exampleSet);

    exampleSetOutput1.deliver(recommendAlg);
  }
 public void update(Example example) {
   int labelIndex = (int) example.getValue(labelAttribute);
   if (weightAttribute != null) {
     double currentWeight = example.getValue(weightAttribute);
     labelWeights[labelIndex] += currentWeight;
     weight += currentWeight;
   } else {
     labelWeights[labelIndex] += 1d;
     weight += 1d;
   }
 }
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    DistanceMeasure measure = DistanceMeasures.createMeasure(this);
    measure.init(exampleSet);
    GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure);

    // check if weights should be used
    boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS);
    // check if robust estimate should be performed: Then calculate weights and use it anyway
    if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) {
      useWeights = true;
      LocalPolynomialExampleWeightingOperator weightingOperator;
      try {
        weightingOperator =
            OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class);
        exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this);
      } catch (OperatorCreationException e) {
        throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage());
      }
    }

    Attributes attributes = exampleSet.getAttributes();
    Attribute label = attributes.getLabel();
    Attribute weightAttribute = attributes.getWeight();
    for (Example example : exampleSet) {
      double[] values = new double[attributes.size()];
      double labelValue = example.getValue(label);
      double weight = 1d;
      if (weightAttribute != null && useWeights) {
        weight = example.getValue(weightAttribute);
      }

      // filter out examples without influence
      if (weight > 0d) {
        // copying example values
        int i = 0;
        for (Attribute attribute : attributes) {
          values[i] = example.getValue(attribute);
          i++;
        }

        // inserting into geometric data collection
        data.add(values, new RegressionData(values, labelValue, weight));
      }
    }
    return new LocalPolynomialRegressionModel(
        exampleSet,
        data,
        Neighborhoods.createNeighborhood(this),
        SmoothingKernels.createKernel(this),
        getParameterAsInt(PARAMETER_DEGREE),
        getParameterAsDouble(PARAMETER_RIDGE));
  }
  private double[] estimateVariance() {
    double[] beta = getBestValuesEver();

    Matrix hessian = new Matrix(beta.length, beta.length);
    for (Example example : exampleSet) {
      double[] values = new double[beta.length];
      double eta = 0.0d;
      int j = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        values[j] = value;
        eta += beta[j] * value;
        j++;
      }
      if (addIntercept) {
        values[beta.length - 1] = 1.0d;
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      for (int x = 0; x < beta.length; x++) {
        for (int y = 0; y < beta.length; y++) {
          // sum is second derivative of log likelihood function
          double h = hessian.get(x, y) - values[x] * values[y] * weightValue * pi * (1 - pi);
          hessian.set(x, y, h);
        }
      }
    }

    double[] variance = new double[beta.length];
    Matrix varianceCovarianceMatrix = null;
    try {
      // asymptotic variance-covariance matrix is inverse of hessian matrix
      varianceCovarianceMatrix = hessian.inverse();
    } catch (Exception e) {
      logging.logWarning("could not determine variance-covariance matrix, hessian is singular");
      for (int j = 0; j < beta.length; j++) {
        variance[j] = Double.NaN;
      }
      return variance;
    }
    for (int j = 0; j < beta.length; j++) {
      // get diagonal elements
      variance[j] = Math.abs(varianceCovarianceMatrix.get(j, j));
    }

    return variance;
  }
  private NeuralDataSet getTraining(ExampleSet exampleSet) {
    double[][] data = new double[exampleSet.size()][exampleSet.getAttributes().size()];
    double[][] labels = new double[exampleSet.size()][1];
    int index = 0;
    Attribute label = exampleSet.getAttributes().getLabel();

    this.attributeMin = new double[exampleSet.getAttributes().size()];
    this.attributeMax = new double[attributeMin.length];
    exampleSet.recalculateAllAttributeStatistics();
    int a = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      this.attributeMin[a] = exampleSet.getStatistics(attribute, Statistics.MINIMUM);
      this.attributeMax[a] = exampleSet.getStatistics(attribute, Statistics.MAXIMUM);
      a++;
    }

    this.labelMin = exampleSet.getStatistics(label, Statistics.MINIMUM);
    this.labelMax = exampleSet.getStatistics(label, Statistics.MAXIMUM);

    for (Example example : exampleSet) {
      // attributes
      a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        if (attributeMin[a] != attributeMax[a]) {
          data[index][a] =
              (example.getValue(attribute) - attributeMin[a]) / (attributeMax[a] - attributeMin[a]);
        } else {
          data[index][a] = example.getValue(attribute) - attributeMin[a];
        }
        a++;
      }

      // label
      if (label.isNominal()) {
        labels[index][0] = example.getValue(label);
      } else {
        if (labelMax != labelMin) {
          labels[index][0] = (example.getValue(label) - labelMin) / (labelMax - labelMin);
        } else {
          labels[index][0] = example.getValue(label) - labelMin;
        }
      }

      index++;
    }

    return new BasicNeuralDataSet(data, labels);
  }
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    Kernel kernel = getKernel();
    kernel.init(exampleSet);

    double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
    NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping();
    String classNeg = labelMapping.getNegativeString();
    String classPos = labelMapping.getPositiveString();
    double classValueNeg = labelMapping.getNegativeIndex();
    int numberOfAttributes = exampleSet.getAttributes().size();
    HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel);
    model.init(new double[numberOfAttributes], 0);
    for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) {
      double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate);
      Attributes attributes = exampleSet.getAttributes();
      for (Example example : exampleSet) {
        double prediction = model.predict(example);
        if (prediction != example.getLabel()) {
          double direction = (example.getLabel() == classValueNeg) ? -1 : 1;
          // adapting intercept
          model.setIntercept(model.getIntercept() + learnRate * direction);
          // adapting coefficients
          double coefficients[] = model.getCoefficients();
          int i = 0;
          for (Attribute attribute : attributes) {
            coefficients[i] += learnRate * direction * example.getValue(attribute);
            i++;
          }
        }
      }
    }
    return model;
  }
Beispiel #10
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME);
    Long offset = getParameterAsLong(PARMETER_TIME_OFFSET);

    Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName);
    if (numericalAttribute == null) {
      throw new UserError(this, 111, attributeName);
    }

    Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME);
    exampleSet.getExampleTable().addAttribute(newAttribute);
    exampleSet.getAttributes().addRegular(newAttribute);

    for (Example example : exampleSet) {
      double value = example.getValue(numericalAttribute);
      if (Double.isNaN(value)) {
        example.setValue(newAttribute, value);
      } else {
        value += offset;
        example.setValue(newAttribute, value);
      }
    }

    if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) {
      AttributeRole oldRole = exampleSet.getAttributes().getRole(numericalAttribute);
      exampleSet.getAttributes().remove(numericalAttribute);
      newAttribute.setName(attributeName);
      exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName());
    } else {
      newAttribute.setName(attributeName + "_AS_DATE");
    }
    return exampleSet;
  }
  @Override
  public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException {
    boolean round = getParameterAsBoolean(PARAMETER_ROUND);

    List<Attribute> newAttributes = new LinkedList<Attribute>();
    Iterator<Attribute> a = exampleSet.getAttributes().iterator();
    while (a.hasNext()) {
      Attribute attribute = a.next();
      if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL))
          && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) {
        Attribute newAttribute =
            AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER);
        newAttributes.add(newAttribute);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        for (Example example : exampleSet) {
          double originalValue = example.getValue(attribute);
          if (Double.isNaN(originalValue)) {
            example.setValue(newAttribute, Double.NaN);
          } else {
            long newValue = round ? Math.round(originalValue) : (long) originalValue;
            example.setValue(newAttribute, newValue);
          }
        }
        a.remove();
      }
    }

    for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute);

    return exampleSet;
  }
  private static Map<Integer, MeanVariance> createMeanVariances(
      com.rapidminer.example.ExampleSet exampleSet) {
    double[] sum = new double[exampleSet.getAttributes().size()];
    double[] squaredSum = new double[sum.length];

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      com.rapidminer.example.Example example = reader.next();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = example.getValue(attribute);
        sum[a] += value;
        squaredSum[a] += value * value;
        a++;
      }
    }

    Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>();
    for (int a = 0; a < sum.length; a++) {
      sum[a] /= exampleSet.size();
      squaredSum[a] /= exampleSet.size();
      meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a])));
    }

    return meanVariances;
  }
 @Override
 public double predict(Example example) throws OperatorException {
   int i = 0;
   double distance = intercept;
   // using kernel for distance calculation
   double[] values = new double[example.getAttributes().size()];
   for (Attribute currentAttribute : example.getAttributes()) {
     values[i] = example.getValue(currentAttribute);
     i++;
   }
   distance += kernel.calculateDistance(values, coefficients);
   if (getLabel().isNominal()) {
     int positiveMapping = getLabel().getMapping().mapString(classPositive);
     int negativeMapping = getLabel().getMapping().mapString(classNegative);
     boolean isApplying = example.getAttributes().getPredictedLabel() != null;
     if (isApplying) {
       example.setConfidence(classPositive, 1.0d / (1.0d + java.lang.Math.exp(-distance)));
       example.setConfidence(classNegative, 1.0d / (1.0d + java.lang.Math.exp(distance)));
     }
     if (distance < 0) {
       return negativeMapping;
     } else {
       return positiveMapping;
     }
   } else {
     return distance;
   }
 }
 /**
  * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing
  * values, with approximate comparisons.
  *
  * @param eSet the example set
  * @param a the first attribute to correlate
  * @param b the second attribute to correlate
  * @param fuzz values within +/- fuzz may be considered tied
  * @return Kendall's tau-b rank correlation
  * @throws OperatorException
  */
 public static double tau_b(ExampleSet eSet, Attribute a, Attribute b, double fuzz)
     throws OperatorException {
   ExampleSet e = extract(eSet, a, b); // reduced example set
   FuzzyComp fc = new FuzzyComp(fuzz);
   int c = 0; // concordant pairs
   int d = 0; // discordant pairs
   int ta = 0; // pairs tied on a (only)
   int tb = 0; // pairs tied on b (only)
   int tc = 0; // pairs tied on both a and b
   int n = 0; // number of times iterator i is bumped
   Iterator<Example> i = e.iterator();
   while (i.hasNext()) {
     // iterate through all possible pairs
     Example z1 = i.next();
     n++;
     double x = z1.getValue(a);
     double y = z1.getValue(b);
     if (b.isNominal() && a != null) {
       String yString = b.getMapping().mapIndex((int) y);
       y = a.getMapping().getIndex(yString);
     }
     Iterator<Example> j = e.iterator();
     for (int k = 0; k < n; k++) j.next(); // increment j to match i
     while (j.hasNext()) {
       // move on to subsequent examples
       Example z2 = j.next();
       double xx = z2.getValue(a);
       double yy = z2.getValue(b);
       if (b.isNominal() && a != null) {
         String yyString = b.getMapping().mapIndex((int) yy);
         yy = a.getMapping().getIndex(yyString);
       }
       int xc = fc.compare(x, xx);
       int yc = fc.compare(y, yy);
       if (xc == 0) {
         if (yc == 0) tc++; // tied on both attributes
         else ta++; // tied only on a
       } else if (yc == 0) tb++; // tied only on b
       else if (xc == yc) c++; // concordant pair
       else d++; // discordant pair
     }
   }
   double num = c - d;
   double den = Math.sqrt((c + d + ta) * (c + d + tb));
   if (den != 0) return num / den;
   else return 0;
 }
 /** Returns true if the label was not defined. */
 @Override
 public boolean conditionOk(Example example) {
   if (Double.isNaN(example.getValue(example.getAttributes().getLabel()))) {
     return true;
   } else {
     return false;
   }
 }
  @Override
  public void doWork() throws OperatorException {

    ExampleSet exampleSet = exampleSetInput.getData();

    IPosOnlyFeedback training_data = new PosOnlyFeedback();
    IEntityMapping user_mapping = new EntityMapping();
    IEntityMapping item_mapping = new EntityMapping();

    if (exampleSet.getAttributes().getSpecial("user identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getSpecial("item identification") == null) {
      throw new UserError(this, 105);
    }

    Attributes Att = exampleSet.getAttributes();
    AttributeRole ur = Att.getRole("user identification");
    Attribute u = ur.getAttribute();
    AttributeRole ir = Att.getRole("item identification");
    Attribute i = ir.getAttribute();

    for (Example example : exampleSet) {

      double j = example.getValue(u);
      int uid = (int) j;

      j = example.getValue(i);
      int iid = (int) j;

      training_data.Add(user_mapping.ToInternalID(uid), item_mapping.ToInternalID(iid));
      checkForStop();
    }

    System.out.println(training_data.GetMaxItemID() + " " + training_data.GetMaxUserID());

    Random recommendAlg = new Random();
    recommendAlg.SetFeedback(training_data);
    recommendAlg.user_mapping = user_mapping;
    recommendAlg.item_mapping = item_mapping;
    recommendAlg.Train();

    exampleSetOutput.deliver(exampleSet);
    exampleSetOutput1.deliver(recommendAlg);
  }
 /**
  * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing
  * values.
  *
  * @param eSet the example set
  * @param a the first attribute to correlate
  * @param b the second attribute to correlate
  * @return Kendall's tau-b rank correlation
  * @throws OperatorException
  */
 public static double tau_b(ExampleSet eSet, Attribute a, Attribute b) throws OperatorException {
   ExampleSet e = extract(eSet, a, b); // reduced example set
   long c = 0; // concordant pairs
   long d = 0; // discordant pairs
   long ta = 0; // pairs tied on a (only)
   long tb = 0; // pairs tied on b (only)
   long tc = 0; // pairs tied on both a and b
   int n = 0; // number of times iterator i is bumped
   Iterator<Example> i = e.iterator();
   while (i.hasNext()) {
     // iterate through all possible pairs
     Example z1 = i.next();
     n++;
     double x = z1.getValue(a);
     double y = z1.getValue(b);
     if (b.isNominal() && a != null) {
       String yString = b.getMapping().mapIndex((int) y);
       y = a.getMapping().getIndex(yString);
     }
     Iterator<Example> j = e.iterator();
     for (int k = 0; k < n; k++) j.next(); // increment j to match i
     while (j.hasNext()) {
       // move on to subsequent examples
       Example z2 = j.next();
       double xx = z2.getValue(a);
       double yy = z2.getValue(b);
       if (b.isNominal() && a != null) {
         String yyString = b.getMapping().mapIndex((int) yy);
         yy = a.getMapping().getIndex(yyString);
       }
       if (x == xx) {
         if (y == yy) tc++; // tied on both attributes
         else ta++; // tied only on a
       } else if (y == yy) tb++; // tied only on b
       else if ((x > xx && y > yy) || (x < xx && y < yy)) c++;
       // concordant pair
       else d++; // discordant pair
     }
   }
   double num = c - d;
   double f1 = c + d + ta;
   double f2 = c + d + tb;
   double den = Math.sqrt(f1 * f2);
   if (den != 0) return num / den;
   else return 0;
 }
 public Model learn(ExampleSet exampleSet) throws OperatorException {
   double value = 0.0;
   double[] confidences = null;
   int method = getParameterAsInt(PARAMETER_METHOD);
   Attribute label = exampleSet.getAttributes().getLabel();
   if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) {
     logWarning(
         "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!");
     method = MODE;
   } else if ((!label.isNominal()) && (method == MODE)) {
     logWarning(
         "Cannot use method '"
             + METHODS[method]
             + "' for numerical labels: changing to 'average'!");
     method = AVERAGE;
   }
   switch (method) {
     case MEDIAN:
       double[] labels = new double[exampleSet.size()];
       Iterator<Example> r = exampleSet.iterator();
       int counter = 0;
       while (r.hasNext()) {
         Example example = r.next();
         labels[counter++] = example.getValue(example.getAttributes().getLabel());
       }
       java.util.Arrays.sort(labels);
       value = labels[exampleSet.size() / 2];
       break;
     case AVERAGE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.AVERAGE);
       break;
     case MODE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.MODE);
       confidences = new double[label.getMapping().size()];
       for (int i = 0; i < confidences.length; i++) {
         confidences[i] =
             exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i))
                 / exampleSet.size();
       }
       break;
     case CONSTANT:
       value = getParameterAsDouble(PARAMETER_CONSTANT);
       break;
     case ATTRIBUTE:
       return new AttributeDefaultModel(
           exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME));
     default:
       // cannot happen
       throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!");
   }
   log(
       "Default value is '"
           + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "")
           + "'.");
   return new DefaultModel(exampleSet, value, confidences);
 }
 private double[] getAttributeValues(Example example, Attributes attributes, double[] means) {
   double[] values = new double[attributes.size()];
   int x = 0;
   for (Attribute attribute : attributes) {
     values[x] = example.getValue(attribute) - means[x];
     x++;
   }
   return values;
 }
 private double[] getAsDoubleArray(Example example, Attributes attributes) {
   double[] values = new double[attributes.size()];
   int i = 0;
   for (Attribute attribute : attributes) {
     values[i] = example.getValue(attribute);
     i++;
   }
   return values;
 }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // recall: difference = minuend - subtrahend
    // but the subtrahend is last on the ioobjects stack, so pop first
    ExampleSet subtrahendSet = subtrahendInput.getData(ExampleSet.class);
    ExampleSet minuendSet = exampleSet;

    subtrahendSet.remapIds();
    minuendSet.remapIds();

    Attribute minuendId = minuendSet.getAttributes().getId();
    Attribute subtrahendId = subtrahendSet.getAttributes().getId();

    // sanity checks
    if ((minuendId == null) || (subtrahendId == null)) {
      throw new UserError(this, 129);
    }
    if (minuendId.getValueType() != subtrahendId.getValueType()) {
      throw new UserError(
          this,
          120,
          new Object[] {
            subtrahendId.getName(),
            Ontology.VALUE_TYPE_NAMES[subtrahendId.getValueType()],
            Ontology.VALUE_TYPE_NAMES[minuendId.getValueType()]
          });
    }

    List<Integer> indices = new LinkedList<>();
    {
      int i = 0;
      for (Example example : minuendSet) {
        double id = example.getValue(minuendId);
        Example subtrahendExample = null;
        if (minuendId.isNominal()) {
          subtrahendExample =
              subtrahendSet.getExampleFromId(
                  subtrahendId.getMapping().getIndex(minuendId.getMapping().mapIndex((int) id)));
        } else {
          subtrahendExample = subtrahendSet.getExampleFromId(id);
        }
        if (subtrahendExample == null) {
          indices.add(i);
        }
        i++;
      }
    }

    int[] indexArray = new int[indices.size()];
    for (int i = 0; i < indices.size(); i++) {
      indexArray[i] = indices.get(i);
    }

    ExampleSet minusSet = new MappedExampleSet(minuendSet, indexArray);
    return minusSet;
  }
  private double[] getExampleValues(Example example) {
    Attributes attributes = example.getAttributes();
    double[] attributeValues = new double[attributes.size()];

    int i = 0;
    for (Attribute attribute : attributes) {
      attributeValues[i] = example.getValue(attribute);
      i++;
    }
    return attributeValues;
  }
 /**
  * This method must be implemented by the subclasses. Subclasses have to iterate over the
  * exampleset and on each example iterate over the oldAttribute array and set the new values on
  * the corresponding new attribute
  */
 protected void applyOnData(
     ExampleSet exampleSet, Attribute[] oldAttributes, Attribute[] newAttributes) {
   // copying data
   for (Example example : exampleSet) {
     for (int i = 0; i < oldAttributes.length; i++) {
       if (oldAttributes[i].isNumerical())
         example.setValue(
             newAttributes[i], computeValue(oldAttributes[i], example.getValue(oldAttributes[i])));
     }
   }
 }
 @Override
 public void count(Example example, double weight) {
   double value = example.getValue(sourceAttribute);
   if (!Double.isNaN(value))
     if (frequencies != null) {
       frequencies[(int) value] += weight;
     } else {
       Double frequency = frequenciesMap.get(value);
       if (frequency == null) frequenciesMap.put(value, weight);
       else frequenciesMap.put(value, frequency + weight);
     }
 }
 @Override
 public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException {
   for (Example example : exampleSet) {
     for (Attribute attribute : exampleSet.getAttributes()) {
       if (attribute.isNumerical()) {
         double value = example.getValue(attribute);
         value = Math.abs(value);
         example.setValue(attribute, value);
       }
     }
   }
   return exampleSet;
 }
 public void reinitOnlineCounting(ExampleSet exampleSet) {
   // counting one time all class weights
   labelAttribute = exampleSet.getAttributes().getLabel();
   weightAttribute = exampleSet.getAttributes().getWeight();
   totalLabelWeights = new double[labelAttribute.getMapping().size()];
   totalWeight = 0d;
   if (exampleSet.getAttributes().getWeight() != null) {
     for (Example example : exampleSet) {
       double weight = example.getWeight();
       totalLabelWeights[(int) example.getValue(labelAttribute)] += weight;
     }
   } else {
     for (Example example : exampleSet) {
       totalLabelWeights[(int) example.getValue(labelAttribute)] += 1d;
     }
   }
   for (int i = 0; i < totalLabelWeights.length; i++) {
     totalWeight += totalLabelWeights[i];
   }
   // resetting online counter for subtraction
   labelWeights = new double[labelAttribute.getMapping().size()];
   weight = 0;
 }
  public static SplittedExampleSet splitByAttribute(
      ExampleSet exampleSet, Attribute attribute, double value) {
    int[] elements = new int[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int i = 0;

    while (reader.hasNext()) {
      Example example = reader.next();
      double currentValue = example.getValue(attribute);
      if (currentValue <= value) elements[i++] = 0;
      else elements[i++] = 1;
    }
    Partition partition = new Partition(elements, 2);
    return new SplittedExampleSet(exampleSet, partition);
  }
 @Override
 public void init(ExampleSet exampleSet) throws OperatorException {
   super.init(exampleSet);
   Tools.onlyNumericalAttributes(exampleSet, "value based similarities");
   Attributes attributes = exampleSet.getAttributes();
   if (attributes.size() != 1)
     throw new OperatorException(
         "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence.");
   for (Example example : exampleSet) {
     for (Attribute attribute : attributes) {
       if (example.getValue(attribute) <= 0)
         throw new OperatorException(
             "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence.");
       ;
     }
   }
 }
Beispiel #29
0
 public double[] vectorSubtraction(Example x, double[] y) {
   if (x.getAttributes().size() != y.length) {
     throw new RuntimeException(
         "Cannot substract vectors: incompatible numbers of attributes ("
             + x.getAttributes().size()
             + " != "
             + y.length
             + ")!");
   }
   double[] result = new double[x.getAttributes().size()];
   int i = 0;
   for (Attribute att : x.getAttributes()) {
     result[i] = x.getValue(att) - y[i];
     i++;
   }
   return result;
 }
  private void updateEstimates(ExampleSet exampleSet, int modelNr, Attribute[] specialAttributes) {
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      Example example = reader.next();
      int predicted = (int) example.getPredictedLabel();

      double oldValue = example.getValue(specialAttributes[predicted]);
      if (Double.isNaN(oldValue)) {
        logWarning("Found NaN confidence as intermediate prediction.");
        oldValue = 0;
      }

      if (!Double.isInfinite(oldValue)) {
        example.setValue(specialAttributes[predicted], oldValue + this.getWeightForModel(modelNr));
      }
    }
  }