@Override
  public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException {
    boolean round = getParameterAsBoolean(PARAMETER_ROUND);

    List<Attribute> newAttributes = new LinkedList<Attribute>();
    Iterator<Attribute> a = exampleSet.getAttributes().iterator();
    while (a.hasNext()) {
      Attribute attribute = a.next();
      if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL))
          && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) {
        Attribute newAttribute =
            AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER);
        newAttributes.add(newAttribute);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        for (Example example : exampleSet) {
          double originalValue = example.getValue(attribute);
          if (Double.isNaN(originalValue)) {
            example.setValue(newAttribute, Double.NaN);
          } else {
            long newValue = round ? Math.round(originalValue) : (long) originalValue;
            example.setValue(newAttribute, newValue);
          }
        }
        a.remove();
      }
    }

    for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute);

    return exampleSet;
  }
Example #2
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME);
    Long offset = getParameterAsLong(PARMETER_TIME_OFFSET);

    Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName);
    if (numericalAttribute == null) {
      throw new UserError(this, 111, attributeName);
    }

    Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME);
    exampleSet.getExampleTable().addAttribute(newAttribute);
    exampleSet.getAttributes().addRegular(newAttribute);

    for (Example example : exampleSet) {
      double value = example.getValue(numericalAttribute);
      if (Double.isNaN(value)) {
        example.setValue(newAttribute, value);
      } else {
        value += offset;
        example.setValue(newAttribute, value);
      }
    }

    if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) {
      AttributeRole oldRole = exampleSet.getAttributes().getRole(numericalAttribute);
      exampleSet.getAttributes().remove(numericalAttribute);
      newAttribute.setName(attributeName);
      exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName());
    } else {
      newAttribute.setName(attributeName + "_AS_DATE");
    }
    return exampleSet;
  }
  @Override
  public PerformanceVector evaluateIndividual(Individual individual) {
    double[] beta = individual.getValues();

    double fitness = 0.0d;
    for (Example example : exampleSet) {
      double eta = 0.0d;
      int i = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        eta += beta[i] * value;
        i++;
      }
      if (addIntercept) {
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double classValue = example.getValue(label);
      double currentFitness = classValue * Math.log(pi) + (1 - classValue) * Math.log(1 - pi);
      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      fitness += weightValue * currentFitness;
    }

    PerformanceVector performanceVector = new PerformanceVector();
    performanceVector.addCriterion(
        new EstimatedPerformance("log_reg_fitness", fitness, exampleSet.size(), false));
    return performanceVector;
  }
  private static Map<Integer, MeanVariance> createMeanVariances(
      com.rapidminer.example.ExampleSet exampleSet) {
    double[] sum = new double[exampleSet.getAttributes().size()];
    double[] squaredSum = new double[sum.length];

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      com.rapidminer.example.Example example = reader.next();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = example.getValue(attribute);
        sum[a] += value;
        squaredSum[a] += value * value;
        a++;
      }
    }

    Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>();
    for (int a = 0; a < sum.length; a++) {
      sum[a] /= exampleSet.size();
      squaredSum[a] /= exampleSet.size();
      meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a])));
    }

    return meanVariances;
  }
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    Kernel kernel = getKernel();
    kernel.init(exampleSet);

    double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
    NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping();
    String classNeg = labelMapping.getNegativeString();
    String classPos = labelMapping.getPositiveString();
    double classValueNeg = labelMapping.getNegativeIndex();
    int numberOfAttributes = exampleSet.getAttributes().size();
    HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel);
    model.init(new double[numberOfAttributes], 0);
    for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) {
      double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate);
      Attributes attributes = exampleSet.getAttributes();
      for (Example example : exampleSet) {
        double prediction = model.predict(example);
        if (prediction != example.getLabel()) {
          double direction = (example.getLabel() == classValueNeg) ? -1 : 1;
          // adapting intercept
          model.setIntercept(model.getIntercept() + learnRate * direction);
          // adapting coefficients
          double coefficients[] = model.getCoefficients();
          int i = 0;
          for (Attribute attribute : attributes) {
            coefficients[i] += learnRate * direction * example.getValue(attribute);
            i++;
          }
        }
      }
    }
    return model;
  }
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // generating assignment
    RandomGenerator random = RandomGenerator.getRandomGenerator(this);
    int clusterAssignments[] = new int[exampleSet.size()];
    int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS);
    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = random.nextInt(k);
    }

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    model.setClusterAssignments(clusterAssignments, exampleSet);

    // generating cluster attribute
    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
  /**
   * Creates a {@link HistogramDataset} for this {@link Attribute}.
   *
   * @param exampleSet
   * @return
   */
  private HistogramDataset createHistogramDataset(final ExampleSet exampleSet) {
    HistogramDataset dataset = new HistogramDataset();

    double[] array = new double[exampleSet.size()];
    int count = 0;

    for (Example example : exampleSet) {
      double value = example.getDataRow().get(getAttribute());
      // don't use missing values because otherwise JFreeChart tries to plot them too which
      // can lead to false histograms
      if (!Double.isNaN(value)) {
        array[count++] = value;
      }
    }

    // add points to data set (if any)
    if (count > 0) {
      // truncate array if necessary
      if (count < array.length) {
        array = Arrays.copyOf(array, count);
      }
      dataset.addSeries(
          getAttribute().getName(), array, Math.min(array.length, MAX_BINS_HISTOGRAM));
    }

    return dataset;
  }
  /** Creates a new evolutionary SVM optimization. */
  public ClassificationEvoOptimization(
      ExampleSet exampleSet, // training data
      Kernel kernel,
      double c, // SVM paras
      int initType, // start population creation type para
      int maxIterations,
      int generationsWithoutImprovement,
      int popSize, // GA paras
      int selectionType,
      double tournamentFraction,
      boolean keepBest, // selection paras
      int mutationType, // type of mutation
      double crossoverProb,
      boolean showConvergencePlot,
      boolean showPopulationPlot,
      ExampleSet holdOutSet,
      RandomGenerator random,
      LoggingHandler logging,
      Operator executingOperator) {
    super(
        EvoSVM.createBoundArray(0.0d, exampleSet.size()),
        EvoSVM.determineMax(c, kernel, exampleSet, selectionType, exampleSet.size()),
        popSize,
        exampleSet.size(),
        initType,
        maxIterations,
        generationsWithoutImprovement,
        selectionType,
        tournamentFraction,
        keepBest,
        mutationType,
        Double.NaN,
        crossoverProb,
        showConvergencePlot,
        showPopulationPlot,
        random,
        logging,
        executingOperator);
    this.exampleSet = exampleSet;
    this.holdOutSet = holdOutSet;
    this.populationSize = popSize;

    this.kernel = kernel;
    this.c = getMax(0);

    // label values
    this.ys = new double[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int index = 0;
    Attribute label = exampleSet.getAttributes().getLabel();
    while (reader.hasNext()) {
      Example example = reader.next();
      ys[index++] = example.getLabel() == label.getMapping().getPositiveIndex() ? 1.0d : -1.0d;
    }

    // optimization function
    this.optimizationFunction =
        new ClassificationOptimizationFunction(selectionType == NON_DOMINATED_SORTING_SELECTION);
  }
  /**
   * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha
   * values and b are zero, the label will be set if it is known.
   */
  public SVMExamples(
      com.rapidminer.example.ExampleSet exampleSet,
      Attribute labelAttribute,
      Map<Integer, MeanVariance> meanVariances) {
    this(exampleSet.size(), 0.0d);
    this.meanVarianceMap = meanVariances;

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    Attribute idAttribute = exampleSet.getAttributes().getId();
    int exampleCounter = 0;
    while (reader.hasNext()) {
      com.rapidminer.example.Example current = reader.next();
      Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = current.getValue(attribute);
        if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) {
          attributeMap.put(a, value);
        }
        if ((a + 1) > dim) {
          dim = (a + 1);
        }
        a++;
      }
      atts[exampleCounter] = new double[attributeMap.size()];
      index[exampleCounter] = new int[attributeMap.size()];
      Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator();
      int attributeCounter = 0;
      while (i.hasNext()) {
        Map.Entry<Integer, Double> e = i.next();
        Integer indexValue = e.getKey();
        Double attributeValue = e.getValue();
        index[exampleCounter][attributeCounter] = indexValue.intValue();
        double value = attributeValue.doubleValue();
        MeanVariance meanVariance = meanVarianceMap.get(indexValue);
        if (meanVariance != null) {
          if (meanVariance.getVariance() == 0.0d) {
            value = 0.0d;
          } else {
            value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance());
          }
        }
        atts[exampleCounter][attributeCounter] = value;
        attributeCounter++;
      }
      if (labelAttribute != null) {
        double label = current.getValue(labelAttribute);
        if (labelAttribute.isNominal()) {
          ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1);
        } else {
          ys[exampleCounter] = label;
        }
      }
      if (idAttribute != null) {
        ids[exampleCounter] = current.getValueAsString(idAttribute);
      }
      exampleCounter++;
    }
  }
 /** Returns true if the label was not defined. */
 @Override
 public boolean conditionOk(Example example) {
   if (Double.isNaN(example.getValue(example.getAttributes().getLabel()))) {
     return true;
   } else {
     return false;
   }
 }
 public Model learn(ExampleSet exampleSet) throws OperatorException {
   double value = 0.0;
   double[] confidences = null;
   int method = getParameterAsInt(PARAMETER_METHOD);
   Attribute label = exampleSet.getAttributes().getLabel();
   if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) {
     logWarning(
         "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!");
     method = MODE;
   } else if ((!label.isNominal()) && (method == MODE)) {
     logWarning(
         "Cannot use method '"
             + METHODS[method]
             + "' for numerical labels: changing to 'average'!");
     method = AVERAGE;
   }
   switch (method) {
     case MEDIAN:
       double[] labels = new double[exampleSet.size()];
       Iterator<Example> r = exampleSet.iterator();
       int counter = 0;
       while (r.hasNext()) {
         Example example = r.next();
         labels[counter++] = example.getValue(example.getAttributes().getLabel());
       }
       java.util.Arrays.sort(labels);
       value = labels[exampleSet.size() / 2];
       break;
     case AVERAGE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.AVERAGE);
       break;
     case MODE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.MODE);
       confidences = new double[label.getMapping().size()];
       for (int i = 0; i < confidences.length; i++) {
         confidences[i] =
             exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i))
                 / exampleSet.size();
       }
       break;
     case CONSTANT:
       value = getParameterAsDouble(PARAMETER_CONSTANT);
       break;
     case ATTRIBUTE:
       return new AttributeDefaultModel(
           exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME));
     default:
       // cannot happen
       throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!");
   }
   log(
       "Default value is '"
           + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "")
           + "'.");
   return new DefaultModel(exampleSet, value, confidences);
 }
  /**
   * Iterates over all models and returns the class with maximum likelihood.
   *
   * @param origExampleSet the set of examples to be classified
   */
  @Override
  public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel)
      throws OperatorException {
    final String attributePrefix = "AdaBoostModelPrediction";
    final int numLabels = predictedLabel.getMapping().size();
    final Attribute[] specialAttributes = new Attribute[numLabels];
    OperatorProgress progress = null;
    if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) {
      progress = getOperator().getProgress();
      progress.setTotal(100);
    }
    for (int i = 0; i < numLabels; i++) {
      specialAttributes[i] =
          com.rapidminer.example.Tools.createSpecialAttribute(
              origExampleSet, attributePrefix + i, Ontology.NUMERICAL);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels));
      }
    }

    Iterator<Example> reader = origExampleSet.iterator();
    int progressCounter = 0;
    while (reader.hasNext()) {
      Example example = reader.next();
      for (int i = 0; i < specialAttributes.length; i++) {
        example.setValue(specialAttributes[i], 0);
      }
      if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) {
        progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25);
      }
    }

    reader = origExampleSet.iterator();
    for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) {
      Model model = this.getModel(modelNr);
      ExampleSet exampleSet = (ExampleSet) origExampleSet.clone();
      exampleSet = model.apply(exampleSet);
      this.updateEstimates(exampleSet, modelNr, specialAttributes);
      PredictionModel.removePredictedLabel(exampleSet);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50);
      }
    }

    // Turn prediction weights into confidences and a crisp predcition:
    this.evaluateSpecialAttributes(origExampleSet, specialAttributes);

    // Clean up attributes:
    for (int i = 0; i < numLabels; i++) {
      origExampleSet.getAttributes().remove(specialAttributes[i]);
      origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75);
      }
    }

    return origExampleSet;
  }
  public Split getBestSplit(ExampleSet inputSet, Attribute attribute, String labelName) {
    SortedExampleSet exampleSet =
        new SortedExampleSet((ExampleSet) inputSet.clone(), attribute, SortedExampleSet.INCREASING);

    Attribute labelAttribute = exampleSet.getAttributes().getLabel();
    int labelIndex = labelAttribute.getMapping().mapString(labelName);

    double oldLabel = Double.NaN;
    double bestSplit = Double.NaN;
    double lastValue = Double.NaN;
    double bestBenefit = Double.NEGATIVE_INFINITY;
    double bestTotalWeight = 0;
    int bestSplitType = Split.LESS_SPLIT;

    // initiating online counting of benefit: only 2 Datascans needed then
    criterion.reinitOnlineCounting(exampleSet);
    for (Example e : exampleSet) {
      double currentValue = e.getValue(attribute);
      double label = e.getValue(labelAttribute);
      if ((Double.isNaN(oldLabel)) || (oldLabel != label) && (lastValue != currentValue)) {
        double splitValue = (lastValue + currentValue) / 2.0d;

        double[] benefits;
        if (labelName == null) {
          benefits = criterion.getOnlineBenefit(e);
        } else {
          benefits = criterion.getOnlineBenefit(e, labelIndex);
        }
        // online method returns both possible relations in one array(greater / smaller) in one
        // array
        if ((benefits[0] > minValue)
            && (benefits[0] > 0)
            && (benefits[1] > 0)
            && ((benefits[0] > bestBenefit)
                || ((benefits[0] == bestBenefit) && (benefits[1] > bestTotalWeight)))) {
          bestBenefit = benefits[0];
          bestSplit = splitValue;
          bestTotalWeight = benefits[1];
          bestSplitType = Split.LESS_SPLIT;
        }
        if ((benefits[2] > minValue)
            && (benefits[2] > 0)
            && (benefits[3] > 0)
            && ((benefits[2] > bestBenefit)
                || ((benefits[2] == bestBenefit) && (benefits[3] > bestTotalWeight)))) {
          bestBenefit = benefits[2];
          bestSplit = splitValue;
          bestTotalWeight = benefits[3];
          bestSplitType = Split.GREATER_SPLIT;
        }
        oldLabel = label;
      }
      lastValue = currentValue;
      criterion.update(e);
    }
    return new Split(bestSplit, new double[] {bestBenefit, bestTotalWeight}, bestSplitType);
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // recall: difference = minuend - subtrahend
    // but the subtrahend is last on the ioobjects stack, so pop first
    ExampleSet subtrahendSet = subtrahendInput.getData(ExampleSet.class);
    ExampleSet minuendSet = exampleSet;

    subtrahendSet.remapIds();
    minuendSet.remapIds();

    Attribute minuendId = minuendSet.getAttributes().getId();
    Attribute subtrahendId = subtrahendSet.getAttributes().getId();

    // sanity checks
    if ((minuendId == null) || (subtrahendId == null)) {
      throw new UserError(this, 129);
    }
    if (minuendId.getValueType() != subtrahendId.getValueType()) {
      throw new UserError(
          this,
          120,
          new Object[] {
            subtrahendId.getName(),
            Ontology.VALUE_TYPE_NAMES[subtrahendId.getValueType()],
            Ontology.VALUE_TYPE_NAMES[minuendId.getValueType()]
          });
    }

    List<Integer> indices = new LinkedList<>();
    {
      int i = 0;
      for (Example example : minuendSet) {
        double id = example.getValue(minuendId);
        Example subtrahendExample = null;
        if (minuendId.isNominal()) {
          subtrahendExample =
              subtrahendSet.getExampleFromId(
                  subtrahendId.getMapping().getIndex(minuendId.getMapping().mapIndex((int) id)));
        } else {
          subtrahendExample = subtrahendSet.getExampleFromId(id);
        }
        if (subtrahendExample == null) {
          indices.add(i);
        }
        i++;
      }
    }

    int[] indexArray = new int[indices.size()];
    for (int i = 0; i < indices.size(); i++) {
      indexArray[i] = indices.get(i);
    }

    ExampleSet minusSet = new MappedExampleSet(minuendSet, indexArray);
    return minusSet;
  }
  @Override
  public void doWork() throws OperatorException {

    ExampleSet exampleSet = exampleSetInput.getData();

    IEntityMapping user_mapping = new EntityMapping();
    IEntityMapping item_mapping = new EntityMapping();
    IRatings training_data = new Ratings();

    if (exampleSet.getAttributes().getSpecial("user identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getSpecial("item identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getLabel() == null) {
      throw new UserError(this, 105);
    }

    Attributes Att = exampleSet.getAttributes();
    AttributeRole ur = Att.getRole("user identification");
    Attribute u = ur.getAttribute();
    AttributeRole ir = Att.getRole("item identification");
    Attribute i = ir.getAttribute();
    Attribute ui = Att.getLabel();

    for (Example example : exampleSet) {

      double j = example.getValue(u);
      int uid = user_mapping.ToInternalID((int) j);

      j = example.getValue(i);
      int iid = item_mapping.ToInternalID((int) j);

      double r = example.getValue(ui);
      training_data.Add(uid, iid, r);
    }

    _slopeOne recommendAlg = new _slopeOne();

    recommendAlg.user_mapping = user_mapping;
    recommendAlg.item_mapping = item_mapping;
    recommendAlg.SetMinRating(getParameterAsInt("Min Rating"));
    recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range"));

    recommendAlg.SetRatings(training_data);

    recommendAlg.Train();

    exampleSetOutput.deliver(exampleSet);

    exampleSetOutput1.deliver(recommendAlg);
  }
 /** returns the accuracy of the predictions for the given example set */
 private double evaluatePredictions(ExampleSet exampleSet) {
   Iterator<Example> reader = exampleSet.iterator();
   int count = 0;
   int correct = 0;
   while (reader.hasNext()) {
     count++;
     Example example = reader.next();
     if (example.getLabel() == example.getPredictedLabel()) correct++;
   }
   return ((double) correct) / count;
 }
  private double[] getExampleValues(Example example) {
    Attributes attributes = example.getAttributes();
    double[] attributeValues = new double[attributes.size()];

    int i = 0;
    for (Attribute attribute : attributes) {
      attributeValues[i] = example.getValue(attribute);
      i++;
    }
    return attributeValues;
  }
 public void update(Example example) {
   int labelIndex = (int) example.getValue(labelAttribute);
   if (weightAttribute != null) {
     double currentWeight = example.getValue(weightAttribute);
     labelWeights[labelIndex] += currentWeight;
     weight += currentWeight;
   } else {
     labelWeights[labelIndex] += 1d;
     weight += 1d;
   }
 }
 /**
  * This method must be implemented by the subclasses. Subclasses have to iterate over the
  * exampleset and on each example iterate over the oldAttribute array and set the new values on
  * the corresponding new attribute
  */
 protected void applyOnData(
     ExampleSet exampleSet, Attribute[] oldAttributes, Attribute[] newAttributes) {
   // copying data
   for (Example example : exampleSet) {
     for (int i = 0; i < oldAttributes.length; i++) {
       if (oldAttributes[i].isNumerical())
         example.setValue(
             newAttributes[i], computeValue(oldAttributes[i], example.getValue(oldAttributes[i])));
     }
   }
 }
  public Graph<String, String> createGraph() {
    graph = new UndirectedSparseGraph<String, String>();

    Attribute id = exampleSet.getAttributes().getId();
    if (id != null) {
      for (Example example : exampleSet) {
        graph.addVertex(example.getValueAsString(id));
      }
      addEdges();
    }
    return graph;
  }
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    DistanceMeasure measure = DistanceMeasures.createMeasure(this);
    measure.init(exampleSet);
    GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure);

    // check if weights should be used
    boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS);
    // check if robust estimate should be performed: Then calculate weights and use it anyway
    if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) {
      useWeights = true;
      LocalPolynomialExampleWeightingOperator weightingOperator;
      try {
        weightingOperator =
            OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class);
        exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this);
      } catch (OperatorCreationException e) {
        throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage());
      }
    }

    Attributes attributes = exampleSet.getAttributes();
    Attribute label = attributes.getLabel();
    Attribute weightAttribute = attributes.getWeight();
    for (Example example : exampleSet) {
      double[] values = new double[attributes.size()];
      double labelValue = example.getValue(label);
      double weight = 1d;
      if (weightAttribute != null && useWeights) {
        weight = example.getValue(weightAttribute);
      }

      // filter out examples without influence
      if (weight > 0d) {
        // copying example values
        int i = 0;
        for (Attribute attribute : attributes) {
          values[i] = example.getValue(attribute);
          i++;
        }

        // inserting into geometric data collection
        data.add(values, new RegressionData(values, labelValue, weight));
      }
    }
    return new LocalPolynomialRegressionModel(
        exampleSet,
        data,
        Neighborhoods.createNeighborhood(this),
        SmoothingKernels.createKernel(this),
        getParameterAsInt(PARAMETER_DEGREE),
        getParameterAsDouble(PARAMETER_RIDGE));
  }
 @Override
 public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException {
   for (Example example : exampleSet) {
     for (Attribute attribute : exampleSet.getAttributes()) {
       if (attribute.isNumerical()) {
         double value = example.getValue(attribute);
         value = Math.abs(value);
         example.setValue(attribute, value);
       }
     }
   }
   return exampleSet;
 }
 public void apply(Example example) {
   if (applicable(example)) {
     double weight = 1.0d;
     if (example.getAttributes().getWeight() != null) {
       weight = example.getWeight();
     }
     coveredWeight += weight;
     if (example.getLabel()
         == example.getAttributes().getLabel().getMapping().getPositiveIndex()) {
       positiveWeight += weight;
     }
   }
 }
  /**
   * Computes the weighted class priors of the boolean target attribute and shifts weights so that
   * the priors are equal afterwards.
   */
  private void rescalePriors(ExampleSet exampleSet, double[] classPriors) {
    // The weights of class i are calculated as
    // (1 / #classes) / (#rel_freq_class_i)
    double[] weights = new double[2];
    for (int i = 0; i < weights.length; i++) {
      weights[i] = 1.0d / (weights.length * (classPriors[i]));
    }

    Iterator<Example> exRead = exampleSet.iterator();
    while (exRead.hasNext()) {
      Example example = exRead.next();
      example.setWeight(weights[(int) (example.getLabel())]);
    }
  }
 /**
  * Similar to prepareBatch, but for extended batches.
  *
  * @param extendedBatch containing the extended batch
  * @return the class priors of the batch
  */
 private double[] prepareExtendedBatch(ExampleSet extendedBatch) {
   int[] classCount = new int[2];
   Iterator<Example> reader = extendedBatch.iterator();
   while (reader.hasNext()) {
     Example example = reader.next();
     example.setWeight(1);
     classCount[(int) example.getLabel()]++;
   }
   double[] classPriors = new double[2];
   int sum = classCount[0] + classCount[1];
   classPriors[0] = ((double) classCount[0]) / sum;
   classPriors[1] = ((double) classCount[1]) / sum;
   return classPriors;
 }
  public LinkedList<String> getAllCategories(Attribute attribute) {
    LinkedList<String> allCategoryList = new LinkedList<String>();

    Iterator<Example> reader = this.iterator();

    while (reader.hasNext()) {
      Example example = reader.next();
      String currentValue = example.getValueAsString(attribute);
      if (!inList(currentValue, allCategoryList)) allCategoryList.add(currentValue);
    }

    // return new SplittedExampleSet(exampleSet, partition);
    return allCategoryList;
  }
  private double[] estimateVariance() {
    double[] beta = getBestValuesEver();

    Matrix hessian = new Matrix(beta.length, beta.length);
    for (Example example : exampleSet) {
      double[] values = new double[beta.length];
      double eta = 0.0d;
      int j = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        values[j] = value;
        eta += beta[j] * value;
        j++;
      }
      if (addIntercept) {
        values[beta.length - 1] = 1.0d;
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      for (int x = 0; x < beta.length; x++) {
        for (int y = 0; y < beta.length; y++) {
          // sum is second derivative of log likelihood function
          double h = hessian.get(x, y) - values[x] * values[y] * weightValue * pi * (1 - pi);
          hessian.set(x, y, h);
        }
      }
    }

    double[] variance = new double[beta.length];
    Matrix varianceCovarianceMatrix = null;
    try {
      // asymptotic variance-covariance matrix is inverse of hessian matrix
      varianceCovarianceMatrix = hessian.inverse();
    } catch (Exception e) {
      logging.logWarning("could not determine variance-covariance matrix, hessian is singular");
      for (int j = 0; j < beta.length; j++) {
        variance[j] = Double.NaN;
      }
      return variance;
    }
    for (int j = 0; j < beta.length; j++) {
      // get diagonal elements
      variance[j] = Math.abs(varianceCovarianceMatrix.get(j, j));
    }

    return variance;
  }
  public static SplittedExampleSet splitByAttribute(
      ExampleSet exampleSet, Attribute attribute, double value) {
    int[] elements = new int[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int i = 0;

    while (reader.hasNext()) {
      Example example = reader.next();
      double currentValue = example.getValue(attribute);
      if (currentValue <= value) elements[i++] = 0;
      else elements[i++] = 1;
    }
    Partition partition = new Partition(elements, 2);
    return new SplittedExampleSet(exampleSet, partition);
  }
 /**
  * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing
  * values, with approximate comparisons.
  *
  * @param eSet the example set
  * @param a the first attribute to correlate
  * @param b the second attribute to correlate
  * @param fuzz values within +/- fuzz may be considered tied
  * @return Kendall's tau-b rank correlation
  * @throws OperatorException
  */
 public static double tau_b(ExampleSet eSet, Attribute a, Attribute b, double fuzz)
     throws OperatorException {
   ExampleSet e = extract(eSet, a, b); // reduced example set
   FuzzyComp fc = new FuzzyComp(fuzz);
   int c = 0; // concordant pairs
   int d = 0; // discordant pairs
   int ta = 0; // pairs tied on a (only)
   int tb = 0; // pairs tied on b (only)
   int tc = 0; // pairs tied on both a and b
   int n = 0; // number of times iterator i is bumped
   Iterator<Example> i = e.iterator();
   while (i.hasNext()) {
     // iterate through all possible pairs
     Example z1 = i.next();
     n++;
     double x = z1.getValue(a);
     double y = z1.getValue(b);
     if (b.isNominal() && a != null) {
       String yString = b.getMapping().mapIndex((int) y);
       y = a.getMapping().getIndex(yString);
     }
     Iterator<Example> j = e.iterator();
     for (int k = 0; k < n; k++) j.next(); // increment j to match i
     while (j.hasNext()) {
       // move on to subsequent examples
       Example z2 = j.next();
       double xx = z2.getValue(a);
       double yy = z2.getValue(b);
       if (b.isNominal() && a != null) {
         String yyString = b.getMapping().mapIndex((int) yy);
         yy = a.getMapping().getIndex(yyString);
       }
       int xc = fc.compare(x, xx);
       int yc = fc.compare(y, yy);
       if (xc == 0) {
         if (yc == 0) tc++; // tied on both attributes
         else ta++; // tied only on a
       } else if (yc == 0) tb++; // tied only on b
       else if (xc == yc) c++; // concordant pair
       else d++; // discordant pair
     }
   }
   double num = c - d;
   double den = Math.sqrt((c + d + ta) * (c + d + tb));
   if (den != 0) return num / den;
   else return 0;
 }
Example #30
0
 public double[] vectorSubtraction(Example x, double[] y) {
   if (x.getAttributes().size() != y.length) {
     throw new RuntimeException(
         "Cannot substract vectors: incompatible numbers of attributes ("
             + x.getAttributes().size()
             + " != "
             + y.length
             + ")!");
   }
   double[] result = new double[x.getAttributes().size()];
   int i = 0;
   for (Attribute att : x.getAttributes()) {
     result[i] = x.getValue(att) - y[i];
     i++;
   }
   return result;
 }