コード例 #1
0
  /**
   * Iterates over all models and returns the class with maximum likelihood.
   *
   * @param origExampleSet the set of examples to be classified
   */
  @Override
  public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel)
      throws OperatorException {
    final String attributePrefix = "AdaBoostModelPrediction";
    final int numLabels = predictedLabel.getMapping().size();
    final Attribute[] specialAttributes = new Attribute[numLabels];
    OperatorProgress progress = null;
    if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) {
      progress = getOperator().getProgress();
      progress.setTotal(100);
    }
    for (int i = 0; i < numLabels; i++) {
      specialAttributes[i] =
          com.rapidminer.example.Tools.createSpecialAttribute(
              origExampleSet, attributePrefix + i, Ontology.NUMERICAL);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels));
      }
    }

    Iterator<Example> reader = origExampleSet.iterator();
    int progressCounter = 0;
    while (reader.hasNext()) {
      Example example = reader.next();
      for (int i = 0; i < specialAttributes.length; i++) {
        example.setValue(specialAttributes[i], 0);
      }
      if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) {
        progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25);
      }
    }

    reader = origExampleSet.iterator();
    for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) {
      Model model = this.getModel(modelNr);
      ExampleSet exampleSet = (ExampleSet) origExampleSet.clone();
      exampleSet = model.apply(exampleSet);
      this.updateEstimates(exampleSet, modelNr, specialAttributes);
      PredictionModel.removePredictedLabel(exampleSet);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50);
      }
    }

    // Turn prediction weights into confidences and a crisp predcition:
    this.evaluateSpecialAttributes(origExampleSet, specialAttributes);

    // Clean up attributes:
    for (int i = 0; i < numLabels; i++) {
      origExampleSet.getAttributes().remove(specialAttributes[i]);
      origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75);
      }
    }

    return origExampleSet;
  }
コード例 #2
0
ファイル: StreamProcess.java プロジェクト: amuraru/streams
  /** @see com.rapidminer.operator.OperatorChain#doWork() */
  @Override
  public void doWork() throws OperatorException {

    List<Operator> nested = this.getImmediateChildren();
    log.info("This StreamProcess has {} nested operators", nested.size());
    for (Operator op : nested) {
      log.info("  op: {}", op);

      if (op instanceof DataStreamOperator) {
        log.info("Resetting stream-operator {}", op);
        ((DataStreamOperator) op).reset();
      }
    }

    log.info("Starting some work in doWork()");
    ExampleSet exampleSet = input.getData(ExampleSet.class);
    log.info("input is an example set with {} examples", exampleSet.size());
    int i = 0;

    Iterator<Example> it = exampleSet.iterator();
    while (it.hasNext()) {
      Example example = it.next();
      log.info("Processing example {}", i);
      DataObject datum = StreamUtils.wrap(example);
      log.info("Wrapped data-object is: {}", datum);
      dataStream.deliver(datum);
      getSubprocess(0).execute();
      inApplyLoop();
      i++;
    }

    // super.doWork();
    log.info("doWork() is finished.");
  }
コード例 #3
0
  private static Map<Integer, MeanVariance> createMeanVariances(
      com.rapidminer.example.ExampleSet exampleSet) {
    double[] sum = new double[exampleSet.getAttributes().size()];
    double[] squaredSum = new double[sum.length];

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      com.rapidminer.example.Example example = reader.next();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = example.getValue(attribute);
        sum[a] += value;
        squaredSum[a] += value * value;
        a++;
      }
    }

    Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>();
    for (int a = 0; a < sum.length; a++) {
      sum[a] /= exampleSet.size();
      squaredSum[a] /= exampleSet.size();
      meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a])));
    }

    return meanVariances;
  }
コード例 #4
0
 /**
  * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing
  * values, with approximate comparisons.
  *
  * @param eSet the example set
  * @param a the first attribute to correlate
  * @param b the second attribute to correlate
  * @param fuzz values within +/- fuzz may be considered tied
  * @return Kendall's tau-b rank correlation
  * @throws OperatorException
  */
 public static double tau_b(ExampleSet eSet, Attribute a, Attribute b, double fuzz)
     throws OperatorException {
   ExampleSet e = extract(eSet, a, b); // reduced example set
   FuzzyComp fc = new FuzzyComp(fuzz);
   int c = 0; // concordant pairs
   int d = 0; // discordant pairs
   int ta = 0; // pairs tied on a (only)
   int tb = 0; // pairs tied on b (only)
   int tc = 0; // pairs tied on both a and b
   int n = 0; // number of times iterator i is bumped
   Iterator<Example> i = e.iterator();
   while (i.hasNext()) {
     // iterate through all possible pairs
     Example z1 = i.next();
     n++;
     double x = z1.getValue(a);
     double y = z1.getValue(b);
     if (b.isNominal() && a != null) {
       String yString = b.getMapping().mapIndex((int) y);
       y = a.getMapping().getIndex(yString);
     }
     Iterator<Example> j = e.iterator();
     for (int k = 0; k < n; k++) j.next(); // increment j to match i
     while (j.hasNext()) {
       // move on to subsequent examples
       Example z2 = j.next();
       double xx = z2.getValue(a);
       double yy = z2.getValue(b);
       if (b.isNominal() && a != null) {
         String yyString = b.getMapping().mapIndex((int) yy);
         yy = a.getMapping().getIndex(yyString);
       }
       int xc = fc.compare(x, xx);
       int yc = fc.compare(y, yy);
       if (xc == 0) {
         if (yc == 0) tc++; // tied on both attributes
         else ta++; // tied only on a
       } else if (yc == 0) tb++; // tied only on b
       else if (xc == yc) c++; // concordant pair
       else d++; // discordant pair
     }
   }
   double num = c - d;
   double den = Math.sqrt((c + d + ta) * (c + d + tb));
   if (den != 0) return num / den;
   else return 0;
 }
コード例 #5
0
  /** Creates a new evolutionary SVM optimization. */
  public ClassificationEvoOptimization(
      ExampleSet exampleSet, // training data
      Kernel kernel,
      double c, // SVM paras
      int initType, // start population creation type para
      int maxIterations,
      int generationsWithoutImprovement,
      int popSize, // GA paras
      int selectionType,
      double tournamentFraction,
      boolean keepBest, // selection paras
      int mutationType, // type of mutation
      double crossoverProb,
      boolean showConvergencePlot,
      boolean showPopulationPlot,
      ExampleSet holdOutSet,
      RandomGenerator random,
      LoggingHandler logging,
      Operator executingOperator) {
    super(
        EvoSVM.createBoundArray(0.0d, exampleSet.size()),
        EvoSVM.determineMax(c, kernel, exampleSet, selectionType, exampleSet.size()),
        popSize,
        exampleSet.size(),
        initType,
        maxIterations,
        generationsWithoutImprovement,
        selectionType,
        tournamentFraction,
        keepBest,
        mutationType,
        Double.NaN,
        crossoverProb,
        showConvergencePlot,
        showPopulationPlot,
        random,
        logging,
        executingOperator);
    this.exampleSet = exampleSet;
    this.holdOutSet = holdOutSet;
    this.populationSize = popSize;

    this.kernel = kernel;
    this.c = getMax(0);

    // label values
    this.ys = new double[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int index = 0;
    Attribute label = exampleSet.getAttributes().getLabel();
    while (reader.hasNext()) {
      Example example = reader.next();
      ys[index++] = example.getLabel() == label.getMapping().getPositiveIndex() ? 1.0d : -1.0d;
    }

    // optimization function
    this.optimizationFunction =
        new ClassificationOptimizationFunction(selectionType == NON_DOMINATED_SORTING_SELECTION);
  }
コード例 #6
0
  /**
   * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha
   * values and b are zero, the label will be set if it is known.
   */
  public SVMExamples(
      com.rapidminer.example.ExampleSet exampleSet,
      Attribute labelAttribute,
      Map<Integer, MeanVariance> meanVariances) {
    this(exampleSet.size(), 0.0d);
    this.meanVarianceMap = meanVariances;

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    Attribute idAttribute = exampleSet.getAttributes().getId();
    int exampleCounter = 0;
    while (reader.hasNext()) {
      com.rapidminer.example.Example current = reader.next();
      Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = current.getValue(attribute);
        if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) {
          attributeMap.put(a, value);
        }
        if ((a + 1) > dim) {
          dim = (a + 1);
        }
        a++;
      }
      atts[exampleCounter] = new double[attributeMap.size()];
      index[exampleCounter] = new int[attributeMap.size()];
      Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator();
      int attributeCounter = 0;
      while (i.hasNext()) {
        Map.Entry<Integer, Double> e = i.next();
        Integer indexValue = e.getKey();
        Double attributeValue = e.getValue();
        index[exampleCounter][attributeCounter] = indexValue.intValue();
        double value = attributeValue.doubleValue();
        MeanVariance meanVariance = meanVarianceMap.get(indexValue);
        if (meanVariance != null) {
          if (meanVariance.getVariance() == 0.0d) {
            value = 0.0d;
          } else {
            value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance());
          }
        }
        atts[exampleCounter][attributeCounter] = value;
        attributeCounter++;
      }
      if (labelAttribute != null) {
        double label = current.getValue(labelAttribute);
        if (labelAttribute.isNominal()) {
          ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1);
        } else {
          ys[exampleCounter] = label;
        }
      }
      if (idAttribute != null) {
        ids[exampleCounter] = current.getValueAsString(idAttribute);
      }
      exampleCounter++;
    }
  }
コード例 #7
0
 /**
  * Computes Kendall's tau-b rank correlation statistic, ignoring examples containing missing
  * values.
  *
  * @param eSet the example set
  * @param a the first attribute to correlate
  * @param b the second attribute to correlate
  * @return Kendall's tau-b rank correlation
  * @throws OperatorException
  */
 public static double tau_b(ExampleSet eSet, Attribute a, Attribute b) throws OperatorException {
   ExampleSet e = extract(eSet, a, b); // reduced example set
   long c = 0; // concordant pairs
   long d = 0; // discordant pairs
   long ta = 0; // pairs tied on a (only)
   long tb = 0; // pairs tied on b (only)
   long tc = 0; // pairs tied on both a and b
   int n = 0; // number of times iterator i is bumped
   Iterator<Example> i = e.iterator();
   while (i.hasNext()) {
     // iterate through all possible pairs
     Example z1 = i.next();
     n++;
     double x = z1.getValue(a);
     double y = z1.getValue(b);
     if (b.isNominal() && a != null) {
       String yString = b.getMapping().mapIndex((int) y);
       y = a.getMapping().getIndex(yString);
     }
     Iterator<Example> j = e.iterator();
     for (int k = 0; k < n; k++) j.next(); // increment j to match i
     while (j.hasNext()) {
       // move on to subsequent examples
       Example z2 = j.next();
       double xx = z2.getValue(a);
       double yy = z2.getValue(b);
       if (b.isNominal() && a != null) {
         String yyString = b.getMapping().mapIndex((int) yy);
         yy = a.getMapping().getIndex(yyString);
       }
       if (x == xx) {
         if (y == yy) tc++; // tied on both attributes
         else ta++; // tied only on a
       } else if (y == yy) tb++; // tied only on b
       else if ((x > xx && y > yy) || (x < xx && y < yy)) c++;
       // concordant pair
       else d++; // discordant pair
     }
   }
   double num = c - d;
   double f1 = c + d + ta;
   double f2 = c + d + tb;
   double den = Math.sqrt(f1 * f2);
   if (den != 0) return num / den;
   else return 0;
 }
コード例 #8
0
 public Model learn(ExampleSet exampleSet) throws OperatorException {
   double value = 0.0;
   double[] confidences = null;
   int method = getParameterAsInt(PARAMETER_METHOD);
   Attribute label = exampleSet.getAttributes().getLabel();
   if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) {
     logWarning(
         "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!");
     method = MODE;
   } else if ((!label.isNominal()) && (method == MODE)) {
     logWarning(
         "Cannot use method '"
             + METHODS[method]
             + "' for numerical labels: changing to 'average'!");
     method = AVERAGE;
   }
   switch (method) {
     case MEDIAN:
       double[] labels = new double[exampleSet.size()];
       Iterator<Example> r = exampleSet.iterator();
       int counter = 0;
       while (r.hasNext()) {
         Example example = r.next();
         labels[counter++] = example.getValue(example.getAttributes().getLabel());
       }
       java.util.Arrays.sort(labels);
       value = labels[exampleSet.size() / 2];
       break;
     case AVERAGE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.AVERAGE);
       break;
     case MODE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.MODE);
       confidences = new double[label.getMapping().size()];
       for (int i = 0; i < confidences.length; i++) {
         confidences[i] =
             exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i))
                 / exampleSet.size();
       }
       break;
     case CONSTANT:
       value = getParameterAsDouble(PARAMETER_CONSTANT);
       break;
     case ATTRIBUTE:
       return new AttributeDefaultModel(
           exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME));
     default:
       // cannot happen
       throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!");
   }
   log(
       "Default value is '"
           + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "")
           + "'.");
   return new DefaultModel(exampleSet, value, confidences);
 }
コード例 #9
0
 /** returns the accuracy of the predictions for the given example set */
 private double evaluatePredictions(ExampleSet exampleSet) {
   Iterator<Example> reader = exampleSet.iterator();
   int count = 0;
   int correct = 0;
   while (reader.hasNext()) {
     count++;
     Example example = reader.next();
     if (example.getLabel() == example.getPredictedLabel()) correct++;
   }
   return ((double) correct) / count;
 }
コード例 #10
0
 private void restoreOldWeights(ExampleSet exampleSet) {
   if (this.oldWeights != null) { // need to reset weights
     Iterator<Example> reader = exampleSet.iterator();
     int i = 0;
     while (reader.hasNext() && i < this.oldWeights.length) {
       reader.next().setWeight(this.oldWeights[i++]);
     }
   } else { // need to delete the weights attribute
     Attribute weight = exampleSet.getAttributes().getWeight();
     exampleSet.getAttributes().remove(weight);
     exampleSet.getExampleTable().removeAttribute(weight);
   }
 }
コード例 #11
0
  /**
   * Computes the weighted class priors of the boolean target attribute and shifts weights so that
   * the priors are equal afterwards.
   */
  private void rescalePriors(ExampleSet exampleSet, double[] classPriors) {
    // The weights of class i are calculated as
    // (1 / #classes) / (#rel_freq_class_i)
    double[] weights = new double[2];
    for (int i = 0; i < weights.length; i++) {
      weights[i] = 1.0d / (weights.length * (classPriors[i]));
    }

    Iterator<Example> exRead = exampleSet.iterator();
    while (exRead.hasNext()) {
      Example example = exRead.next();
      example.setWeight(weights[(int) (example.getLabel())]);
    }
  }
コード例 #12
0
 /**
  * Similar to prepareBatch, but for extended batches.
  *
  * @param extendedBatch containing the extended batch
  * @return the class priors of the batch
  */
 private double[] prepareExtendedBatch(ExampleSet extendedBatch) {
   int[] classCount = new int[2];
   Iterator<Example> reader = extendedBatch.iterator();
   while (reader.hasNext()) {
     Example example = reader.next();
     example.setWeight(1);
     classCount[(int) example.getLabel()]++;
   }
   double[] classPriors = new double[2];
   int sum = classCount[0] + classCount[1];
   classPriors[0] = ((double) classCount[0]) / sum;
   classPriors[1] = ((double) classCount[1]) / sum;
   return classPriors;
 }
コード例 #13
0
  public static SplittedExampleSet splitByAttribute(
      ExampleSet exampleSet, Attribute attribute, double value) {
    int[] elements = new int[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int i = 0;

    while (reader.hasNext()) {
      Example example = reader.next();
      double currentValue = example.getValue(attribute);
      if (currentValue <= value) elements[i++] = 0;
      else elements[i++] = 1;
    }
    Partition partition = new Partition(elements, 2);
    return new SplittedExampleSet(exampleSet, partition);
  }
コード例 #14
0
  private void updateEstimates(ExampleSet exampleSet, int modelNr, Attribute[] specialAttributes) {
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      Example example = reader.next();
      int predicted = (int) example.getPredictedLabel();

      double oldValue = example.getValue(specialAttributes[predicted]);
      if (Double.isNaN(oldValue)) {
        logWarning("Found NaN confidence as intermediate prediction.");
        oldValue = 0;
      }

      if (!Double.isInfinite(oldValue)) {
        example.setValue(specialAttributes[predicted], oldValue + this.getWeightForModel(modelNr));
      }
    }
  }
コード例 #15
0
  protected void prepareWeights(ExampleSet exampleSet) {
    Attribute weightAttr = exampleSet.getAttributes().getWeight();
    if (weightAttr == null) {
      this.oldWeights = null;
      com.rapidminer.example.Tools.createWeightAttribute(exampleSet);
    } else { // Back up old weights
      this.oldWeights = new double[exampleSet.size()];
      Iterator<Example> reader = exampleSet.iterator();

      for (int i = 0; (reader.hasNext() && i < oldWeights.length); i++) {
        Example example = reader.next();
        if (example != null) {
          this.oldWeights[i] = example.getWeight();
          example.setWeight(1);
        }
      }
    }
  }
コード例 #16
0
  private void evaluateSpecialAttributes(ExampleSet exampleSet, Attribute[] specialAttributes) {
    Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel();
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      Example example = reader.next();
      double sum = 0;
      double[] confidences = new double[specialAttributes.length];
      double bestConf = -1;
      int bestLabel = 0;
      for (int n = 0; n < confidences.length; n++) {
        confidences[n] = example.getValue(specialAttributes[n]);
        if (confidences[n] > bestConf) {
          bestConf = confidences[n];
          bestLabel = n;
        }
      }

      example.setValue(
          predictedLabel,
          predictedLabel.getMapping().mapString(this.getLabel().getMapping().mapIndex(bestLabel)));

      for (int n = 0; n < confidences.length; n++) {
        confidences[n] = Math.exp(confidences[n] - bestConf);
        // remember for normalization:
        sum += confidences[n];
      }

      // Normalize and set confidence values for all classes:
      if (Double.isInfinite(sum) || Double.isNaN(sum)) {
        int best = (int) example.getPredictedLabel();
        for (int k = 0; k < confidences.length; k++) {
          confidences[k] = 0;
        }
        confidences[best] = 1;
      } else {
        for (int k = 0; k < confidences.length; k++) {
          confidences[k] /= sum;
          example.setConfidence(predictedLabel.getMapping().mapIndex(k), confidences[k]);
        }
      }
    }
  }
コード例 #17
0
  private BayBoostModel retrainLastWeight(
      BayBoostModel ensemble, ExampleSet exampleSet, Vector holdOutSet) throws OperatorException {
    this.prepareExtendedBatch(exampleSet); // method fits by chance
    int modelNum = ensemble.getNumberOfModels();
    Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>();
    double[] priors = ensemble.getPriors();
    for (int i = 0; i < modelNum - 1; i++) {
      Model model = ensemble.getModel(i);
      ContingencyMatrix cm = ensemble.getContingencyMatrix(i);
      modelInfo.add(new BayBoostBaseModelInfo(model, cm));
      exampleSet = model.apply(exampleSet);
      WeightedPerformanceMeasures.reweightExamples(exampleSet, cm, false);
    }
    Model latestModel = ensemble.getModel(modelNum - 1);
    exampleSet = latestModel.apply(exampleSet);

    // quite ugly:
    double[] weights = new double[holdOutSet.size()];
    Iterator it = holdOutSet.iterator();
    int index = 0;
    while (it.hasNext()) {
      Example example = (Example) it.next();
      weights[index++] = example.getWeight();
    }
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      reader.next().setWeight(0);
    }
    it = holdOutSet.iterator();
    index = 0;
    while (it.hasNext()) {
      Example example = (Example) it.next();
      example.setWeight(weights[index++]);
    }

    WeightedPerformanceMeasures wp = new WeightedPerformanceMeasures(exampleSet);
    modelInfo.add(new BayBoostBaseModelInfo(latestModel, wp.getContingencyMatrix()));

    return new BayBoostModel(exampleSet, modelInfo, priors);
  }
コード例 #18
0
 /**
  * Calculates ranks for an attribute.
  *
  * <p>Ranks are returned as double precision values, with 1 as the rank of the smallest value.
  * Values within +/- fuzz of each other may be considered tied. Tied values receive identical
  * ranks. Missing values receive rank NaN.
  *
  * <p>Note that application of the "fuzz" factor is dependent on the order of the observations in
  * the example set. For instance, if the first three values encountered are x, x+fuzz and
  * x+2*fuzz, the first two will be considered tied but the third will not, since x+2*fuzz is not
  * within +/- fuzz of x.
  *
  * @param eSet the example set
  * @param att the attribute to rank
  * @param fuzz values within +/- fuzz may be considered tied
  * @return a double precision array of ranks
  */
 public static double[] rank(ExampleSet eSet, Attribute att, Attribute mappingAtt, double fuzz) {
   TreeMap<Double, ArrayList<Integer>> map;
   if (fuzz == 0.0) map = new TreeMap<Double, ArrayList<Integer>>();
   else {
     FuzzyComp fc = new FuzzyComp(fuzz);
     map = new TreeMap<Double, ArrayList<Integer>>(fc);
   }
   double[] rank = new double[eSet.size()];
   Iterator<Example> reader = eSet.iterator();
   int i = 0; // example index
   // iterate through the example set
   while (reader.hasNext()) {
     // get the attribute values from the next example
     Example e = reader.next();
     double x = e.getValue(att);
     if (att.isNominal() && mappingAtt != null) {
       String xString = att.getMapping().mapIndex((int) x);
       x = mappingAtt.getMapping().getIndex(xString);
     }
     // punt if either is missing
     if (Double.isNaN(x)) rank[i++] = Double.NaN;
     else {
       // insert x into the tree
       if (!map.containsKey(x))
         // new key -- create a new entry in the map
         map.put(x, new ArrayList<Integer>());
       map.get(x).add(i++); // add the index to the list
     }
   }
   // convert the map to ranks
   double r = 0;
   for (double x : map.keySet()) {
     ArrayList<Integer> y = map.get(x);
     double v = r + (1.0 + y.size()) / 2.0;
     for (int j : y) rank[j] = v;
     r += y.size();
   }
   return rank;
 }
コード例 #19
0
ファイル: IdTagging.java プロジェクト: ntj/ComplexRapidMiner
  public IOObject[] apply() throws OperatorException {
    ExampleSet eSet = getInput(ExampleSet.class);

    // only warning, removing is done by createSpecialAttribute(...)
    Attribute idAttribute = eSet.getAttributes().getId();
    if (idAttribute != null) {
      logWarning("Overwriting old id attribute!");
    }

    // create new id attribute
    boolean nominalIds = getParameterAsBoolean(PARAMETER_CREATE_NOMINAL_IDS);
    idAttribute =
        Tools.createSpecialAttribute(
            eSet, Attributes.ID_NAME, nominalIds ? Ontology.NOMINAL : Ontology.INTEGER);

    // set IDs
    int currentId = 1;
    Iterator<Example> r = eSet.iterator();
    while (r.hasNext()) {
      Example example = r.next();
      example.setValue(
          idAttribute,
          nominalIds ? idAttribute.getMapping().mapString("id_" + currentId) : currentId);
      currentId++;
      checkForStop();
    }

    // initialize example visualizer
    Operator visualizer = null;
    try {
      visualizer = OperatorService.createOperator(ExampleVisualizationOperator.class);
    } catch (OperatorCreationException e) {
      logNote("Cannot initialize example visualizer, skipping...");
    }
    if (visualizer != null) visualizer.apply(new IOContainer(eSet));

    return new IOObject[] {eSet};
  }
コード例 #20
0
  @Override
  public ExampleSet performPrediction(ExampleSet exampleSet, Attribute predictedLabel)
      throws OperatorException {
    Attribute label = this.getLabel();
    final int posLabel = label.getMapping().getPositiveIndex();
    final int negLabel = label.getMapping().getNegativeIndex();
    final String posLabelS = label.getMapping().mapIndex(posLabel);
    final String negLabelS = label.getMapping().mapIndex(negLabel);
    exampleSet = model.apply(exampleSet);
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      Example example = reader.next();
      double predicted = PlattScaling.getLogOddsPosConfidence(example.getConfidence(posLabelS));
      double scaledPos =
          1.0d / (1.0d + Math.exp(predicted * parameters.getA() + parameters.getB()));
      double scaledNeg = 1.0d - scaledPos;

      example.setValue(predictedLabel, (scaledPos >= scaledNeg) ? posLabel : negLabel);
      example.setConfidence(posLabelS, scaledPos);
      example.setConfidence(negLabelS, scaledNeg);
    }
    return exampleSet;
  }
コード例 #21
0
 @Override
 protected void createMatrices() {
   List<Attribute> attributes = new ArrayList<Attribute>(exampleSet.getAttributes().size());
   for (Attribute attribute : exampleSet.getAttributes()) {
     attributes.add((Attribute) attribute.clone());
   }
   MemoryExampleTable table = new MemoryExampleTable(attributes);
   for (int x = 0; x < dimensions[0]; x++) {
     for (int y = 0; y < dimensions[1]; y++) {
       DataRow row = new DoubleArrayDataRow(net.getNodeWeights(new int[] {x, y}));
       table.addDataRow(row);
     }
   }
   ExampleSet set = table.createExampleSet();
   this.classificationMatrix = new double[dimensions[0]][dimensions[1]];
   try {
     set = model.apply(set);
     Iterator<Example> exampleIterator = set.iterator();
     for (int x = 0; x < dimensions[0]; x++) {
       for (int y = 0; y < dimensions[1]; y++) {
         Example example = exampleIterator.next();
         classificationMatrix[x][y] =
             example.getValue(example.getAttributes().getPredictedLabel());
       }
     }
   } catch (OperatorException e) {
     // LogService.getGlobal().log("Cannot use Model for prediction of node label: " +
     // e.getMessage(), LogService.WARNING);
     LogService.getRoot()
         .log(
             Level.WARNING,
             "com.rapidminer.operator.visualization.SOMModelPlotter.using_model_for_prediction_error"
                 + e.getMessage());
   }
   super.createMatrices();
 }
コード例 #22
0
  /**
   * Constructs a <code>Model</code> repeatedly running a weak learner, reweighting the training
   * example set accordingly, and combining the hypothesis using the available weighted performance
   * values.
   */
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    this.runVector = new RunVector();
    BayBoostModel ensembleNewBatch = null;
    BayBoostModel ensembleExtBatch = null;
    final Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); // for
    // models
    // and
    // their
    // probability
    // estimates
    Vector<BayBoostBaseModelInfo> modelInfo2 = new Vector<BayBoostBaseModelInfo>();
    this.currentIteration = 0;
    int firstOpenBatch = 1;

    // prepare the stream control attribute
    final Attribute streamControlAttribute;
    {
      Attribute attr = null;
      if ((attr = exampleSet.getAttributes().get(STREAM_CONTROL_ATTRIB_NAME)) == null)
        streamControlAttribute =
            com.rapidminer.example.Tools.createSpecialAttribute(
                exampleSet, STREAM_CONTROL_ATTRIB_NAME, Ontology.INTEGER);
      else {
        streamControlAttribute = attr;
        logWarning(
            "Attribute with the (reserved) name of the stream control attribute exists. It is probably an old version created by this operator. Trying to recycle it... ");
        // Resetting the stream control attribute values by overwriting
        // them with 0 avoids (unlikely)
        // problems in case the same ExampleSet is passed to this
        // operator over and over again:
        Iterator<Example> e = exampleSet.iterator();
        while (e.hasNext()) {
          e.next().setValue(streamControlAttribute, 0);
        }
      }
    }

    // and the weight attribute
    if (exampleSet.getAttributes().getWeight() == null) {
      this.prepareWeights(exampleSet);
    }

    boolean estimateFavoursExtBatch = true;
    // *** The main loop, one iteration per batch: ***
    Iterator<Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      // increment batch number, collect batch and evaluate performance of
      // current model on batch
      double[] classPriors =
          this.prepareBatch(++this.currentIteration, reader, streamControlAttribute);

      ConditionedExampleSet trainingSet =
          new ConditionedExampleSet(
              exampleSet, new BatchFilterCondition(streamControlAttribute, this.currentIteration));

      final EstimatedPerformance estPerf;

      // Step 1: apply the ensemble model to the current batch (prediction
      // phase), evaluate and store result
      if (ensembleExtBatch != null) {
        // apply extended batch model first:
        trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet);
        this.performance = evaluatePredictions(trainingSet); // unweighted
        // performance;

        // then apply new batch model:
        trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
        double newBatchPerformance = evaluatePredictions(trainingSet);

        // heuristic: use extended batch model for predicting
        // unclassified instances
        if (estimateFavoursExtBatch == true)
          estPerf =
              new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false);
        else
          estPerf =
              new EstimatedPerformance("accuracy", newBatchPerformance, trainingSet.size(), false);

        // final double[] ensembleWeights;

        // continue with the better model:
        if (newBatchPerformance > this.performance) {
          this.performance = newBatchPerformance;
          firstOpenBatch = Math.max(1, this.currentIteration - 1);
          // ensembleWeights = ensembleNewBatch.getModelWeights();
        } else {
          modelInfo.clear();
          modelInfo.addAll(modelInfo2);
          // ensembleWeights = ensembleExtBatch.getModelWeights();
        }

      } else if (ensembleNewBatch != null) {
        trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
        this.performance = evaluatePredictions(trainingSet);
        firstOpenBatch = Math.max(1, this.currentIteration - 1);
        estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false);
      } else estPerf = null; // no model ==> no prediction performance

      if (estPerf != null) {
        PerformanceVector perf = new PerformanceVector();
        perf.addAveragable(estPerf);
        this.runVector.addVector(perf);
      }

      // *** retraining phase ***
      // Step 2: First reconstruct the initial weighting, if necessary.
      if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) {
        this.rescalePriors(trainingSet, classPriors);
      }

      estimateFavoursExtBatch = true;
      // Step 3: Find better weights for existing models and continue
      // training
      if (modelInfo.size() > 0) {

        modelInfo2 = new Vector<BayBoostBaseModelInfo>();
        for (BayBoostBaseModelInfo bbbmi : modelInfo) {
          modelInfo2.add(bbbmi); // BayBoostBaseModelInfo objects
          // cannot be changed, no deep copy
          // required
        }

        // separate hold out set
        final double holdOutRatio = this.getParameterAsDouble(PARAMETER_FRACTION_HOLD_OUT_SET);
        Vector<Example> holdOutExamples = new Vector<Example>();
        if (holdOutRatio > 0) {
          RandomGenerator random = RandomGenerator.getRandomGenerator(this);
          Iterator<Example> randBatchReader = trainingSet.iterator();
          while (randBatchReader.hasNext()) {
            Example example = randBatchReader.next();
            if (random.nextDoubleInRange(0, 1) <= holdOutRatio) {
              example.setValue(streamControlAttribute, 0);
              holdOutExamples.add(example);
            }
          }
          // TODO: create new example set
          // trainingSet.updateCondition();
        }

        // model 1: train one more base classifier
        boolean trainingExamplesLeft = this.adjustBaseModelWeights(trainingSet, modelInfo);
        if (trainingExamplesLeft) {
          // "trainingExamplesLeft" needs to be checked to avoid
          // exceptions.
          // Anyway, learning does not make sense, otherwise.
          if (!this.trainAdditionalModel(trainingSet, modelInfo)) {}
        }
        ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors);

        // model 2: remove last classifier, extend batch, train on
        // extended batch
        ExampleSet extendedBatch = // because of the ">=" condition it
            // is sufficient to remember the
            // opening batch
            new ConditionedExampleSet(
                exampleSet, new BatchFilterCondition(streamControlAttribute, firstOpenBatch));
        classPriors = this.prepareExtendedBatch(extendedBatch);
        if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) {
          this.rescalePriors(extendedBatch, classPriors);
        }
        modelInfo2.remove(modelInfo2.size() - 1);
        trainingExamplesLeft = this.adjustBaseModelWeights(extendedBatch, modelInfo2);
        // If no training examples are left: no need and chance to
        // continue training.
        if (trainingExamplesLeft == false) {
          ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors);
        } else {
          boolean success = this.trainAdditionalModel(extendedBatch, modelInfo2);
          if (success) {
            ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors);
          } else {
            ensembleExtBatch = null;
            estimateFavoursExtBatch = false;
          }
        }

        if (holdOutRatio > 0) {
          Iterator hoEit = holdOutExamples.iterator();
          while (hoEit.hasNext()) {
            ((Example) hoEit.next()).setValue(streamControlAttribute, this.currentIteration);
          }
          // TODO: create new example set
          // trainingSet.updateCondition();

          if (ensembleExtBatch != null) {
            trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet);
            hoEit = holdOutExamples.iterator();
            int errors = 0;
            while (hoEit.hasNext()) {
              Example example = (Example) hoEit.next();
              if (example.getPredictedLabel() != example.getLabel()) errors++;
            }
            double newBatchErr = ((double) errors) / holdOutExamples.size();

            trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet);
            hoEit = holdOutExamples.iterator();
            errors = 0;
            while (hoEit.hasNext()) {
              Example example = (Example) hoEit.next();
              if (example.getPredictedLabel() != example.getLabel()) errors++;
            }
            double extBatchErr = ((double) errors) / holdOutExamples.size();

            estimateFavoursExtBatch = (extBatchErr <= newBatchErr);

            if (estimateFavoursExtBatch) {
              ensembleExtBatch =
                  this.retrainLastWeight(ensembleExtBatch, trainingSet, holdOutExamples);
            } else
              ensembleNewBatch =
                  this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples);
          } else
            ensembleNewBatch =
                this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples);
        }
      } else {
        this.trainAdditionalModel(trainingSet, modelInfo);
        ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors);
        ensembleExtBatch = null;
        estimateFavoursExtBatch = false;
      }
    }
    this.restoreOldWeights(exampleSet);
    return (ensembleExtBatch == null ? ensembleNewBatch : ensembleExtBatch);
  }
コード例 #23
0
 /** Creates an iterator over all examples. */
 public Iterator<Example> iterator() {
   return new AttributesExampleReader(parent.iterator(), this);
 }
コード例 #24
0
  /** Returns a model containing all support vectors, i.e. the examples with non-zero alphas. */
  private EvoSVMModel getModel(double[] alphas) {
    // calculate support vectors
    Iterator<Example> reader = exampleSet.iterator();
    List<SupportVector> supportVectors = new ArrayList<SupportVector>();
    int index = 0;
    while (reader.hasNext()) {
      double currentAlpha = alphas[index];
      Example currentExample = reader.next();
      if (currentAlpha != 0.0d) {
        double[] x = new double[exampleSet.getAttributes().size()];
        int a = 0;
        for (Attribute attribute : exampleSet.getAttributes())
          x[a++] = currentExample.getValue(attribute);
        supportVectors.add(new SupportVector(x, ys[index], currentAlpha));
      }
      index++;
    }

    // calculate all sum values
    double[] sum = new double[exampleSet.size()];
    reader = exampleSet.iterator();
    index = 0;
    while (reader.hasNext()) {
      Example current = reader.next();
      double[] x = new double[exampleSet.getAttributes().size()];
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) x[a++] = current.getValue(attribute);
      sum[index] = kernel.getSum(supportVectors, x);
      index++;
    }

    // calculate b (from Stefan's mySVM code)
    double bSum = 0.0d;
    int bCounter = 0;
    for (int i = 0; i < alphas.length; i++) {
      if ((ys[i] * alphas[i] - c < -IS_ZERO) && (ys[i] * alphas[i] > IS_ZERO)) {
        bSum += ys[i] - sum[i];
        bCounter++;
      } else if ((ys[i] * alphas[i] + c > IS_ZERO) && (ys[i] * alphas[i] < -IS_ZERO)) {
        bSum += ys[i] - sum[i];
        bCounter++;
      }
    }

    if (bCounter == 0) {
      // unlikely
      bSum = 0.0d;
      for (int i = 0; i < alphas.length; i++) {
        if ((ys[i] * alphas[i] < IS_ZERO) && (ys[i] * alphas[i] > -IS_ZERO)) {
          bSum += ys[i] - sum[i];
          bCounter++;
        }
      }
      if (bCounter == 0) {
        // even unlikelier
        bSum = 0.0d;
        for (int i = 0; i < alphas.length; i++) {
          bSum += ys[i] - sum[i];
          bCounter++;
        }
      }
    }
    return new EvoSVMModel(exampleSet, supportVectors, kernel, bSum / bCounter);
  }