private static Map<Integer, MeanVariance> createMeanVariances(
      com.rapidminer.example.ExampleSet exampleSet) {
    double[] sum = new double[exampleSet.getAttributes().size()];
    double[] squaredSum = new double[sum.length];

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      com.rapidminer.example.Example example = reader.next();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = example.getValue(attribute);
        sum[a] += value;
        squaredSum[a] += value * value;
        a++;
      }
    }

    Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>();
    for (int a = 0; a < sum.length; a++) {
      sum[a] /= exampleSet.size();
      squaredSum[a] /= exampleSet.size();
      meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a])));
    }

    return meanVariances;
  }
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // generating assignment
    RandomGenerator random = RandomGenerator.getRandomGenerator(this);
    int clusterAssignments[] = new int[exampleSet.size()];
    int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS);
    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = random.nextInt(k);
    }

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    model.setClusterAssignments(clusterAssignments, exampleSet);

    // generating cluster attribute
    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
  /** Creates a new evolutionary SVM optimization. */
  public ClassificationEvoOptimization(
      ExampleSet exampleSet, // training data
      Kernel kernel,
      double c, // SVM paras
      int initType, // start population creation type para
      int maxIterations,
      int generationsWithoutImprovement,
      int popSize, // GA paras
      int selectionType,
      double tournamentFraction,
      boolean keepBest, // selection paras
      int mutationType, // type of mutation
      double crossoverProb,
      boolean showConvergencePlot,
      boolean showPopulationPlot,
      ExampleSet holdOutSet,
      RandomGenerator random,
      LoggingHandler logging,
      Operator executingOperator) {
    super(
        EvoSVM.createBoundArray(0.0d, exampleSet.size()),
        EvoSVM.determineMax(c, kernel, exampleSet, selectionType, exampleSet.size()),
        popSize,
        exampleSet.size(),
        initType,
        maxIterations,
        generationsWithoutImprovement,
        selectionType,
        tournamentFraction,
        keepBest,
        mutationType,
        Double.NaN,
        crossoverProb,
        showConvergencePlot,
        showPopulationPlot,
        random,
        logging,
        executingOperator);
    this.exampleSet = exampleSet;
    this.holdOutSet = holdOutSet;
    this.populationSize = popSize;

    this.kernel = kernel;
    this.c = getMax(0);

    // label values
    this.ys = new double[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int index = 0;
    Attribute label = exampleSet.getAttributes().getLabel();
    while (reader.hasNext()) {
      Example example = reader.next();
      ys[index++] = example.getLabel() == label.getMapping().getPositiveIndex() ? 1.0d : -1.0d;
    }

    // optimization function
    this.optimizationFunction =
        new ClassificationOptimizationFunction(selectionType == NON_DOMINATED_SORTING_SELECTION);
  }
 public Model learn(ExampleSet exampleSet) throws OperatorException {
   double value = 0.0;
   double[] confidences = null;
   int method = getParameterAsInt(PARAMETER_METHOD);
   Attribute label = exampleSet.getAttributes().getLabel();
   if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) {
     logWarning(
         "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!");
     method = MODE;
   } else if ((!label.isNominal()) && (method == MODE)) {
     logWarning(
         "Cannot use method '"
             + METHODS[method]
             + "' for numerical labels: changing to 'average'!");
     method = AVERAGE;
   }
   switch (method) {
     case MEDIAN:
       double[] labels = new double[exampleSet.size()];
       Iterator<Example> r = exampleSet.iterator();
       int counter = 0;
       while (r.hasNext()) {
         Example example = r.next();
         labels[counter++] = example.getValue(example.getAttributes().getLabel());
       }
       java.util.Arrays.sort(labels);
       value = labels[exampleSet.size() / 2];
       break;
     case AVERAGE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.AVERAGE);
       break;
     case MODE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.MODE);
       confidences = new double[label.getMapping().size()];
       for (int i = 0; i < confidences.length; i++) {
         confidences[i] =
             exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i))
                 / exampleSet.size();
       }
       break;
     case CONSTANT:
       value = getParameterAsDouble(PARAMETER_CONSTANT);
       break;
     case ATTRIBUTE:
       return new AttributeDefaultModel(
           exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME));
     default:
       // cannot happen
       throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!");
   }
   log(
       "Default value is '"
           + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "")
           + "'.");
   return new DefaultModel(exampleSet, value, confidences);
 }
 public boolean hasNext() {
   if (this.nextInvoked) {
     this.nextInvoked = false;
     this.currentIndex++;
     if (this.currentIndex < parent.size()) {
       this.currentExample = this.parent.getExample(this.currentIndex);
       return true;
     } else {
       return false;
     }
   }
   return (this.currentIndex < parent.size());
 }
  private RuleModel createNumericalRuleModel(ExampleSet trainingSet, Attribute attribute) {
    RuleModel model = new RuleModel(trainingSet);

    // split by best attribute
    int oldSize = -1;
    while ((trainingSet.size() > 0) && (trainingSet.size() != oldSize)) {
      ExampleSet exampleSet = (ExampleSet) trainingSet.clone();
      Split bestSplit = splitter.getBestSplit(exampleSet, attribute, null);
      double bestSplitValue = bestSplit.getSplitPoint();
      if (!Double.isNaN(bestSplitValue)) {
        SplittedExampleSet splittedSet =
            SplittedExampleSet.splitByAttribute(exampleSet, attribute, bestSplitValue);
        Attribute label = splittedSet.getAttributes().getLabel();
        splittedSet.selectSingleSubset(0);
        SplitCondition condition = new LessEqualsSplitCondition(attribute, bestSplitValue);

        splittedSet.recalculateAttributeStatistics(label);
        int labelValue = (int) splittedSet.getStatistics(label, Statistics.MODE);
        String labelName = label.getMapping().mapIndex(labelValue);
        Rule rule = new Rule(labelName, condition);

        int[] frequencies = new int[label.getMapping().size()];
        int counter = 0;
        for (String value : label.getMapping().getValues())
          frequencies[counter++] = (int) splittedSet.getStatistics(label, Statistics.COUNT, value);
        rule.setFrequencies(frequencies);
        model.addRule(rule);
        oldSize = trainingSet.size();
        trainingSet = rule.removeCovered(trainingSet);
      } else {
        break;
      }
    }

    // add default rule if some examples were not yet covered
    if (trainingSet.size() > 0) {
      Attribute label = trainingSet.getAttributes().getLabel();
      trainingSet.recalculateAttributeStatistics(label);
      int index = (int) trainingSet.getStatistics(label, Statistics.MODE);
      String defaultLabel = label.getMapping().mapIndex(index);
      Rule defaultRule = new Rule(defaultLabel);
      int[] frequencies = new int[label.getMapping().size()];
      int counter = 0;
      for (String value : label.getMapping().getValues())
        frequencies[counter++] = (int) (trainingSet.getStatistics(label, Statistics.COUNT, value));
      defaultRule.setFrequencies(frequencies);
      model.addRule(defaultRule);
    }

    return model;
  }
  private NeuralDataSet getTraining(ExampleSet exampleSet) {
    double[][] data = new double[exampleSet.size()][exampleSet.getAttributes().size()];
    double[][] labels = new double[exampleSet.size()][1];
    int index = 0;
    Attribute label = exampleSet.getAttributes().getLabel();

    this.attributeMin = new double[exampleSet.getAttributes().size()];
    this.attributeMax = new double[attributeMin.length];
    exampleSet.recalculateAllAttributeStatistics();
    int a = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      this.attributeMin[a] = exampleSet.getStatistics(attribute, Statistics.MINIMUM);
      this.attributeMax[a] = exampleSet.getStatistics(attribute, Statistics.MAXIMUM);
      a++;
    }

    this.labelMin = exampleSet.getStatistics(label, Statistics.MINIMUM);
    this.labelMax = exampleSet.getStatistics(label, Statistics.MAXIMUM);

    for (Example example : exampleSet) {
      // attributes
      a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        if (attributeMin[a] != attributeMax[a]) {
          data[index][a] =
              (example.getValue(attribute) - attributeMin[a]) / (attributeMax[a] - attributeMin[a]);
        } else {
          data[index][a] = example.getValue(attribute) - attributeMin[a];
        }
        a++;
      }

      // label
      if (label.isNominal()) {
        labels[index][0] = example.getValue(label);
      } else {
        if (labelMax != labelMin) {
          labels[index][0] = (example.getValue(label) - labelMin) / (labelMax - labelMin);
        } else {
          labels[index][0] = example.getValue(label) - labelMin;
        }
      }

      index++;
    }

    return new BasicNeuralDataSet(data, labels);
  }
Exemple #8
0
  /** @see com.rapidminer.operator.OperatorChain#doWork() */
  @Override
  public void doWork() throws OperatorException {

    List<Operator> nested = this.getImmediateChildren();
    log.info("This StreamProcess has {} nested operators", nested.size());
    for (Operator op : nested) {
      log.info("  op: {}", op);

      if (op instanceof DataStreamOperator) {
        log.info("Resetting stream-operator {}", op);
        ((DataStreamOperator) op).reset();
      }
    }

    log.info("Starting some work in doWork()");
    ExampleSet exampleSet = input.getData(ExampleSet.class);
    log.info("input is an example set with {} examples", exampleSet.size());
    int i = 0;

    Iterator<Example> it = exampleSet.iterator();
    while (it.hasNext()) {
      Example example = it.next();
      log.info("Processing example {}", i);
      DataObject datum = StreamUtils.wrap(example);
      log.info("Wrapped data-object is: {}", datum);
      dataStream.deliver(datum);
      getSubprocess(0).execute();
      inApplyLoop();
      i++;
    }

    // super.doWork();
    log.info("doWork() is finished.");
  }
  @Override
  public AttributeWeights calculateWeights(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();
    Attribute labelAttribute = attributes.getLabel();
    boolean useSquaredCorrelation = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION);

    AttributeWeights weights = new AttributeWeights(exampleSet);
    getProgress().setTotal(attributes.size());
    int progressCounter = 0;
    int exampleSetSize = exampleSet.size();
    int exampleCounter = 0;
    for (Attribute attribute : attributes) {
      double correlation =
          MathFunctions.correlation(exampleSet, labelAttribute, attribute, useSquaredCorrelation);
      weights.setWeight(attribute.getName(), Math.abs(correlation));
      progressCounter++;
      exampleCounter += exampleSetSize;
      if (exampleCounter > PROGRESS_UPDATE_STEPS) {
        exampleCounter = 0;
        getProgress().setCompleted(progressCounter);
      }
    }

    return weights;
  }
  /**
   * Gets the input data and macro name and iterates over the example set while updating the current
   * iteration in the given macro.
   */
  @Override
  public void doWork() throws OperatorException {
    outExtender.reset();
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);
    String iterationMacroName = getParameterAsString(PARAMETER_ITERATION_MACRO);
    boolean innerSinkIsConnected = exampleSetInnerSink.isConnected();

    for (iteration = 1; iteration <= exampleSet.size(); iteration++) {

      getProcess().getMacroHandler().addMacro(iterationMacroName, String.valueOf(iteration));

      // passing in clone or if connected the result from last iteration
      exampleSetInnerSource.deliver(
          innerSinkIsConnected ? exampleSet : (ExampleSet) exampleSet.clone());
      getSubprocess(0).execute();
      inApplyLoop();

      if (innerSinkIsConnected) {
        exampleSet = exampleSetInnerSink.getData(ExampleSet.class);
      }

      outExtender.collect();
    }

    getProcess().getMacroHandler().removeMacro(iterationMacroName);
    exampleSetOutput.deliver(exampleSet);
  }
  /**
   * Creates a {@link HistogramDataset} for this {@link Attribute}.
   *
   * @param exampleSet
   * @return
   */
  private HistogramDataset createHistogramDataset(final ExampleSet exampleSet) {
    HistogramDataset dataset = new HistogramDataset();

    double[] array = new double[exampleSet.size()];
    int count = 0;

    for (Example example : exampleSet) {
      double value = example.getDataRow().get(getAttribute());
      // don't use missing values because otherwise JFreeChart tries to plot them too which
      // can lead to false histograms
      if (!Double.isNaN(value)) {
        array[count++] = value;
      }
    }

    // add points to data set (if any)
    if (count > 0) {
      // truncate array if necessary
      if (count < array.length) {
        array = Arrays.copyOf(array, count);
      }
      dataset.addSeries(
          getAttribute().getName(), array, Math.min(array.length, MAX_BINS_HISTOGRAM));
    }

    return dataset;
  }
  @Override
  public PerformanceVector evaluateIndividual(Individual individual) {
    double[] beta = individual.getValues();

    double fitness = 0.0d;
    for (Example example : exampleSet) {
      double eta = 0.0d;
      int i = 0;
      for (Attribute attribute : example.getAttributes()) {
        double value = example.getValue(attribute);
        eta += beta[i] * value;
        i++;
      }
      if (addIntercept) {
        eta += beta[beta.length - 1];
      }
      double pi = Math.exp(eta) / (1 + Math.exp(eta));

      double classValue = example.getValue(label);
      double currentFitness = classValue * Math.log(pi) + (1 - classValue) * Math.log(1 - pi);
      double weightValue = 1.0d;
      if (weight != null) weightValue = example.getValue(weight);
      fitness += weightValue * currentFitness;
    }

    PerformanceVector performanceVector = new PerformanceVector();
    performanceVector.addCriterion(
        new EstimatedPerformance("log_reg_fitness", fitness, exampleSet.size(), false));
    return performanceVector;
  }
  /**
   * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha
   * values and b are zero, the label will be set if it is known.
   */
  public SVMExamples(
      com.rapidminer.example.ExampleSet exampleSet,
      Attribute labelAttribute,
      Map<Integer, MeanVariance> meanVariances) {
    this(exampleSet.size(), 0.0d);
    this.meanVarianceMap = meanVariances;

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    Attribute idAttribute = exampleSet.getAttributes().getId();
    int exampleCounter = 0;
    while (reader.hasNext()) {
      com.rapidminer.example.Example current = reader.next();
      Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = current.getValue(attribute);
        if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) {
          attributeMap.put(a, value);
        }
        if ((a + 1) > dim) {
          dim = (a + 1);
        }
        a++;
      }
      atts[exampleCounter] = new double[attributeMap.size()];
      index[exampleCounter] = new int[attributeMap.size()];
      Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator();
      int attributeCounter = 0;
      while (i.hasNext()) {
        Map.Entry<Integer, Double> e = i.next();
        Integer indexValue = e.getKey();
        Double attributeValue = e.getValue();
        index[exampleCounter][attributeCounter] = indexValue.intValue();
        double value = attributeValue.doubleValue();
        MeanVariance meanVariance = meanVarianceMap.get(indexValue);
        if (meanVariance != null) {
          if (meanVariance.getVariance() == 0.0d) {
            value = 0.0d;
          } else {
            value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance());
          }
        }
        atts[exampleCounter][attributeCounter] = value;
        attributeCounter++;
      }
      if (labelAttribute != null) {
        double label = current.getValue(labelAttribute);
        if (labelAttribute.isNominal()) {
          ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1);
        } else {
          ys[exampleCounter] = label;
        }
      }
      if (idAttribute != null) {
        ids[exampleCounter] = current.getValueAsString(idAttribute);
      }
      exampleCounter++;
    }
  }
  /**
   * Iterates over all models and returns the class with maximum likelihood.
   *
   * @param origExampleSet the set of examples to be classified
   */
  @Override
  public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel)
      throws OperatorException {
    final String attributePrefix = "AdaBoostModelPrediction";
    final int numLabels = predictedLabel.getMapping().size();
    final Attribute[] specialAttributes = new Attribute[numLabels];
    OperatorProgress progress = null;
    if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) {
      progress = getOperator().getProgress();
      progress.setTotal(100);
    }
    for (int i = 0; i < numLabels; i++) {
      specialAttributes[i] =
          com.rapidminer.example.Tools.createSpecialAttribute(
              origExampleSet, attributePrefix + i, Ontology.NUMERICAL);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels));
      }
    }

    Iterator<Example> reader = origExampleSet.iterator();
    int progressCounter = 0;
    while (reader.hasNext()) {
      Example example = reader.next();
      for (int i = 0; i < specialAttributes.length; i++) {
        example.setValue(specialAttributes[i], 0);
      }
      if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) {
        progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25);
      }
    }

    reader = origExampleSet.iterator();
    for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) {
      Model model = this.getModel(modelNr);
      ExampleSet exampleSet = (ExampleSet) origExampleSet.clone();
      exampleSet = model.apply(exampleSet);
      this.updateEstimates(exampleSet, modelNr, specialAttributes);
      PredictionModel.removePredictedLabel(exampleSet);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50);
      }
    }

    // Turn prediction weights into confidences and a crisp predcition:
    this.evaluateSpecialAttributes(origExampleSet, specialAttributes);

    // Clean up attributes:
    for (int i = 0; i < numLabels; i++) {
      origExampleSet.getAttributes().remove(specialAttributes[i]);
      origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75);
      }
    }

    return origExampleSet;
  }
 /** Creates an example set that is splitted into n subsets with the given sampling type. */
 public SplittedExampleSet(
     ExampleSet exampleSet, double[] splitRatios, int samplingType, int seed) {
   this(
       exampleSet,
       new Partition(
           splitRatios,
           exampleSet.size(),
           createPartitionBuilder(exampleSet, samplingType, seed)));
 }
 /**
  * Creates an example set that is splitted into <i>numberOfSubsets</i> parts with the given
  * sampling type.
  */
 public SplittedExampleSet(
     ExampleSet exampleSet, int numberOfSubsets, int samplingType, int seed) {
   this(
       exampleSet,
       new Partition(
           numberOfSubsets,
           exampleSet.size(),
           createPartitionBuilder(exampleSet, samplingType, seed)));
 }
Exemple #17
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();

    // searching example by index
    int exampleIndex = getParameterAsInt(PARAMETER_EXAMPLE_INDEX);
    if (exampleIndex == 0) {
      throw new UserError(
          this,
          207,
          new Object[] {
            "0", PARAMETER_EXAMPLE_INDEX, "only positive or negative indices are allowed"
          });
    }
    if (getParameterAsBoolean(PARAMETER_COUNT_BACKWARDS)) {
      exampleIndex = exampleSet.size() - exampleIndex;
    } else {
      exampleIndex--;
    }
    if (exampleIndex >= exampleSet.size()) {
      throw new UserError(this, 110, exampleIndex);
    }
    Example example = exampleSet.getExample(exampleIndex);

    // now set single value of first parameter
    if (isParameterSet(PARAMETER_ATTRIBUTE_NAME) && isParameterSet(PARAMETER_VALUE)) {
      String attributeName = getParameter(PARAMETER_ATTRIBUTE_NAME);
      String value = getParameterAsString(PARAMETER_VALUE);

      setData(example, attributeName, value, attributes);
    }

    // now set each defined additional value.
    List<String[]> list = getParameterList(PARAMETER_ADDITIONAL_VALUES);
    for (String[] pair : list) {
      setData(example, pair[0], pair[1], attributes);
    }

    return exampleSet;
  }
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    // only use numeric attributes
    Tools.onlyNumericalAttributes(exampleSet, "KernelPCA");
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this);

    Attributes attributes = exampleSet.getAttributes();
    int numberOfExamples = exampleSet.size();

    // calculating means for later zero centering
    exampleSet.recalculateAllAttributeStatistics();
    double[] means = new double[exampleSet.getAttributes().size()];
    int i = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      means[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE);
      i++;
    }

    // kernel
    Kernel kernel = Kernel.createKernel(this);

    // copying zero centered exampleValues
    ArrayList<double[]> exampleValues = new ArrayList<double[]>(numberOfExamples);
    i = 0;
    for (Example columnExample : exampleSet) {
      double[] columnValues = getAttributeValues(columnExample, attributes, means);
      exampleValues.add(columnValues);
      i++;
    }

    // filling kernel matrix
    Matrix kernelMatrix = new Matrix(numberOfExamples, numberOfExamples);
    for (i = 0; i < numberOfExamples; i++) {
      for (int j = 0; j < numberOfExamples; j++) {
        kernelMatrix.set(
            i, j, kernel.calculateDistance(exampleValues.get(i), exampleValues.get(j)));
      }
    }

    // calculating eigenVectors
    EigenvalueDecomposition eig = kernelMatrix.eig();
    Model model = new KernelPCAModel(exampleSet, means, eig.getV(), exampleValues, kernel);

    if (exampleSetOutput.isConnected()) {
      exampleSetOutput.deliver(model.apply(exampleSet));
    }
    originalOutput.deliver(exampleSet);
    modelOutput.deliver(model);
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    int size = exampleSet.size();

    // cannot bootstrap without any examples
    if (size < 1) {
      throw new UserError(this, 117);
    }

    RandomGenerator random = RandomGenerator.getRandomGenerator(this);
    switch (getParameterAsInt(PARAMETER_SAMPLE)) {
      case SAMPLE_ABSOLUTE:
        size = getParameterAsInt(PARAMETER_SAMPLE_SIZE);
        break;
      case SAMPLE_RELATIVE:
        size = (int) Math.round(exampleSet.size() * getParameterAsDouble(PARAMETER_SAMPLE_RATIO));
        break;
    }

    int[] mapping = null;
    if (getParameterAsBoolean(PARAMETER_USE_WEIGHTS)
        && exampleSet.getAttributes().getWeight() != null) {
      mapping = MappedExampleSet.createWeightedBootstrappingMapping(exampleSet, size, random);
    } else {
      mapping = MappedExampleSet.createBootstrappingMapping(exampleSet, size, random);
    }

    // create and materialize example set
    ExampleSet mappedExampleSet = new MappedExampleSet(exampleSet, mapping, true);
    if (getCompatibilityLevel().isAbove(VERSION_6_4_0)) {
      int type = DataRowFactory.TYPE_DOUBLE_ARRAY;
      if (exampleSet.size() > 0) {
        type = exampleSet.getExampleTable().getDataRow(0).getType();
      }
      mappedExampleSet = MaterializeDataInMemory.materializeExampleSet(mappedExampleSet, type);
    }
    return mappedExampleSet;
  }
Exemple #20
0
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    Attribute attribute =
        exampleSet.getAttributes().get(getParameterAsString(PARAMETER_ATTRIBUTE_NAME));
    if (attribute == null)
      throw new UserError(this, 111, getParameterAsString(PARAMETER_ATTRIBUTE_NAME));

    int index = getParameterAsInt(PARAMETER_EXAMPLE_INDEX);
    if (index == 0) {
      throw new UserError(
          this, 207, "0", PARAMETER_EXAMPLE_INDEX, "only positive or negative indices are allowed");
    }

    if (index < 0) {
      index = exampleSet.size() + index;
    } else {
      index--;
    }

    if (index >= exampleSet.size()) {
      throw new UserError(this, 110, index);
    }

    Example example = exampleSet.getExample(index);
    if (attribute.isNominal()
        || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
      currentValue = example.getValueAsString(attribute);
      isNominal = true;
    } else {
      currentValue = Double.valueOf(example.getValue(attribute));
      isNominal = false;
    }

    exampleSetOutput.deliver(exampleSet);
  }
  public static SplittedExampleSet splitByAttribute(
      ExampleSet exampleSet, Attribute attribute, double value) {
    int[] elements = new int[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int i = 0;

    while (reader.hasNext()) {
      Example example = reader.next();
      double currentValue = example.getValue(attribute);
      if (currentValue <= value) elements[i++] = 0;
      else elements[i++] = 1;
    }
    Partition partition = new Partition(elements, 2);
    return new SplittedExampleSet(exampleSet, partition);
  }
  protected void prepareWeights(ExampleSet exampleSet) {
    Attribute weightAttr = exampleSet.getAttributes().getWeight();
    if (weightAttr == null) {
      this.oldWeights = null;
      com.rapidminer.example.Tools.createWeightAttribute(exampleSet);
    } else { // Back up old weights
      this.oldWeights = new double[exampleSet.size()];
      Iterator<Example> reader = exampleSet.iterator();

      for (int i = 0; (reader.hasNext() && i < oldWeights.length); i++) {
        Example example = reader.next();
        if (example != null) {
          this.oldWeights[i] = example.getWeight();
          example.setWeight(1);
        }
      }
    }
  }
  public static SplittedExampleSet splitByAttribute(ExampleSet exampleSet, Attribute attribute) {
    int[] elements = new int[exampleSet.size()];
    int i = 0;
    Map<Integer, Integer> indexMap = new HashMap<Integer, Integer>();
    AtomicInteger currentIndex = new AtomicInteger(0);
    for (Example example : exampleSet) {
      int value = (int) example.getValue(attribute);
      Integer indexObject = indexMap.get(value);
      if (indexObject == null) {
        indexMap.put(value, currentIndex.getAndIncrement());
      }
      int intValue = indexMap.get(value).intValue();
      elements[i++] = intValue;
    }

    int maxNumber = indexMap.size();
    indexMap.clear();
    Partition partition = new Partition(elements, maxNumber);
    return new SplittedExampleSet(exampleSet, partition);
  }
 /**
  * Calculates ranks for an attribute.
  *
  * <p>Ranks are returned as double precision values, with 1 as the rank of the smallest value.
  * Values within +/- fuzz of each other may be considered tied. Tied values receive identical
  * ranks. Missing values receive rank NaN.
  *
  * <p>Note that application of the "fuzz" factor is dependent on the order of the observations in
  * the example set. For instance, if the first three values encountered are x, x+fuzz and
  * x+2*fuzz, the first two will be considered tied but the third will not, since x+2*fuzz is not
  * within +/- fuzz of x.
  *
  * @param eSet the example set
  * @param att the attribute to rank
  * @param fuzz values within +/- fuzz may be considered tied
  * @return a double precision array of ranks
  */
 public static double[] rank(ExampleSet eSet, Attribute att, Attribute mappingAtt, double fuzz) {
   TreeMap<Double, ArrayList<Integer>> map;
   if (fuzz == 0.0) map = new TreeMap<Double, ArrayList<Integer>>();
   else {
     FuzzyComp fc = new FuzzyComp(fuzz);
     map = new TreeMap<Double, ArrayList<Integer>>(fc);
   }
   double[] rank = new double[eSet.size()];
   Iterator<Example> reader = eSet.iterator();
   int i = 0; // example index
   // iterate through the example set
   while (reader.hasNext()) {
     // get the attribute values from the next example
     Example e = reader.next();
     double x = e.getValue(att);
     if (att.isNominal() && mappingAtt != null) {
       String xString = att.getMapping().mapIndex((int) x);
       x = mappingAtt.getMapping().getIndex(xString);
     }
     // punt if either is missing
     if (Double.isNaN(x)) rank[i++] = Double.NaN;
     else {
       // insert x into the tree
       if (!map.containsKey(x))
         // new key -- create a new entry in the map
         map.put(x, new ArrayList<Integer>());
       map.get(x).add(i++); // add the index to the list
     }
   }
   // convert the map to ranks
   double r = 0;
   for (double x : map.keySet()) {
     ArrayList<Integer> y = map.get(x);
     double v = r + (1.0 + y.size()) / 2.0;
     for (int j : y) rank[j] = v;
     r += y.size();
   }
   return rank;
 }
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    int k = getParameterAsInt(PARAMETER_K);
    int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS);
    boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS);
    Kernel kernel = Kernel.createKernel(this);

    // init operator progress
    getProgress().setTotal(maxOptimizationSteps);

    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);

    if (exampleSet.size() < k) {
      throw new UserError(this, 142, k);
    }

    // extracting attribute names
    Attributes attributes = exampleSet.getAttributes();
    ArrayList<String> attributeNames = new ArrayList<String>(attributes.size());
    for (Attribute attribute : attributes) {
      attributeNames.add(attribute.getName());
    }
    Attribute weightAttribute = attributes.getWeight();

    RandomGenerator generator = RandomGenerator.getRandomGenerator(this);

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    // init centroids
    int[] clusterAssignments = new int[exampleSet.size()];

    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = generator.nextIntInRange(0, k);
    }

    // run optimization steps
    boolean stable = false;
    for (int step = 0; step < maxOptimizationSteps && !stable; step++) {
      // calculating cluster kernel properties
      double[] clusterWeights = new double[k];
      double[] clusterKernelCorrection = new double[k];
      int i = 0;
      for (Example firstExample : exampleSet) {
        double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d;
        double[] firstExampleValues = getAsDoubleArray(firstExample, attributes);
        clusterWeights[clusterAssignments[i]] += firstExampleWeight;
        int j = 0;
        for (Example secondExample : exampleSet) {
          if (clusterAssignments[i] == clusterAssignments[j]) {
            double secondExampleWeight =
                useExampleWeights ? secondExample.getValue(weightAttribute) : 1d;
            clusterKernelCorrection[clusterAssignments[i]] +=
                firstExampleWeight
                    * secondExampleWeight
                    * kernel.calculateDistance(
                        firstExampleValues, getAsDoubleArray(secondExample, attributes));
          }
          j++;
        }
        i++;
      }
      for (int z = 0; z < k; z++) {
        clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z];
      }

      // assign examples to new centroids
      int[] newClusterAssignments = new int[exampleSet.size()];
      i = 0;
      for (Example example : exampleSet) {
        double[] exampleValues = getAsDoubleArray(example, attributes);
        double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues);
        double nearestDistance = Double.POSITIVE_INFINITY;
        int nearestIndex = 0;
        for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) {
          double distance = 0;
          // iterating over all examples in cluster to get kernel distance
          int j = 0;
          for (Example clusterExample : exampleSet) {
            if (clusterAssignments[j] == clusterIndex) {
              distance +=
                  (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d)
                      * kernel.calculateDistance(
                          getAsDoubleArray(clusterExample, attributes), exampleValues);
            }
            j++;
          }
          distance *= -2d / clusterWeights[clusterIndex];
          // copy in outer loop
          distance += exampleKernelValue;
          distance += clusterKernelCorrection[clusterIndex];
          if (distance < nearestDistance) {
            nearestDistance = distance;
            nearestIndex = clusterIndex;
          }
        }
        newClusterAssignments[i] = nearestIndex;
        i++;
      }

      // finishing assignment
      stable = true;
      for (int j = 0; j < exampleSet.size() && stable; j++) {
        stable &= newClusterAssignments[j] == clusterAssignments[j];
      }
      clusterAssignments = newClusterAssignments;

      // trigger operator progress
      getProgress().step();
    }

    // setting last clustering into model
    model.setClusterAssignments(clusterAssignments, exampleSet);

    getProgress().complete();

    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
 public int size() {
   return parent.size();
 }
  public SimilarityVisualization(SimilarityMeasureObject sim, ExampleSet exampleSet) {
    super();
    setLayout(new BorderLayout());

    DistanceMeasure measure = sim.getDistanceMeasure();
    ButtonGroup group = new ButtonGroup();
    JPanel togglePanel = new JPanel(new FlowLayout(FlowLayout.LEFT));

    // similarity table
    final JComponent tableView = new SimilarityTable(measure, exampleSet);
    final JRadioButton tableButton = new JRadioButton("Table View", true);
    tableButton.addActionListener(
        new ActionListener() {
          public void actionPerformed(ActionEvent e) {
            if (tableButton.isSelected()) {
              remove(1);
              add(tableView, BorderLayout.CENTER);
              repaint();
            }
          }
        });
    group.add(tableButton);
    togglePanel.add(tableButton);

    // graph view
    final JComponent graphView =
        new GraphViewer<String, String>(new SimilarityGraphCreator(measure, exampleSet));
    final JRadioButton graphButton = new JRadioButton("Graph View", false);
    graphButton.addActionListener(
        new ActionListener() {
          public void actionPerformed(ActionEvent e) {
            if (graphButton.isSelected()) {
              remove(1);
              add(graphView, BorderLayout.CENTER);
              repaint();
            }
          }
        });
    group.add(graphButton);
    togglePanel.add(graphButton);

    // histogram view
    DataTable dataTable = new SimpleDataTable("Histogram", new String[] {"Histogram"});
    double sampleRatio = Math.min(1.0d, 500.0d / exampleSet.size());

    Random random = new Random();
    int i = 0;
    for (Example example : exampleSet) {
      int j = 0;
      for (Example compExample : exampleSet) {
        if (i != j && random.nextDouble() < sampleRatio) {
          double simValue = measure.calculateSimilarity(example, compExample);
          dataTable.add(new SimpleDataTableRow(new double[] {simValue}));
        }
        j++;
      }
      i++;
    }

    final PlotterConfigurationModel settings =
        new PlotterConfigurationModel(PlotterConfigurationModel.HISTOGRAM_PLOT, dataTable);
    settings.enablePlotColumn(0);
    settings.setParameterAsInt(HistogramChart.PARAMETER_NUMBER_OF_BINS, 100);

    final JRadioButton histogramButton = new JRadioButton("Histogram View", false);
    histogramButton.addActionListener(
        new ActionListener() {
          public void actionPerformed(ActionEvent e) {
            if (histogramButton.isSelected()) {
              remove(1);
              add(settings.getPlotter().getPlotter(), BorderLayout.CENTER);
              repaint();
            }
          }
        });
    group.add(histogramButton);
    togglePanel.add(histogramButton);

    // K distance view
    final SimilarityKDistanceVisualization kDistancePlotter =
        new SimilarityKDistanceVisualization(measure, exampleSet);
    final JRadioButton kdistanceButton = new JRadioButton("k-Distance View", false);
    kdistanceButton.addActionListener(
        new ActionListener() {
          public void actionPerformed(ActionEvent e) {
            if (kdistanceButton.isSelected()) {
              remove(1);
              add(kDistancePlotter, BorderLayout.CENTER);
              repaint();
            }
          }
        });
    group.add(kdistanceButton);
    togglePanel.add(kdistanceButton);

    add(togglePanel, BorderLayout.NORTH);
    add(tableView, BorderLayout.CENTER);
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // init
    char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0);
    Character groupingCharacter = null;
    if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) {
      groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0);
    }

    Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false);
    int size = attributeSet.size();

    int[] valueTypes = new int[size];

    int index = 0;
    for (Attribute attribute : attributeSet) {
      valueTypes[index++] = attribute.getValueType();
    }

    // guessing
    int[] guessedValueTypes = new int[valueTypes.length];
    int checkedCounter = 0;
    for (Example example : exampleSet) {
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        double originalValue = example.getValue(attribute);
        if (!Double.isNaN(originalValue)) {
          if (guessedValueTypes[index] != Ontology.NOMINAL) {
            try {
              String valueString = example.getValueAsString(attribute);
              if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                double value = Double.parseDouble(valueString);
                if (guessedValueTypes[index] != Ontology.REAL) {
                  if (Tools.isEqual(Math.round(value), value)) {
                    guessedValueTypes[index] = Ontology.INTEGER;
                  } else {
                    guessedValueTypes[index] = Ontology.REAL;
                  }
                }
              }
            } catch (NumberFormatException e) {
              guessedValueTypes[index] = Ontology.NOMINAL;
              checkedCounter++;
            }
          }
        }
        index++;
      }
      if (checkedCounter >= guessedValueTypes.length) {
        break;
      }
    }

    // the example set contains at least one example and the guessing was performed
    if (exampleSet.size() > 0) {
      valueTypes = guessedValueTypes;

      // new attributes
      List<AttributeRole> newAttributes = new LinkedList<AttributeRole>();
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        AttributeRole role = exampleSet.getAttributes().getRole(attribute);

        Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        AttributeRole newRole = new AttributeRole(newAttribute);
        newRole.setSpecial(role.getSpecialName());
        newAttributes.add(newRole);

        // copy data
        for (Example e : exampleSet) {
          double oldValue = e.getValue(attribute);
          if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) {
            if (!Double.isNaN(oldValue)) {
              String valueString = e.getValueAsString(attribute);
              if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                e.setValue(newAttribute, Double.NaN);
              } else {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                e.setValue(newAttribute, Double.parseDouble(valueString));
              }
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          } else {
            if (!Double.isNaN(oldValue)) {
              String value = e.getValueAsString(attribute);
              e.setValue(newAttribute, newAttribute.getMapping().mapString(value));
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          }
        }

        // delete attribute and rename the new attribute (due to deletion and data scans: no
        // more memory used :-)
        exampleSet.getExampleTable().removeAttribute(attribute);
        exampleSet.getAttributes().remove(role);
        newAttribute.setName(attribute.getName());

        index++;
      }

      for (AttributeRole role : newAttributes) {
        if (role.isSpecial()) {
          exampleSet
              .getAttributes()
              .setSpecialAttribute(role.getAttribute(), role.getSpecialName());
        } else {
          exampleSet.getAttributes().addRegular(role.getAttribute());
        }
      }
    }

    return exampleSet;
  }
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);
    exampleSet.recalculateAllAttributeStatistics();

    List<String[]> attributeValueOptions = getParameterList(PARAMETER_ATTRIBUTES);

    LinkedHashMap<Attribute, Integer> attributeValueOptionsMap =
        new LinkedHashMap<Attribute, Integer>();
    int[] valueOptions = new int[attributeValueOptions.size()];
    Pattern[] attributeRegexPatterns = new Pattern[attributeValueOptions.size()];

    Attribute[] attributes = new Attribute[attributeValueOptions.size()];

    Iterator<String[]> iterator = attributeValueOptions.iterator();
    int j = 0;
    while (iterator.hasNext()) {
      String[] pair = iterator.next();
      String regex = pair[0];
      try {
        attributeRegexPatterns[j] = Pattern.compile(regex);
      } catch (PatternSyntaxException e) {
        throw new UserError(this, 206, regex, e.getMessage());
      }
      attributes[j] = exampleSet.getAttributes().get(pair[0]);
      valueOptions[j] =
          ((ParameterTypeCategory)
                  ((ParameterTypeList) getParameterType(PARAMETER_ATTRIBUTES)).getValueType())
              .getIndex(pair[1]);
      j++;
    }

    for (int i = 0; i < attributeRegexPatterns.length; i++) {
      Iterator<Attribute> a = exampleSet.getAttributes().allAttributes();
      while (a.hasNext()) {
        Attribute attribute = a.next();
        Matcher matcher = attributeRegexPatterns[i].matcher(attribute.getName());
        if (matcher.matches()) {
          attributeValueOptionsMap.put(attribute, valueOptions[i]);
        }
      }
    }

    double p = getParameterAsDouble(PARAMETER_P);
    boolean filterAttribute = getParameterAsBoolean(PARAMETER_FILTER_ATTRIBUTE);
    String iterationMacro = getParameterAsString(PARAMETER_ITERATION_MACRO);

    // applying on complete set
    if (getParameterAsBoolean(PARAMETER_APPLY_ON_COMPLETE_SET)) {
      if (iterationMacro != null) {
        getProcess().getMacroHandler().addMacro(iterationMacro, "ALL");
      }

      innerExampleSetSource.deliver(exampleSet);
      getSubprocess(0).execute();
    }

    // applying on subgroups defined by attributes
    for (Entry<Attribute, Integer> attributeEntry : attributeValueOptionsMap.entrySet()) {
      Attribute attribute = attributeEntry.getKey();
      if (!attribute.isNominal()) {
        continue;
      }
      List<String> values = null;
      switch (attributeEntry.getValue()) {
        case VALUE_OPTION_ALL:
          values = attribute.getMapping().getValues();
          break;
        case VALUE_OPTION_ABOVE_P:
          values = new Vector<String>();
          for (String value : attribute.getMapping().getValues()) {
            if (exampleSet.getStatistics(attribute, Statistics.COUNT, value) / exampleSet.size()
                >= p) {
              values.add(value);
            }
          }
          break;
        default:
          values = attribute.getMapping().getValues();
          break;
      }

      for (String value : values) {
        if (exampleSet.getStatistics(attribute, Statistics.COUNT, value) > 0) {
          String className = "attribute_value_filter";
          String parameter = attribute.getName() + "=" + value;
          log("Creating condition '" + className + "' with parameter '" + parameter + "'");
          Condition condition = null;
          try {
            condition = ConditionedExampleSet.createCondition(className, exampleSet, parameter);
          } catch (ConditionCreationException e) {
            throw new UserError(this, 904, className, e.getMessage());
          }
          ExampleSet subgroupSet = new ConditionedExampleSet(exampleSet, condition, false);
          if (filterAttribute) {
            subgroupSet.getAttributes().remove(attribute);
          }
          if (iterationMacro != null) {
            getProcess().getMacroHandler().addMacro(iterationMacro, parameter.replace(' ', '_'));
          }

          // applying subprocess
          innerExampleSetSource.deliver(subgroupSet);
          getSubprocess(0).execute();

          if (filterAttribute) {
            subgroupSet.getAttributes().addRegular(attribute);
          }
        }
        inApplyLoop();
      }
    }

    if (iterationMacro != null) {
      getProcess().getMacroHandler().addMacro(iterationMacro, null);
    }
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // determine new value types
    int valueType = Ontology.REAL;
    Iterator<AttributeRole> a = exampleSet.getAttributes().allAttributeRoles();
    while (a.hasNext()) {
      AttributeRole attributeRole = a.next();
      if (!attributeRole.isSpecial()
          || !attributeRole.getSpecialName().equals(Attributes.ID_NAME)) {
        if (attributeRole.getAttribute().isNominal()) {
          valueType = Ontology.NOMINAL;
          break;
        }
      }
    }

    // create new attributes
    List<Attribute> newAttributes = new ArrayList<Attribute>(exampleSet.size());
    Attribute newIdAttribute =
        AttributeFactory.createAttribute(Attributes.ID_NAME, Ontology.NOMINAL);
    newAttributes.add(newIdAttribute);

    Attribute oldIdAttribute = exampleSet.getAttributes().getId();
    if (oldIdAttribute != null) {
      for (Example e : exampleSet) {
        double idValue = e.getValue(oldIdAttribute);
        String attributeName = "att_" + idValue;
        if (oldIdAttribute.isNominal()) {
          if (Double.isNaN(idValue)) {
            newAttributes.add(AttributeFactory.createAttribute(valueType));
          } else {
            attributeName = oldIdAttribute.getMapping().mapIndex((int) idValue);
            newAttributes.add(AttributeFactory.createAttribute(attributeName, valueType));
          }
        } else {
          newAttributes.add(AttributeFactory.createAttribute(attributeName, valueType));
        }
      }
    } else {
      for (int i = 0; i < exampleSet.size(); i++) {
        newAttributes.add(AttributeFactory.createAttribute("att_" + (i + 1), valueType));
      }
    }

    // create and fill table
    MemoryExampleTable table = new MemoryExampleTable(newAttributes);
    a = exampleSet.getAttributes().allAttributeRoles();
    while (a.hasNext()) {
      AttributeRole attributeRole = a.next();
      if (!attributeRole.isSpecial()
          || !attributeRole.getSpecialName().equals(Attributes.ID_NAME)) {
        Attribute attribute = attributeRole.getAttribute();
        double[] data = new double[exampleSet.size() + 1];
        data[0] = newIdAttribute.getMapping().mapString(attribute.getName());
        int counter = 1;
        for (Example e : exampleSet) {
          double currentValue = e.getValue(attribute);
          data[counter] = currentValue;
          Attribute newAttribute = newAttributes.get(counter);
          if (newAttribute.isNominal()) {
            if (!Double.isNaN(currentValue)) {
              String currentValueString = currentValue + "";
              if (attribute.isNominal())
                currentValueString = attribute.getMapping().mapIndex((int) currentValue);
              data[counter] = newAttribute.getMapping().mapString(currentValueString);
            }
          }
          counter++;
        }
        table.addDataRow(new DoubleArrayDataRow(data));
      }
    }

    // create and deliver example set
    ExampleSet result = table.createExampleSet(null, null, newIdAttribute);
    result.getAnnotations().addAll(exampleSet.getAnnotations());
    return result;
  }