@Override
  public final void doWork() throws OperatorException {
    ExampleSet inputExampleSet = exampleSetInput.getData(ExampleSet.class);
    ExampleSet applySet = null;
    // check for needed copy of original exampleset
    if (originalOutput.isConnected() && writesIntoExistingData()) {
      int type = DataRowFactory.TYPE_DOUBLE_ARRAY;
      if (inputExampleSet.getExampleTable() instanceof MemoryExampleTable) {
        DataRowReader dataRowReader = inputExampleSet.getExampleTable().getDataRowReader();
        if (dataRowReader.hasNext()) {
          type = dataRowReader.next().getType();
        }
      }
      // check if type is supported to be copied
      if (type >= 0) {
        applySet = MaterializeDataInMemory.materializeExampleSet(inputExampleSet, type);
      }
    }

    if (applySet == null) applySet = (ExampleSet) inputExampleSet.clone();

    // we apply on the materialized data, because writing can't take place in views anyway.
    ExampleSet result = apply(applySet);
    originalOutput.deliver(inputExampleSet);
    exampleSetOutput.deliver(result);
  }
Exemple #2
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME);
    Long offset = getParameterAsLong(PARMETER_TIME_OFFSET);

    Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName);
    if (numericalAttribute == null) {
      throw new UserError(this, 111, attributeName);
    }

    Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME);
    exampleSet.getExampleTable().addAttribute(newAttribute);
    exampleSet.getAttributes().addRegular(newAttribute);

    for (Example example : exampleSet) {
      double value = example.getValue(numericalAttribute);
      if (Double.isNaN(value)) {
        example.setValue(newAttribute, value);
      } else {
        value += offset;
        example.setValue(newAttribute, value);
      }
    }

    if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) {
      AttributeRole oldRole = exampleSet.getAttributes().getRole(numericalAttribute);
      exampleSet.getAttributes().remove(numericalAttribute);
      newAttribute.setName(attributeName);
      exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName());
    } else {
      newAttribute.setName(attributeName + "_AS_DATE");
    }
    return exampleSet;
  }
  @Override
  public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException {
    boolean round = getParameterAsBoolean(PARAMETER_ROUND);

    List<Attribute> newAttributes = new LinkedList<Attribute>();
    Iterator<Attribute> a = exampleSet.getAttributes().iterator();
    while (a.hasNext()) {
      Attribute attribute = a.next();
      if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL))
          && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) {
        Attribute newAttribute =
            AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER);
        newAttributes.add(newAttribute);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        for (Example example : exampleSet) {
          double originalValue = example.getValue(attribute);
          if (Double.isNaN(originalValue)) {
            example.setValue(newAttribute, Double.NaN);
          } else {
            long newValue = round ? Math.round(originalValue) : (long) originalValue;
            example.setValue(newAttribute, newValue);
          }
        }
        a.remove();
      }
    }

    for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute);

    return exampleSet;
  }
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // generating assignment
    RandomGenerator random = RandomGenerator.getRandomGenerator(this);
    int clusterAssignments[] = new int[exampleSet.size()];
    int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS);
    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = random.nextInt(k);
    }

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    model.setClusterAssignments(clusterAssignments, exampleSet);

    // generating cluster attribute
    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
  /**
   * Iterates over all models and returns the class with maximum likelihood.
   *
   * @param origExampleSet the set of examples to be classified
   */
  @Override
  public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel)
      throws OperatorException {
    final String attributePrefix = "AdaBoostModelPrediction";
    final int numLabels = predictedLabel.getMapping().size();
    final Attribute[] specialAttributes = new Attribute[numLabels];
    OperatorProgress progress = null;
    if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) {
      progress = getOperator().getProgress();
      progress.setTotal(100);
    }
    for (int i = 0; i < numLabels; i++) {
      specialAttributes[i] =
          com.rapidminer.example.Tools.createSpecialAttribute(
              origExampleSet, attributePrefix + i, Ontology.NUMERICAL);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels));
      }
    }

    Iterator<Example> reader = origExampleSet.iterator();
    int progressCounter = 0;
    while (reader.hasNext()) {
      Example example = reader.next();
      for (int i = 0; i < specialAttributes.length; i++) {
        example.setValue(specialAttributes[i], 0);
      }
      if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) {
        progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25);
      }
    }

    reader = origExampleSet.iterator();
    for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) {
      Model model = this.getModel(modelNr);
      ExampleSet exampleSet = (ExampleSet) origExampleSet.clone();
      exampleSet = model.apply(exampleSet);
      this.updateEstimates(exampleSet, modelNr, specialAttributes);
      PredictionModel.removePredictedLabel(exampleSet);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50);
      }
    }

    // Turn prediction weights into confidences and a crisp predcition:
    this.evaluateSpecialAttributes(origExampleSet, specialAttributes);

    // Clean up attributes:
    for (int i = 0; i < numLabels; i++) {
      origExampleSet.getAttributes().remove(specialAttributes[i]);
      origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75);
      }
    }

    return origExampleSet;
  }
 /**
  * Helper method replacing <code>Model.createPredictedLabel(ExampleSet)</code> in order to lower
  * memory consumption.
  */
 private static void createOrReplacePredictedLabelFor(ExampleSet exampleSet, Model model) {
   Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel();
   if (predictedLabel != null) { // remove old predicted label
     exampleSet.getAttributes().remove(predictedLabel);
     exampleSet.getExampleTable().removeAttribute(predictedLabel);
   }
   // model.createPredictedLabel(exampleSet); // not longer necessary since
   // label creation is done by model.apply(...).
 }
 private void restoreOldWeights(ExampleSet exampleSet) {
   if (this.oldWeights != null) { // need to reset weights
     Iterator<Example> reader = exampleSet.iterator();
     int i = 0;
     while (reader.hasNext() && i < this.oldWeights.length) {
       reader.next().setWeight(this.oldWeights[i++]);
     }
   } else { // need to delete the weights attribute
     Attribute weight = exampleSet.getAttributes().getWeight();
     exampleSet.getAttributes().remove(weight);
     exampleSet.getExampleTable().removeAttribute(weight);
   }
 }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    int size = exampleSet.size();

    // cannot bootstrap without any examples
    if (size < 1) {
      throw new UserError(this, 117);
    }

    RandomGenerator random = RandomGenerator.getRandomGenerator(this);
    switch (getParameterAsInt(PARAMETER_SAMPLE)) {
      case SAMPLE_ABSOLUTE:
        size = getParameterAsInt(PARAMETER_SAMPLE_SIZE);
        break;
      case SAMPLE_RELATIVE:
        size = (int) Math.round(exampleSet.size() * getParameterAsDouble(PARAMETER_SAMPLE_RATIO));
        break;
    }

    int[] mapping = null;
    if (getParameterAsBoolean(PARAMETER_USE_WEIGHTS)
        && exampleSet.getAttributes().getWeight() != null) {
      mapping = MappedExampleSet.createWeightedBootstrappingMapping(exampleSet, size, random);
    } else {
      mapping = MappedExampleSet.createBootstrappingMapping(exampleSet, size, random);
    }

    // create and materialize example set
    ExampleSet mappedExampleSet = new MappedExampleSet(exampleSet, mapping, true);
    if (getCompatibilityLevel().isAbove(VERSION_6_4_0)) {
      int type = DataRowFactory.TYPE_DOUBLE_ARRAY;
      if (exampleSet.size() > 0) {
        type = exampleSet.getExampleTable().getDataRow(0).getType();
      }
      mappedExampleSet = MaterializeDataInMemory.materializeExampleSet(mappedExampleSet, type);
    }
    return mappedExampleSet;
  }
  @Override
  public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();

    // constructing new attributes with generic names, holding old ones, if old type wasn't real
    Attribute[] oldAttributes = new Attribute[attributes.size()];
    int i = 0;
    for (Attribute attribute : attributes) {
      oldAttributes[i] = attribute;
      i++;
    }
    Attribute[] newAttributes = new Attribute[attributes.size()];
    for (i = 0; i < newAttributes.length; i++) {
      newAttributes[i] = oldAttributes[i];
      if (oldAttributes[i].isNumerical())
        if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(oldAttributes[i].getValueType(), Ontology.REAL)) {
          newAttributes[i] = AttributeFactory.createAttribute(Ontology.REAL);
          exampleSet.getExampleTable().addAttribute(newAttributes[i]);
          attributes.addRegular(newAttributes[i]);
        }
    }

    // applying on data
    applyOnData(exampleSet, oldAttributes, newAttributes);

    // removing old attributes and change new attributes name to old ones if needed
    for (i = 0; i < oldAttributes.length; i++) {
      attributes.remove(oldAttributes[i]);
      // if attribute is new, then remove for later storing in correct order
      if (oldAttributes[i] != newAttributes[i]) attributes.remove(newAttributes[i]);
      attributes.addRegular(newAttributes[i]);
      newAttributes[i].setName(oldAttributes[i].getName());
    }

    return exampleSet;
  }
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    int k = getParameterAsInt(PARAMETER_K);
    int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS);
    boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS);
    Kernel kernel = Kernel.createKernel(this);

    // init operator progress
    getProgress().setTotal(maxOptimizationSteps);

    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);

    if (exampleSet.size() < k) {
      throw new UserError(this, 142, k);
    }

    // extracting attribute names
    Attributes attributes = exampleSet.getAttributes();
    ArrayList<String> attributeNames = new ArrayList<String>(attributes.size());
    for (Attribute attribute : attributes) {
      attributeNames.add(attribute.getName());
    }
    Attribute weightAttribute = attributes.getWeight();

    RandomGenerator generator = RandomGenerator.getRandomGenerator(this);

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    // init centroids
    int[] clusterAssignments = new int[exampleSet.size()];

    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = generator.nextIntInRange(0, k);
    }

    // run optimization steps
    boolean stable = false;
    for (int step = 0; step < maxOptimizationSteps && !stable; step++) {
      // calculating cluster kernel properties
      double[] clusterWeights = new double[k];
      double[] clusterKernelCorrection = new double[k];
      int i = 0;
      for (Example firstExample : exampleSet) {
        double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d;
        double[] firstExampleValues = getAsDoubleArray(firstExample, attributes);
        clusterWeights[clusterAssignments[i]] += firstExampleWeight;
        int j = 0;
        for (Example secondExample : exampleSet) {
          if (clusterAssignments[i] == clusterAssignments[j]) {
            double secondExampleWeight =
                useExampleWeights ? secondExample.getValue(weightAttribute) : 1d;
            clusterKernelCorrection[clusterAssignments[i]] +=
                firstExampleWeight
                    * secondExampleWeight
                    * kernel.calculateDistance(
                        firstExampleValues, getAsDoubleArray(secondExample, attributes));
          }
          j++;
        }
        i++;
      }
      for (int z = 0; z < k; z++) {
        clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z];
      }

      // assign examples to new centroids
      int[] newClusterAssignments = new int[exampleSet.size()];
      i = 0;
      for (Example example : exampleSet) {
        double[] exampleValues = getAsDoubleArray(example, attributes);
        double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues);
        double nearestDistance = Double.POSITIVE_INFINITY;
        int nearestIndex = 0;
        for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) {
          double distance = 0;
          // iterating over all examples in cluster to get kernel distance
          int j = 0;
          for (Example clusterExample : exampleSet) {
            if (clusterAssignments[j] == clusterIndex) {
              distance +=
                  (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d)
                      * kernel.calculateDistance(
                          getAsDoubleArray(clusterExample, attributes), exampleValues);
            }
            j++;
          }
          distance *= -2d / clusterWeights[clusterIndex];
          // copy in outer loop
          distance += exampleKernelValue;
          distance += clusterKernelCorrection[clusterIndex];
          if (distance < nearestDistance) {
            nearestDistance = distance;
            nearestIndex = clusterIndex;
          }
        }
        newClusterAssignments[i] = nearestIndex;
        i++;
      }

      // finishing assignment
      stable = true;
      for (int j = 0; j < exampleSet.size() && stable; j++) {
        stable &= newClusterAssignments[j] == clusterAssignments[j];
      }
      clusterAssignments = newClusterAssignments;

      // trigger operator progress
      getProgress().step();
    }

    // setting last clustering into model
    model.setClusterAssignments(clusterAssignments, exampleSet);

    getProgress().complete();

    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
  /**
   * Parses the provided expression and iterates over the {@link ExampleSet}, interprets attributes
   * as variables, evaluates the function and creates a new attribute with the given name that takes
   * the expression's value. The type of the attribute depends on the expression type and is {@link
   * Ontology#NOMINAL} for strings, {@link Ontology#INTEGER} for integers, {@link Ontology#REAL} for
   * reals, {@link Ontology#DATE_TIME} for Dates, and {@link Ontology#BINOMINAL} with values
   * &quot;true&quot; and &quot;false&quot; for booleans. If the executing operator is defined,
   * there will be a check for stop before the calculation of each example.
   *
   * @param exampleSet the example set to which the generated attribute is added
   * @param name the new attribute name
   * @param expression the expression used to generate attribute values
   * @param parser the expression parser used to parse the expression argument
   * @param resolver the example resolver which is used by the parser to resolve example values
   * @param executingOperator the operator calling this method. <code>null</code> is allowed. If not
   *     null the operator will be used to check for stop
   * @throws ProcessStoppedException in case the process was stopped by the user
   * @throws ExpressionException in case parsing the expression fails
   */
  public static Attribute addAttribute(
      ExampleSet exampleSet,
      String name,
      String expression,
      ExpressionParser parser,
      ExampleResolver resolver,
      Operator executingOperator)
      throws ProcessStoppedException, ExpressionException {

    // parse the expression
    Expression parsedExpression = parser.parse(expression);

    Attribute newAttribute = null;
    // if != null this needs to be overridden
    Attribute existingAttribute = exampleSet.getAttributes().get(name);
    StringBuffer appendix = new StringBuffer();
    String targetName = name;
    if (existingAttribute != null) {
      // If an existing attribute will be overridden, first a unique temporary name has to be
      // generated by appending a random string to the attribute's name until it's a unique
      // attribute name. After the new attribute is build, it's name is set the 'targetName'
      // at the end of this method.
      //
      do {
        appendix.append(RandomGenerator.getGlobalRandomGenerator().nextString(5));
      } while (exampleSet.getAttributes().get(name + appendix.toString()) != null);
      name = name + appendix.toString();
    }

    ExpressionType resultType = parsedExpression.getExpressionType();
    int ontology = resultType.getAttributeType();
    if (ontology == Ontology.BINOMINAL) {
      newAttribute = AttributeFactory.createAttribute(name, Ontology.BINOMINAL);
      newAttribute.getMapping().mapString("false");
      newAttribute.getMapping().mapString("true");
    } else {
      newAttribute = AttributeFactory.createAttribute(name, ontology);
    }

    // set construction description
    newAttribute.setConstruction(expression);

    // add new attribute to table and example set
    exampleSet.getExampleTable().addAttribute(newAttribute);
    exampleSet.getAttributes().addRegular(newAttribute);

    // create attribute of correct type and all values
    for (Example example : exampleSet) {
      if (executingOperator != null) {
        executingOperator.checkForStop();
      }

      // bind example to resolver
      resolver.bind(example);

      // calculate result
      try {
        switch (resultType) {
          case DOUBLE:
          case INTEGER:
            example.setValue(newAttribute, parsedExpression.evaluateNumerical());
            break;
          case DATE:
            Date date = parsedExpression.evaluateDate();
            example.setValue(newAttribute, date == null ? Double.NaN : date.getTime());
            break;
          default:
            example.setValue(newAttribute, parsedExpression.evaluateNominal());
            break;
        }
      } finally {
        // avoid memory leaks
        resolver.unbind();
      }
    }

    // remove existing attribute (if necessary)
    if (existingAttribute != null) {
      AttributeRole oldRole = exampleSet.getAttributes().getRole(existingAttribute);
      exampleSet.getAttributes().remove(existingAttribute);
      newAttribute.setName(targetName);
      // restore role from old attribute to new attribute
      if (oldRole.isSpecial()) {
        exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName());
      }
    }

    // update example resolver after meta data change
    resolver.addAttributeMetaData(
        new AttributeMetaData(exampleSet.getAttributes().getRole(newAttribute), exampleSet, true));

    return newAttribute;
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // searching confidence attributes
    Attributes attributes = exampleSet.getAttributes();
    Attribute predictedLabel = attributes.getPredictedLabel();
    if (predictedLabel == null) {
      throw new UserError(this, 107);
    }

    NominalMapping mapping = predictedLabel.getMapping();
    int numberOfLabels = mapping.size();
    Attribute[] confidences = new Attribute[numberOfLabels];
    String[] labelValue = new String[numberOfLabels];
    int i = 0;
    for (String value : mapping.getValues()) {
      labelValue[i] = value;
      confidences[i] = attributes.getConfidence(value);
      if (confidences[i] == null) {
        throw new UserError(this, 154, value);
      }
      i++;
    }

    // generating new prediction attributes
    int k = Math.min(numberOfLabels, getParameterAsInt(PARAMETER_NUMBER_OF_RANKS));
    Attribute[] kthPredictions = new Attribute[k];
    Attribute[] kthConfidences = new Attribute[k];
    for (i = 0; i < k; i++) {
      kthPredictions[i] = AttributeFactory.createAttribute(predictedLabel.getValueType());
      kthPredictions[i].setName(predictedLabel.getName() + "_" + (i + 1));
      kthPredictions[i].setMapping((NominalMapping) predictedLabel.getMapping().clone());
      kthConfidences[i] = AttributeFactory.createAttribute(Ontology.REAL);
      kthConfidences[i].setName(Attributes.CONFIDENCE_NAME + "_" + (i + 1));
      attributes.addRegular(kthPredictions[i]);
      attributes.addRegular(kthConfidences[i]);
      attributes.setSpecialAttribute(kthPredictions[i], Attributes.PREDICTION_NAME + "_" + (i + 1));
      attributes.setSpecialAttribute(kthConfidences[i], Attributes.CONFIDENCE_NAME + "_" + (i + 1));
    }
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthConfidences));
    exampleSet.getExampleTable().addAttributes(Arrays.asList(kthPredictions));

    // now setting values
    for (Example example : exampleSet) {
      ArrayList<Tupel<Double, Integer>> labelConfidences =
          new ArrayList<Tupel<Double, Integer>>(numberOfLabels);
      for (i = 0; i < numberOfLabels; i++) {
        labelConfidences.add(new Tupel<Double, Integer>(example.getValue(confidences[i]), i));
      }
      Collections.sort(labelConfidences);
      for (i = 0; i < k; i++) {
        Tupel<Double, Integer> tupel = labelConfidences.get(numberOfLabels - i - 1);
        example.setValue(
            kthPredictions[i],
            tupel.getSecond()); // Can use index since mapping has been cloned from above
        example.setValue(kthConfidences[i], tupel.getFirst());
      }
    }

    // deleting old prediction / confidences
    attributes.remove(predictedLabel);
    if (getParameterAsBoolean(PARAMETER_REMOVE_OLD_PREDICTIONS)) {
      for (i = 0; i < confidences.length; i++) {
        attributes.remove(confidences[i]);
      }
    }

    return exampleSet;
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // init
    char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0);
    Character groupingCharacter = null;
    if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) {
      groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0);
    }

    Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false);
    int size = attributeSet.size();

    int[] valueTypes = new int[size];

    int index = 0;
    for (Attribute attribute : attributeSet) {
      valueTypes[index++] = attribute.getValueType();
    }

    // guessing
    int[] guessedValueTypes = new int[valueTypes.length];
    int checkedCounter = 0;
    for (Example example : exampleSet) {
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        double originalValue = example.getValue(attribute);
        if (!Double.isNaN(originalValue)) {
          if (guessedValueTypes[index] != Ontology.NOMINAL) {
            try {
              String valueString = example.getValueAsString(attribute);
              if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                double value = Double.parseDouble(valueString);
                if (guessedValueTypes[index] != Ontology.REAL) {
                  if (Tools.isEqual(Math.round(value), value)) {
                    guessedValueTypes[index] = Ontology.INTEGER;
                  } else {
                    guessedValueTypes[index] = Ontology.REAL;
                  }
                }
              }
            } catch (NumberFormatException e) {
              guessedValueTypes[index] = Ontology.NOMINAL;
              checkedCounter++;
            }
          }
        }
        index++;
      }
      if (checkedCounter >= guessedValueTypes.length) {
        break;
      }
    }

    // the example set contains at least one example and the guessing was performed
    if (exampleSet.size() > 0) {
      valueTypes = guessedValueTypes;

      // new attributes
      List<AttributeRole> newAttributes = new LinkedList<AttributeRole>();
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        AttributeRole role = exampleSet.getAttributes().getRole(attribute);

        Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        AttributeRole newRole = new AttributeRole(newAttribute);
        newRole.setSpecial(role.getSpecialName());
        newAttributes.add(newRole);

        // copy data
        for (Example e : exampleSet) {
          double oldValue = e.getValue(attribute);
          if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) {
            if (!Double.isNaN(oldValue)) {
              String valueString = e.getValueAsString(attribute);
              if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                e.setValue(newAttribute, Double.NaN);
              } else {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                e.setValue(newAttribute, Double.parseDouble(valueString));
              }
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          } else {
            if (!Double.isNaN(oldValue)) {
              String value = e.getValueAsString(attribute);
              e.setValue(newAttribute, newAttribute.getMapping().mapString(value));
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          }
        }

        // delete attribute and rename the new attribute (due to deletion and data scans: no
        // more memory used :-)
        exampleSet.getExampleTable().removeAttribute(attribute);
        exampleSet.getAttributes().remove(role);
        newAttribute.setName(attribute.getName());

        index++;
      }

      for (AttributeRole role : newAttributes) {
        if (role.isSpecial()) {
          exampleSet
              .getAttributes()
              .setSpecialAttribute(role.getAttribute(), role.getSpecialName());
        } else {
          exampleSet.getAttributes().addRegular(role.getAttribute());
        }
      }
    }

    return exampleSet;
  }
  @Override
  public ExampleSet apply(ExampleSet inputExampleSet) throws OperatorException {
    ExampleSet exampleSet = (ExampleSet) inputExampleSet.clone();
    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (proportionThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeSingularValueProportion[numberOfUsedComponents] < proportionThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
      }
    }
    // if nothing defined or number exceeds maximal number of possible components
    if (numberOfUsedComponents == -1 || numberOfUsedComponents > getNumberOfComponents()) {
      // keep all components
      numberOfUsedComponents = getNumberOfComponents();
    }

    // retrieve factors inside singularValueVectors
    double[][] singularValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    double[][] vMatrixData = vMatrix.getArray();
    for (int i = 0; i < numberOfUsedComponents; i++) {
      double invertedSingularValue = 1d / singularValues[i];
      for (int j = 0; j < attributeNames.length; j++) {
        singularValueFactors[i][j] = vMatrixData[j][i] * invertedSingularValue;
      }
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      if (useLegacyNames) {
        derivedAttributes[i] = AttributeFactory.createAttribute("d" + i, Ontology.REAL);
      } else {
        derivedAttributes[i] = AttributeFactory.createAttribute("svd_" + (i + 1), Ontology.REAL);
      }
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute);
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += singularValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }
 public ExampleTable getExampleTable() {
   return parent.getExampleTable();
 }
Exemple #16
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    exampleSet.recalculateAllAttributeStatistics();

    Attributes attributes = exampleSet.getAttributes();
    if (attributeNames.length != attributes.size()) {
      throw new UserError(null, 133, numberOfComponents, attributes.size());
    }

    // remember attributes that have been removed during training. These will be removed lateron
    Attribute[] inputAttributes = new Attribute[getTrainingHeader().getAttributes().size()];
    int d = 0;
    for (Attribute oldAttribute : getTrainingHeader().getAttributes()) {
      inputAttributes[d] = attributes.get(oldAttribute.getName());
      d++;
    }

    // determining number of used components
    int numberOfUsedComponents = -1;
    if (manualNumber) {
      numberOfUsedComponents = numberOfComponents;
    } else {
      if (varianceThreshold == 0.0d) {
        numberOfUsedComponents = -1;
      } else {
        numberOfUsedComponents = 0;
        while (cumulativeVariance[numberOfUsedComponents] < varianceThreshold) {
          numberOfUsedComponents++;
        }
        numberOfUsedComponents++;
        if (numberOfUsedComponents == eigenVectors.size()) {
          numberOfUsedComponents--;
        }
      }
    }
    if (numberOfUsedComponents == -1) {
      // keep all components
      numberOfUsedComponents = attributes.size();
    }

    // retrieve factors inside eigenVectors
    double[][] eigenValueFactors = new double[numberOfUsedComponents][attributeNames.length];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      eigenValueFactors[i] = this.eigenVectors.get(i).getEigenvector();
    }

    // now build new attributes
    Attribute[] derivedAttributes = new Attribute[numberOfUsedComponents];
    for (int i = 0; i < numberOfUsedComponents; i++) {
      derivedAttributes[i] = AttributeFactory.createAttribute("pc_" + (i + 1), Ontology.REAL);
      exampleSet.getExampleTable().addAttribute(derivedAttributes[i]);
      attributes.addRegular(derivedAttributes[i]);
    }

    // now iterator through all examples and derive value of new features
    double[] derivedValues = new double[numberOfUsedComponents];
    for (Example example : exampleSet) {
      // calculate values of new attributes with single scan over attributes
      d = 0;
      for (Attribute attribute : inputAttributes) {
        double attributeValue = example.getValue(attribute) - means[d];
        for (int i = 0; i < numberOfUsedComponents; i++) {
          derivedValues[i] += eigenValueFactors[i][d] * attributeValue;
        }
        d++;
      }

      // set values
      for (int i = 0; i < numberOfUsedComponents; i++) {
        example.setValue(derivedAttributes[i], derivedValues[i]);
      }

      // set values back
      Arrays.fill(derivedValues, 0);
    }

    // now remove attributes if needed
    if (!keepAttributes) {
      for (Attribute attribute : inputAttributes) {
        attributes.remove(attribute);
      }
    }

    return exampleSet;
  }
  public IOObject[] apply() throws OperatorException {
    ExampleSet exampleSet = getInput(ExampleSet.class);

    String labelName = getParameterAsString(PARAMETER_LABEL_NAME_STEM);
    int horizon = getParameterAsInt(PARAMETER_HORIZON);

    // TODO: check if appropriate label is there
    // TODO: check if window width is large enough

    // collect base names and attributes to remove, find label
    Attribute labelAttribute = null;
    List<String> baseNames = new LinkedList<String>();
    List<Attribute> toRemove = new LinkedList<Attribute>();
    int windowWidth = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      if (attribute.getName().endsWith("-0")) {
        String baseName = attribute.getName().substring(0, attribute.getName().lastIndexOf("-"));
        baseNames.add(baseName);
        if (attribute.getName().startsWith(labelName)) {
          labelAttribute = attribute;
        }
      }

      int index =
          Integer.valueOf(attribute.getName().substring(attribute.getName().lastIndexOf("-") + 1));
      windowWidth = Math.max(windowWidth, index);

      if ((index >= 0) && (index < horizon)) {
        toRemove.add(attribute);
      }
    }

    // remove horizon attributes
    for (Attribute attribute : toRemove) {
      exampleSet.getAttributes().remove(attribute);
    }

    // set label
    exampleSet.getAttributes().setLabel(labelAttribute);

    // transform all values relative to last known label attribute value and create base value
    // column
    if (getParameterAsBoolean(PARAMETER_RELATIVE_TRANSFORMATION)) {
      if (labelAttribute.isNumerical()) {
        Attribute baseValueAttribute =
            AttributeFactory.createAttribute("base_value", Ontology.REAL);
        exampleSet.getExampleTable().addAttribute(baseValueAttribute);
        exampleSet.getAttributes().setSpecialAttribute(baseValueAttribute, "base_value");

        for (Example example : exampleSet) {
          // handle label
          String lastKnownLabelName = labelName + "-" + horizon;
          Attribute lastKnownLabelAttribute = exampleSet.getAttributes().get(lastKnownLabelName);
          double baseLabelValue = example.getValue(lastKnownLabelAttribute);
          example.setValue(baseValueAttribute, baseLabelValue);

          for (String baseName : baseNames) {
            String lastKnownBaseName = baseName + "-" + horizon;
            Attribute lastKnownBaseAttribute = exampleSet.getAttributes().get(lastKnownBaseName);
            double baseAttributeValue = example.getValue(lastKnownBaseAttribute);

            for (int w = horizon; w <= windowWidth; w++) {
              String currentName = baseName + "-" + w;
              Attribute currentAttribute = exampleSet.getAttributes().get(currentName);
              double currentValue = example.getValue(currentAttribute);

              example.setValue(currentAttribute, currentValue - baseAttributeValue);
            }
          }

          example.setValue(labelAttribute, example.getValue(labelAttribute) - baseLabelValue);
        }
      }
    }

    return new IOObject[] {exampleSet};
  }