@Override
  public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException {
    boolean round = getParameterAsBoolean(PARAMETER_ROUND);

    List<Attribute> newAttributes = new LinkedList<Attribute>();
    Iterator<Attribute> a = exampleSet.getAttributes().iterator();
    while (a.hasNext()) {
      Attribute attribute = a.next();
      if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL))
          && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) {
        Attribute newAttribute =
            AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER);
        newAttributes.add(newAttribute);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        for (Example example : exampleSet) {
          double originalValue = example.getValue(attribute);
          if (Double.isNaN(originalValue)) {
            example.setValue(newAttribute, Double.NaN);
          } else {
            long newValue = round ? Math.round(originalValue) : (long) originalValue;
            example.setValue(newAttribute, newValue);
          }
        }
        a.remove();
      }
    }

    for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute);

    return exampleSet;
  }
    @Override
    public Object getValueAt(int row, int column) {
      Object[] values = data.get(row);

      if (column == 0) {
        return values[column].toString();
      }

      if (column >= values.length) {
        return "";
      }
      int attributeType = reader.getAttributeColumn(column - 1).getValueType();

      if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attributeType, Ontology.DATE_TIME)
          || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attributeType, Ontology.DATE_TIME)
          || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attributeType, Ontology.TIME)) {
        try {
          return Tools.formatDateTime((Date) values[column]);
        } catch (ClassCastException e) {
          // do nothing, just return default value
        }
      }
      // default value
      return values[column].toString();
    }
 public static ValueType convertFromRapidMinerOntology(int rmValueType) {
   if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(rmValueType, Ontology.NUMERICAL)) {
     return NUMERICAL;
   } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(rmValueType, Ontology.NOMINAL)) {
     return NOMINAL;
   } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(rmValueType, Ontology.DATE_TIME)) {
     return DATE_TIME;
   } else {
     return INVALID;
   }
 }
 private boolean isOfAllowedType(int valueType) {
   boolean isAllowed = false;
   for (int type : allowedValueTypes) {
     isAllowed |= Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, type);
   }
   return isAllowed;
 }
 @Override
 public void setValueAt(Object value, int row, int column) {
   if (column == 0) { // RowNo.
     if (row == IS_SELECTED_ROW) {
       for (AttributeColumn col : reader.getAllAttributeColumns()) {
         col.activateColumn((Boolean) value);
       }
       repaint();
     }
     return;
   }
   column--;
   if (row == ATTRIBUTE_NAME_ROW) {
     reader.setAttributeNamesDefinedByUser(true);
     reader.getAttributeColumn(column).setName((String) value);
   }
   if (row == VALUE_TYPE_ROW) {
     // update only if its not the same value
     if (reader.getAttributeColumn(column).getValueType()
         != Ontology.ATTRIBUTE_VALUE_TYPE.mapName(value.toString())) {
       reader
           .getAttributeColumn(column)
           .setValueType(Ontology.ATTRIBUTE_VALUE_TYPE.mapName(value.toString()));
     }
   }
   if (row == IS_SELECTED_ROW) {
     reader.getAttributeColumn(column).activateColumn((Boolean) value);
   }
   if (row == ROLE_ROW) {
     String role = (String) value;
     if (role.equals(AttributeColumn.REGULAR)) {
       reader.getAttributeColumn(column).setRole(role);
     } else {
       for (AttributeColumn attColumn : reader.getAllAttributeColumns()) {
         if (attColumn.getRole().equals(role)) {
           attColumn.setRole(AttributeColumn.REGULAR);
         }
       }
       reader.getAttributeColumn(column).setRole(role);
       fireTableDataChanged();
     }
   }
   repaint();
 }
  @Override
  public ExampleSetMetaData applyOnFilteredMetaData(ExampleSetMetaData emd) {
    boolean round = getParameterAsBoolean(PARAMETER_ROUND);

    for (AttributeMetaData amd : emd.getAllAttributes()) {
      if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(amd.getValueType(), Ontology.NUMERICAL))
          && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(amd.getValueType(), Ontology.INTEGER))) {
        amd.setType(Ontology.INTEGER);
      }
      if (round) {
        amd.setValueRange(
            new Range(
                Math.round(amd.getValueRange().getLower()),
                Math.round(amd.getValueRange().getUpper())),
            SetRelation.EQUAL);
      } else {
        amd.setValueRange(
            new Range((long) amd.getValueRange().getLower(), (long) amd.getValueRange().getUpper()),
            SetRelation.EQUAL);
      }
    }
    return emd;
  }
 private double[] getMeanVector(ExampleSet exampleSet) {
   exampleSet.recalculateAllAttributeStatistics();
   Attributes attributes = exampleSet.getAttributes();
   double[] meanVector = new double[attributes.size()];
   int i = 0;
   for (Attribute attribute : attributes) {
     if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
       meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MINIMUM);
     } else if (attribute.isNominal())
       meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MODE);
     else meanVector[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE);
     i++;
   }
   return meanVector;
 }
示例#8
0
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    Attribute attribute =
        exampleSet.getAttributes().get(getParameterAsString(PARAMETER_ATTRIBUTE_NAME));
    if (attribute == null)
      throw new UserError(this, 111, getParameterAsString(PARAMETER_ATTRIBUTE_NAME));

    int index = getParameterAsInt(PARAMETER_EXAMPLE_INDEX);
    if (index == 0) {
      throw new UserError(
          this, 207, "0", PARAMETER_EXAMPLE_INDEX, "only positive or negative indices are allowed");
    }

    if (index < 0) {
      index = exampleSet.size() + index;
    } else {
      index--;
    }

    if (index >= exampleSet.size()) {
      throw new UserError(this, 110, index);
    }

    Example example = exampleSet.getExample(index);
    if (attribute.isNominal()
        || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
      currentValue = example.getValueAsString(attribute);
      isNominal = true;
    } else {
      currentValue = Double.valueOf(example.getValue(attribute));
      isNominal = false;
    }

    exampleSetOutput.deliver(exampleSet);
  }
  @Override
  public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();

    // constructing new attributes with generic names, holding old ones, if old type wasn't real
    Attribute[] oldAttributes = new Attribute[attributes.size()];
    int i = 0;
    for (Attribute attribute : attributes) {
      oldAttributes[i] = attribute;
      i++;
    }
    Attribute[] newAttributes = new Attribute[attributes.size()];
    for (i = 0; i < newAttributes.length; i++) {
      newAttributes[i] = oldAttributes[i];
      if (oldAttributes[i].isNumerical())
        if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(oldAttributes[i].getValueType(), Ontology.REAL)) {
          newAttributes[i] = AttributeFactory.createAttribute(Ontology.REAL);
          exampleSet.getExampleTable().addAttribute(newAttributes[i]);
          attributes.addRegular(newAttributes[i]);
        }
    }

    // applying on data
    applyOnData(exampleSet, oldAttributes, newAttributes);

    // removing old attributes and change new attributes name to old ones if needed
    for (i = 0; i < oldAttributes.length; i++) {
      attributes.remove(oldAttributes[i]);
      // if attribute is new, then remove for later storing in correct order
      if (oldAttributes[i] != newAttributes[i]) attributes.remove(newAttributes[i]);
      attributes.addRegular(newAttributes[i]);
      newAttributes[i].setName(oldAttributes[i].getName());
    }

    return exampleSet;
  }
  public void checkConstraints(ExampleTable et) {

    super.checkConstraints(et);

    RelationalAttribute relA = null;
    for (int i = 0; i < et.getNumberOfAttributes(); i++)
      if (et.getAttribute(i).getTableIndex() == this.getAttributeIndexes()[0]) {
        relA = (RelationalAttribute) et.getAttribute(i);
        break;
      }

    // sparse matrix inner relational attributes
    if (relA.getInnerAttributeCount() != 2)
      throw new IllegalArgumentException(
          "sparse matrix attribute "
              + this.getName()
              + " must wrap relational attribute with exactly two inner attributes");
    else if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(
        relA.getInnerAttributeAt(0).getValueType(), Ontology.NUMERICAL))
      throw new IllegalArgumentException(
          "sparse matrix attribute "
              + this.getName()
              + " must wrap relational attribute which inner first attribute serves as key and therefore must be numerical");
  }
 private void parseTree(
     AggregationTreeNode node,
     Attribute[] groupAttributes,
     double[] dataOfUpperLevels,
     int groupLevel,
     List<double[]> allGroupCombinations,
     List<List<Aggregator>> allAggregators,
     DataRowFactory factory,
     Attribute[] newAttributes,
     boolean isCountingAllCombinations,
     List<AggregationFunction> aggregationFunctions)
     throws UserError {
   Attribute currentAttribute = groupAttributes[groupLevel];
   if (currentAttribute.isNominal()) {
     Collection<? extends Object> nominalValues = null;
     if (isCountingAllCombinations) {
       nominalValues = currentAttribute.getMapping().getValues();
     } else {
       nominalValues = node.getValues();
     }
     for (Object nominalValue : nominalValues) {
       dataOfUpperLevels[groupLevel] =
           newAttributes[groupLevel].getMapping().mapString(nominalValue.toString());
       // check if we have more group defining attributes
       if (groupLevel + 1 < groupAttributes.length) {
         parseTree(
             node.getOrCreateChild(nominalValue),
             groupAttributes,
             dataOfUpperLevels,
             groupLevel + 1,
             allGroupCombinations,
             allAggregators,
             factory,
             newAttributes,
             isCountingAllCombinations,
             aggregationFunctions);
       } else {
         // if not, insert values from aggregation functions
         parseLeaf(
             node.getLeaf(nominalValue),
             dataOfUpperLevels,
             allGroupCombinations,
             allAggregators,
             factory,
             newAttributes,
             aggregationFunctions);
       }
     }
   } else if (currentAttribute.isNumerical()
       || Ontology.ATTRIBUTE_VALUE_TYPE.isA(currentAttribute.getValueType(), Ontology.DATE_TIME)) {
     for (Object numericalValue : node.getValues()) {
       dataOfUpperLevels[groupLevel] = (Double) numericalValue;
       if (groupLevel + 1 < groupAttributes.length) {
         parseTree(
             node.getOrCreateChild(numericalValue),
             groupAttributes,
             dataOfUpperLevels,
             groupLevel + 1,
             allGroupCombinations,
             allAggregators,
             factory,
             newAttributes,
             isCountingAllCombinations,
             aggregationFunctions);
       } else {
         // if not, insert values from aggregation functions
         parseLeaf(
             node.getLeaf(numericalValue),
             dataOfUpperLevels,
             allGroupCombinations,
             allAggregators,
             factory,
             newAttributes,
             aggregationFunctions);
       }
     }
   } else {
     throw new UserError(
         this,
         "aggregation_operator.unsupported_value_type",
         currentAttribute.getName(),
         Ontology.ATTRIBUTE_VALUE_TYPE.getNames()[currentAttribute.getValueType()]);
   }
 }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // init
    char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0);
    Character groupingCharacter = null;
    if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) {
      groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0);
    }

    Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false);
    int size = attributeSet.size();

    int[] valueTypes = new int[size];

    int index = 0;
    for (Attribute attribute : attributeSet) {
      valueTypes[index++] = attribute.getValueType();
    }

    // guessing
    int[] guessedValueTypes = new int[valueTypes.length];
    int checkedCounter = 0;
    for (Example example : exampleSet) {
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        double originalValue = example.getValue(attribute);
        if (!Double.isNaN(originalValue)) {
          if (guessedValueTypes[index] != Ontology.NOMINAL) {
            try {
              String valueString = example.getValueAsString(attribute);
              if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                double value = Double.parseDouble(valueString);
                if (guessedValueTypes[index] != Ontology.REAL) {
                  if (Tools.isEqual(Math.round(value), value)) {
                    guessedValueTypes[index] = Ontology.INTEGER;
                  } else {
                    guessedValueTypes[index] = Ontology.REAL;
                  }
                }
              }
            } catch (NumberFormatException e) {
              guessedValueTypes[index] = Ontology.NOMINAL;
              checkedCounter++;
            }
          }
        }
        index++;
      }
      if (checkedCounter >= guessedValueTypes.length) {
        break;
      }
    }

    // the example set contains at least one example and the guessing was performed
    if (exampleSet.size() > 0) {
      valueTypes = guessedValueTypes;

      // new attributes
      List<AttributeRole> newAttributes = new LinkedList<AttributeRole>();
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        AttributeRole role = exampleSet.getAttributes().getRole(attribute);

        Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        AttributeRole newRole = new AttributeRole(newAttribute);
        newRole.setSpecial(role.getSpecialName());
        newAttributes.add(newRole);

        // copy data
        for (Example e : exampleSet) {
          double oldValue = e.getValue(attribute);
          if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) {
            if (!Double.isNaN(oldValue)) {
              String valueString = e.getValueAsString(attribute);
              if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                e.setValue(newAttribute, Double.NaN);
              } else {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                e.setValue(newAttribute, Double.parseDouble(valueString));
              }
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          } else {
            if (!Double.isNaN(oldValue)) {
              String value = e.getValueAsString(attribute);
              e.setValue(newAttribute, newAttribute.getMapping().mapString(value));
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          }
        }

        // delete attribute and rename the new attribute (due to deletion and data scans: no
        // more memory used :-)
        exampleSet.getExampleTable().removeAttribute(attribute);
        exampleSet.getAttributes().remove(role);
        newAttribute.setName(attribute.getName());

        index++;
      }

      for (AttributeRole role : newAttributes) {
        if (role.isSpecial()) {
          exampleSet
              .getAttributes()
              .setSpecialAttribute(role.getAttribute(), role.getSpecialName());
        } else {
          exampleSet.getAttributes().addRegular(role.getAttribute());
        }
      }
    }

    return exampleSet;
  }
 /** Standard behavior is to return true for all numerical value types. */
 @Override
 public boolean supportsValueType(int valueType) {
   return Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, Ontology.NUMERICAL);
 }
示例#14
0
  public static void writeCSV(
      ExampleSet exampleSet,
      PrintWriter out,
      String colSeparator,
      boolean quoteNomValues,
      boolean writeAttribNames,
      boolean formatDate) {
    String columnSeparator = colSeparator;
    boolean quoteNominalValues = quoteNomValues;

    // write column names
    if (writeAttribNames) {
      Iterator<Attribute> a = exampleSet.getAttributes().allAttributes();
      boolean first = true;
      while (a.hasNext()) {
        if (!first) out.print(columnSeparator);
        Attribute attribute = a.next();
        String name = attribute.getName();
        if (quoteNominalValues) {
          name = name.replaceAll("\"", "'");
          name = "\"" + name + "\"";
        }
        out.print(name);
        first = false;
      }
      out.println();
    }

    // write data
    for (Example example : exampleSet) {
      Iterator<Attribute> a = exampleSet.getAttributes().allAttributes();
      boolean first = true;
      while (a.hasNext()) {
        Attribute attribute = a.next();
        if (!first) out.print(columnSeparator);
        if (!Double.isNaN(example.getValue(attribute))) {
          if (attribute.isNominal()) {
            String stringValue = example.getValueAsString(attribute);
            if (quoteNominalValues) {
              stringValue = stringValue.replaceAll("\"", "'");
              stringValue = "\"" + stringValue + "\"";
            }
            out.print(stringValue);
          } else {
            Double value = example.getValue(attribute);
            if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
              if (formatDate) {
                Date date = new Date(value.longValue());
                String s = DateFormat.getInstance().format(date);
                out.print(s);
              } else {
                out.print(value);
              }
            } else {
              out.print(value);
            }
          }
        }
        first = false;
      }
      out.println();
    }
  }