public NonSpecialAttributesExampleSet(ExampleSet exampleSet) {
   this.parent = (ExampleSet) exampleSet.clone();
   Iterator<AttributeRole> s = this.parent.getAttributes().specialAttributes();
   while (s.hasNext()) {
     AttributeRole attributeRole = s.next();
     if (attributeRole.isSpecial()) {
       attributeRole.changeToRegular();
     }
   }
 }
 /**
  * Implements the method required by the superclass. For features whose name matches the input
  * name (regular expression). If the input name does not match the the input name (regular
  * expression) will not be switched off. If no parameter was provided, FALSE is always returned,
  * so no feature is switched off.
  *
  * @param attributeRole Feature to check.
  * @return TRUE if this feature should <b>not</b> be active in the output example set of this
  *     operator. FALSE otherwise.
  */
 @Override
 public boolean switchOffFeature(AttributeRole attributeRole) throws OperatorException {
   Attribute attribute = attributeRole.getAttribute();
   Matcher nameSkipMatcher = skipPattern.matcher(attribute.getName());
   Matcher specialNameSkipMatcher = null;
   if (attributeRole.isSpecial())
     specialNameSkipMatcher = skipPattern.matcher(attributeRole.getSpecialName());
   Matcher exceptionMatcher =
       exceptionPattern != null ? exceptionPattern.matcher(attribute.getName()) : null;
   Matcher specialExceptionMatcher = null;
   if (attributeRole.isSpecial())
     specialExceptionMatcher =
         exceptionPattern != null
             ? exceptionPattern.matcher(attributeRole.getSpecialName())
             : null;
   return (nameSkipMatcher.matches()
           || ((specialNameSkipMatcher != null) && (specialNameSkipMatcher.matches())))
       && ((exceptionMatcher == null) || (!exceptionMatcher.matches()))
       && ((specialExceptionMatcher == null) || (!specialExceptionMatcher.matches()));
 }
Ejemplo n.º 3
0
  /**
   * Applies filtering of features by looping through all features and checking <code>
   * switchOffFeature()</code>. If TRUE is returned, the feature is switched off, ie it won't be
   * used by the following operators in the chain. If FALSE is returned by <code>switchOffFeature()
   * </code>, the feature will keep its previous status.
   *
   * @return An array of IOObjects, with the output example set as the only member.
   */
  public IOObject[] apply() throws OperatorException {
    ExampleSet eSet = getInput(ExampleSet.class);
    log(eSet.getAttributes().size() + " features before filtering.");

    Iterator<AttributeRole> i = eSet.getAttributes().allAttributeRoles();
    boolean filterSpecial = getParameterAsBoolean(PARAMETER_FILTER_SPECIAL_FEATURES);
    while (i.hasNext()) {
      AttributeRole role = i.next();
      if ((role.isSpecial()) && (!filterSpecial)) continue;
      if (switchOffFeature(role)) {
        i.remove();
      }
      checkForStop();
    }

    log(eSet.getAttributes().size() + " features left after filtering.");
    return new IOObject[] {eSet};
  }
  /**
   * Applies the sorted and unmachted attribute list to the provided {@link Attributes}. All
   * unmachted attributes are removed from attributes and all {@link Attribute}s from the sorted
   * list are added in correct order.
   *
   * @param sortedAttributeList attributes that will be removed first and added in correct order
   *     afterwards.
   * @param unmachtedAttributes attributes that should be removed. May be <code>null</code> if no
   *     attributes should be removed.
   */
  private void applySortedAttributes(
      List<Attribute> sortedAttributeList,
      List<Attribute> unmachtedAttributes,
      Attributes attributes) {
    if (unmachtedAttributes != null) {
      for (Attribute unmachted : unmachtedAttributes) {
        attributes.remove(unmachted);
      }
    }

    for (Attribute attribute : sortedAttributeList) {
      AttributeRole role = attributes.getRole(attribute);
      attributes.remove(attribute);

      if (role.isSpecial()) {
        attributes.setSpecialAttribute(attribute, role.getSpecialName());
      } else { // regular
        attributes.addRegular(attribute);
      }
    }
  }
 @Override
 public Attributes getTargetAttributes(ExampleSet viewParent) {
   SimpleAttributes attributes = new SimpleAttributes();
   // add special attributes to new attributes
   Iterator<AttributeRole> roleIterator = viewParent.getAttributes().allAttributeRoles();
   while (roleIterator.hasNext()) {
     AttributeRole role = roleIterator.next();
     if (role.isSpecial()) {
       attributes.add(role);
     }
   }
   // add regular attributes
   for (Attribute attribute : viewParent.getAttributes()) {
     if (!attribute.isNumerical() || !attributeTransformations.containsKey(attribute.getName())) {
       attributes.addRegular(attribute);
     } else {
       // giving new attributes old name: connection to rangesMap
       attributes.addRegular(
           new ViewAttribute(this, attribute, attribute.getName(), Ontology.NUMERICAL, null));
     }
   }
   return attributes;
 }
  /**
   * Parses the provided expression and iterates over the {@link ExampleSet}, interprets attributes
   * as variables, evaluates the function and creates a new attribute with the given name that takes
   * the expression's value. The type of the attribute depends on the expression type and is {@link
   * Ontology#NOMINAL} for strings, {@link Ontology#INTEGER} for integers, {@link Ontology#REAL} for
   * reals, {@link Ontology#DATE_TIME} for Dates, and {@link Ontology#BINOMINAL} with values
   * &quot;true&quot; and &quot;false&quot; for booleans. If the executing operator is defined,
   * there will be a check for stop before the calculation of each example.
   *
   * @param exampleSet the example set to which the generated attribute is added
   * @param name the new attribute name
   * @param expression the expression used to generate attribute values
   * @param parser the expression parser used to parse the expression argument
   * @param resolver the example resolver which is used by the parser to resolve example values
   * @param executingOperator the operator calling this method. <code>null</code> is allowed. If not
   *     null the operator will be used to check for stop
   * @throws ProcessStoppedException in case the process was stopped by the user
   * @throws ExpressionException in case parsing the expression fails
   */
  public static Attribute addAttribute(
      ExampleSet exampleSet,
      String name,
      String expression,
      ExpressionParser parser,
      ExampleResolver resolver,
      Operator executingOperator)
      throws ProcessStoppedException, ExpressionException {

    // parse the expression
    Expression parsedExpression = parser.parse(expression);

    Attribute newAttribute = null;
    // if != null this needs to be overridden
    Attribute existingAttribute = exampleSet.getAttributes().get(name);
    StringBuffer appendix = new StringBuffer();
    String targetName = name;
    if (existingAttribute != null) {
      // If an existing attribute will be overridden, first a unique temporary name has to be
      // generated by appending a random string to the attribute's name until it's a unique
      // attribute name. After the new attribute is build, it's name is set the 'targetName'
      // at the end of this method.
      //
      do {
        appendix.append(RandomGenerator.getGlobalRandomGenerator().nextString(5));
      } while (exampleSet.getAttributes().get(name + appendix.toString()) != null);
      name = name + appendix.toString();
    }

    ExpressionType resultType = parsedExpression.getExpressionType();
    int ontology = resultType.getAttributeType();
    if (ontology == Ontology.BINOMINAL) {
      newAttribute = AttributeFactory.createAttribute(name, Ontology.BINOMINAL);
      newAttribute.getMapping().mapString("false");
      newAttribute.getMapping().mapString("true");
    } else {
      newAttribute = AttributeFactory.createAttribute(name, ontology);
    }

    // set construction description
    newAttribute.setConstruction(expression);

    // add new attribute to table and example set
    exampleSet.getExampleTable().addAttribute(newAttribute);
    exampleSet.getAttributes().addRegular(newAttribute);

    // create attribute of correct type and all values
    for (Example example : exampleSet) {
      if (executingOperator != null) {
        executingOperator.checkForStop();
      }

      // bind example to resolver
      resolver.bind(example);

      // calculate result
      try {
        switch (resultType) {
          case DOUBLE:
          case INTEGER:
            example.setValue(newAttribute, parsedExpression.evaluateNumerical());
            break;
          case DATE:
            Date date = parsedExpression.evaluateDate();
            example.setValue(newAttribute, date == null ? Double.NaN : date.getTime());
            break;
          default:
            example.setValue(newAttribute, parsedExpression.evaluateNominal());
            break;
        }
      } finally {
        // avoid memory leaks
        resolver.unbind();
      }
    }

    // remove existing attribute (if necessary)
    if (existingAttribute != null) {
      AttributeRole oldRole = exampleSet.getAttributes().getRole(existingAttribute);
      exampleSet.getAttributes().remove(existingAttribute);
      newAttribute.setName(targetName);
      // restore role from old attribute to new attribute
      if (oldRole.isSpecial()) {
        exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName());
      }
    }

    // update example resolver after meta data change
    resolver.addAttributeMetaData(
        new AttributeMetaData(exampleSet.getAttributes().getRole(newAttribute), exampleSet, true));

    return newAttribute;
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // determine new value types
    int valueType = Ontology.REAL;
    Iterator<AttributeRole> a = exampleSet.getAttributes().allAttributeRoles();
    while (a.hasNext()) {
      AttributeRole attributeRole = a.next();
      if (!attributeRole.isSpecial()
          || !attributeRole.getSpecialName().equals(Attributes.ID_NAME)) {
        if (attributeRole.getAttribute().isNominal()) {
          valueType = Ontology.NOMINAL;
          break;
        }
      }
    }

    // create new attributes
    List<Attribute> newAttributes = new ArrayList<Attribute>(exampleSet.size());
    Attribute newIdAttribute =
        AttributeFactory.createAttribute(Attributes.ID_NAME, Ontology.NOMINAL);
    newAttributes.add(newIdAttribute);

    Attribute oldIdAttribute = exampleSet.getAttributes().getId();
    if (oldIdAttribute != null) {
      for (Example e : exampleSet) {
        double idValue = e.getValue(oldIdAttribute);
        String attributeName = "att_" + idValue;
        if (oldIdAttribute.isNominal()) {
          if (Double.isNaN(idValue)) {
            newAttributes.add(AttributeFactory.createAttribute(valueType));
          } else {
            attributeName = oldIdAttribute.getMapping().mapIndex((int) idValue);
            newAttributes.add(AttributeFactory.createAttribute(attributeName, valueType));
          }
        } else {
          newAttributes.add(AttributeFactory.createAttribute(attributeName, valueType));
        }
      }
    } else {
      for (int i = 0; i < exampleSet.size(); i++) {
        newAttributes.add(AttributeFactory.createAttribute("att_" + (i + 1), valueType));
      }
    }

    // create and fill table
    MemoryExampleTable table = new MemoryExampleTable(newAttributes);
    a = exampleSet.getAttributes().allAttributeRoles();
    while (a.hasNext()) {
      AttributeRole attributeRole = a.next();
      if (!attributeRole.isSpecial()
          || !attributeRole.getSpecialName().equals(Attributes.ID_NAME)) {
        Attribute attribute = attributeRole.getAttribute();
        double[] data = new double[exampleSet.size() + 1];
        data[0] = newIdAttribute.getMapping().mapString(attribute.getName());
        int counter = 1;
        for (Example e : exampleSet) {
          double currentValue = e.getValue(attribute);
          data[counter] = currentValue;
          Attribute newAttribute = newAttributes.get(counter);
          if (newAttribute.isNominal()) {
            if (!Double.isNaN(currentValue)) {
              String currentValueString = currentValue + "";
              if (attribute.isNominal())
                currentValueString = attribute.getMapping().mapIndex((int) currentValue);
              data[counter] = newAttribute.getMapping().mapString(currentValueString);
            }
          }
          counter++;
        }
        table.addDataRow(new DoubleArrayDataRow(data));
      }
    }

    // create and deliver example set
    ExampleSet result = table.createExampleSet(null, null, newIdAttribute);
    result.getAnnotations().addAll(exampleSet.getAnnotations());
    return result;
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // init
    char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0);
    Character groupingCharacter = null;
    if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) {
      groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0);
    }

    Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false);
    int size = attributeSet.size();

    int[] valueTypes = new int[size];

    int index = 0;
    for (Attribute attribute : attributeSet) {
      valueTypes[index++] = attribute.getValueType();
    }

    // guessing
    int[] guessedValueTypes = new int[valueTypes.length];
    int checkedCounter = 0;
    for (Example example : exampleSet) {
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        double originalValue = example.getValue(attribute);
        if (!Double.isNaN(originalValue)) {
          if (guessedValueTypes[index] != Ontology.NOMINAL) {
            try {
              String valueString = example.getValueAsString(attribute);
              if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                double value = Double.parseDouble(valueString);
                if (guessedValueTypes[index] != Ontology.REAL) {
                  if (Tools.isEqual(Math.round(value), value)) {
                    guessedValueTypes[index] = Ontology.INTEGER;
                  } else {
                    guessedValueTypes[index] = Ontology.REAL;
                  }
                }
              }
            } catch (NumberFormatException e) {
              guessedValueTypes[index] = Ontology.NOMINAL;
              checkedCounter++;
            }
          }
        }
        index++;
      }
      if (checkedCounter >= guessedValueTypes.length) {
        break;
      }
    }

    // the example set contains at least one example and the guessing was performed
    if (exampleSet.size() > 0) {
      valueTypes = guessedValueTypes;

      // new attributes
      List<AttributeRole> newAttributes = new LinkedList<AttributeRole>();
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        AttributeRole role = exampleSet.getAttributes().getRole(attribute);

        Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        AttributeRole newRole = new AttributeRole(newAttribute);
        newRole.setSpecial(role.getSpecialName());
        newAttributes.add(newRole);

        // copy data
        for (Example e : exampleSet) {
          double oldValue = e.getValue(attribute);
          if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) {
            if (!Double.isNaN(oldValue)) {
              String valueString = e.getValueAsString(attribute);
              if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                e.setValue(newAttribute, Double.NaN);
              } else {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                e.setValue(newAttribute, Double.parseDouble(valueString));
              }
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          } else {
            if (!Double.isNaN(oldValue)) {
              String value = e.getValueAsString(attribute);
              e.setValue(newAttribute, newAttribute.getMapping().mapString(value));
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          }
        }

        // delete attribute and rename the new attribute (due to deletion and data scans: no
        // more memory used :-)
        exampleSet.getExampleTable().removeAttribute(attribute);
        exampleSet.getAttributes().remove(role);
        newAttribute.setName(attribute.getName());

        index++;
      }

      for (AttributeRole role : newAttributes) {
        if (role.isSpecial()) {
          exampleSet
              .getAttributes()
              .setSpecialAttribute(role.getAttribute(), role.getSpecialName());
        } else {
          exampleSet.getAttributes().addRegular(role.getAttribute());
        }
      }
    }

    return exampleSet;
  }