public NonSpecialAttributesExampleSet(ExampleSet exampleSet) { this.parent = (ExampleSet) exampleSet.clone(); Iterator<AttributeRole> s = this.parent.getAttributes().specialAttributes(); while (s.hasNext()) { AttributeRole attributeRole = s.next(); if (attributeRole.isSpecial()) { attributeRole.changeToRegular(); } } }
/** * Implements the method required by the superclass. For features whose name matches the input * name (regular expression). If the input name does not match the the input name (regular * expression) will not be switched off. If no parameter was provided, FALSE is always returned, * so no feature is switched off. * * @param attributeRole Feature to check. * @return TRUE if this feature should <b>not</b> be active in the output example set of this * operator. FALSE otherwise. */ @Override public boolean switchOffFeature(AttributeRole attributeRole) throws OperatorException { Attribute attribute = attributeRole.getAttribute(); Matcher nameSkipMatcher = skipPattern.matcher(attribute.getName()); Matcher specialNameSkipMatcher = null; if (attributeRole.isSpecial()) specialNameSkipMatcher = skipPattern.matcher(attributeRole.getSpecialName()); Matcher exceptionMatcher = exceptionPattern != null ? exceptionPattern.matcher(attribute.getName()) : null; Matcher specialExceptionMatcher = null; if (attributeRole.isSpecial()) specialExceptionMatcher = exceptionPattern != null ? exceptionPattern.matcher(attributeRole.getSpecialName()) : null; return (nameSkipMatcher.matches() || ((specialNameSkipMatcher != null) && (specialNameSkipMatcher.matches()))) && ((exceptionMatcher == null) || (!exceptionMatcher.matches())) && ((specialExceptionMatcher == null) || (!specialExceptionMatcher.matches())); }
/** * Applies filtering of features by looping through all features and checking <code> * switchOffFeature()</code>. If TRUE is returned, the feature is switched off, ie it won't be * used by the following operators in the chain. If FALSE is returned by <code>switchOffFeature() * </code>, the feature will keep its previous status. * * @return An array of IOObjects, with the output example set as the only member. */ public IOObject[] apply() throws OperatorException { ExampleSet eSet = getInput(ExampleSet.class); log(eSet.getAttributes().size() + " features before filtering."); Iterator<AttributeRole> i = eSet.getAttributes().allAttributeRoles(); boolean filterSpecial = getParameterAsBoolean(PARAMETER_FILTER_SPECIAL_FEATURES); while (i.hasNext()) { AttributeRole role = i.next(); if ((role.isSpecial()) && (!filterSpecial)) continue; if (switchOffFeature(role)) { i.remove(); } checkForStop(); } log(eSet.getAttributes().size() + " features left after filtering."); return new IOObject[] {eSet}; }
/** * Applies the sorted and unmachted attribute list to the provided {@link Attributes}. All * unmachted attributes are removed from attributes and all {@link Attribute}s from the sorted * list are added in correct order. * * @param sortedAttributeList attributes that will be removed first and added in correct order * afterwards. * @param unmachtedAttributes attributes that should be removed. May be <code>null</code> if no * attributes should be removed. */ private void applySortedAttributes( List<Attribute> sortedAttributeList, List<Attribute> unmachtedAttributes, Attributes attributes) { if (unmachtedAttributes != null) { for (Attribute unmachted : unmachtedAttributes) { attributes.remove(unmachted); } } for (Attribute attribute : sortedAttributeList) { AttributeRole role = attributes.getRole(attribute); attributes.remove(attribute); if (role.isSpecial()) { attributes.setSpecialAttribute(attribute, role.getSpecialName()); } else { // regular attributes.addRegular(attribute); } } }
@Override public Attributes getTargetAttributes(ExampleSet viewParent) { SimpleAttributes attributes = new SimpleAttributes(); // add special attributes to new attributes Iterator<AttributeRole> roleIterator = viewParent.getAttributes().allAttributeRoles(); while (roleIterator.hasNext()) { AttributeRole role = roleIterator.next(); if (role.isSpecial()) { attributes.add(role); } } // add regular attributes for (Attribute attribute : viewParent.getAttributes()) { if (!attribute.isNumerical() || !attributeTransformations.containsKey(attribute.getName())) { attributes.addRegular(attribute); } else { // giving new attributes old name: connection to rangesMap attributes.addRegular( new ViewAttribute(this, attribute, attribute.getName(), Ontology.NUMERICAL, null)); } } return attributes; }
/** * Parses the provided expression and iterates over the {@link ExampleSet}, interprets attributes * as variables, evaluates the function and creates a new attribute with the given name that takes * the expression's value. The type of the attribute depends on the expression type and is {@link * Ontology#NOMINAL} for strings, {@link Ontology#INTEGER} for integers, {@link Ontology#REAL} for * reals, {@link Ontology#DATE_TIME} for Dates, and {@link Ontology#BINOMINAL} with values * "true" and "false" for booleans. If the executing operator is defined, * there will be a check for stop before the calculation of each example. * * @param exampleSet the example set to which the generated attribute is added * @param name the new attribute name * @param expression the expression used to generate attribute values * @param parser the expression parser used to parse the expression argument * @param resolver the example resolver which is used by the parser to resolve example values * @param executingOperator the operator calling this method. <code>null</code> is allowed. If not * null the operator will be used to check for stop * @throws ProcessStoppedException in case the process was stopped by the user * @throws ExpressionException in case parsing the expression fails */ public static Attribute addAttribute( ExampleSet exampleSet, String name, String expression, ExpressionParser parser, ExampleResolver resolver, Operator executingOperator) throws ProcessStoppedException, ExpressionException { // parse the expression Expression parsedExpression = parser.parse(expression); Attribute newAttribute = null; // if != null this needs to be overridden Attribute existingAttribute = exampleSet.getAttributes().get(name); StringBuffer appendix = new StringBuffer(); String targetName = name; if (existingAttribute != null) { // If an existing attribute will be overridden, first a unique temporary name has to be // generated by appending a random string to the attribute's name until it's a unique // attribute name. After the new attribute is build, it's name is set the 'targetName' // at the end of this method. // do { appendix.append(RandomGenerator.getGlobalRandomGenerator().nextString(5)); } while (exampleSet.getAttributes().get(name + appendix.toString()) != null); name = name + appendix.toString(); } ExpressionType resultType = parsedExpression.getExpressionType(); int ontology = resultType.getAttributeType(); if (ontology == Ontology.BINOMINAL) { newAttribute = AttributeFactory.createAttribute(name, Ontology.BINOMINAL); newAttribute.getMapping().mapString("false"); newAttribute.getMapping().mapString("true"); } else { newAttribute = AttributeFactory.createAttribute(name, ontology); } // set construction description newAttribute.setConstruction(expression); // add new attribute to table and example set exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); // create attribute of correct type and all values for (Example example : exampleSet) { if (executingOperator != null) { executingOperator.checkForStop(); } // bind example to resolver resolver.bind(example); // calculate result try { switch (resultType) { case DOUBLE: case INTEGER: example.setValue(newAttribute, parsedExpression.evaluateNumerical()); break; case DATE: Date date = parsedExpression.evaluateDate(); example.setValue(newAttribute, date == null ? Double.NaN : date.getTime()); break; default: example.setValue(newAttribute, parsedExpression.evaluateNominal()); break; } } finally { // avoid memory leaks resolver.unbind(); } } // remove existing attribute (if necessary) if (existingAttribute != null) { AttributeRole oldRole = exampleSet.getAttributes().getRole(existingAttribute); exampleSet.getAttributes().remove(existingAttribute); newAttribute.setName(targetName); // restore role from old attribute to new attribute if (oldRole.isSpecial()) { exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName()); } } // update example resolver after meta data change resolver.addAttributeMetaData( new AttributeMetaData(exampleSet.getAttributes().getRole(newAttribute), exampleSet, true)); return newAttribute; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // determine new value types int valueType = Ontology.REAL; Iterator<AttributeRole> a = exampleSet.getAttributes().allAttributeRoles(); while (a.hasNext()) { AttributeRole attributeRole = a.next(); if (!attributeRole.isSpecial() || !attributeRole.getSpecialName().equals(Attributes.ID_NAME)) { if (attributeRole.getAttribute().isNominal()) { valueType = Ontology.NOMINAL; break; } } } // create new attributes List<Attribute> newAttributes = new ArrayList<Attribute>(exampleSet.size()); Attribute newIdAttribute = AttributeFactory.createAttribute(Attributes.ID_NAME, Ontology.NOMINAL); newAttributes.add(newIdAttribute); Attribute oldIdAttribute = exampleSet.getAttributes().getId(); if (oldIdAttribute != null) { for (Example e : exampleSet) { double idValue = e.getValue(oldIdAttribute); String attributeName = "att_" + idValue; if (oldIdAttribute.isNominal()) { if (Double.isNaN(idValue)) { newAttributes.add(AttributeFactory.createAttribute(valueType)); } else { attributeName = oldIdAttribute.getMapping().mapIndex((int) idValue); newAttributes.add(AttributeFactory.createAttribute(attributeName, valueType)); } } else { newAttributes.add(AttributeFactory.createAttribute(attributeName, valueType)); } } } else { for (int i = 0; i < exampleSet.size(); i++) { newAttributes.add(AttributeFactory.createAttribute("att_" + (i + 1), valueType)); } } // create and fill table MemoryExampleTable table = new MemoryExampleTable(newAttributes); a = exampleSet.getAttributes().allAttributeRoles(); while (a.hasNext()) { AttributeRole attributeRole = a.next(); if (!attributeRole.isSpecial() || !attributeRole.getSpecialName().equals(Attributes.ID_NAME)) { Attribute attribute = attributeRole.getAttribute(); double[] data = new double[exampleSet.size() + 1]; data[0] = newIdAttribute.getMapping().mapString(attribute.getName()); int counter = 1; for (Example e : exampleSet) { double currentValue = e.getValue(attribute); data[counter] = currentValue; Attribute newAttribute = newAttributes.get(counter); if (newAttribute.isNominal()) { if (!Double.isNaN(currentValue)) { String currentValueString = currentValue + ""; if (attribute.isNominal()) currentValueString = attribute.getMapping().mapIndex((int) currentValue); data[counter] = newAttribute.getMapping().mapString(currentValueString); } } counter++; } table.addDataRow(new DoubleArrayDataRow(data)); } } // create and deliver example set ExampleSet result = table.createExampleSet(null, null, newIdAttribute); result.getAnnotations().addAll(exampleSet.getAnnotations()); return result; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // init char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0); Character groupingCharacter = null; if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) { groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0); } Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false); int size = attributeSet.size(); int[] valueTypes = new int[size]; int index = 0; for (Attribute attribute : attributeSet) { valueTypes[index++] = attribute.getValueType(); } // guessing int[] guessedValueTypes = new int[valueTypes.length]; int checkedCounter = 0; for (Example example : exampleSet) { index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } double originalValue = example.getValue(attribute); if (!Double.isNaN(originalValue)) { if (guessedValueTypes[index] != Ontology.NOMINAL) { try { String valueString = example.getValueAsString(attribute); if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); double value = Double.parseDouble(valueString); if (guessedValueTypes[index] != Ontology.REAL) { if (Tools.isEqual(Math.round(value), value)) { guessedValueTypes[index] = Ontology.INTEGER; } else { guessedValueTypes[index] = Ontology.REAL; } } } } catch (NumberFormatException e) { guessedValueTypes[index] = Ontology.NOMINAL; checkedCounter++; } } } index++; } if (checkedCounter >= guessedValueTypes.length) { break; } } // the example set contains at least one example and the guessing was performed if (exampleSet.size() > 0) { valueTypes = guessedValueTypes; // new attributes List<AttributeRole> newAttributes = new LinkedList<AttributeRole>(); index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } AttributeRole role = exampleSet.getAttributes().getRole(attribute); Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]); exampleSet.getExampleTable().addAttribute(newAttribute); AttributeRole newRole = new AttributeRole(newAttribute); newRole.setSpecial(role.getSpecialName()); newAttributes.add(newRole); // copy data for (Example e : exampleSet) { double oldValue = e.getValue(attribute); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) { if (!Double.isNaN(oldValue)) { String valueString = e.getValueAsString(attribute); if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { e.setValue(newAttribute, Double.NaN); } else { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); e.setValue(newAttribute, Double.parseDouble(valueString)); } } else { e.setValue(newAttribute, Double.NaN); } } else { if (!Double.isNaN(oldValue)) { String value = e.getValueAsString(attribute); e.setValue(newAttribute, newAttribute.getMapping().mapString(value)); } else { e.setValue(newAttribute, Double.NaN); } } } // delete attribute and rename the new attribute (due to deletion and data scans: no // more memory used :-) exampleSet.getExampleTable().removeAttribute(attribute); exampleSet.getAttributes().remove(role); newAttribute.setName(attribute.getName()); index++; } for (AttributeRole role : newAttributes) { if (role.isSpecial()) { exampleSet .getAttributes() .setSpecialAttribute(role.getAttribute(), role.getSpecialName()); } else { exampleSet.getAttributes().addRegular(role.getAttribute()); } } } return exampleSet; }