@Override public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException { boolean round = getParameterAsBoolean(PARAMETER_ROUND); List<Attribute> newAttributes = new LinkedList<Attribute>(); Iterator<Attribute> a = exampleSet.getAttributes().iterator(); while (a.hasNext()) { Attribute attribute = a.next(); if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL)) && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) { Attribute newAttribute = AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER); newAttributes.add(newAttribute); exampleSet.getExampleTable().addAttribute(newAttribute); for (Example example : exampleSet) { double originalValue = example.getValue(attribute); if (Double.isNaN(originalValue)) { example.setValue(newAttribute, Double.NaN); } else { long newValue = round ? Math.round(originalValue) : (long) originalValue; example.setValue(newAttribute, newValue); } } a.remove(); } } for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute); return exampleSet; }
@Override public Object getValueAt(int row, int column) { Object[] values = data.get(row); if (column == 0) { return values[column].toString(); } if (column >= values.length) { return ""; } int attributeType = reader.getAttributeColumn(column - 1).getValueType(); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attributeType, Ontology.DATE_TIME) || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attributeType, Ontology.DATE_TIME) || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attributeType, Ontology.TIME)) { try { return Tools.formatDateTime((Date) values[column]); } catch (ClassCastException e) { // do nothing, just return default value } } // default value return values[column].toString(); }
public static ValueType convertFromRapidMinerOntology(int rmValueType) { if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(rmValueType, Ontology.NUMERICAL)) { return NUMERICAL; } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(rmValueType, Ontology.NOMINAL)) { return NOMINAL; } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(rmValueType, Ontology.DATE_TIME)) { return DATE_TIME; } else { return INVALID; } }
private boolean isOfAllowedType(int valueType) { boolean isAllowed = false; for (int type : allowedValueTypes) { isAllowed |= Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, type); } return isAllowed; }
@Override public void setValueAt(Object value, int row, int column) { if (column == 0) { // RowNo. if (row == IS_SELECTED_ROW) { for (AttributeColumn col : reader.getAllAttributeColumns()) { col.activateColumn((Boolean) value); } repaint(); } return; } column--; if (row == ATTRIBUTE_NAME_ROW) { reader.setAttributeNamesDefinedByUser(true); reader.getAttributeColumn(column).setName((String) value); } if (row == VALUE_TYPE_ROW) { // update only if its not the same value if (reader.getAttributeColumn(column).getValueType() != Ontology.ATTRIBUTE_VALUE_TYPE.mapName(value.toString())) { reader .getAttributeColumn(column) .setValueType(Ontology.ATTRIBUTE_VALUE_TYPE.mapName(value.toString())); } } if (row == IS_SELECTED_ROW) { reader.getAttributeColumn(column).activateColumn((Boolean) value); } if (row == ROLE_ROW) { String role = (String) value; if (role.equals(AttributeColumn.REGULAR)) { reader.getAttributeColumn(column).setRole(role); } else { for (AttributeColumn attColumn : reader.getAllAttributeColumns()) { if (attColumn.getRole().equals(role)) { attColumn.setRole(AttributeColumn.REGULAR); } } reader.getAttributeColumn(column).setRole(role); fireTableDataChanged(); } } repaint(); }
@Override public ExampleSetMetaData applyOnFilteredMetaData(ExampleSetMetaData emd) { boolean round = getParameterAsBoolean(PARAMETER_ROUND); for (AttributeMetaData amd : emd.getAllAttributes()) { if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(amd.getValueType(), Ontology.NUMERICAL)) && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(amd.getValueType(), Ontology.INTEGER))) { amd.setType(Ontology.INTEGER); } if (round) { amd.setValueRange( new Range( Math.round(amd.getValueRange().getLower()), Math.round(amd.getValueRange().getUpper())), SetRelation.EQUAL); } else { amd.setValueRange( new Range((long) amd.getValueRange().getLower(), (long) amd.getValueRange().getUpper()), SetRelation.EQUAL); } } return emd; }
private double[] getMeanVector(ExampleSet exampleSet) { exampleSet.recalculateAllAttributeStatistics(); Attributes attributes = exampleSet.getAttributes(); double[] meanVector = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MINIMUM); } else if (attribute.isNominal()) meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MODE); else meanVector[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE); i++; } return meanVector; }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); Attribute attribute = exampleSet.getAttributes().get(getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); if (attribute == null) throw new UserError(this, 111, getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); int index = getParameterAsInt(PARAMETER_EXAMPLE_INDEX); if (index == 0) { throw new UserError( this, 207, "0", PARAMETER_EXAMPLE_INDEX, "only positive or negative indices are allowed"); } if (index < 0) { index = exampleSet.size() + index; } else { index--; } if (index >= exampleSet.size()) { throw new UserError(this, 110, index); } Example example = exampleSet.getExample(index); if (attribute.isNominal() || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { currentValue = example.getValueAsString(attribute); isNominal = true; } else { currentValue = Double.valueOf(example.getValue(attribute)); isNominal = false; } exampleSetOutput.deliver(exampleSet); }
@Override public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException { Attributes attributes = exampleSet.getAttributes(); // constructing new attributes with generic names, holding old ones, if old type wasn't real Attribute[] oldAttributes = new Attribute[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { oldAttributes[i] = attribute; i++; } Attribute[] newAttributes = new Attribute[attributes.size()]; for (i = 0; i < newAttributes.length; i++) { newAttributes[i] = oldAttributes[i]; if (oldAttributes[i].isNumerical()) if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(oldAttributes[i].getValueType(), Ontology.REAL)) { newAttributes[i] = AttributeFactory.createAttribute(Ontology.REAL); exampleSet.getExampleTable().addAttribute(newAttributes[i]); attributes.addRegular(newAttributes[i]); } } // applying on data applyOnData(exampleSet, oldAttributes, newAttributes); // removing old attributes and change new attributes name to old ones if needed for (i = 0; i < oldAttributes.length; i++) { attributes.remove(oldAttributes[i]); // if attribute is new, then remove for later storing in correct order if (oldAttributes[i] != newAttributes[i]) attributes.remove(newAttributes[i]); attributes.addRegular(newAttributes[i]); newAttributes[i].setName(oldAttributes[i].getName()); } return exampleSet; }
public void checkConstraints(ExampleTable et) { super.checkConstraints(et); RelationalAttribute relA = null; for (int i = 0; i < et.getNumberOfAttributes(); i++) if (et.getAttribute(i).getTableIndex() == this.getAttributeIndexes()[0]) { relA = (RelationalAttribute) et.getAttribute(i); break; } // sparse matrix inner relational attributes if (relA.getInnerAttributeCount() != 2) throw new IllegalArgumentException( "sparse matrix attribute " + this.getName() + " must wrap relational attribute with exactly two inner attributes"); else if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA( relA.getInnerAttributeAt(0).getValueType(), Ontology.NUMERICAL)) throw new IllegalArgumentException( "sparse matrix attribute " + this.getName() + " must wrap relational attribute which inner first attribute serves as key and therefore must be numerical"); }
private void parseTree( AggregationTreeNode node, Attribute[] groupAttributes, double[] dataOfUpperLevels, int groupLevel, List<double[]> allGroupCombinations, List<List<Aggregator>> allAggregators, DataRowFactory factory, Attribute[] newAttributes, boolean isCountingAllCombinations, List<AggregationFunction> aggregationFunctions) throws UserError { Attribute currentAttribute = groupAttributes[groupLevel]; if (currentAttribute.isNominal()) { Collection<? extends Object> nominalValues = null; if (isCountingAllCombinations) { nominalValues = currentAttribute.getMapping().getValues(); } else { nominalValues = node.getValues(); } for (Object nominalValue : nominalValues) { dataOfUpperLevels[groupLevel] = newAttributes[groupLevel].getMapping().mapString(nominalValue.toString()); // check if we have more group defining attributes if (groupLevel + 1 < groupAttributes.length) { parseTree( node.getOrCreateChild(nominalValue), groupAttributes, dataOfUpperLevels, groupLevel + 1, allGroupCombinations, allAggregators, factory, newAttributes, isCountingAllCombinations, aggregationFunctions); } else { // if not, insert values from aggregation functions parseLeaf( node.getLeaf(nominalValue), dataOfUpperLevels, allGroupCombinations, allAggregators, factory, newAttributes, aggregationFunctions); } } } else if (currentAttribute.isNumerical() || Ontology.ATTRIBUTE_VALUE_TYPE.isA(currentAttribute.getValueType(), Ontology.DATE_TIME)) { for (Object numericalValue : node.getValues()) { dataOfUpperLevels[groupLevel] = (Double) numericalValue; if (groupLevel + 1 < groupAttributes.length) { parseTree( node.getOrCreateChild(numericalValue), groupAttributes, dataOfUpperLevels, groupLevel + 1, allGroupCombinations, allAggregators, factory, newAttributes, isCountingAllCombinations, aggregationFunctions); } else { // if not, insert values from aggregation functions parseLeaf( node.getLeaf(numericalValue), dataOfUpperLevels, allGroupCombinations, allAggregators, factory, newAttributes, aggregationFunctions); } } } else { throw new UserError( this, "aggregation_operator.unsupported_value_type", currentAttribute.getName(), Ontology.ATTRIBUTE_VALUE_TYPE.getNames()[currentAttribute.getValueType()]); } }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // init char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0); Character groupingCharacter = null; if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) { groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0); } Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false); int size = attributeSet.size(); int[] valueTypes = new int[size]; int index = 0; for (Attribute attribute : attributeSet) { valueTypes[index++] = attribute.getValueType(); } // guessing int[] guessedValueTypes = new int[valueTypes.length]; int checkedCounter = 0; for (Example example : exampleSet) { index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } double originalValue = example.getValue(attribute); if (!Double.isNaN(originalValue)) { if (guessedValueTypes[index] != Ontology.NOMINAL) { try { String valueString = example.getValueAsString(attribute); if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); double value = Double.parseDouble(valueString); if (guessedValueTypes[index] != Ontology.REAL) { if (Tools.isEqual(Math.round(value), value)) { guessedValueTypes[index] = Ontology.INTEGER; } else { guessedValueTypes[index] = Ontology.REAL; } } } } catch (NumberFormatException e) { guessedValueTypes[index] = Ontology.NOMINAL; checkedCounter++; } } } index++; } if (checkedCounter >= guessedValueTypes.length) { break; } } // the example set contains at least one example and the guessing was performed if (exampleSet.size() > 0) { valueTypes = guessedValueTypes; // new attributes List<AttributeRole> newAttributes = new LinkedList<AttributeRole>(); index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } AttributeRole role = exampleSet.getAttributes().getRole(attribute); Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]); exampleSet.getExampleTable().addAttribute(newAttribute); AttributeRole newRole = new AttributeRole(newAttribute); newRole.setSpecial(role.getSpecialName()); newAttributes.add(newRole); // copy data for (Example e : exampleSet) { double oldValue = e.getValue(attribute); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) { if (!Double.isNaN(oldValue)) { String valueString = e.getValueAsString(attribute); if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { e.setValue(newAttribute, Double.NaN); } else { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); e.setValue(newAttribute, Double.parseDouble(valueString)); } } else { e.setValue(newAttribute, Double.NaN); } } else { if (!Double.isNaN(oldValue)) { String value = e.getValueAsString(attribute); e.setValue(newAttribute, newAttribute.getMapping().mapString(value)); } else { e.setValue(newAttribute, Double.NaN); } } } // delete attribute and rename the new attribute (due to deletion and data scans: no // more memory used :-) exampleSet.getExampleTable().removeAttribute(attribute); exampleSet.getAttributes().remove(role); newAttribute.setName(attribute.getName()); index++; } for (AttributeRole role : newAttributes) { if (role.isSpecial()) { exampleSet .getAttributes() .setSpecialAttribute(role.getAttribute(), role.getSpecialName()); } else { exampleSet.getAttributes().addRegular(role.getAttribute()); } } } return exampleSet; }
/** Standard behavior is to return true for all numerical value types. */ @Override public boolean supportsValueType(int valueType) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, Ontology.NUMERICAL); }
public static void writeCSV( ExampleSet exampleSet, PrintWriter out, String colSeparator, boolean quoteNomValues, boolean writeAttribNames, boolean formatDate) { String columnSeparator = colSeparator; boolean quoteNominalValues = quoteNomValues; // write column names if (writeAttribNames) { Iterator<Attribute> a = exampleSet.getAttributes().allAttributes(); boolean first = true; while (a.hasNext()) { if (!first) out.print(columnSeparator); Attribute attribute = a.next(); String name = attribute.getName(); if (quoteNominalValues) { name = name.replaceAll("\"", "'"); name = "\"" + name + "\""; } out.print(name); first = false; } out.println(); } // write data for (Example example : exampleSet) { Iterator<Attribute> a = exampleSet.getAttributes().allAttributes(); boolean first = true; while (a.hasNext()) { Attribute attribute = a.next(); if (!first) out.print(columnSeparator); if (!Double.isNaN(example.getValue(attribute))) { if (attribute.isNominal()) { String stringValue = example.getValueAsString(attribute); if (quoteNominalValues) { stringValue = stringValue.replaceAll("\"", "'"); stringValue = "\"" + stringValue + "\""; } out.print(stringValue); } else { Double value = example.getValue(attribute); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { if (formatDate) { Date date = new Date(value.longValue()); String s = DateFormat.getInstance().format(date); out.print(s); } else { out.print(value); } } else { out.print(value); } } } first = false; } out.println(); } }