/** * Adds a SearchObject to the container. * * <p>Attention: As you do this, it has to be checked, if the distance between the associated * SearchObject and the object added to the container is equal to the distance of the objects * already in the container. * * <p>To achieve this, the method checks if the distance delivered as a sanity check parameter is * equal to the distance of the container; if yes, the object is added, else not. A boolean state * on the success is returned. * * <p>if the container is empty, e.g. the first object is added, no checks are necessary and hence * the distance check against the initial zero value is not performed thus not preventing the * addition of the object. * * <p>It is recommended to e.g. add an object <i>so</i> to the list of the objects of a container * associated to an object <i>soA</i> with the following process: * * <p>KdistanceContainer.addObject(so, soA.getDistance(so)); */ public boolean addObject(SearchObject so, double dist) { // first, check if the container is empty, in this case the object can be added // without additional checks: if (this.listOfObjects.size() == 0) { this.listOfObjects.add(so); this.setDistance(dist); this.setNumberOfObjects(this.listOfObjects.size()); return true; } else { // in the other case (container is not empty) if (Tools.isEqual( this.getDistance(), dist)) { // check if distance of container is equal to dist of added object this.listOfObjects.add(so); // if yes, then add it this.setDistance(dist); this.setNumberOfObjects(this.listOfObjects.size()); return true; } else { // if the distances are not equal, do not add the object and return false return false; } } }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // init char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0); Character groupingCharacter = null; if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) { groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0); } Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false); int size = attributeSet.size(); int[] valueTypes = new int[size]; int index = 0; for (Attribute attribute : attributeSet) { valueTypes[index++] = attribute.getValueType(); } // guessing int[] guessedValueTypes = new int[valueTypes.length]; int checkedCounter = 0; for (Example example : exampleSet) { index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } double originalValue = example.getValue(attribute); if (!Double.isNaN(originalValue)) { if (guessedValueTypes[index] != Ontology.NOMINAL) { try { String valueString = example.getValueAsString(attribute); if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); double value = Double.parseDouble(valueString); if (guessedValueTypes[index] != Ontology.REAL) { if (Tools.isEqual(Math.round(value), value)) { guessedValueTypes[index] = Ontology.INTEGER; } else { guessedValueTypes[index] = Ontology.REAL; } } } } catch (NumberFormatException e) { guessedValueTypes[index] = Ontology.NOMINAL; checkedCounter++; } } } index++; } if (checkedCounter >= guessedValueTypes.length) { break; } } // the example set contains at least one example and the guessing was performed if (exampleSet.size() > 0) { valueTypes = guessedValueTypes; // new attributes List<AttributeRole> newAttributes = new LinkedList<AttributeRole>(); index = 0; for (Attribute attribute : attributeSet) { if (!attribute.isNominal() && !attribute.isNumerical()) { continue; } AttributeRole role = exampleSet.getAttributes().getRole(attribute); Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]); exampleSet.getExampleTable().addAttribute(newAttribute); AttributeRole newRole = new AttributeRole(newAttribute); newRole.setSpecial(role.getSpecialName()); newAttributes.add(newRole); // copy data for (Example e : exampleSet) { double oldValue = e.getValue(attribute); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) { if (!Double.isNaN(oldValue)) { String valueString = e.getValueAsString(attribute); if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) { e.setValue(newAttribute, Double.NaN); } else { if (groupingCharacter != null) { valueString = valueString.replace(groupingCharacter.toString(), ""); } valueString = valueString.replace(decimalPointCharacter, '.'); e.setValue(newAttribute, Double.parseDouble(valueString)); } } else { e.setValue(newAttribute, Double.NaN); } } else { if (!Double.isNaN(oldValue)) { String value = e.getValueAsString(attribute); e.setValue(newAttribute, newAttribute.getMapping().mapString(value)); } else { e.setValue(newAttribute, Double.NaN); } } } // delete attribute and rename the new attribute (due to deletion and data scans: no // more memory used :-) exampleSet.getExampleTable().removeAttribute(attribute); exampleSet.getAttributes().remove(role); newAttribute.setName(attribute.getName()); index++; } for (AttributeRole role : newAttributes) { if (role.isSpecial()) { exampleSet .getAttributes() .setSpecialAttribute(role.getAttribute(), role.getSpecialName()); } else { exampleSet.getAttributes().addRegular(role.getAttribute()); } } } return exampleSet; }