/** initializes the attribute indices. */ protected void initializeAttributeIndices() { m_AttributeIndices.setUpper(m_Data.numAttributes() - 1); m_ActiveIndices = new boolean[m_Data.numAttributes()]; for (int i = 0; i < m_ActiveIndices.length; i++) { m_ActiveIndices[i] = m_AttributeIndices.isInRange(i); } }
/** * Determines the output format based on the input format and returns this. In case the output * format cannot be returned immediately, i.e., immediateOutputFormat() returns false, then this * method will be called from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances data; Instances result; FastVector atts; FastVector values; HashSet hash; int i; int n; boolean isDate; Instance inst; Vector sorted; m_Cols.setUpper(inputFormat.numAttributes() - 1); data = new Instances(inputFormat); atts = new FastVector(); for (i = 0; i < data.numAttributes(); i++) { if (!m_Cols.isInRange(i) || !data.attribute(i).isNumeric()) { atts.addElement(data.attribute(i)); continue; } // date attribute? isDate = (data.attribute(i).type() == Attribute.DATE); // determine all available attribtues in dataset hash = new HashSet(); for (n = 0; n < data.numInstances(); n++) { inst = data.instance(n); if (inst.isMissing(i)) continue; if (isDate) hash.add(inst.stringValue(i)); else hash.add(new Double(inst.value(i))); } // sort values sorted = new Vector(); for (Object o : hash) sorted.add(o); Collections.sort(sorted); // create attribute from sorted values values = new FastVector(); for (Object o : sorted) { if (isDate) values.addElement(o.toString()); else values.addElement(Utils.doubleToString(((Double) o).doubleValue(), MAX_DECIMALS)); } atts.addElement(new Attribute(data.attribute(i).name(), values)); } result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance structure (any instances * contained in the object are ignored - only the structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the format couldn't be set successfully */ public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); m_Insert.setUpper(instanceInfo.numAttributes()); Instances outputFormat = new Instances(instanceInfo, 0); Attribute newAttribute = null; switch (m_AttributeType) { case Attribute.NUMERIC: newAttribute = new Attribute(m_Name); break; case Attribute.NOMINAL: newAttribute = new Attribute(m_Name, m_Labels); break; case Attribute.STRING: newAttribute = new Attribute(m_Name, (FastVector) null); break; case Attribute.DATE: newAttribute = new Attribute(m_Name, m_DateFormat); break; default: throw new IllegalArgumentException("Unknown attribute type in Add"); } if ((m_Insert.getIndex() < 0) || (m_Insert.getIndex() > getInputFormat().numAttributes())) { throw new IllegalArgumentException("Index out of range"); } outputFormat.insertAttributeAt(newAttribute, m_Insert.getIndex()); setOutputFormat(outputFormat); // all attributes, except index of added attribute // (otherwise the length of the input/output indices differ) Range atts = new Range(m_Insert.getSingleIndex()); atts.setInvert(true); atts.setUpper(outputFormat.numAttributes() - 1); initOutputLocators(outputFormat, atts.getSelection()); return true; }