/** * Determines the output format based on the input format and returns this. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances result; Attribute att; Attribute attSorted; FastVector atts; FastVector values; Vector<String> sorted; int i; int n; m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1); // determine sorted indices atts = new FastVector(); m_NewOrder = new int[inputFormat.numAttributes()][]; for (i = 0; i < inputFormat.numAttributes(); i++) { att = inputFormat.attribute(i); if (!att.isNominal() || !m_AttributeIndices.isInRange(i)) { m_NewOrder[i] = new int[0]; atts.addElement(inputFormat.attribute(i).copy()); continue; } // sort labels sorted = new Vector<String>(); for (n = 0; n < att.numValues(); n++) sorted.add(att.value(n)); Collections.sort(sorted, m_Comparator); // determine new indices m_NewOrder[i] = new int[att.numValues()]; values = new FastVector(); for (n = 0; n < att.numValues(); n++) { m_NewOrder[i][n] = sorted.indexOf(att.value(n)); values.addElement(sorted.get(n)); } attSorted = new Attribute(att.name(), values); attSorted.setWeight(att.weight()); atts.addElement(attSorted); } // generate new header result = new Instances(inputFormat.relationName(), atts, 0); result.setClassIndex(inputFormat.classIndex()); return result; }
/** * Set the output format. Swapss the desired nominal attribute values in the header and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; ArrayList<Attribute> newAtts; ArrayList<String> newVals; // Compute new attributes newAtts = new ArrayList<Attribute>(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (j != m_AttIndex.getIndex()) { newAtts.add((Attribute) att.copy()); } else { // Compute list of attribute values newVals = new ArrayList<String>(att.numValues()); for (int i = 0; i < att.numValues(); i++) { if (i == m_FirstIndex.getIndex()) { newVals.add(att.value(m_SecondIndex.getIndex())); } else if (i == m_SecondIndex.getIndex()) { newVals.add(att.value(m_FirstIndex.getIndex())); } else { newVals.add(att.value(i)); } } Attribute newAtt = new Attribute(att.name(), newVals); newAtt.setWeight(att.weight()); newAtts.add(newAtt); } } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }
/** * Set the output format. Takes the currently defined cutpoints and m_InputFormat and calls * setOutputFormat(Instances) appropriately. */ protected void setOutputFormat() { if (m_CutPoints == null) { setOutputFormat(null); return; } ArrayList<Attribute> attributes = new ArrayList<Attribute>(getInputFormat().numAttributes()); int classIndex = getInputFormat().classIndex(); for (int i = 0, m = getInputFormat().numAttributes(); i < m; ++i) { if ((m_DiscretizeCols.isInRange(i)) && (getInputFormat().attribute(i).isNumeric())) { Set<String> cutPointsCheck = new HashSet<String>(); double[] cutPoints = m_CutPoints[i]; if (!m_MakeBinary) { ArrayList<String> attribValues; if (cutPoints == null) { attribValues = new ArrayList<String>(1); attribValues.add("'All'"); } else { attribValues = new ArrayList<String>(cutPoints.length + 1); if (m_UseBinNumbers) { for (int j = 0, n = cutPoints.length; j <= n; ++j) { attribValues.add("'B" + (j + 1) + "of" + (n + 1) + "'"); } } else { for (int j = 0, n = cutPoints.length; j <= n; ++j) { String newBinRangeString = binRangeString(cutPoints, j, m_BinRangePrecision); if (cutPointsCheck.contains(newBinRangeString)) { throw new IllegalArgumentException( "A duplicate bin range was detected. " + "Try increasing the bin range precision."); } attribValues.add("'" + newBinRangeString + "'"); } } } Attribute newAtt = new Attribute(getInputFormat().attribute(i).name(), attribValues); newAtt.setWeight(getInputFormat().attribute(i).weight()); attributes.add(newAtt); } else { if (cutPoints == null) { ArrayList<String> attribValues = new ArrayList<String>(1); attribValues.add("'All'"); Attribute newAtt = new Attribute(getInputFormat().attribute(i).name(), attribValues); newAtt.setWeight(getInputFormat().attribute(i).weight()); attributes.add(newAtt); } else { if (i < getInputFormat().classIndex()) { classIndex += cutPoints.length - 1; } for (int j = 0, n = cutPoints.length; j < n; ++j) { ArrayList<String> attribValues = new ArrayList<String>(2); if (m_UseBinNumbers) { attribValues.add("'B1of2'"); attribValues.add("'B2of2'"); } else { double[] binaryCutPoint = {cutPoints[j]}; String newBinRangeString1 = binRangeString(binaryCutPoint, 0, m_BinRangePrecision); String newBinRangeString2 = binRangeString(binaryCutPoint, 1, m_BinRangePrecision); if (newBinRangeString1.equals(newBinRangeString2)) { throw new IllegalArgumentException( "A duplicate bin range was detected. " + "Try increasing the bin range precision."); } attribValues.add("'" + newBinRangeString1 + "'"); attribValues.add("'" + newBinRangeString2 + "'"); } Attribute newAtt = new Attribute(getInputFormat().attribute(i).name() + "_" + (j + 1), attribValues); newAtt.setWeight(getInputFormat().attribute(i).weight()); attributes.add(newAtt); } } } } else { attributes.add((Attribute) getInputFormat().attribute(i).copy()); } } Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0); outputFormat.setClassIndex(classIndex); setOutputFormat(outputFormat); }
/** * Set the output format. Takes the current average class values and m_InputFormat and calls * setOutputFormat(Instances) appropriately. */ private void setOutputFormat() { Instances newData; FastVector newAtts, newVals; boolean firstEndsWithPrime = false, secondEndsWithPrime = false; StringBuffer text = new StringBuffer(); // Compute new attributes newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (j != m_AttIndex.getIndex()) { newAtts.addElement(att.copy()); } else { // Compute new value if (att.value(m_FirstIndex.getIndex()).endsWith("'")) { firstEndsWithPrime = true; } if (att.value(m_SecondIndex.getIndex()).endsWith("'")) { secondEndsWithPrime = true; } if (firstEndsWithPrime || secondEndsWithPrime) { text.append("'"); } if (firstEndsWithPrime) { text.append( ((String) att.value(m_FirstIndex.getIndex())) .substring(1, ((String) att.value(m_FirstIndex.getIndex())).length() - 1)); } else { text.append((String) att.value(m_FirstIndex.getIndex())); } text.append('_'); if (secondEndsWithPrime) { text.append( ((String) att.value(m_SecondIndex.getIndex())) .substring(1, ((String) att.value(m_SecondIndex.getIndex())).length() - 1)); } else { text.append((String) att.value(m_SecondIndex.getIndex())); } if (firstEndsWithPrime || secondEndsWithPrime) { text.append("'"); } // Compute list of attribute values newVals = new FastVector(att.numValues() - 1); for (int i = 0; i < att.numValues(); i++) { if (i == m_FirstIndex.getIndex()) { newVals.addElement(text.toString()); } else if (i != m_SecondIndex.getIndex()) { newVals.addElement(att.value(i)); } } Attribute newAtt = new Attribute(att.name(), newVals); newAtt.setWeight(getInputFormat().attribute(j).weight()); newAtts.addElement(newAtt); } } // Construct new header newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); setOutputFormat(newData); }