Example #1
0
  /**
   * Determines the output format based on the input format and returns this.
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances result;
    Attribute att;
    Attribute attSorted;
    FastVector atts;
    FastVector values;
    Vector<String> sorted;
    int i;
    int n;

    m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);

    // determine sorted indices
    atts = new FastVector();
    m_NewOrder = new int[inputFormat.numAttributes()][];
    for (i = 0; i < inputFormat.numAttributes(); i++) {
      att = inputFormat.attribute(i);
      if (!att.isNominal() || !m_AttributeIndices.isInRange(i)) {
        m_NewOrder[i] = new int[0];
        atts.addElement(inputFormat.attribute(i).copy());
        continue;
      }

      // sort labels
      sorted = new Vector<String>();
      for (n = 0; n < att.numValues(); n++) sorted.add(att.value(n));
      Collections.sort(sorted, m_Comparator);

      // determine new indices
      m_NewOrder[i] = new int[att.numValues()];
      values = new FastVector();
      for (n = 0; n < att.numValues(); n++) {
        m_NewOrder[i][n] = sorted.indexOf(att.value(n));
        values.addElement(sorted.get(n));
      }
      attSorted = new Attribute(att.name(), values);
      attSorted.setWeight(att.weight());
      atts.addElement(attSorted);
    }

    // generate new header
    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
Example #2
0
  /**
   * Set the output format. Swapss the desired nominal attribute values in the header and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {

    Instances newData;
    ArrayList<Attribute> newAtts;
    ArrayList<String> newVals;

    // Compute new attributes

    newAtts = new ArrayList<Attribute>(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (j != m_AttIndex.getIndex()) {
        newAtts.add((Attribute) att.copy());
      } else {

        // Compute list of attribute values

        newVals = new ArrayList<String>(att.numValues());
        for (int i = 0; i < att.numValues(); i++) {
          if (i == m_FirstIndex.getIndex()) {
            newVals.add(att.value(m_SecondIndex.getIndex()));
          } else if (i == m_SecondIndex.getIndex()) {
            newVals.add(att.value(m_FirstIndex.getIndex()));
          } else {
            newVals.add(att.value(i));
          }
        }
        Attribute newAtt = new Attribute(att.name(), newVals);
        newAtt.setWeight(att.weight());
        newAtts.add(newAtt);
      }
    }

    // Construct new header

    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    setOutputFormat(newData);
  }
Example #3
0
  /**
   * Set the output format. Takes the currently defined cutpoints and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  protected void setOutputFormat() {

    if (m_CutPoints == null) {
      setOutputFormat(null);
      return;
    }
    ArrayList<Attribute> attributes = new ArrayList<Attribute>(getInputFormat().numAttributes());
    int classIndex = getInputFormat().classIndex();
    for (int i = 0, m = getInputFormat().numAttributes(); i < m; ++i) {
      if ((m_DiscretizeCols.isInRange(i)) && (getInputFormat().attribute(i).isNumeric())) {

        Set<String> cutPointsCheck = new HashSet<String>();
        double[] cutPoints = m_CutPoints[i];
        if (!m_MakeBinary) {
          ArrayList<String> attribValues;
          if (cutPoints == null) {
            attribValues = new ArrayList<String>(1);
            attribValues.add("'All'");
          } else {
            attribValues = new ArrayList<String>(cutPoints.length + 1);
            if (m_UseBinNumbers) {
              for (int j = 0, n = cutPoints.length; j <= n; ++j) {
                attribValues.add("'B" + (j + 1) + "of" + (n + 1) + "'");
              }
            } else {
              for (int j = 0, n = cutPoints.length; j <= n; ++j) {
                String newBinRangeString = binRangeString(cutPoints, j, m_BinRangePrecision);
                if (cutPointsCheck.contains(newBinRangeString)) {
                  throw new IllegalArgumentException(
                      "A duplicate bin range was detected. "
                          + "Try increasing the bin range precision.");
                }
                attribValues.add("'" + newBinRangeString + "'");
              }
            }
          }
          Attribute newAtt = new Attribute(getInputFormat().attribute(i).name(), attribValues);
          newAtt.setWeight(getInputFormat().attribute(i).weight());
          attributes.add(newAtt);
        } else {
          if (cutPoints == null) {
            ArrayList<String> attribValues = new ArrayList<String>(1);
            attribValues.add("'All'");
            Attribute newAtt = new Attribute(getInputFormat().attribute(i).name(), attribValues);
            newAtt.setWeight(getInputFormat().attribute(i).weight());
            attributes.add(newAtt);
          } else {
            if (i < getInputFormat().classIndex()) {
              classIndex += cutPoints.length - 1;
            }
            for (int j = 0, n = cutPoints.length; j < n; ++j) {
              ArrayList<String> attribValues = new ArrayList<String>(2);
              if (m_UseBinNumbers) {
                attribValues.add("'B1of2'");
                attribValues.add("'B2of2'");
              } else {
                double[] binaryCutPoint = {cutPoints[j]};
                String newBinRangeString1 = binRangeString(binaryCutPoint, 0, m_BinRangePrecision);
                String newBinRangeString2 = binRangeString(binaryCutPoint, 1, m_BinRangePrecision);
                if (newBinRangeString1.equals(newBinRangeString2)) {
                  throw new IllegalArgumentException(
                      "A duplicate bin range was detected. "
                          + "Try increasing the bin range precision.");
                }
                attribValues.add("'" + newBinRangeString1 + "'");
                attribValues.add("'" + newBinRangeString2 + "'");
              }
              Attribute newAtt =
                  new Attribute(getInputFormat().attribute(i).name() + "_" + (j + 1), attribValues);
              newAtt.setWeight(getInputFormat().attribute(i).weight());
              attributes.add(newAtt);
            }
          }
        }
      } else {
        attributes.add((Attribute) getInputFormat().attribute(i).copy());
      }
    }
    Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);
  }
  /**
   * Set the output format. Takes the current average class values and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {

    Instances newData;
    FastVector newAtts, newVals;
    boolean firstEndsWithPrime = false, secondEndsWithPrime = false;
    StringBuffer text = new StringBuffer();

    // Compute new attributes

    newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (j != m_AttIndex.getIndex()) {
        newAtts.addElement(att.copy());
      } else {

        // Compute new value

        if (att.value(m_FirstIndex.getIndex()).endsWith("'")) {
          firstEndsWithPrime = true;
        }
        if (att.value(m_SecondIndex.getIndex()).endsWith("'")) {
          secondEndsWithPrime = true;
        }
        if (firstEndsWithPrime || secondEndsWithPrime) {
          text.append("'");
        }
        if (firstEndsWithPrime) {
          text.append(
              ((String) att.value(m_FirstIndex.getIndex()))
                  .substring(1, ((String) att.value(m_FirstIndex.getIndex())).length() - 1));
        } else {
          text.append((String) att.value(m_FirstIndex.getIndex()));
        }
        text.append('_');
        if (secondEndsWithPrime) {
          text.append(
              ((String) att.value(m_SecondIndex.getIndex()))
                  .substring(1, ((String) att.value(m_SecondIndex.getIndex())).length() - 1));
        } else {
          text.append((String) att.value(m_SecondIndex.getIndex()));
        }
        if (firstEndsWithPrime || secondEndsWithPrime) {
          text.append("'");
        }

        // Compute list of attribute values

        newVals = new FastVector(att.numValues() - 1);
        for (int i = 0; i < att.numValues(); i++) {
          if (i == m_FirstIndex.getIndex()) {
            newVals.addElement(text.toString());
          } else if (i != m_SecondIndex.getIndex()) {
            newVals.addElement(att.value(i));
          }
        }

        Attribute newAtt = new Attribute(att.name(), newVals);
        newAtt.setWeight(getInputFormat().attribute(j).weight());

        newAtts.addElement(newAtt);
      }
    }

    // Construct new header

    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    setOutputFormat(newData);
  }