Ejemplo n.º 1
0
  /**
   * Assumes the first line of the file contains the attribute names. Assumes all attributes are
   * real (Reading the full data set with getDataSet will establish the true structure).
   *
   * @param tokenizer a <code>StreamTokenizer</code> value
   * @exception IOException if an error occurs
   *     <pre><jml>
   *    private_normal_behavior
   *      requires: tokenizer != null;
   *      modifiable: m_structure;
   *      ensures: m_structure != null;
   *  also
   *    private_exceptional_behavior
   *      requires: tokenizer == null
   *                || (* unsucessful parse *);
   *      signals: (IOException);
   * </jml></pre>
   */
  private void readHeader(StreamTokenizer tokenizer) throws IOException {

    ArrayList<Attribute> attribNames = new ArrayList<Attribute>();
    ConverterUtils.getFirstToken(tokenizer);
    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
      ConverterUtils.errms(tokenizer, "premature end of file");
    }

    while (tokenizer.ttype != StreamTokenizer.TT_EOL) {
      attribNames.add(new Attribute(tokenizer.sval));
      ConverterUtils.getToken(tokenizer);
    }
    String relationName;
    if (m_sourceFile != null)
      relationName = (m_sourceFile.getName()).replaceAll("\\.[cC][sS][vV]$", "");
    else relationName = "stream";
    m_structure = new Instances(relationName, attribNames, 0);
  }
Ejemplo n.º 2
0
  /**
   * Attempts to parse a line of the data set.
   *
   * @param tokenizer the tokenizer
   * @return a ArrayList containg String and Double objects representing the values of the instance.
   * @exception IOException if an error occurs
   *     <pre><jml>
   *    private_normal_behavior
   *      requires: tokenizer != null;
   *      ensures: \result  != null;
   *  also
   *    private_exceptional_behavior
   *      requires: tokenizer == null
   *                || (* unsucessful parse *);
   *      signals: (IOException);
   * </jml></pre>
   */
  private ArrayList<Object> getInstance(StreamTokenizer tokenizer) throws IOException {

    ArrayList<Object> current = new ArrayList<Object>();

    // Check if end of file reached.
    ConverterUtils.getFirstToken(tokenizer);
    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
      return null;
    }
    boolean first = true;
    boolean wasSep;

    while (tokenizer.ttype != StreamTokenizer.TT_EOL && tokenizer.ttype != StreamTokenizer.TT_EOF) {

      // Get next token
      if (!first) {
        ConverterUtils.getToken(tokenizer);
      }

      if (tokenizer.ttype == m_FieldSeparator.charAt(0)
          || tokenizer.ttype == StreamTokenizer.TT_EOL) {
        current.add(m_MissingValue);
        wasSep = true;
      } else {
        wasSep = false;
        if (tokenizer.sval.equals(m_MissingValue)) {
          current.add(new String(m_MissingValue));
        } else {
          // try to parse as a number
          try {
            double val = Double.valueOf(tokenizer.sval).doubleValue();
            current.add(new Double(val));
          } catch (NumberFormatException e) {
            // otherwise assume its an enumerated value
            current.add(new String(tokenizer.sval));
          }
        }
      }

      if (!wasSep) {
        ConverterUtils.getToken(tokenizer);
      }
      first = false;
    }

    // check number of values read
    if (current.size() != m_structure.numAttributes()) {
      ConverterUtils.errms(
          tokenizer,
          "wrong number of values. Read "
              + current.size()
              + ", expected "
              + m_structure.numAttributes());
    }

    // check for structure update
    try {
      checkStructure(current);
    } catch (Exception ex) {
      ex.printStackTrace();
    }

    return current;
  }