/** * Assumes the first line of the file contains the attribute names. Assumes all attributes are * real (Reading the full data set with getDataSet will establish the true structure). * * @param tokenizer a <code>StreamTokenizer</code> value * @exception IOException if an error occurs * <pre><jml> * private_normal_behavior * requires: tokenizer != null; * modifiable: m_structure; * ensures: m_structure != null; * also * private_exceptional_behavior * requires: tokenizer == null * || (* unsucessful parse *); * signals: (IOException); * </jml></pre> */ private void readHeader(StreamTokenizer tokenizer) throws IOException { ArrayList<Attribute> attribNames = new ArrayList<Attribute>(); ConverterUtils.getFirstToken(tokenizer); if (tokenizer.ttype == StreamTokenizer.TT_EOF) { ConverterUtils.errms(tokenizer, "premature end of file"); } while (tokenizer.ttype != StreamTokenizer.TT_EOL) { attribNames.add(new Attribute(tokenizer.sval)); ConverterUtils.getToken(tokenizer); } String relationName; if (m_sourceFile != null) relationName = (m_sourceFile.getName()).replaceAll("\\.[cC][sS][vV]$", ""); else relationName = "stream"; m_structure = new Instances(relationName, attribNames, 0); }
/** * Attempts to parse a line of the data set. * * @param tokenizer the tokenizer * @return a ArrayList containg String and Double objects representing the values of the instance. * @exception IOException if an error occurs * <pre><jml> * private_normal_behavior * requires: tokenizer != null; * ensures: \result != null; * also * private_exceptional_behavior * requires: tokenizer == null * || (* unsucessful parse *); * signals: (IOException); * </jml></pre> */ private ArrayList<Object> getInstance(StreamTokenizer tokenizer) throws IOException { ArrayList<Object> current = new ArrayList<Object>(); // Check if end of file reached. ConverterUtils.getFirstToken(tokenizer); if (tokenizer.ttype == StreamTokenizer.TT_EOF) { return null; } boolean first = true; boolean wasSep; while (tokenizer.ttype != StreamTokenizer.TT_EOL && tokenizer.ttype != StreamTokenizer.TT_EOF) { // Get next token if (!first) { ConverterUtils.getToken(tokenizer); } if (tokenizer.ttype == m_FieldSeparator.charAt(0) || tokenizer.ttype == StreamTokenizer.TT_EOL) { current.add(m_MissingValue); wasSep = true; } else { wasSep = false; if (tokenizer.sval.equals(m_MissingValue)) { current.add(new String(m_MissingValue)); } else { // try to parse as a number try { double val = Double.valueOf(tokenizer.sval).doubleValue(); current.add(new Double(val)); } catch (NumberFormatException e) { // otherwise assume its an enumerated value current.add(new String(tokenizer.sval)); } } } if (!wasSep) { ConverterUtils.getToken(tokenizer); } first = false; } // check number of values read if (current.size() != m_structure.numAttributes()) { ConverterUtils.errms( tokenizer, "wrong number of values. Read " + current.size() + ", expected " + m_structure.numAttributes()); } // check for structure update try { checkStructure(current); } catch (Exception ex) { ex.printStackTrace(); } return current; }