@Override public Instances getDataSet() throws IOException { if (m_sourceReader == null) { throw new IOException("No source has been specified"); } if (getRetrieval() == INCREMENTAL) { throw new IOException("Cannot mix getting instances in both incremental and batch modes"); } setRetrieval(BATCH); if (m_structure == null) { getStructure(); } while (readData(true)) {; } m_dataDumper.flush(); m_dataDumper.close(); // make final structure makeStructure(); Reader sr = new BufferedReader(new FileReader(m_tempFile)); ArffReader initialArff = new ArffReader(sr, m_structure, 0, m_fieldSeparatorAndEnclosures); Instances initialInsts = initialArff.getData(); sr.close(); initialArff = null; return initialInsts; }
/** * Return the full data set. If the structure hasn't yet been determined by a call to getStructure * then method should do so before processing the rest of the data set. * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ @Override public Instances getDataSet() throws IOException { if ((m_sourceFile == null) && (m_sourceReader == null)) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } if (m_st == null) { m_st = new StreamTokenizer(m_sourceReader); initTokenizer(m_st); } m_st.ordinaryChar(m_FieldSeparator.charAt(0)); m_cumulativeStructure = new ArrayList<Hashtable<Object, Integer>>(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { m_cumulativeStructure.add(new Hashtable<Object, Integer>()); } m_cumulativeInstances = new ArrayList<ArrayList<Object>>(); ArrayList<Object> current; while ((current = getInstance(m_st)) != null) { m_cumulativeInstances.add(current); } ArrayList<Attribute> atts = new ArrayList<Attribute>(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { String attname = m_structure.attribute(i).name(); Hashtable<Object, Integer> tempHash = m_cumulativeStructure.get(i); if (tempHash.size() == 0) { atts.add(new Attribute(attname)); } else { if (m_StringAttributes.isInRange(i)) { atts.add(new Attribute(attname, (ArrayList<String>) null)); } else { ArrayList<String> values = new ArrayList<String>(tempHash.size()); // add dummy objects in order to make the ArrayList's size == capacity for (int z = 0; z < tempHash.size(); z++) { values.add("dummy"); } Enumeration e = tempHash.keys(); while (e.hasMoreElements()) { Object ob = e.nextElement(); // if (ob instanceof Double) { int index = ((Integer) tempHash.get(ob)).intValue(); String s = ob.toString(); if (s.startsWith("'") || s.startsWith("\"")) s = s.substring(1, s.length() - 1); values.set(index, new String(s)); // } } atts.add(new Attribute(attname, values)); } } } // make the instances String relationName; if (m_sourceFile != null) relationName = (m_sourceFile.getName()).replaceAll("\\.[cC][sS][vV]$", ""); else relationName = "stream"; Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size()); for (int i = 0; i < m_cumulativeInstances.size(); i++) { current = m_cumulativeInstances.get(i); double[] vals = new double[dataSet.numAttributes()]; for (int j = 0; j < current.size(); j++) { Object cval = current.get(j); if (cval instanceof String) { if (((String) cval).compareTo(m_MissingValue) == 0) { vals[j] = Utils.missingValue(); } else { if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue((String) cval); } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable<Object, Integer> lookup = m_cumulativeStructure.get(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else { throw new IllegalStateException( "Wrong attribute type at position " + (i + 1) + "!!!"); } } } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable<Object, Integer> lookup = m_cumulativeStructure.get(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue("" + cval); } else { vals[j] = ((Double) cval).doubleValue(); } } dataSet.add(new DenseInstance(1.0, vals)); } m_structure = new Instances(dataSet, 0); setRetrieval(BATCH); m_cumulativeStructure = null; // conserve memory // close the stream m_sourceReader.close(); return dataSet; }