public static void main(String[] args) { try { InputStream is = StreamTokenering.class.getResourceAsStream("/input.txt"); StreamTokenizer in = new StreamTokenizer(new InputStreamReader(is)); in.ordinaryChar('.'); in.ordinaryChar('\''); int wordCount = 0, numCount = 0, punctionCount = 0, count = 0; double token; while ((token = in.nextToken()) != StreamTokenizer.TT_EOF) { count++; if (token == StreamTokenizer.TT_WORD) { wordCount++; } else if (token == StreamTokenizer.TT_NUMBER) { numCount++; } else { punctionCount++; } System.out.println(in.toString()); } System.out.println("单词总数为:" + count); System.out.println("单词数为:" + wordCount); System.out.println("数字数为:" + numCount); System.out.println("标点符号数为:" + punctionCount++); } catch (IOException e) { e.printStackTrace(); } }
/** @tests java.io.StreamTokenizer#ordinaryChar(int) */ public void test_ordinaryCharI() throws IOException { // SM. setTest("Ffjein 893"); st.ordinaryChar('F'); st.nextToken(); assertTrue("OrdinaryChar failed." + (char) st.ttype, st.ttype == 'F'); }
/** * helper method to check for selectors of individual fields like "select x from y" or "{'select x * from y where id=' + z}". For such selectors it returns true, otherwise false */ protected static boolean isIndividualSelector(String selector) { if (selector == null) return false; StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(selector)); tokenizer.ordinaryChar('\''); tokenizer.ordinaryChar('"'); int token; try { while ((token = tokenizer.nextToken()) != StreamTokenizer.TT_EOF) { if (token == StreamTokenizer.TT_WORD) return StringUtil.startsWithIgnoreCase(tokenizer.sval.trim(), "select"); } } catch (IOException e) { throw new RuntimeException("Unexpected error", e); } return false; }
/** @tests java.io.StreamTokenizer#slashSlashComments(boolean) */ public void test_slashSlashCommentsZ() throws IOException { // SM. setTest("// foo \r\n /fiji \r\n -456"); st.ordinaryChar('/'); st.slashSlashComments(true); assertEquals("Test failed.", '/', st.nextToken()); assertTrue("Test failed.", st.nextToken() == StreamTokenizer.TT_WORD); }
public CSVReader(BufferedReader input, char customizedSeparator) { this.separator = customizedSeparator; parser = new StreamTokenizer(input); parser.ordinaryChars(0, 255); parser.wordChars(0, 255); parser.ordinaryChar('\"'); parser.ordinaryChar(customizedSeparator); // Need to do set EOL significance after setting ordinary and word // chars, and need to explicitly set \n and \r as whitespace chars // for EOL detection to work parser.eolIsSignificant(true); parser.whitespaceChars('\n', '\n'); parser.whitespaceChars('\r', '\r'); atEOF = false; }
public PatientSearch createCompositionFilter(String description) { Set<String> andWords = new HashSet<String>(); Set<String> orWords = new HashSet<String>(); Set<String> notWords = new HashSet<String>(); andWords.add("and"); andWords.add("intersection"); andWords.add("*"); orWords.add("or"); orWords.add("union"); orWords.add("+"); notWords.add("not"); notWords.add("!"); List<Object> currentLine = new ArrayList<Object>(); try { StreamTokenizer st = new StreamTokenizer(new StringReader(description)); st.ordinaryChar('('); st.ordinaryChar(')'); Stack<List<Object>> stack = new Stack<List<Object>>(); while (st.nextToken() != StreamTokenizer.TT_EOF) { if (st.ttype == StreamTokenizer.TT_NUMBER) { Integer thisInt = new Integer((int) st.nval); if (thisInt < 1 || thisInt > searchHistory.size()) { log.error("number < 1 or > search history size"); return null; } currentLine.add(thisInt); } else if (st.ttype == '(') { stack.push(currentLine); currentLine = new ArrayList<Object>(); } else if (st.ttype == ')') { List<Object> l = stack.pop(); l.add(currentLine); currentLine = l; } else if (st.ttype == StreamTokenizer.TT_WORD) { String str = st.sval.toLowerCase(); if (andWords.contains(str)) currentLine.add(PatientSetService.BooleanOperator.AND); else if (orWords.contains(str)) currentLine.add(PatientSetService.BooleanOperator.OR); else if (notWords.contains(str)) currentLine.add(PatientSetService.BooleanOperator.NOT); else throw new IllegalArgumentException("Don't recognize " + st.sval); } } } catch (Exception ex) { log.error("Error in description string: " + description, ex); return null; } if (!testCompositionList(currentLine)) { log.error("Description string failed test: " + description); return null; } // return toPatientFilter(currentLine); PatientSearch ret = new PatientSearch(); ret.setParsedComposition(currentLine); return ret; }
/** @tests java.io.StreamTokenizer#slashSlashComments(boolean) */ public void test_slashSlashComments_withSSOpen_NoComment() throws IOException { Reader reader = new CharArrayReader("// t".toCharArray()); StreamTokenizer st = new StreamTokenizer(reader); st.slashSlashComments(true); st.ordinaryChar('/'); assertEquals(StreamTokenizer.TT_EOF, st.nextToken()); }
/** @tests java.io.StreamTokenizer#nextToken() */ @SuppressWarnings("deprecation") public void test_nextToken() throws IOException { // SM. setTest( "\r\n/* fje fje 43.4 f \r\n f g */ 456.459 \r\n" + "Hello / \r\n \r\n \n \r \257 Hi \'Hello World\'"); st.ordinaryChar('/'); st.slashStarComments(true); st.nextToken(); assertTrue("Wrong Token type1: " + (char) st.ttype, st.ttype == StreamTokenizer.TT_NUMBER); st.nextToken(); assertTrue("Wrong Token type2: " + st.ttype, st.ttype == StreamTokenizer.TT_WORD); st.nextToken(); assertTrue("Wrong Token type3: " + st.ttype, st.ttype == '/'); st.nextToken(); assertTrue("Wrong Token type4: " + st.ttype, st.ttype == StreamTokenizer.TT_WORD); st.nextToken(); assertTrue("Wrong Token type5: " + st.ttype, st.ttype == StreamTokenizer.TT_WORD); st.nextToken(); assertTrue("Wrong Token type6: " + st.ttype, st.ttype == '\''); assertTrue("Wrong Token type7: " + st.ttype, st.sval.equals("Hello World")); st.nextToken(); assertTrue("Wrong Token type8: " + st.ttype, st.ttype == -1); final PipedInputStream pin = new PipedInputStream(); PipedOutputStream pout = new PipedOutputStream(pin); pout.write("hello\n\r\r".getBytes("UTF-8")); StreamTokenizer s = new StreamTokenizer(pin); s.eolIsSignificant(true); assertTrue( "Wrong token 1,1", s.nextToken() == StreamTokenizer.TT_WORD && s.sval.equals("hello")); assertTrue("Wrong token 1,2", s.nextToken() == '\n'); assertTrue("Wrong token 1,3", s.nextToken() == '\n'); assertTrue("Wrong token 1,4", s.nextToken() == '\n'); pout.close(); assertTrue("Wrong token 1,5", s.nextToken() == StreamTokenizer.TT_EOF); StreamTokenizer tokenizer = new StreamTokenizer(new Support_StringReader("\n \r\n#")); tokenizer.ordinaryChar('\n'); // make \n ordinary tokenizer.eolIsSignificant(true); assertTrue("Wrong token 2,1", tokenizer.nextToken() == '\n'); assertTrue("Wrong token 2,2", tokenizer.nextToken() == '\n'); assertEquals("Wrong token 2,3", '#', tokenizer.nextToken()); }
/** * This method sets the syntax of the StreamTokenizer. i.e. set the whitespace, comment and * delimit chars. */ protected void setSyntax(StreamTokenizer tk) { tk.resetSyntax(); tk.eolIsSignificant(false); tk.slashStarComments(true); tk.slashSlashComments(true); tk.whitespaceChars(0, ' '); tk.wordChars(' ' + 1, '\u00ff'); tk.ordinaryChar('['); tk.ordinaryChar(']'); tk.ordinaryChar('{'); tk.ordinaryChar('}'); tk.ordinaryChar('-'); tk.ordinaryChar('>'); tk.ordinaryChar('/'); tk.ordinaryChar('*'); tk.quoteChar('"'); tk.whitespaceChars(';', ';'); tk.ordinaryChar('='); }
/** @tests java.io.StreamTokenizer#slashSlashComments(boolean) */ public void test_slashSlashComments_withSSClosed() throws IOException { Reader reader = new CharArrayReader("// t".toCharArray()); StreamTokenizer st = new StreamTokenizer(reader); st.slashSlashComments(false); st.ordinaryChar('/'); assertEquals('/', st.nextToken()); assertEquals('/', st.nextToken()); assertEquals(StreamTokenizer.TT_WORD, st.nextToken()); }
/** Creates a StreamTokenizer for reading ARFF files. */ private StreamTokenizer createTokenizer(Reader in) { StreamTokenizer tokenizer = new StreamTokenizer(in); tokenizer.resetSyntax(); tokenizer.whitespaceChars(0, ' '); tokenizer.wordChars(' ' + 1, '\u00FF'); tokenizer.whitespaceChars(',', ','); tokenizer.commentChar('%'); tokenizer.quoteChar('"'); tokenizer.quoteChar('\''); tokenizer.ordinaryChar('{'); tokenizer.ordinaryChar('}'); tokenizer.eolIsSignificant(true); return tokenizer; }
/** @tests java.io.StreamTokenizer#commentChar(int) */ public void test_commentCharI() throws IOException { setTest("*comment \n / 8 'h' "); st.ordinaryChar('/'); st.commentChar('*'); assertEquals( "nextToken() did not return the character / skiping the comments starting with *", 47, st.nextToken()); assertTrue( "the next token returned should be the digit 8", st.nextToken() == StreamTokenizer.TT_NUMBER && st.nval == 8.0); assertTrue( "the next token returned should be the quote character", st.nextToken() == 39 && st.sval.equals("h")); }
/** Create an HTTP tokenizer, given a StreamTokenizer for the web page. */ public HttpTokenizer(StreamTokenizer tokens) throws IOException { // Create a stream tokenizer this.tokens = tokens; // Set up the appropriate defaults tokens.eolIsSignificant(false); tokens.lowerCaseMode(true); tokens.wordChars('<', '<'); tokens.wordChars('>', '>'); tokens.wordChars('/', '/'); tokens.wordChars('=', '='); tokens.wordChars('@', '@'); tokens.wordChars('!', '!'); tokens.wordChars('-', '-'); tokens.ordinaryChar('.'); tokens.ordinaryChar('?'); }
/** * Set the params (analyzerName only), Comma-separate list of Analyzer class names. If the * Analyzer lives in org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a * part of the Fully Qualified Class Name. * * <p>Analyzer names may also refer to previously defined AnalyzerFactory's. * * <p>Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, * StopAnalyzer, standard.StandardAnalyzer) > * * <p>Example AnalyzerFactory usage: * * <pre> * -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer) * -NewAnalyzer('whitespace tokenized') * </pre> * * @param params analyzerClassName, or empty for the StandardAnalyzer */ @Override public void setParams(String params) { super.setParams(params); final StreamTokenizer stok = new StreamTokenizer(new StringReader(params)); stok.quoteChar('"'); stok.quoteChar('\''); stok.eolIsSignificant(false); stok.ordinaryChar(','); try { while (stok.nextToken() != StreamTokenizer.TT_EOF) { switch (stok.ttype) { case ',': { // Do nothing break; } case '\'': case '\"': case StreamTokenizer.TT_WORD: { analyzerNames.add(stok.sval); break; } default: { throw new RuntimeException("Unexpected token: " + stok.toString()); } } } } catch (RuntimeException e) { if (e.getMessage().startsWith("Line #")) { throw e; } else { throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", e); } } catch (Throwable t) { throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", t); } }
public static Sequence Tokenizer() { AST ass = new AST(); Sequence seq = ass.new Sequence(); try { // ArrayList<Node> Nodes = new ArrayList<Node>(); FileReader inFile = new FileReader("Test.scn"); StreamTokenizer st = new StreamTokenizer(inFile); st.ordinaryChar('.'); st.ordinaryChar('/'); st.eolIsSignificant(true); String ID; int x; int y; int w; int h; String fileName; int token = st.nextToken(); while (token != StreamTokenizer.TT_EOF) { char ch; String s; switch (token) { case StreamTokenizer.TT_WORD: s = st.sval; if (s.equals("Move")) { st.nextToken(); st.nextToken(); ID = st.sval; st.nextToken(); st.nextToken(); x = (int) st.nval; st.nextToken(); st.nextToken(); y = (int) st.nval; st.nextToken(); seq.elements.add(new Move(new Id(ID), new Number(x), new Number(y))); System.out.println("Move " + ID + " " + x + ", " + y); } /*else if(s.equals("Object") || s.equals("Camera") || s.equals("Sprite")) { System.out.print("<Type> " + s + " "); }*/ else if (s.equals("Object")) { st.nextToken(); ID = st.sval; st.nextToken(); st.nextToken(); x = (int) st.nval; st.nextToken(); st.nextToken(); y = (int) st.nval; st.nextToken(); st.nextToken(); w = (int) st.nval; st.nextToken(); st.nextToken(); h = (int) st.nval; st.nextToken(); st.nextToken(); fileName = st.sval; st.nextToken(); seq.elements.add( new Obj( new Id(ID), new Variables( new Number(x), new Number(y), new Number(w), new Number(h), new Id(fileName)))); System.out.println( "Object " + ID + " " + x + ", " + y + ", " + w + ", " + h + ", " + fileName); } else if (s.equals("Camera")) { st.nextToken(); ID = st.sval; st.nextToken(); st.nextToken(); x = (int) st.nval; st.nextToken(); st.nextToken(); y = (int) st.nval; st.nextToken(); seq.elements.add(new Camera(new Id(ID), new Variables(new Number(x), new Number(y)))); System.out.println("Camera " + ID + " " + x + ", " + y); } else if (s.equals("Sprite")) { st.nextToken(); ID = st.sval; st.nextToken(); st.nextToken(); x = (int) st.nval; st.nextToken(); st.nextToken(); y = (int) st.nval; st.nextToken(); st.nextToken(); w = (int) st.nval; st.nextToken(); st.nextToken(); h = (int) st.nval; st.nextToken(); st.nextToken(); fileName = st.sval; st.nextToken(); seq.elements.add( new Sprite( new Id(ID), new Variables( new Number(x), new Number(y), new Number(w), new Number(h), new Id(fileName)))); System.out.println( "Sprite " + ID + " " + x + ", " + y + ", " + w + ", " + h + ", " + fileName); } else { System.out.print("<ID> " + s + " "); } break; case StreamTokenizer.TT_NUMBER: int n = (int) st.nval; System.out.print("<Number> " + n); seq.elements.add(new Number(n)); break; case '(': ch = (char) st.ttype; System.out.print("<Variables>" + ch); break; case ')': ch = (char) st.ttype; System.out.print(ch); break; case ',': ch = (char) st.ttype; System.out.print(ch + " "); break; case '"': s = st.sval; System.out.print("<ID> " + "\"" + s + "\""); break; case StreamTokenizer.TT_EOL: System.out.println(); case '\0': break; default: s = st.sval; System.out.println("ERROR: Unrecognized Token: " + s); break; } token = st.nextToken(); } inFile.close(); System.out.println(); } catch (IOException e) { System.out.println("Error: " + e); } return seq; }
/** * Return the full data set. If the structure hasn't yet been determined by a call to getStructure * then method should do so before processing the rest of the data set. * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ @Override public Instances getDataSet() throws IOException { if ((m_sourceFile == null) && (m_sourceReader == null)) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } if (m_st == null) { m_st = new StreamTokenizer(m_sourceReader); initTokenizer(m_st); } m_st.ordinaryChar(m_FieldSeparator.charAt(0)); m_cumulativeStructure = new ArrayList<Hashtable<Object, Integer>>(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { m_cumulativeStructure.add(new Hashtable<Object, Integer>()); } m_cumulativeInstances = new ArrayList<ArrayList<Object>>(); ArrayList<Object> current; while ((current = getInstance(m_st)) != null) { m_cumulativeInstances.add(current); } ArrayList<Attribute> atts = new ArrayList<Attribute>(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { String attname = m_structure.attribute(i).name(); Hashtable<Object, Integer> tempHash = m_cumulativeStructure.get(i); if (tempHash.size() == 0) { atts.add(new Attribute(attname)); } else { if (m_StringAttributes.isInRange(i)) { atts.add(new Attribute(attname, (ArrayList<String>) null)); } else { ArrayList<String> values = new ArrayList<String>(tempHash.size()); // add dummy objects in order to make the ArrayList's size == capacity for (int z = 0; z < tempHash.size(); z++) { values.add("dummy"); } Enumeration e = tempHash.keys(); while (e.hasMoreElements()) { Object ob = e.nextElement(); // if (ob instanceof Double) { int index = ((Integer) tempHash.get(ob)).intValue(); String s = ob.toString(); if (s.startsWith("'") || s.startsWith("\"")) s = s.substring(1, s.length() - 1); values.set(index, new String(s)); // } } atts.add(new Attribute(attname, values)); } } } // make the instances String relationName; if (m_sourceFile != null) relationName = (m_sourceFile.getName()).replaceAll("\\.[cC][sS][vV]$", ""); else relationName = "stream"; Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size()); for (int i = 0; i < m_cumulativeInstances.size(); i++) { current = m_cumulativeInstances.get(i); double[] vals = new double[dataSet.numAttributes()]; for (int j = 0; j < current.size(); j++) { Object cval = current.get(j); if (cval instanceof String) { if (((String) cval).compareTo(m_MissingValue) == 0) { vals[j] = Utils.missingValue(); } else { if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue((String) cval); } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable<Object, Integer> lookup = m_cumulativeStructure.get(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else { throw new IllegalStateException( "Wrong attribute type at position " + (i + 1) + "!!!"); } } } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable<Object, Integer> lookup = m_cumulativeStructure.get(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue("" + cval); } else { vals[j] = ((Double) cval).doubleValue(); } } dataSet.add(new DenseInstance(1.0, vals)); } m_structure = new Instances(dataSet, 0); setRetrieval(BATCH); m_cumulativeStructure = null; // conserve memory // close the stream m_sourceReader.close(); return dataSet; }
@Override public Instance getNextInstance(Instances structure) throws IOException { m_structure = structure; if (getRetrieval() == BATCH) { throw new IOException("Cannot mix getting instances in both incremental and batch modes"); } setRetrieval(INCREMENTAL); if (m_dataDumper != null) { // close the uneeded temp files (if necessary) m_dataDumper.close(); m_dataDumper = null; } if (m_rowBuffer.size() > 0 && m_incrementalReader == null) { StringBuilder tempB = new StringBuilder(); for (String r : m_rowBuffer) { tempB.append(r).append("\n"); } m_numBufferedRows = m_rowBuffer.size(); Reader batchReader = new BufferedReader(new StringReader(tempB.toString())); m_incrementalReader = new ArffReader(batchReader, m_structure, 0, 0, m_fieldSeparatorAndEnclosures); m_rowBuffer.clear(); } if (m_numBufferedRows == 0) { // m_incrementalReader = new ArffReader(m_sourceReader, m_structure, 0, // 0); m_numBufferedRows = -1; m_st = new StreamTokenizer(m_sourceReader); initTokenizer(m_st); m_st.ordinaryChar(m_FieldSeparator.charAt(0)); // m_incrementalReader = null; } Instance current = null; if (m_sourceReader != null) { if (m_incrementalReader != null) { current = m_incrementalReader.readInstance(m_structure); } else { if (getInstance(m_st) != null) { current = makeInstance(); } } if (current == null) {} if (m_numBufferedRows > 0) { m_numBufferedRows--; } } if ((m_sourceReader != null) && (current == null)) { try { // close the stream m_sourceReader.close(); m_sourceReader = null; // reset(); } catch (Exception ex) { ex.printStackTrace(); } } return current; }
private void readHeader() throws IOException { m_rowCount = 1; m_incrementalReader = null; m_current = new ArrayList<Object>(); openTempFiles(); m_rowBuffer = new ArrayList<String>(); String firstRow = m_sourceReader.readLine(); if (firstRow == null) { throw new IOException("No data in the file!"); } if (m_noHeaderRow) { m_rowBuffer.add(firstRow); } ArrayList<Attribute> attribNames = new ArrayList<Attribute>(); // now tokenize to determine attribute names (or create att names if // no header row StringReader sr = new StringReader(firstRow + "\n"); // System.out.print(firstRow + "\n"); m_st = new StreamTokenizer(sr); initTokenizer(m_st); m_st.ordinaryChar(m_FieldSeparator.charAt(0)); int attNum = 1; StreamTokenizerUtils.getFirstToken(m_st); if (m_st.ttype == StreamTokenizer.TT_EOF) { StreamTokenizerUtils.errms(m_st, "premature end of file"); } boolean first = true; boolean wasSep; while (m_st.ttype != StreamTokenizer.TT_EOL && m_st.ttype != StreamTokenizer.TT_EOF) { // Get next token if (!first) { StreamTokenizerUtils.getToken(m_st); } if (m_st.ttype == m_FieldSeparator.charAt(0) || m_st.ttype == StreamTokenizer.TT_EOL) { wasSep = true; } else { wasSep = false; String attName = null; if (m_noHeaderRow) { attName = "att" + attNum; attNum++; } else { attName = m_st.sval; } attribNames.add(new Attribute(attName, (java.util.List<String>) null)); } if (!wasSep) { StreamTokenizerUtils.getToken(m_st); } first = false; } String relationName; if (m_sourceFile != null) { relationName = (m_sourceFile.getName()).replaceAll("\\.[cC][sS][vV]$", ""); } else { relationName = "stream"; } m_structure = new Instances(relationName, attribNames, 0); m_NominalAttributes.setUpper(m_structure.numAttributes() - 1); m_StringAttributes.setUpper(m_structure.numAttributes() - 1); m_dateAttributes.setUpper(m_structure.numAttributes() - 1); m_numericAttributes.setUpper(m_structure.numAttributes() - 1); m_nominalVals = new HashMap<Integer, LinkedHashSet<String>>(); m_types = new TYPE[m_structure.numAttributes()]; for (int i = 0; i < m_structure.numAttributes(); i++) { if (m_NominalAttributes.isInRange(i)) { m_types[i] = TYPE.NOMINAL; LinkedHashSet<String> ts = new LinkedHashSet<String>(); m_nominalVals.put(i, ts); } else if (m_StringAttributes.isInRange(i)) { m_types[i] = TYPE.STRING; } else if (m_dateAttributes.isInRange(i)) { m_types[i] = TYPE.DATE; } else if (m_numericAttributes.isInRange(i)) { m_types[i] = TYPE.NUMERIC; } else { m_types[i] = TYPE.UNDETERMINED; } } if (m_nominalLabelSpecs.size() > 0) { for (String spec : m_nominalLabelSpecs) { String[] attsAndLabels = spec.split(":"); if (attsAndLabels.length == 2) { String[] labels = attsAndLabels[1].split(","); try { // try as a range string first Range tempR = new Range(); tempR.setRanges(attsAndLabels[0].trim()); tempR.setUpper(m_structure.numAttributes() - 1); int[] rangeIndexes = tempR.getSelection(); for (int i = 0; i < rangeIndexes.length; i++) { m_types[rangeIndexes[i]] = TYPE.NOMINAL; LinkedHashSet<String> ts = new LinkedHashSet<String>(); for (String lab : labels) { ts.add(lab); } m_nominalVals.put(rangeIndexes[i], ts); } } catch (IllegalArgumentException e) { // one or more named attributes? String[] attNames = attsAndLabels[0].split(","); for (String attN : attNames) { Attribute a = m_structure.attribute(attN.trim()); if (a != null) { int attIndex = a.index(); m_types[attIndex] = TYPE.NOMINAL; LinkedHashSet<String> ts = new LinkedHashSet<String>(); for (String lab : labels) { ts.add(lab); } m_nominalVals.put(attIndex, ts); } } } } } } // Prevents the first row from getting lost in the // case where there is no header row and we're // running in batch mode if (m_noHeaderRow && getRetrieval() == BATCH) { StreamTokenizer tempT = new StreamTokenizer(new StringReader(firstRow)); initTokenizer(tempT); tempT.ordinaryChar(m_FieldSeparator.charAt(0)); String checked = getInstance(tempT); dumpRow(checked); } m_st = new StreamTokenizer(m_sourceReader); initTokenizer(m_st); m_st.ordinaryChar(m_FieldSeparator.charAt(0)); // try and determine a more accurate structure from the first batch readData(false || getRetrieval() == BATCH); makeStructure(); }
/** @tests java.io.StreamTokenizer#slashStarComments(boolean) */ public void test_slashStarCommentsZ() throws IOException { setTest("/* foo \r\n /fiji \r\n*/ -456"); st.ordinaryChar('/'); st.slashStarComments(true); assertTrue("Test failed.", st.nextToken() == StreamTokenizer.TT_NUMBER); }
/** @param ribFileReader rib file reader */ public Rib2Xml(FileReader ribFileReader) { /* Configure log4j, read conf out of jar file */ Class clazz = getClass(); URL url = clazz.getResource("/conf/log4j.xml"); if (url == null) { /* Try reading via filename */ DOMConfigurator.configure("../conf/log4j.xml"); System.err.println("Error: Configuration file for Log4j (log4j.xml) not found, aborting..."); System.exit(1); } DOMConfigurator.configure(url); /* Create the ribfactory which deal with all the rib elements */ Config config = Config.instance(); RibFactory ribFac = new RibFactory(config); Vector ribNames = config.getNames(); StreamTokenizer thTokens = new StreamTokenizer(ribFileReader); // thTokens.resetSyntax(); thTokens.commentChar('#'); thTokens.eolIsSignificant(false); thTokens.parseNumbers(); thTokens.ordinaryChar('['); thTokens.ordinaryChar(']'); thTokens.quoteChar('"'); int count = 0; String factoryInput = ""; try { while (thTokens.nextToken() != StreamTokenizer.TT_EOF) { logger.debug(thTokens.lineno() + ": " + thTokens.sval + ": ttype: " + thTokens.ttype); if (thTokens.ttype == StreamTokenizer.TT_NUMBER) { logger.debug(thTokens.lineno() + ": " + thTokens.nval); factoryInput += " " + String.valueOf(thTokens.nval); count++; } else if (thTokens.ttype == StreamTokenizer.TT_WORD) { if (ribNames.contains(thTokens.sval)) { logger.debug(factoryInput); // AbstractRib Factory called to add an element to xml document logger.debug("Elements: " + count + ": " + factoryInput); ribFac.processRibElement(factoryInput); factoryInput = thTokens.sval; } else { factoryInput += " " + thTokens.sval; } logger.debug(thTokens.lineno() + ": " + thTokens.sval); count++; } else { if (thTokens.ttype != '"') { logger.debug(thTokens.lineno() + ": " + (char) thTokens.ttype); factoryInput += " " + (char) thTokens.ttype; count++; } else if (thTokens.sval != null) { logger.debug( thTokens.lineno() + ": " + (char) thTokens.ttype + thTokens.sval + (char) thTokens.ttype); factoryInput += " " + (char) thTokens.ttype + thTokens.sval + (char) thTokens.ttype; count++; } } } } catch (IOException e) { logger.error(e.toString()); } logger.info("Tokens: " + count); RibDocument ribDoc = RibDocument.newInstance(); ribDoc.toFile(); }
/** * Return an interned VarInfoAux that represents a given string. Elements are separated by commas, * in the form: * * <p>x = a, "a key" = "a value" * * <p>Parse allow for quoted elements. White space to the left and right of keys and values do not * matter, but inbetween does. */ public static /*@Interned*/ VarInfoAux parse(String inString) throws IOException { Reader inStringReader = new StringReader(inString); StreamTokenizer tok = new StreamTokenizer(inStringReader); tok.resetSyntax(); tok.wordChars(0, Integer.MAX_VALUE); tok.quoteChar('\"'); tok.whitespaceChars(' ', ' '); tok.ordinaryChar('['); tok.ordinaryChar(']'); tok.ordinaryChars(',', ','); tok.ordinaryChars('=', '='); Map</*@Interned*/ String, /*@Interned*/ String> map = theDefault.map; String key = ""; String value = ""; boolean seenEqual = false; boolean insideVector = false; for (int tokInfo = tok.nextToken(); tokInfo != StreamTokenizer.TT_EOF; tokInfo = tok.nextToken()) { @SuppressWarnings("interning") // initialization-checking pattern boolean mapUnchanged = (map == theDefault.map); if (mapUnchanged) { // We use default values if none are specified. We initialize // here rather than above to save time when there are no tokens. map = new HashMap</*@Interned*/ String, /*@Interned*/ String>(theDefault.map); } /*@Interned*/ String token; if (tok.ttype == StreamTokenizer.TT_WORD || tok.ttype == '\"') { assert tok.sval != null : "@AssumeAssertion(nullness): representation invariant of StreamTokenizer"; token = tok.sval.trim().intern(); } else { token = ((char) tok.ttype + "").intern(); } debug.fine("Token info: " + tokInfo + " " + token); if (token == "[") { // interned if (!seenEqual) throw new IOException("Aux option did not contain an '='"); if (insideVector) throw new IOException("Vectors cannot be nested in an aux option"); if (value.length() > 0) throw new IOException("Cannot mix scalar and vector values"); insideVector = true; value = ""; } else if (token == "]") { // interned if (!insideVector) throw new IOException("']' without preceding '['"); insideVector = false; } else if (token == ",") { // interned if (!seenEqual) throw new IOException("Aux option did not contain an '='"); if (insideVector) throw new IOException("',' cannot be used inside a vector"); map.put(key.intern(), value.intern()); key = ""; value = ""; seenEqual = false; } else if (token == "=") { // interned if (seenEqual) throw new IOException("Aux option contained more than one '='"); if (insideVector) throw new IOException("'=' cannot be used inside a vector"); seenEqual = true; } else { if (!seenEqual) { key = (key + " " + token).trim(); } else if (insideVector) { value = value + " \"" + token.trim() + "\""; } else { value = (value + " " + token).trim(); } } } if (seenEqual) { map.put(key.intern(), value.intern()); } // Interning VarInfoAux result = new VarInfoAux(map).intern(); assert interningMap != null : "@AssumeAssertion(nullness): application invariant: postcondition of intern(), which was just called"; if (debug.isLoggable(Level.FINE)) { debug.fine("New parse " + result); debug.fine("Intern table size: " + new Integer(interningMap.size())); } return result; }