Exemplo n.º 1
0
  public static void main(String[] args) {
    try {
      InputStream is = StreamTokenering.class.getResourceAsStream("/input.txt");
      StreamTokenizer in = new StreamTokenizer(new InputStreamReader(is));

      in.ordinaryChar('.');
      in.ordinaryChar('\'');
      int wordCount = 0, numCount = 0, punctionCount = 0, count = 0;
      double token;
      while ((token = in.nextToken()) != StreamTokenizer.TT_EOF) {
        count++;
        if (token == StreamTokenizer.TT_WORD) {
          wordCount++;
        } else if (token == StreamTokenizer.TT_NUMBER) {
          numCount++;
        } else {
          punctionCount++;
        }
        System.out.println(in.toString());
      }
      System.out.println("单词总数为:" + count);
      System.out.println("单词数为:" + wordCount);
      System.out.println("数字数为:" + numCount);
      System.out.println("标点符号数为:" + punctionCount++);
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
 /** @tests java.io.StreamTokenizer#ordinaryChar(int) */
 public void test_ordinaryCharI() throws IOException {
   // SM.
   setTest("Ffjein 893");
   st.ordinaryChar('F');
   st.nextToken();
   assertTrue("OrdinaryChar failed." + (char) st.ttype, st.ttype == 'F');
 }
 /**
  * helper method to check for selectors of individual fields like "select x from y" or "{'select x
  * from y where id=' + z}". For such selectors it returns true, otherwise false
  */
 protected static boolean isIndividualSelector(String selector) {
   if (selector == null) return false;
   StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(selector));
   tokenizer.ordinaryChar('\'');
   tokenizer.ordinaryChar('"');
   int token;
   try {
     while ((token = tokenizer.nextToken()) != StreamTokenizer.TT_EOF) {
       if (token == StreamTokenizer.TT_WORD)
         return StringUtil.startsWithIgnoreCase(tokenizer.sval.trim(), "select");
     }
   } catch (IOException e) {
     throw new RuntimeException("Unexpected error", e);
   }
   return false;
 }
 /** @tests java.io.StreamTokenizer#slashSlashComments(boolean) */
 public void test_slashSlashCommentsZ() throws IOException {
   // SM.
   setTest("// foo \r\n /fiji \r\n -456");
   st.ordinaryChar('/');
   st.slashSlashComments(true);
   assertEquals("Test failed.", '/', st.nextToken());
   assertTrue("Test failed.", st.nextToken() == StreamTokenizer.TT_WORD);
 }
Exemplo n.º 5
0
  public CSVReader(BufferedReader input, char customizedSeparator) {
    this.separator = customizedSeparator;

    parser = new StreamTokenizer(input);
    parser.ordinaryChars(0, 255);
    parser.wordChars(0, 255);
    parser.ordinaryChar('\"');
    parser.ordinaryChar(customizedSeparator);

    // Need to do set EOL significance after setting ordinary and word
    // chars, and need to explicitly set \n and \r as whitespace chars
    // for EOL detection to work
    parser.eolIsSignificant(true);
    parser.whitespaceChars('\n', '\n');
    parser.whitespaceChars('\r', '\r');
    atEOF = false;
  }
  public PatientSearch createCompositionFilter(String description) {
    Set<String> andWords = new HashSet<String>();
    Set<String> orWords = new HashSet<String>();
    Set<String> notWords = new HashSet<String>();
    andWords.add("and");
    andWords.add("intersection");
    andWords.add("*");
    orWords.add("or");
    orWords.add("union");
    orWords.add("+");
    notWords.add("not");
    notWords.add("!");

    List<Object> currentLine = new ArrayList<Object>();

    try {
      StreamTokenizer st = new StreamTokenizer(new StringReader(description));
      st.ordinaryChar('(');
      st.ordinaryChar(')');
      Stack<List<Object>> stack = new Stack<List<Object>>();
      while (st.nextToken() != StreamTokenizer.TT_EOF) {
        if (st.ttype == StreamTokenizer.TT_NUMBER) {
          Integer thisInt = new Integer((int) st.nval);
          if (thisInt < 1 || thisInt > searchHistory.size()) {
            log.error("number < 1 or > search history size");
            return null;
          }
          currentLine.add(thisInt);
        } else if (st.ttype == '(') {
          stack.push(currentLine);
          currentLine = new ArrayList<Object>();
        } else if (st.ttype == ')') {
          List<Object> l = stack.pop();
          l.add(currentLine);
          currentLine = l;
        } else if (st.ttype == StreamTokenizer.TT_WORD) {
          String str = st.sval.toLowerCase();
          if (andWords.contains(str)) currentLine.add(PatientSetService.BooleanOperator.AND);
          else if (orWords.contains(str)) currentLine.add(PatientSetService.BooleanOperator.OR);
          else if (notWords.contains(str)) currentLine.add(PatientSetService.BooleanOperator.NOT);
          else throw new IllegalArgumentException("Don't recognize " + st.sval);
        }
      }
    } catch (Exception ex) {
      log.error("Error in description string: " + description, ex);
      return null;
    }

    if (!testCompositionList(currentLine)) {
      log.error("Description string failed test: " + description);
      return null;
    }

    // return toPatientFilter(currentLine);
    PatientSearch ret = new PatientSearch();
    ret.setParsedComposition(currentLine);
    return ret;
  }
  /** @tests java.io.StreamTokenizer#slashSlashComments(boolean) */
  public void test_slashSlashComments_withSSOpen_NoComment() throws IOException {
    Reader reader = new CharArrayReader("// t".toCharArray());

    StreamTokenizer st = new StreamTokenizer(reader);
    st.slashSlashComments(true);
    st.ordinaryChar('/');

    assertEquals(StreamTokenizer.TT_EOF, st.nextToken());
  }
  /** @tests java.io.StreamTokenizer#nextToken() */
  @SuppressWarnings("deprecation")
  public void test_nextToken() throws IOException {
    // SM.
    setTest(
        "\r\n/* fje fje 43.4 f \r\n f g */  456.459 \r\n"
            + "Hello  / 	\r\n \r\n \n \r \257 Hi \'Hello World\'");
    st.ordinaryChar('/');
    st.slashStarComments(true);
    st.nextToken();
    assertTrue("Wrong Token type1: " + (char) st.ttype, st.ttype == StreamTokenizer.TT_NUMBER);
    st.nextToken();
    assertTrue("Wrong Token type2: " + st.ttype, st.ttype == StreamTokenizer.TT_WORD);
    st.nextToken();
    assertTrue("Wrong Token type3: " + st.ttype, st.ttype == '/');
    st.nextToken();
    assertTrue("Wrong Token type4: " + st.ttype, st.ttype == StreamTokenizer.TT_WORD);
    st.nextToken();
    assertTrue("Wrong Token type5: " + st.ttype, st.ttype == StreamTokenizer.TT_WORD);
    st.nextToken();
    assertTrue("Wrong Token type6: " + st.ttype, st.ttype == '\'');
    assertTrue("Wrong Token type7: " + st.ttype, st.sval.equals("Hello World"));
    st.nextToken();
    assertTrue("Wrong Token type8: " + st.ttype, st.ttype == -1);

    final PipedInputStream pin = new PipedInputStream();
    PipedOutputStream pout = new PipedOutputStream(pin);
    pout.write("hello\n\r\r".getBytes("UTF-8"));
    StreamTokenizer s = new StreamTokenizer(pin);
    s.eolIsSignificant(true);
    assertTrue(
        "Wrong token 1,1", s.nextToken() == StreamTokenizer.TT_WORD && s.sval.equals("hello"));
    assertTrue("Wrong token 1,2", s.nextToken() == '\n');
    assertTrue("Wrong token 1,3", s.nextToken() == '\n');
    assertTrue("Wrong token 1,4", s.nextToken() == '\n');
    pout.close();
    assertTrue("Wrong token 1,5", s.nextToken() == StreamTokenizer.TT_EOF);
    StreamTokenizer tokenizer = new StreamTokenizer(new Support_StringReader("\n \r\n#"));
    tokenizer.ordinaryChar('\n'); // make \n ordinary
    tokenizer.eolIsSignificant(true);
    assertTrue("Wrong token 2,1", tokenizer.nextToken() == '\n');
    assertTrue("Wrong token 2,2", tokenizer.nextToken() == '\n');
    assertEquals("Wrong token 2,3", '#', tokenizer.nextToken());
  }
Exemplo n.º 9
0
 /**
  * This method sets the syntax of the StreamTokenizer. i.e. set the whitespace, comment and
  * delimit chars.
  */
 protected void setSyntax(StreamTokenizer tk) {
   tk.resetSyntax();
   tk.eolIsSignificant(false);
   tk.slashStarComments(true);
   tk.slashSlashComments(true);
   tk.whitespaceChars(0, ' ');
   tk.wordChars(' ' + 1, '\u00ff');
   tk.ordinaryChar('[');
   tk.ordinaryChar(']');
   tk.ordinaryChar('{');
   tk.ordinaryChar('}');
   tk.ordinaryChar('-');
   tk.ordinaryChar('>');
   tk.ordinaryChar('/');
   tk.ordinaryChar('*');
   tk.quoteChar('"');
   tk.whitespaceChars(';', ';');
   tk.ordinaryChar('=');
 }
  /** @tests java.io.StreamTokenizer#slashSlashComments(boolean) */
  public void test_slashSlashComments_withSSClosed() throws IOException {
    Reader reader = new CharArrayReader("// t".toCharArray());

    StreamTokenizer st = new StreamTokenizer(reader);
    st.slashSlashComments(false);
    st.ordinaryChar('/');

    assertEquals('/', st.nextToken());
    assertEquals('/', st.nextToken());
    assertEquals(StreamTokenizer.TT_WORD, st.nextToken());
  }
 /** Creates a StreamTokenizer for reading ARFF files. */
 private StreamTokenizer createTokenizer(Reader in) {
   StreamTokenizer tokenizer = new StreamTokenizer(in);
   tokenizer.resetSyntax();
   tokenizer.whitespaceChars(0, ' ');
   tokenizer.wordChars(' ' + 1, '\u00FF');
   tokenizer.whitespaceChars(',', ',');
   tokenizer.commentChar('%');
   tokenizer.quoteChar('"');
   tokenizer.quoteChar('\'');
   tokenizer.ordinaryChar('{');
   tokenizer.ordinaryChar('}');
   tokenizer.eolIsSignificant(true);
   return tokenizer;
 }
 /** @tests java.io.StreamTokenizer#commentChar(int) */
 public void test_commentCharI() throws IOException {
   setTest("*comment \n / 8 'h' ");
   st.ordinaryChar('/');
   st.commentChar('*');
   assertEquals(
       "nextToken() did not return the character / skiping the comments starting with *",
       47,
       st.nextToken());
   assertTrue(
       "the next token returned should be the digit 8",
       st.nextToken() == StreamTokenizer.TT_NUMBER && st.nval == 8.0);
   assertTrue(
       "the next token returned should be the quote character",
       st.nextToken() == 39 && st.sval.equals("h"));
 }
Exemplo n.º 13
0
  /** Create an HTTP tokenizer, given a StreamTokenizer for the web page. */
  public HttpTokenizer(StreamTokenizer tokens) throws IOException {
    // Create a stream tokenizer
    this.tokens = tokens;

    // Set up the appropriate defaults
    tokens.eolIsSignificant(false);
    tokens.lowerCaseMode(true);
    tokens.wordChars('<', '<');
    tokens.wordChars('>', '>');
    tokens.wordChars('/', '/');
    tokens.wordChars('=', '=');
    tokens.wordChars('@', '@');
    tokens.wordChars('!', '!');
    tokens.wordChars('-', '-');
    tokens.ordinaryChar('.');
    tokens.ordinaryChar('?');
  }
 /**
  * Set the params (analyzerName only), Comma-separate list of Analyzer class names. If the
  * Analyzer lives in org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a
  * part of the Fully Qualified Class Name.
  *
  * <p>Analyzer names may also refer to previously defined AnalyzerFactory's.
  *
  * <p>Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer,
  * StopAnalyzer, standard.StandardAnalyzer) >
  *
  * <p>Example AnalyzerFactory usage:
  *
  * <pre>
  * -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
  * -NewAnalyzer('whitespace tokenized')
  * </pre>
  *
  * @param params analyzerClassName, or empty for the StandardAnalyzer
  */
 @Override
 public void setParams(String params) {
   super.setParams(params);
   final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
   stok.quoteChar('"');
   stok.quoteChar('\'');
   stok.eolIsSignificant(false);
   stok.ordinaryChar(',');
   try {
     while (stok.nextToken() != StreamTokenizer.TT_EOF) {
       switch (stok.ttype) {
         case ',':
           {
             // Do nothing
             break;
           }
         case '\'':
         case '\"':
         case StreamTokenizer.TT_WORD:
           {
             analyzerNames.add(stok.sval);
             break;
           }
         default:
           {
             throw new RuntimeException("Unexpected token: " + stok.toString());
           }
       }
     }
   } catch (RuntimeException e) {
     if (e.getMessage().startsWith("Line #")) {
       throw e;
     } else {
       throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", e);
     }
   } catch (Throwable t) {
     throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", t);
   }
 }
Exemplo n.º 15
0
  public static Sequence Tokenizer() {
    AST ass = new AST();
    Sequence seq = ass.new Sequence();
    try {

      // ArrayList<Node> Nodes = new ArrayList<Node>();
      FileReader inFile = new FileReader("Test.scn");
      StreamTokenizer st = new StreamTokenizer(inFile);

      st.ordinaryChar('.');
      st.ordinaryChar('/');
      st.eolIsSignificant(true);

      String ID;
      int x;
      int y;
      int w;
      int h;
      String fileName;

      int token = st.nextToken();
      while (token != StreamTokenizer.TT_EOF) {
        char ch;
        String s;

        switch (token) {
          case StreamTokenizer.TT_WORD:
            s = st.sval;
            if (s.equals("Move")) {
              st.nextToken();
              st.nextToken();
              ID = st.sval;
              st.nextToken();
              st.nextToken();
              x = (int) st.nval;
              st.nextToken();
              st.nextToken();
              y = (int) st.nval;
              st.nextToken();
              seq.elements.add(new Move(new Id(ID), new Number(x), new Number(y)));
              System.out.println("Move " + ID + " " + x + ", " + y);
            }
            /*else if(s.equals("Object") || s.equals("Camera") || s.equals("Sprite")) {
            	System.out.print("<Type> " + s + " ");

            }*/
            else if (s.equals("Object")) {
              st.nextToken();
              ID = st.sval;
              st.nextToken();
              st.nextToken();
              x = (int) st.nval;
              st.nextToken();
              st.nextToken();
              y = (int) st.nval;
              st.nextToken();
              st.nextToken();
              w = (int) st.nval;
              st.nextToken();
              st.nextToken();
              h = (int) st.nval;
              st.nextToken();
              st.nextToken();
              fileName = st.sval;
              st.nextToken();
              seq.elements.add(
                  new Obj(
                      new Id(ID),
                      new Variables(
                          new Number(x),
                          new Number(y),
                          new Number(w),
                          new Number(h),
                          new Id(fileName))));
              System.out.println(
                  "Object " + ID + " " + x + ", " + y + ", " + w + ", " + h + ", " + fileName);
            } else if (s.equals("Camera")) {
              st.nextToken();
              ID = st.sval;
              st.nextToken();
              st.nextToken();
              x = (int) st.nval;
              st.nextToken();
              st.nextToken();
              y = (int) st.nval;
              st.nextToken();
              seq.elements.add(new Camera(new Id(ID), new Variables(new Number(x), new Number(y))));
              System.out.println("Camera " + ID + " " + x + ", " + y);
            } else if (s.equals("Sprite")) {
              st.nextToken();
              ID = st.sval;
              st.nextToken();
              st.nextToken();
              x = (int) st.nval;
              st.nextToken();
              st.nextToken();
              y = (int) st.nval;
              st.nextToken();
              st.nextToken();
              w = (int) st.nval;
              st.nextToken();
              st.nextToken();
              h = (int) st.nval;
              st.nextToken();
              st.nextToken();
              fileName = st.sval;
              st.nextToken();
              seq.elements.add(
                  new Sprite(
                      new Id(ID),
                      new Variables(
                          new Number(x),
                          new Number(y),
                          new Number(w),
                          new Number(h),
                          new Id(fileName))));
              System.out.println(
                  "Sprite " + ID + " " + x + ", " + y + ", " + w + ", " + h + ", " + fileName);
            } else {
              System.out.print("<ID> " + s + " ");
            }
            break;

          case StreamTokenizer.TT_NUMBER:
            int n = (int) st.nval;
            System.out.print("<Number> " + n);
            seq.elements.add(new Number(n));
            break;

          case '(':
            ch = (char) st.ttype;
            System.out.print("<Variables>" + ch);
            break;

          case ')':
            ch = (char) st.ttype;
            System.out.print(ch);
            break;

          case ',':
            ch = (char) st.ttype;
            System.out.print(ch + " ");
            break;

          case '"':
            s = st.sval;
            System.out.print("<ID> " + "\"" + s + "\"");
            break;

          case StreamTokenizer.TT_EOL:
            System.out.println();

          case '\0':
            break;

          default:
            s = st.sval;
            System.out.println("ERROR: Unrecognized Token: " + s);
            break;
        }
        token = st.nextToken();
      }

      inFile.close();
      System.out.println();

    } catch (IOException e) {
      System.out.println("Error: " + e);
    }
    return seq;
  }
Exemplo n.º 16
0
  /**
   * Return the full data set. If the structure hasn't yet been determined by a call to getStructure
   * then method should do so before processing the rest of the data set.
   *
   * @return the structure of the data set as an empty set of Instances
   * @exception IOException if there is no source or parsing fails
   */
  @Override
  public Instances getDataSet() throws IOException {
    if ((m_sourceFile == null) && (m_sourceReader == null)) {
      throw new IOException("No source has been specified");
    }

    if (m_structure == null) {
      getStructure();
    }

    if (m_st == null) {
      m_st = new StreamTokenizer(m_sourceReader);
      initTokenizer(m_st);
    }

    m_st.ordinaryChar(m_FieldSeparator.charAt(0));

    m_cumulativeStructure = new ArrayList<Hashtable<Object, Integer>>(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
      m_cumulativeStructure.add(new Hashtable<Object, Integer>());
    }

    m_cumulativeInstances = new ArrayList<ArrayList<Object>>();
    ArrayList<Object> current;
    while ((current = getInstance(m_st)) != null) {
      m_cumulativeInstances.add(current);
    }

    ArrayList<Attribute> atts = new ArrayList<Attribute>(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
      String attname = m_structure.attribute(i).name();
      Hashtable<Object, Integer> tempHash = m_cumulativeStructure.get(i);
      if (tempHash.size() == 0) {
        atts.add(new Attribute(attname));
      } else {
        if (m_StringAttributes.isInRange(i)) {
          atts.add(new Attribute(attname, (ArrayList<String>) null));
        } else {
          ArrayList<String> values = new ArrayList<String>(tempHash.size());
          // add dummy objects in order to make the ArrayList's size == capacity
          for (int z = 0; z < tempHash.size(); z++) {
            values.add("dummy");
          }
          Enumeration e = tempHash.keys();
          while (e.hasMoreElements()) {
            Object ob = e.nextElement();
            //	  if (ob instanceof Double) {
            int index = ((Integer) tempHash.get(ob)).intValue();
            String s = ob.toString();
            if (s.startsWith("'") || s.startsWith("\"")) s = s.substring(1, s.length() - 1);
            values.set(index, new String(s));
            //	  }
          }
          atts.add(new Attribute(attname, values));
        }
      }
    }

    // make the instances
    String relationName;
    if (m_sourceFile != null)
      relationName = (m_sourceFile.getName()).replaceAll("\\.[cC][sS][vV]$", "");
    else relationName = "stream";
    Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size());

    for (int i = 0; i < m_cumulativeInstances.size(); i++) {
      current = m_cumulativeInstances.get(i);
      double[] vals = new double[dataSet.numAttributes()];
      for (int j = 0; j < current.size(); j++) {
        Object cval = current.get(j);
        if (cval instanceof String) {
          if (((String) cval).compareTo(m_MissingValue) == 0) {
            vals[j] = Utils.missingValue();
          } else {
            if (dataSet.attribute(j).isString()) {
              vals[j] = dataSet.attribute(j).addStringValue((String) cval);
            } else if (dataSet.attribute(j).isNominal()) {
              // find correct index
              Hashtable<Object, Integer> lookup = m_cumulativeStructure.get(j);
              int index = ((Integer) lookup.get(cval)).intValue();
              vals[j] = index;
            } else {
              throw new IllegalStateException(
                  "Wrong attribute type at position " + (i + 1) + "!!!");
            }
          }
        } else if (dataSet.attribute(j).isNominal()) {
          // find correct index
          Hashtable<Object, Integer> lookup = m_cumulativeStructure.get(j);
          int index = ((Integer) lookup.get(cval)).intValue();
          vals[j] = index;
        } else if (dataSet.attribute(j).isString()) {
          vals[j] = dataSet.attribute(j).addStringValue("" + cval);
        } else {
          vals[j] = ((Double) cval).doubleValue();
        }
      }
      dataSet.add(new DenseInstance(1.0, vals));
    }
    m_structure = new Instances(dataSet, 0);
    setRetrieval(BATCH);
    m_cumulativeStructure = null; // conserve memory

    // close the stream
    m_sourceReader.close();

    return dataSet;
  }
Exemplo n.º 17
0
  @Override
  public Instance getNextInstance(Instances structure) throws IOException {
    m_structure = structure;
    if (getRetrieval() == BATCH) {
      throw new IOException("Cannot mix getting instances in both incremental and batch modes");
    }
    setRetrieval(INCREMENTAL);

    if (m_dataDumper != null) {
      // close the uneeded temp files (if necessary)
      m_dataDumper.close();
      m_dataDumper = null;
    }

    if (m_rowBuffer.size() > 0 && m_incrementalReader == null) {
      StringBuilder tempB = new StringBuilder();
      for (String r : m_rowBuffer) {
        tempB.append(r).append("\n");
      }
      m_numBufferedRows = m_rowBuffer.size();
      Reader batchReader = new BufferedReader(new StringReader(tempB.toString()));

      m_incrementalReader =
          new ArffReader(batchReader, m_structure, 0, 0, m_fieldSeparatorAndEnclosures);

      m_rowBuffer.clear();
    }

    if (m_numBufferedRows == 0) {
      // m_incrementalReader = new ArffReader(m_sourceReader, m_structure, 0,
      // 0);
      m_numBufferedRows = -1;

      m_st = new StreamTokenizer(m_sourceReader);
      initTokenizer(m_st);
      m_st.ordinaryChar(m_FieldSeparator.charAt(0));
      //
      m_incrementalReader = null;
    }

    Instance current = null;
    if (m_sourceReader != null) {
      if (m_incrementalReader != null) {
        current = m_incrementalReader.readInstance(m_structure);
      } else {
        if (getInstance(m_st) != null) {
          current = makeInstance();
        }
      }
      if (current == null) {}
      if (m_numBufferedRows > 0) {
        m_numBufferedRows--;
      }
    }

    if ((m_sourceReader != null) && (current == null)) {
      try {
        // close the stream
        m_sourceReader.close();
        m_sourceReader = null;
        // reset();
      } catch (Exception ex) {
        ex.printStackTrace();
      }
    }

    return current;
  }
Exemplo n.º 18
0
  private void readHeader() throws IOException {
    m_rowCount = 1;
    m_incrementalReader = null;
    m_current = new ArrayList<Object>();
    openTempFiles();

    m_rowBuffer = new ArrayList<String>();

    String firstRow = m_sourceReader.readLine();
    if (firstRow == null) {
      throw new IOException("No data in the file!");
    }
    if (m_noHeaderRow) {
      m_rowBuffer.add(firstRow);
    }

    ArrayList<Attribute> attribNames = new ArrayList<Attribute>();

    // now tokenize to determine attribute names (or create att names if
    // no header row
    StringReader sr = new StringReader(firstRow + "\n");
    // System.out.print(firstRow + "\n");
    m_st = new StreamTokenizer(sr);
    initTokenizer(m_st);

    m_st.ordinaryChar(m_FieldSeparator.charAt(0));

    int attNum = 1;
    StreamTokenizerUtils.getFirstToken(m_st);
    if (m_st.ttype == StreamTokenizer.TT_EOF) {
      StreamTokenizerUtils.errms(m_st, "premature end of file");
    }
    boolean first = true;
    boolean wasSep;

    while (m_st.ttype != StreamTokenizer.TT_EOL && m_st.ttype != StreamTokenizer.TT_EOF) {
      // Get next token

      if (!first) {
        StreamTokenizerUtils.getToken(m_st);
      }

      if (m_st.ttype == m_FieldSeparator.charAt(0) || m_st.ttype == StreamTokenizer.TT_EOL) {
        wasSep = true;
      } else {
        wasSep = false;

        String attName = null;

        if (m_noHeaderRow) {
          attName = "att" + attNum;
          attNum++;
        } else {
          attName = m_st.sval;
        }

        attribNames.add(new Attribute(attName, (java.util.List<String>) null));
      }
      if (!wasSep) {
        StreamTokenizerUtils.getToken(m_st);
      }
      first = false;
    }
    String relationName;
    if (m_sourceFile != null) {
      relationName = (m_sourceFile.getName()).replaceAll("\\.[cC][sS][vV]$", "");
    } else {
      relationName = "stream";
    }
    m_structure = new Instances(relationName, attribNames, 0);
    m_NominalAttributes.setUpper(m_structure.numAttributes() - 1);
    m_StringAttributes.setUpper(m_structure.numAttributes() - 1);
    m_dateAttributes.setUpper(m_structure.numAttributes() - 1);
    m_numericAttributes.setUpper(m_structure.numAttributes() - 1);
    m_nominalVals = new HashMap<Integer, LinkedHashSet<String>>();

    m_types = new TYPE[m_structure.numAttributes()];
    for (int i = 0; i < m_structure.numAttributes(); i++) {
      if (m_NominalAttributes.isInRange(i)) {
        m_types[i] = TYPE.NOMINAL;
        LinkedHashSet<String> ts = new LinkedHashSet<String>();
        m_nominalVals.put(i, ts);
      } else if (m_StringAttributes.isInRange(i)) {
        m_types[i] = TYPE.STRING;
      } else if (m_dateAttributes.isInRange(i)) {
        m_types[i] = TYPE.DATE;
      } else if (m_numericAttributes.isInRange(i)) {
        m_types[i] = TYPE.NUMERIC;
      } else {
        m_types[i] = TYPE.UNDETERMINED;
      }
    }

    if (m_nominalLabelSpecs.size() > 0) {
      for (String spec : m_nominalLabelSpecs) {
        String[] attsAndLabels = spec.split(":");
        if (attsAndLabels.length == 2) {
          String[] labels = attsAndLabels[1].split(",");
          try {
            // try as a range string first
            Range tempR = new Range();
            tempR.setRanges(attsAndLabels[0].trim());
            tempR.setUpper(m_structure.numAttributes() - 1);

            int[] rangeIndexes = tempR.getSelection();
            for (int i = 0; i < rangeIndexes.length; i++) {
              m_types[rangeIndexes[i]] = TYPE.NOMINAL;
              LinkedHashSet<String> ts = new LinkedHashSet<String>();
              for (String lab : labels) {
                ts.add(lab);
              }
              m_nominalVals.put(rangeIndexes[i], ts);
            }
          } catch (IllegalArgumentException e) {
            // one or more named attributes?
            String[] attNames = attsAndLabels[0].split(",");
            for (String attN : attNames) {
              Attribute a = m_structure.attribute(attN.trim());
              if (a != null) {
                int attIndex = a.index();
                m_types[attIndex] = TYPE.NOMINAL;
                LinkedHashSet<String> ts = new LinkedHashSet<String>();
                for (String lab : labels) {
                  ts.add(lab);
                }
                m_nominalVals.put(attIndex, ts);
              }
            }
          }
        }
      }
    }

    // Prevents the first row from getting lost in the
    // case where there is no header row and we're
    // running in batch mode
    if (m_noHeaderRow && getRetrieval() == BATCH) {
      StreamTokenizer tempT = new StreamTokenizer(new StringReader(firstRow));
      initTokenizer(tempT);
      tempT.ordinaryChar(m_FieldSeparator.charAt(0));
      String checked = getInstance(tempT);
      dumpRow(checked);
    }

    m_st = new StreamTokenizer(m_sourceReader);
    initTokenizer(m_st);
    m_st.ordinaryChar(m_FieldSeparator.charAt(0));

    // try and determine a more accurate structure from the first batch
    readData(false || getRetrieval() == BATCH);
    makeStructure();
  }
 /** @tests java.io.StreamTokenizer#slashStarComments(boolean) */
 public void test_slashStarCommentsZ() throws IOException {
   setTest("/* foo \r\n /fiji \r\n*/ -456");
   st.ordinaryChar('/');
   st.slashStarComments(true);
   assertTrue("Test failed.", st.nextToken() == StreamTokenizer.TT_NUMBER);
 }
Exemplo n.º 20
0
  /** @param ribFileReader rib file reader */
  public Rib2Xml(FileReader ribFileReader) {

    /* Configure log4j, read conf out of jar file */
    Class clazz = getClass();
    URL url = clazz.getResource("/conf/log4j.xml");
    if (url == null) {
      /* Try reading via filename */
      DOMConfigurator.configure("../conf/log4j.xml");
      System.err.println("Error: Configuration file for Log4j (log4j.xml) not found, aborting...");
      System.exit(1);
    }
    DOMConfigurator.configure(url);

    /* Create the ribfactory which deal with all the rib elements */
    Config config = Config.instance();
    RibFactory ribFac = new RibFactory(config);

    Vector ribNames = config.getNames();

    StreamTokenizer thTokens = new StreamTokenizer(ribFileReader);
    // thTokens.resetSyntax();
    thTokens.commentChar('#');
    thTokens.eolIsSignificant(false);
    thTokens.parseNumbers();
    thTokens.ordinaryChar('[');
    thTokens.ordinaryChar(']');
    thTokens.quoteChar('"');
    int count = 0;

    String factoryInput = "";

    try {
      while (thTokens.nextToken() != StreamTokenizer.TT_EOF) {
        logger.debug(thTokens.lineno() + ": " + thTokens.sval + ": ttype: " + thTokens.ttype);
        if (thTokens.ttype == StreamTokenizer.TT_NUMBER) {
          logger.debug(thTokens.lineno() + ": " + thTokens.nval);
          factoryInput += " " + String.valueOf(thTokens.nval);
          count++;
        } else if (thTokens.ttype == StreamTokenizer.TT_WORD) {
          if (ribNames.contains(thTokens.sval)) {
            logger.debug(factoryInput);

            // AbstractRib Factory called to add an element to xml document
            logger.debug("Elements: " + count + ": " + factoryInput);
            ribFac.processRibElement(factoryInput);

            factoryInput = thTokens.sval;
          } else {
            factoryInput += " " + thTokens.sval;
          }
          logger.debug(thTokens.lineno() + ": " + thTokens.sval);
          count++;
        } else {
          if (thTokens.ttype != '"') {
            logger.debug(thTokens.lineno() + ": " + (char) thTokens.ttype);
            factoryInput += " " + (char) thTokens.ttype;
            count++;
          } else if (thTokens.sval != null) {
            logger.debug(
                thTokens.lineno()
                    + ": "
                    + (char) thTokens.ttype
                    + thTokens.sval
                    + (char) thTokens.ttype);
            factoryInput += " " + (char) thTokens.ttype + thTokens.sval + (char) thTokens.ttype;
            count++;
          }
        }
      }
    } catch (IOException e) {
      logger.error(e.toString());
    }

    logger.info("Tokens: " + count);

    RibDocument ribDoc = RibDocument.newInstance();
    ribDoc.toFile();
  }
Exemplo n.º 21
0
  /**
   * Return an interned VarInfoAux that represents a given string. Elements are separated by commas,
   * in the form:
   *
   * <p>x = a, "a key" = "a value"
   *
   * <p>Parse allow for quoted elements. White space to the left and right of keys and values do not
   * matter, but inbetween does.
   */
  public static /*@Interned*/ VarInfoAux parse(String inString) throws IOException {
    Reader inStringReader = new StringReader(inString);
    StreamTokenizer tok = new StreamTokenizer(inStringReader);
    tok.resetSyntax();
    tok.wordChars(0, Integer.MAX_VALUE);
    tok.quoteChar('\"');
    tok.whitespaceChars(' ', ' ');
    tok.ordinaryChar('[');
    tok.ordinaryChar(']');
    tok.ordinaryChars(',', ',');
    tok.ordinaryChars('=', '=');
    Map</*@Interned*/ String, /*@Interned*/ String> map = theDefault.map;

    String key = "";
    String value = "";
    boolean seenEqual = false;
    boolean insideVector = false;
    for (int tokInfo = tok.nextToken();
        tokInfo != StreamTokenizer.TT_EOF;
        tokInfo = tok.nextToken()) {
      @SuppressWarnings("interning") // initialization-checking pattern
      boolean mapUnchanged = (map == theDefault.map);
      if (mapUnchanged) {
        // We use default values if none are specified.  We initialize
        // here rather than above to save time when there are no tokens.

        map = new HashMap</*@Interned*/ String, /*@Interned*/ String>(theDefault.map);
      }

      /*@Interned*/ String token;
      if (tok.ttype == StreamTokenizer.TT_WORD || tok.ttype == '\"') {
        assert tok.sval != null
            : "@AssumeAssertion(nullness): representation invariant of StreamTokenizer";
        token = tok.sval.trim().intern();
      } else {
        token = ((char) tok.ttype + "").intern();
      }

      debug.fine("Token info: " + tokInfo + " " + token);

      if (token == "[") { // interned
        if (!seenEqual) throw new IOException("Aux option did not contain an '='");
        if (insideVector) throw new IOException("Vectors cannot be nested in an aux option");
        if (value.length() > 0) throw new IOException("Cannot mix scalar and vector values");

        insideVector = true;
        value = "";
      } else if (token == "]") { // interned
        if (!insideVector) throw new IOException("']' without preceding '['");
        insideVector = false;
      } else if (token == ",") { // interned
        if (!seenEqual) throw new IOException("Aux option did not contain an '='");
        if (insideVector) throw new IOException("',' cannot be used inside a vector");
        map.put(key.intern(), value.intern());
        key = "";
        value = "";
        seenEqual = false;
      } else if (token == "=") { // interned
        if (seenEqual) throw new IOException("Aux option contained more than one '='");
        if (insideVector) throw new IOException("'=' cannot be used inside a vector");
        seenEqual = true;
      } else {
        if (!seenEqual) {
          key = (key + " " + token).trim();
        } else if (insideVector) {
          value = value + " \"" + token.trim() + "\"";
        } else {
          value = (value + " " + token).trim();
        }
      }
    }

    if (seenEqual) {
      map.put(key.intern(), value.intern());
    }

    // Interning
    VarInfoAux result = new VarInfoAux(map).intern();
    assert interningMap != null
        : "@AssumeAssertion(nullness):  application invariant:  postcondition of intern(), which was just called";
    if (debug.isLoggable(Level.FINE)) {
      debug.fine("New parse " + result);
      debug.fine("Intern table size: " + new Integer(interningMap.size()));
    }
    return result;
  }