Example #1
0
  /**
   * Parses the S-Expression from the lexer output. The lexer should be positioned on the first
   * symbol after the opening parenthesis.
   *
   * @return the parse tree of the input
   * @throws IOException if a read error occurs in the lexer
   * @throws ParsingException if the input cannot be parsed successfully
   */
  private Expression parseSymbolicExpression() throws IOException, ParsingException {
    Expression expr = new Expression(lexer.sval);

    int t = lexer.nextToken();
    while (t != StreamTokenizer.TT_EOF) {
      switch (t) {
        case ')':
          if (stack.empty()) return expr;
          stack.peek().addOperand(expr);
          expr = stack.pop();
          break;
        case '(': // descend into a sub-expression
          stack.push(expr);
          if (lexer.nextToken() != StreamTokenizer.TT_WORD) {
            throw new ParsingException("Expected symbol. Got: " + lexer.ttype);
          }
          expr = new Expression(lexer.sval);
          break;
        case StreamTokenizer.TT_WORD:
          try {
            // test for a number
            expr.addOperand(Value.newInt(Integer.parseInt(lexer.sval)));
          } catch (NumberFormatException ignored) {
            // fall back on a symbol
            expr.addOperand(lexer.sval);
          }
          break;
        default:
          throw new ParsingException("Unknown token type: " + lexer.ttype);
      }
      t = lexer.nextToken();
    }

    throw new ParsingException("Expected end of input.  Got: " + lexer.ttype);
  }
  static int check(InputStream in) throws IOException {
    Reader r = new BufferedReader(new InputStreamReader(in));
    StreamTokenizer st = new StreamTokenizer(r);
    int i, cnt = 0, num = 0, tmp, incorrect = 0;
    boolean first_read = false;

    while (true) {
      i = st.nextToken();
      if (i == StreamTokenizer.TT_EOF) break;
      tmp = (int) st.nval;
      if (!first_read) {
        first_read = true;
      } else {
        if (tmp != num + 1) {
          System.err.println(
              "Number read: "
                  + tmp
                  + ", previous number: "
                  + num
                  + " (lineno: "
                  + st.lineno()
                  + ")");
          incorrect++;
        }
      }
      num = tmp;
      cnt++;
      if (cnt > 0 && cnt % 1000 == 0) System.out.println("read " + cnt + " numbers");
    }
    return incorrect;
  }
Example #3
0
  /**
   * Ensures the program's first token is '(' and is followed by a symbol.
   *
   * <p>The first symbol in the input will be the current value of the lexer after this call
   * completes.
   *
   * @throws IOException if reading the input fails
   * @throws ParsingException if this assumption fails
   */
  private void checkStart() throws ParsingException, IOException {
    lexer.nextToken();
    if (lexer.ttype != '(') throw new ParsingException("Program does not begin with '('.");

    if (lexer.nextToken() != StreamTokenizer.TT_WORD)
      throw new ParsingException("Expected symbol. Got: " + lexer.ttype);
  }
Example #4
0
 int ct() throws IOException {
   FileReader fr = new FileReader("mf.dat");
   StreamTokenizer st = new StreamTokenizer(fr);
   st.eolIsSignificant(true);
   int tok = st.nextToken();
   int ctr = 0;
   while (tok != -1) {
     switch (tok) {
       case -3:
         {
           break;
         }
       case -2:
         {
           break;
         }
       case 10:
         {
           ctr++;
           break;
         }
     }
     tok = st.nextToken();
   }
   return ctr;
 }
 /** @tests java.io.StreamTokenizer#ordinaryChar(int) */
 public void test_ordinaryCharI() throws IOException {
   // SM.
   setTest("Ffjein 893");
   st.ordinaryChar('F');
   st.nextToken();
   assertTrue("OrdinaryChar failed." + (char) st.ttype, st.ttype == 'F');
 }
Example #6
0
  public void setPaths(ClassLoader cl) throws FileNotFoundException, IOException {
    // open the file that contains all the paths
    // FileReader inFile = new FileReader();
    // System.out.println("ABC: " + inFile.ready());

    StreamTokenizer tokens =
        new StreamTokenizer(cl.getResource("atlas/resources/Paths.dat").openStream());
    String Prefs[] = {"", "", "", "", "", "", ""};

    // treat endofline as a token
    tokens.eolIsSignificant(false);

    try {
      for (int i = 0; i < 7 && (tokens.nextToken() != StreamTokenizer.TT_EOF); i++) {
        Prefs[i] = tokens.sval;
      }
    } catch (IOException e) {
      e.printStackTrace();
    }

    // save the paths
    adlc = Prefs[0];
    berkeleyInc = Prefs[1];
    berkeleyLib = Prefs[2];
    adlInc = Prefs[3];
    adlLib = Prefs[4];
    imdbInc = Prefs[5];
    imdbLib = Prefs[6];
  }
Example #7
0
 private String scanString(final StreamTokenizer stok) throws IOException {
   stok.nextToken();
   if (stok.ttype == StreamTokenizer.TT_EOF) throw new EOFException();
   else if (stok.ttype != StreamTokenizer.TT_WORD)
     throw new IOException("Expecting word, line " + stok.lineno());
   else return stok.sval;
 }
Example #8
0
  public static String StreamTokenizer(StringReader reader) throws IOException {

    StringBuilder buffer = new StringBuilder();

    StreamTokenizer tokenizer = new StreamTokenizer(reader);
    tokenizer.lowerCaseMode(true);
    tokenizer.eolIsSignificant(false);
    tokenizer.whitespaceChars('.', '.');

    while (tokenizer.nextToken() != StreamTokenizer.TT_EOF) {
      switch (tokenizer.ttype) {
        case StreamTokenizer.TT_WORD:
          buffer.append(tokenizer.sval + " ");
          break;
        case StreamTokenizer.TT_NUMBER:
          buffer.append(tokenizer.nval + " ");
          break;
        case StreamTokenizer.TT_EOL:
          break;
        default:
          break;
      }
    }

    return buffer.toString();
  }
Example #9
0
  protected void stmt(StreamTokenizer tk) {
    // tk.nextToken();

    if (tk.sval.equalsIgnoreCase("graph")
        || tk.sval.equalsIgnoreCase("node")
        || tk.sval.equalsIgnoreCase("edge")) {; // attribStmt(k);
    } else {
      try {
        nodeID(tk);
        int nodeindex = m_nodes.indexOf(new GraphNode(tk.sval, null));
        tk.nextToken();

        if (tk.ttype == '[') {
          nodeStmt(tk, nodeindex);
        } else if (tk.ttype == '-') {
          edgeStmt(tk, nodeindex);
        } else {
          System.err.println("error at lineno " + tk.lineno() + " in stmt");
        }
      } catch (Exception ex) {
        System.err.println("error at lineno " + tk.lineno() + " in stmtException");
        ex.printStackTrace();
      }
    }
  }
Example #10
0
  public static void main(String[] args) {
    try {
      InputStream is = StreamTokenering.class.getResourceAsStream("/input.txt");
      StreamTokenizer in = new StreamTokenizer(new InputStreamReader(is));

      in.ordinaryChar('.');
      in.ordinaryChar('\'');
      int wordCount = 0, numCount = 0, punctionCount = 0, count = 0;
      double token;
      while ((token = in.nextToken()) != StreamTokenizer.TT_EOF) {
        count++;
        if (token == StreamTokenizer.TT_WORD) {
          wordCount++;
        } else if (token == StreamTokenizer.TT_NUMBER) {
          numCount++;
        } else {
          punctionCount++;
        }
        System.out.println(in.toString());
      }
      System.out.println("单词总数为:" + count);
      System.out.println("单词数为:" + wordCount);
      System.out.println("数字数为:" + numCount);
      System.out.println("标点符号数为:" + punctionCount++);
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
Example #11
0
 private static void parseSplit(StreamTokenizer st, Split parent) throws Exception {
   int token;
   while ((token = st.nextToken()) != StreamTokenizer.TT_EOF) {
     if (token == ')') {
       break;
     } else if (token == StreamTokenizer.TT_WORD) {
       if (st.sval.equalsIgnoreCase("WEIGHT")) {
         parseAttribute(st.sval, st, parent);
       } else {
         addSplitChild(parent, new Leaf(st.sval));
       }
     } else if (token == '(') {
       if ((token = st.nextToken()) != StreamTokenizer.TT_WORD) {
         throwParseException(st, "invalid node type");
       }
       String nodeType = st.sval.toUpperCase();
       if (nodeType.equals("LEAF")) {
         parseLeaf(st, parent);
       } else if (nodeType.equals("ROW") || nodeType.equals("COLUMN")) {
         Split split = new Split();
         split.setRowLayout(nodeType.equals("ROW"));
         addSplitChild(parent, split);
         parseSplit(st, split);
       } else {
         throwParseException(st, "unrecognized node type '" + nodeType + "'");
       }
     }
   }
 }
Example #12
0
  public static Vector parse(String line) throws IOException {
    Vector c = new Vector();

    StreamTokenizer st = new StreamTokenizer(new StringReader(line));

    /** Iterate through each token in the String */
    while (st.nextToken() != st.TT_EOF) {
      /** Token is number */
      if (st.ttype == (StreamTokenizer.TT_NUMBER)) {
        c.add(new Double(st.nval));
      }

      /** Token is a String */
      else if (st.ttype == (StreamTokenizer.TT_WORD)) {
        c.add(st.sval);
      }

      /** Should never reach this case */
      else if (st.sval != null) {
        c.add(st.sval);
      }
    }

    return c;
  }
 /** @tests java.io.StreamTokenizer#pushBack() */
 public void test_pushBack() throws IOException {
   // SM.
   setTest("Hello 897");
   st.nextToken();
   st.pushBack();
   assertTrue("PushBack failed.", st.nextToken() == StreamTokenizer.TT_WORD);
 }
Example #14
0
  /** ******** test functions *********** */
  public static void tokenize(String fname) {
    try {
      ESParser parser = new ESParser(fname);
      StreamTokenizer s = parser.scanner;

      while (s.ttype != StreamTokenizer.TT_EOF) {
        switch (s.ttype) {
          case StreamTokenizer.TT_WORD:
            System.out.println("WORD: " + s.sval);
            break;

          case StreamTokenizer.TT_NUMBER:
            System.out.println("NUM:  " + s.nval);
            break;

          default:
            char c = (char) s.ttype;
            if (c == '"') { // string literal
              System.out.println("STRING: \"" + s.sval + '"');
            } else {
              System.out.println("CHAR: " + (char) s.ttype);
            }
        }
        s.nextToken();
      }
    } catch (Throwable t) {
      t.printStackTrace();
    }
  }
 /** @tests java.io.StreamTokenizer#ordinaryChars(int, int) */
 public void test_ordinaryCharsII() throws IOException {
   // SM.
   setTest("azbc iof z 893");
   st.ordinaryChars('a', 'z');
   assertEquals("OrdinaryChars failed.", 'a', st.nextToken());
   assertEquals("OrdinaryChars failed.", 'z', st.nextToken());
 }
Example #16
0
 public void loadTree() {
   System.out.println("Loading tree");
   StreamTokenizer stream = null;
   try {
     FileInputStream f = new FileInputStream(tree);
     Reader input = new BufferedReader(new InputStreamReader(f));
     stream = new StreamTokenizer(input);
     stream.resetSyntax();
     stream.wordChars(32, 127);
   } catch (Exception e) {
     System.out.println("Error opening " + tree);
     System.exit(1);
   }
   list = new ArrayList();
   try {
     // read the file to the end
     while (stream.nextToken() != StreamTokenizer.TT_EOF) {
       // is a word being read
       if (stream.ttype == StreamTokenizer.TT_WORD) {
         list.add(new String(stream.sval));
       }
       // is a number being read
       if (stream.ttype == StreamTokenizer.TT_NUMBER) {
         list.add(new Double(stream.nval));
       }
     }
   } catch (Exception e) {
     System.out.println("\nError reading " + tree + ". Exiting...");
     System.exit(1);
   }
 }
Example #17
0
  public static void main(String[] args) throws IOException {
    PrintWriter out = null;

    if (args.length < 1) {
      System.err.println("Usage: java Filter input [output]");
      System.exit(2); // POSIX
    } else if (args.length < 2)
      // Optional arg: use stdout if no output file given.
      out = new PrintWriter(System.out);
    else
      // Wrap a BufferedWriter around the FileWriter as it may be costly.
      out = new PrintWriter(new BufferedWriter(new FileWriter(args[1])));

    StreamTokenizer tokenizer = null;
    String res = resourceLoc(args[0]);
    try {
      tokenizer = new StreamTokenizer(new FileReader(res));
      tokenizer.lowerCaseMode(true);
    } catch (FileNotFoundException e) {
      System.err.printf("Input file %s was not found.\n", res);
      System.exit(-1);
    }

    Dictionary dictionary = new Dictionary();
    while (tokenizer.nextToken() != StreamTokenizer.TT_EOF) {
      if (tokenizer.ttype == StreamTokenizer.TT_WORD && dictionary.contains(tokenizer.sval))
        out.println(tokenizer.sval);
    }

    out.close();
  }
 /** @tests java.io.StreamTokenizer#slashSlashComments(boolean) */
 public void test_slashSlashCommentsZ() throws IOException {
   // SM.
   setTest("// foo \r\n /fiji \r\n -456");
   st.ordinaryChar('/');
   st.slashSlashComments(true);
   assertEquals("Test failed.", '/', st.nextToken());
   assertTrue("Test failed.", st.nextToken() == StreamTokenizer.TT_WORD);
 }
  /** @tests java.io.StreamTokenizer#lowerCaseMode(boolean) */
  public void test_lowerCaseModeZ() throws Exception {
    // SM.
    setTest("HELLOWORLD");
    st.lowerCaseMode(true);

    st.nextToken();
    assertEquals("sval not converted to lowercase.", "helloworld", st.sval);
  }
  public PatientSearch createCompositionFilter(String description) {
    Set<String> andWords = new HashSet<String>();
    Set<String> orWords = new HashSet<String>();
    Set<String> notWords = new HashSet<String>();
    andWords.add("and");
    andWords.add("intersection");
    andWords.add("*");
    orWords.add("or");
    orWords.add("union");
    orWords.add("+");
    notWords.add("not");
    notWords.add("!");

    List<Object> currentLine = new ArrayList<Object>();

    try {
      StreamTokenizer st = new StreamTokenizer(new StringReader(description));
      st.ordinaryChar('(');
      st.ordinaryChar(')');
      Stack<List<Object>> stack = new Stack<List<Object>>();
      while (st.nextToken() != StreamTokenizer.TT_EOF) {
        if (st.ttype == StreamTokenizer.TT_NUMBER) {
          Integer thisInt = new Integer((int) st.nval);
          if (thisInt < 1 || thisInt > searchHistory.size()) {
            log.error("number < 1 or > search history size");
            return null;
          }
          currentLine.add(thisInt);
        } else if (st.ttype == '(') {
          stack.push(currentLine);
          currentLine = new ArrayList<Object>();
        } else if (st.ttype == ')') {
          List<Object> l = stack.pop();
          l.add(currentLine);
          currentLine = l;
        } else if (st.ttype == StreamTokenizer.TT_WORD) {
          String str = st.sval.toLowerCase();
          if (andWords.contains(str)) currentLine.add(PatientSetService.BooleanOperator.AND);
          else if (orWords.contains(str)) currentLine.add(PatientSetService.BooleanOperator.OR);
          else if (notWords.contains(str)) currentLine.add(PatientSetService.BooleanOperator.NOT);
          else throw new IllegalArgumentException("Don't recognize " + st.sval);
        }
      }
    } catch (Exception ex) {
      log.error("Error in description string: " + description, ex);
      return null;
    }

    if (!testCompositionList(currentLine)) {
      log.error("Description string failed test: " + description);
      return null;
    }

    // return toPatientFilter(currentLine);
    PatientSearch ret = new PatientSearch();
    ret.setParsedComposition(currentLine);
    return ret;
  }
 /** @tests java.io.StreamTokenizer#parseNumbers() */
 public void test_parseNumbers() throws IOException {
   // SM
   setTest("9.9 678");
   assertTrue("Base behavior failed.", st.nextToken() == StreamTokenizer.TT_NUMBER);
   st.ordinaryChars('0', '9');
   assertEquals("setOrdinary failed.", '6', st.nextToken());
   st.parseNumbers();
   assertTrue("parseNumbers failed.", st.nextToken() == StreamTokenizer.TT_NUMBER);
 }
 /** @tests java.io.StreamTokenizer#resetSyntax() */
 public void test_resetSyntax() throws IOException {
   // SM
   setTest("H 9\' ello World");
   st.resetSyntax();
   assertTrue("resetSyntax failed1." + (char) st.ttype, st.nextToken() == 'H');
   assertTrue("resetSyntax failed1." + (char) st.ttype, st.nextToken() == ' ');
   assertTrue("resetSyntax failed2." + (char) st.ttype, st.nextToken() == '9');
   assertTrue("resetSyntax failed3." + (char) st.ttype, st.nextToken() == '\'');
 }
  /** @tests java.io.StreamTokenizer#slashSlashComments(boolean) */
  public void test_slashSlashComments_withSSOpen_NoComment() throws IOException {
    Reader reader = new CharArrayReader("// t".toCharArray());

    StreamTokenizer st = new StreamTokenizer(reader);
    st.slashSlashComments(true);
    st.ordinaryChar('/');

    assertEquals(StreamTokenizer.TT_EOF, st.nextToken());
  }
  /** @tests java.io.StreamTokenizer#slashStarComments(boolean) */
  public void test_slashStarComments_withSTClosed() throws IOException {
    Reader reader = new CharArrayReader("t /* t */ t".toCharArray());

    StreamTokenizer st = new StreamTokenizer(reader);
    st.slashStarComments(false);

    assertEquals(StreamTokenizer.TT_WORD, st.nextToken());
    assertEquals(StreamTokenizer.TT_EOF, st.nextToken());
  }
Example #25
0
 private GridBagConstraints constraints(GridBagConstraints c, StreamTokenizer setup)
     throws IOException {
   if (setup.nextToken() == StreamTokenizer.TT_WORD)
     if (setup.sval.equals("break")) c.gridwidth = GridBagConstraints.REMAINDER;
     else if (setup.sval.equals("stretch")) c.weightx = 1.0;
     else setup.pushBack();
   else setup.pushBack();
   return c;
 }
 /** @tests java.io.StreamTokenizer#lineno() */
 public void test_lineno() throws IOException {
   setTest("d\n 8\n");
   assertEquals("the lineno should be 1", 1, st.lineno());
   st.nextToken();
   st.nextToken();
   assertEquals("the lineno should be 2", 2, st.lineno());
   st.nextToken();
   assertEquals("the next line no should be 3", 3, st.lineno());
 }
  /**
   * Construct a new instance and configure it.
   *
   * @param broker
   * @param conf
   */
  public TextSearchEngine(DBBroker broker, Configuration conf) {
    this.broker = broker;
    this.config = conf;
    String stopword, tokenizerClass;
    Boolean num, stemming, termFrequencies;
    if ((num = (Boolean) config.getProperty(PROPERTY_INDEX_NUMBERS)) != null)
      indexNumbers = num.booleanValue();
    if ((stemming = (Boolean) config.getProperty(PROPERTY_STEM)) != null)
      stem = stemming.booleanValue();
    if ((termFrequencies = (Boolean) config.getProperty(PROPERTY_STORE_TERM_FREQUENCY)) != null)
      termFreq = termFrequencies.booleanValue();
    String track = (String) config.getProperty(Serializer.PROPERTY_TAG_MATCHING_ELEMENTS);
    if (track != null)
      trackMatches =
          track.equalsIgnoreCase("yes") ? Serializer.TAG_ELEMENT_MATCHES : Serializer.TAG_NONE;
    track = (String) config.getProperty(Serializer.PROPERTY_TAG_MATCHING_ATTRIBUTES);
    if (track != null && track.equalsIgnoreCase("yes"))
      trackMatches = trackMatches | Serializer.TAG_ATTRIBUTE_MATCHES;

    if ((tokenizerClass = (String) config.getProperty(PROPERTY_TOKENIZER)) != null) {
      try {
        Class tokClass = Class.forName(tokenizerClass);
        tokenizer = (Tokenizer) tokClass.newInstance();
        LOG.debug("using tokenizer: " + tokenizerClass);
      } catch (ClassNotFoundException e) {
        LOG.debug(e);
      } catch (InstantiationException e) {
        LOG.debug(e);
      } catch (IllegalAccessException e) {
        LOG.debug(e);
      }
    }
    if (tokenizer == null) {
      LOG.debug("using simple tokenizer");
      tokenizer = new SimpleTokenizer();
    }

    if (stem) stemmer = new PorterStemmer();
    tokenizer.setStemming(stem);
    if ((stopword = (String) config.getProperty(PROPERTY_STOPWORD_FILE)) != null) {
      try {
        FileReader in = new FileReader(stopword);
        StreamTokenizer tok = new StreamTokenizer(in);
        int next = tok.nextToken();
        while (next != StreamTokenizer.TT_EOF) {
          if (next != StreamTokenizer.TT_WORD) continue;
          stoplist.add(tok.sval);
          next = tok.nextToken();
        }
      } catch (FileNotFoundException e) {
        LOG.debug(e);
      } catch (IOException e) {
        LOG.debug(e);
      }
    }
  }
 /** @tests java.io.StreamTokenizer#quoteChar(int) */
 public void test_quoteCharI() throws IOException {
   // SM
   setTest("<Hello World<    HelloWorldH");
   st.quoteChar('<');
   assertEquals("QuoteChar failed.", '<', st.nextToken());
   assertEquals("QuoteChar failed.", "Hello World", st.sval);
   st.quoteChar('H');
   st.nextToken();
   assertEquals("QuoteChar failed for word.", "elloWorld", st.sval);
 }
Example #29
0
  /**
   * Configure the lexical analyzer.
   *
   * @param reader the input stream reader
   * @return an s-expression lexer
   */
  private StreamTokenizer createLexer(Reader reader) {
    StreamTokenizer tokenizer = new StreamTokenizer(reader);

    tokenizer.resetSyntax();
    tokenizer.eolIsSignificant(false);
    tokenizer.whitespaceChars(0, ' ');
    tokenizer.wordChars('!', '!');
    tokenizer.wordChars('*', 'z');

    return tokenizer;
  }
  /** @tests java.io.StreamTokenizer#slashSlashComments(boolean) */
  public void test_slashSlashComments_withSSClosed() throws IOException {
    Reader reader = new CharArrayReader("// t".toCharArray());

    StreamTokenizer st = new StreamTokenizer(reader);
    st.slashSlashComments(false);
    st.ordinaryChar('/');

    assertEquals('/', st.nextToken());
    assertEquals('/', st.nextToken());
    assertEquals(StreamTokenizer.TT_WORD, st.nextToken());
  }