public void quit() {
   tokenizer.quit();
 }
示例#2
0
  public static List<Token> process(String filename, SourceFormat format) throws IOException {
    System.out.println("Tokenizing " + filename);

    FileReader reader = new FileReader(filename);

    // We will be building up our tokenizer in several stages. Each stage
    // takes the preceding tokenizer, and extends its abilities.
    Tokenizer tokenizer;

    // The tokenizers in this sequence should generate the expected tokens.

    tokenizer = new LineSplittingTokenizer(new BufferedReader(reader));
    tokenizer = new CompilerDirectivesTokenizer(tokenizer, format);
    tokenizer = new ProgramAreaTokenizer(tokenizer, format);
    tokenizer = new SourceFormattingDirectivesFilter(tokenizer);

    if (format == SourceFormat.FIXED) {
      tokenizer = new LineContinuationTokenizer(tokenizer);
      tokenizer = new ContinuationWeldingTokenizer(tokenizer);
    }

    tokenizer = new SeparatorTokenizer(tokenizer);
    tokenizer = new PseudoLiteralTokenizer(tokenizer);

    // This tokenizer partly tests that assumption by comparing the number
    // of tokens in the program text area on each line with the expected
    // number. The number of expected tokens is encoded in the indicator
    // area of the test file. If it is missing (which is allowed) the line
    // is not tested in this way.
    TokenCountVerifiyingTokenizer countVerifier = null;
    tokenizer = countVerifier = new TokenCountVerifiyingTokenizer(tokenizer);

    // This tokenizer tests the tagging of tokens, making sure that they are
    // in a consistent state.
    TokenStateVerifiyingTokenizer stateVerifier = null;
    tokenizer = stateVerifier = new TokenStateVerifiyingTokenizer(tokenizer);

    // Here we filter out all tokens which are not part of the program text
    // area (comments are not considered part of this area). This leaves us
    // with the pure code, which should be perfect for processing by a
    // parser.
    tokenizer = new FilteringTokenizer(tokenizer, AreaTag.PROGRAM_TEXT_AREA);

    // Here we filter out all pure whitespace separators. This leaves us
    // with only the "structural" tokens which are of interest to a parser.
    tokenizer =
        new FilteringTokenizer(
            tokenizer,
            new TokenFilter() {
              public boolean accepts(Token token) {
                return !token.hasTag(SyntacticTag.SEPARATOR) || !token.getText().trim().equals("");
              }
            });

    // The following is handy in debugging:
    // tokenizer = new PrintingTokenizer(tokenizer, System.out);

    // So far, no tokenization will have occurred. The file will only start
    // getting tokenized when we start asking for tokens. We do this in the
    // following loop, which also counts the number of tokens which are
    // returned at the end.
    List<Token> tokens = new ArrayList<Token>();
    Token nextToken = null;
    while ((nextToken = tokenizer.nextToken()) != null) {
      tokens.add(nextToken);
      // System.out.println(nextToken);
    }

    // Some of our tokenizers may be threaded. We need to make sure that any
    // threads they hold get stopped. This is what we do here. The message
    // will get passed along the chain of tokenizers, giving each a chance
    // to stop running.
    tokenizer.quit();

    // Finally, some reporting on the results of the tokenizing.
    System.out.println("Processed " + tokens.size() + " top level token(s).");

    if (countVerifier != null) {
      for (String message : countVerifier.getErrorMessages()) {
        System.out.println(message);
      }
    }

    if (stateVerifier != null) {
      for (String message : stateVerifier.getErrorMessages()) {
        System.out.println(message);
      }
    }

    System.out.println();

    Assert.assertTrue(
        "Counts as defined in source should match.", countVerifier.getErrorMessages().size() == 0);

    Assert.assertTrue(
        "Token states should be as expected.", countVerifier.getErrorMessages().size() == 0);

    return tokens;
  }