Esempio n. 1
0
 private Token newOneToken(String image, int column) {
   Token token = new Token();
   token.image = image;
   token.beginLine = line;
   token.beginColumn = column;
   token.endLine = line;
   token.endColumn = column;
   token.kind = -1;
   token.next = null;
   return token;
 }
Esempio n. 2
0
  public static void main(String[] args) throws ParseException, BadTokenException {
    LALRRuleSet<BracketsType> rules = new LALRRuleSet<BracketsType>();
    rules.addStartRule(new BracketsRule());
    rules.addRule(new BracketRule());

    LALRParserGenerator<BracketsType> generator = new LALRParserGenerator<BracketsType>(rules);
    generator.generate(BracketsType.GENERATED_START_RULE);

    BracketTokenizer tokenizer = new BracketTokenizer();

    Parser<BracketsType> parser = new Parser<BracketsType>(generator.getStartState(), tokenizer);

    Token<BracketsType> result = parser.parse();
    System.out.println("Success! got: " + result.getType());
    Bracket[] brackets = (Bracket[]) result.getValue();
    for (Bracket b : brackets) {
      System.out.print(b);
    }
    System.out.println();
  }
Esempio n. 3
0
  public static void main(String[] args) {
    if (args.length == 0) System.err.println("No file arguments given");
    else {
      // parse each file argument given
      for (int i = 0; i < args.length; i++) {
        FileReader file;

        // attempt to open file
        try {
          file = new FileReader("programs/" + args[i]);
        } catch (FileNotFoundException e) {
          System.err.println(args[i] + " was not found in MiniJava/programs");
          continue; // try next file
        }

        // create lexer
        Lexer lexer = new Lexer(file);

        // start tokenizing file
        System.out.println("Tokenizing " + args[i] + "...");
        long startTime = System.currentTimeMillis();
        int numTokens = 0;
        Token token;
        do {
          token = lexer.getToken();
          numTokens++;

          // print token type and location
          System.out.print(token.getType());
          System.out.print(" (" + token.getLineNum() + "," + token.getColNum() + ")");

          // print out semantic values for ID and INT_CONST tokens
          if (token.getType() == TokenType.ID) System.out.println(": " + lexer.getIdVal());
          else if (token.getType() == TokenType.INT_CONST)
            System.out.println(": " + lexer.getIntVal());
          else System.out.println();

        } while (token.getType() != TokenType.EOF);

        long endTime = System.currentTimeMillis();

        // print out statistics
        System.out.println("---");
        System.out.println("Number of tokens: " + numTokens);
        System.out.println("Execution time: " + (endTime - startTime) + "ms");
        System.out.println();
      }
    }
  }
Esempio n. 4
0
  /**
   * read next token image string in current line
   *
   * @return null if reach end of file and no any valid string
   */
  protected String getNextImage() throws ParserException {
    String ret = "";
    char lastChar = 0;
    char c;
    boolean error = false;
    boolean first = true;
    int curColumn = column + 1;

    int max = strLine.length();
    while (column < max && !error) // one token always in one line
    {
      c = strLine.charAt(column);
      if (Character.isWhitespace(c)) {
        if (!ret.isEmpty() && ret.indexOf(singleQuote) > 0 && !RegExp.is_number(ret)) {
          column++;
          continue;
        } else {
          break;
        }
      }

      // escape identifier
      if (backSlash == c) {
        // while until whitespace or line feed or return
        while (column < max && !Character.isWhitespace(c)) {
          ret += strLine.charAt(column);
          column++;
        }
        break;
      }

      if (!first
          && ((specialChar.indexOf(lastChar) < 0 && specialChar.indexOf(c) >= 0)
              || (specialChar.indexOf(c) < 0 && specialChar.indexOf(lastChar) >= 0))) {
        if (column >= max) {
          break;
        }
        if (Character.isDigit(lastChar)
            && c == '.'
            && Character.isDigit(strLine.charAt(column + 1))) {
          // float point
          ret += c;
          column++;
          c = strLine.charAt(column);
        } else {
          break; // exit loop when character change between specialChar and letter&digit
        }
      }

      if (!first
          && Character.isDigit(lastChar)
          && !Character.isDigit(c)
          && c != '.'
          && c != singleQuote
          && !ret.startsWith("`")) {
        String tmp = ret + c;
        // divide some string(such as 10ns) into two token
        if (!RegExp.is_IDENTIFIER(tmp) && !RegExp.is_number(tmp)) break;
      }

      // allow continuous specialChar
      if (!first && specialChar.indexOf(c) >= 0 && specialChar.indexOf(lastChar) >= 0) {
        // &&&
        // ===, !==
        // >=, <=, ==, !=
        // ~&, &&
        // ~|, ||
        // ^~, ~^
        // <<, >>
        // =>, *>, ->
        if ((column < max - 2)
            && (c == '&')
            && (strLine.charAt(column + 1) == '&')
            && (lastChar == '&')) {
          ret += c;
          ret += strLine.charAt(column + 1);
          column += 2;
        } else if ((column < max - 2)
            && (c == '=')
            && (strLine.charAt(column + 1) == '=')
            && ((lastChar == '!') || (lastChar == '='))) {
          ret += c;
          ret += strLine.charAt(column + 1);
          column += 2;
        } else if ((c == '='
                && (lastChar == '!' || lastChar == '>' || lastChar == '<' || lastChar == '='))
            || (c == '&' && (lastChar == '~' || lastChar == '&'))
            || (c == '|' && (lastChar == '~' || lastChar == '|'))
            || (c == '^' && lastChar == '~')
            || (c == '~' && lastChar == '^')
            || (c == '>' && lastChar == '>')
            || (c == '<' && lastChar == '<')
            || (c == '>' && (lastChar == '=' || lastChar == '*' || lastChar == '-'))) {
          ret += c;
          column++;
        }

        break;
      }

      // string
      if (doubleQuote == c) {
        ret += c;
        column++;
        while (column < max) {
          char c1 = strLine.charAt(column);
          ret += c1;
          if (c1 == doubleQuote) {
            column++;
            if (column >= max) {
              break;
            }
            c1 = strLine.charAt(column);
            if (c1 != doubleQuote) {
              break; // double quote in a string must a pair put together
            }
            ret += c1;
          }
          column++;
        }

        if (column >= max) {
          // error = true;   //TODO can double quote be next line?
        }
        break; // always quit loop on double quote
      }

      first = false;
      ret += c;
      lastChar = c;
      column++;
    }

    if (error) {
      throw new ParserException(newOneToken(ret, curColumn));
    }

    if (ret.isEmpty()) return null;

    // check macro define
    if (parser != null && ret.startsWith("`")) {
      int index = -1;
      for (int i = 0; i < CompilerDirectives.cdStrings.length; i++) {
        if (ret.equals(CompilerDirectives.cdStrings[i])) {
          index = i;
          break;
        }
      }

      if (index < 0) {
        // extract the content of macro
        ret = ret.substring(1);
        ASTNode node = parser.getMacroContent(ret);
        if (node == null) {
          throw new SymbolNotFoundException(newOneToken(ret, curColumn));
        }

        // the first token is `define
        // the second token is macro name
        // content start from the third token
        Token token = node.getFirstToken().next.next;
        Token lToken = node.getLastToken();
        Token prev = null;
        if (token != lToken) {
          while (token != null) {
            Token newToken = newOneToken(token.image, curColumn);
            newToken.endColumn = column;
            newToken.kind = token.kind;
            newToken.prev = prev;
            newToken.special = true;
            if (prev != null) prev.next = newToken;
            // lastToken = newToken;
            if (token == lToken) break;

            token = token.next;
          }
          ret = null;
        } else {
          // only one token in the macro content, just return it
          ret = token.image;
        }
      }
    }

    return ret;
  }