/** * Creates a new token using the current stream name, line, and line number. * * @param type the token type to apply. * @param lexeme the token's lexeme. * @return a new Token object. */ protected Token makeToken(int type, String lexeme) { return new Token( streamStack.peek().streamName, lexeme, streamStack.peek().line, streamStack.peek().lineNum, type); }
/** * Adds a preprocessor error message to error list along with the current token's information * (like line number, etc.). * * @param errorMessage the error message. * @param lexeme the current lexeme. */ protected void addPreprocessorErrorMessage(String errorMessage, String lexeme) { String error = "(" + getCurrentStreamName() + ") " + "Line " + (streamStack.peek().lineNum) + ", Token \"" + lexeme + "\": " + errorMessage; preprocessorErrorList.add(error); }
/** * Reads the next character. * * @return the character read, or {@link #END_OF_LEXER} if no more characters. * @throws IOException if a token cannot be read by the underlying Reader. */ protected char readChar() throws IOException { if (streamStack.isEmpty()) return END_OF_LEXER; return streamStack.peek().readChar(); }
/** * Gets the next token. If there are no tokens left to read, this will return null. * * @return the next token, or null if no more tokens to read. * @throws IOException if a token cannot be read by the underlying Reader. */ public Token nextToken() throws IOException { boolean breakloop = false; while (!breakloop) { char c = 0; if (isOnDelimBreak()) { c = delimBreakChar; clearDelimBreak(); } else c = readChar(); switch (getState()) { case TYPE_END_OF_LEXER: { breakloop = true; } break; case TYPE_UNKNOWN: { if (isLexerEnd(c)) { setState(TYPE_END_OF_LEXER); breakloop = true; } else if (isStreamEnd(c)) { if (kernel.willIncludeStreamBreak()) { setState(TYPE_END_OF_STREAM); breakloop = true; } streamStack.pop(); } else if (isNewline(c)) { if (kernel.willIncludeNewlines()) { setState(TYPE_DELIM_NEWLINE); breakloop = true; } } else if (isSpace(c)) { if (kernel.willIncludeSpaces()) { setState(TYPE_DELIM_SPACE); breakloop = true; } } else if (isTab(c)) { if (kernel.willIncludeTabs()) { setState(TYPE_DELIM_TAB); breakloop = true; } } else if (isWhitespace(c)) { } else if (isPoint(c) && isDelimiterStart(c)) { setState(TYPE_POINT); saveChar(c); } else if (isPoint(c) && !isDelimiterStart(c)) { setState(TYPE_FLOAT); saveChar(c); } else if (isSpecialStart(c)) { setState(TYPE_SPECIAL); setSpecialType(c); saveChar(c); } else if (isStringStart(c)) { setState(TYPE_STRING); setStringStartAndEnd(c); } else if (isDelimiterStart(c)) { setState(TYPE_DELIMITER); saveChar(c); } else if (isUnderscore(c)) { setState(TYPE_IDENTIFIER); saveChar(c); } else if (isLetter(c)) { setState(TYPE_IDENTIFIER); saveChar(c); } else if (c == '0') { setState(TYPE_HEX_INTEGER0); saveChar(c); } else if (isDigit(c)) { setState(TYPE_NUMBER); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_START_OF_LEXER case TYPE_ILLEGAL: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { saveChar(c); } else if (isDigit(c)) { saveChar(c); } else { saveChar(c); } } break; // end TYPE_ILLEGAL case TYPE_POINT: // decimal point is seen, but it is a delimiter. { if (isStreamEnd(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isDigit(c)) { setState(TYPE_FLOAT); saveChar(c); } else { setState(TYPE_DELIMITER); if (kernel.getDelimTable().containsKey(getCurrentLexeme() + c)) saveChar(c); else { setDelimBreak(c); breakloop = true; } } } break; // end TYPE_POINT case TYPE_FLOAT: { if (isStreamEnd(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isExponent(c)) { setState(TYPE_EXPONENT); saveChar(c); } else if (isSpecialStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isDigit(c)) { saveChar(c); } else if (isDelimiterStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_FLOAT case TYPE_IDENTIFIER: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { saveChar(c); } else if (isDigit(c)) { saveChar(c); } else if (isUnderscore(c)) { saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_IDENTIFIER case TYPE_SPECIAL: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { saveChar(c); } else if (isDigit(c)) { saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_IDENTIFIER case TYPE_HEX_INTEGER0: { if (isStreamEnd(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isPoint(c)) { setState(TYPE_FLOAT); saveChar(c); } else if (isSpecialStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (c == 'x' || c == 'X') { setState(TYPE_HEX_INTEGER1); saveChar(c); } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else if (isDigit(c)) { setState(TYPE_NUMBER); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_HEX_INTEGER0 case TYPE_HEX_INTEGER1: { if (isStreamEnd(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isPoint(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isHexDigit(c)) { setState(TYPE_HEX_INTEGER); saveChar(c); } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_HEX_INTEGER1 case TYPE_HEX_INTEGER: { if (isStreamEnd(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isHexDigit(c)) { saveChar(c); } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_HEX_INTEGER case TYPE_NUMBER: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isPoint(c)) { setState(TYPE_FLOAT); saveChar(c); } else if (isExponent(c)) { setState(TYPE_EXPONENT); saveChar(c); } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else if (isDigit(c)) { saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_NUMBER case TYPE_EXPONENT: { if (isStreamEnd(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isExponentSign(c)) { setState(TYPE_EXPONENT_POWER); saveChar(c); } else if (isSpecialStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else if (isDigit(c)) { setState(TYPE_EXPONENT_POWER); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_EXPONENT case TYPE_EXPONENT_POWER: { if (isStreamEnd(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isDigit(c)) { saveChar(c); } else if (isDelimiterStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_EXPONENT_POWER case TYPE_STRING: { if (isStreamEnd(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isStringEnd(c)) { breakloop = true; } else if (isStringEscape(c)) { c = readChar(); if (isStringEnd(c)) saveChar(c); else if (isStringEscape(c)) saveChar(c); else switch (c) { case '0': saveChar('\0'); break; case 'b': saveChar('\b'); break; case 't': saveChar('\t'); break; case 'n': saveChar('\n'); break; case 'f': saveChar('\f'); break; case 'r': saveChar('\r'); break; case '/': saveChar('/'); break; case 'u': { StringBuilder sb = new StringBuilder(); for (int i = 0; i < 4; i++) { c = readChar(); if (!isHexDigit(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else sb.append(c); } if (!breakloop) { saveChar((char) (Integer.parseInt(sb.toString(), 16) & 0x0ffff)); } } break; case 'x': { StringBuilder sb = new StringBuilder(); for (int i = 0; i < 2; i++) { c = readChar(); if (!isHexDigit(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else sb.append(c); } if (!breakloop) { saveChar((char) (Integer.parseInt(sb.toString(), 16) & 0x0ff)); } } break; } } else { saveChar(c); } } break; // end TYPE_STRING case TYPE_DELIMITER: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (kernel.getCommentStartTable().containsKey(getCurrentLexeme() + c)) { clearCurrentLexeme(); setState(TYPE_COMMENT); } else if (kernel.getCommentLineTable().containsKey(getCurrentLexeme() + c)) { clearCurrentLexeme(); setState(TYPE_LINE_COMMENT); } else if (kernel.getDelimTable().containsKey(getCurrentLexeme() + c)) { saveChar(c); } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else { setDelimBreak(c); breakloop = true; } } break; // end TYPE_DELIMITER case TYPE_COMMENT: { if (isStreamEnd(c)) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } else if (kernel.getCommentEndTable().containsKey(getCurrentLexeme())) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } else if (isCommentEndDelimiterStart(c)) { setState(TYPE_DELIM_COMMENT); saveChar(c); } } break; // end TYPE_COMMENT case TYPE_DELIM_COMMENT: { if (isStreamEnd(c)) { clearCurrentLexeme(); setState(TYPE_COMMENT); } else if (kernel.getCommentEndTable().containsKey(getCurrentLexeme() + c)) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } else if (isWhitespace(c)) { clearCurrentLexeme(); setState(TYPE_COMMENT); } else { clearCurrentLexeme(); saveChar(c); } } break; // end TYPE_DELIM_COMMENT case TYPE_LINE_COMMENT: { if (isStreamEnd(c)) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } else if (isNewline(c)) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } } break; // end TYPE_DELIM_COMMENT } } // send token. int type = getState(); String lexeme = getCurrentLexeme(); clearCurrentLexeme(); switch (getState()) { case TYPE_DELIM_SPACE: { type = TYPE_DELIM_SPACE; lexeme = " "; } break; case TYPE_DELIM_TAB: { type = TYPE_DELIM_TAB; lexeme = "\t"; } break; case TYPE_DELIM_NEWLINE: { type = TYPE_DELIM_NEWLINE; lexeme = ""; } break; case TYPE_DELIMITER: { type = TYPE_DELIMITER; if (kernel.getCommentStartTable().containsKey(lexeme)) type = kernel.getCommentStartTable().get(lexeme); else if (kernel.getCommentEndTable().containsKey(lexeme)) type = kernel.getCommentEndTable().get(lexeme); else if (kernel.getCommentLineTable().containsKey(lexeme)) type = kernel.getCommentLineTable().get(lexeme); else if (kernel.getDelimTable().containsKey(lexeme)) type = kernel.getDelimTable().get(lexeme); } break; case TYPE_IDENTIFIER: { type = TYPE_IDENTIFIER; if (kernel.getKeywordTable().containsKey(lexeme)) type = kernel.getKeywordTable().get(lexeme); else if (kernel.getCaseInsensitiveKeywordTable().containsKey(lexeme)) type = kernel.getCaseInsensitiveKeywordTable().get(lexeme); } break; case TYPE_SPECIAL: type = specialType; break; } Token out = null; if (getState() != TYPE_END_OF_LEXER) { out = makeToken(type, lexeme); setState(TYPE_UNKNOWN); } if (DEBUG) System.out.println(out); return out; }
/** * Gets the current stream. * * @return the name of the current stream. */ public Stream currentStream() { return streamStack.peek(); }
/** * Pushes a stream onto the stream stack. This will reset the token state as well. * * @param name the name of the stream. * @param in the reader reader. */ public void pushStream(String name, Reader in) { streamStack.push(new Stream(name, in)); }
/** * Gets the lexer's current stream's line number. * * @return the lexer's current stream's line number, or -1 if at Lexer end. */ public int getCurrentLine() { if (streamStack.isEmpty()) return -1; return streamStack.peek().lineNum; }
/** @return the lexer's current stream name. */ public String getCurrentStreamName() { if (streamStack.isEmpty()) return "LEXER END"; return streamStack.peek().streamName; }