/** * Checks if this is a (or the start of a) block-comment-ending delimiter character. * * @param c the character input. * @return true if so, false if not. */ protected boolean isCommentEndDelimiterStart(char c) { return kernel.getEndCommentDelimStartTable().contains(c); }
/** * Gets the next token. If there are no tokens left to read, this will return null. * * @return the next token, or null if no more tokens to read. * @throws IOException if a token cannot be read by the underlying Reader. */ public Token nextToken() throws IOException { boolean breakloop = false; while (!breakloop) { char c = 0; if (isOnDelimBreak()) { c = delimBreakChar; clearDelimBreak(); } else c = readChar(); switch (getState()) { case TYPE_END_OF_LEXER: { breakloop = true; } break; case TYPE_UNKNOWN: { if (isLexerEnd(c)) { setState(TYPE_END_OF_LEXER); breakloop = true; } else if (isStreamEnd(c)) { if (kernel.willIncludeStreamBreak()) { setState(TYPE_END_OF_STREAM); breakloop = true; } streamStack.pop(); } else if (isNewline(c)) { if (kernel.willIncludeNewlines()) { setState(TYPE_DELIM_NEWLINE); breakloop = true; } } else if (isSpace(c)) { if (kernel.willIncludeSpaces()) { setState(TYPE_DELIM_SPACE); breakloop = true; } } else if (isTab(c)) { if (kernel.willIncludeTabs()) { setState(TYPE_DELIM_TAB); breakloop = true; } } else if (isWhitespace(c)) { } else if (isPoint(c) && isDelimiterStart(c)) { setState(TYPE_POINT); saveChar(c); } else if (isPoint(c) && !isDelimiterStart(c)) { setState(TYPE_FLOAT); saveChar(c); } else if (isSpecialStart(c)) { setState(TYPE_SPECIAL); setSpecialType(c); saveChar(c); } else if (isStringStart(c)) { setState(TYPE_STRING); setStringStartAndEnd(c); } else if (isDelimiterStart(c)) { setState(TYPE_DELIMITER); saveChar(c); } else if (isUnderscore(c)) { setState(TYPE_IDENTIFIER); saveChar(c); } else if (isLetter(c)) { setState(TYPE_IDENTIFIER); saveChar(c); } else if (c == '0') { setState(TYPE_HEX_INTEGER0); saveChar(c); } else if (isDigit(c)) { setState(TYPE_NUMBER); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_START_OF_LEXER case TYPE_ILLEGAL: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { saveChar(c); } else if (isDigit(c)) { saveChar(c); } else { saveChar(c); } } break; // end TYPE_ILLEGAL case TYPE_POINT: // decimal point is seen, but it is a delimiter. { if (isStreamEnd(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_DELIMITER); setDelimBreak(c); breakloop = true; } else if (isDigit(c)) { setState(TYPE_FLOAT); saveChar(c); } else { setState(TYPE_DELIMITER); if (kernel.getDelimTable().containsKey(getCurrentLexeme() + c)) saveChar(c); else { setDelimBreak(c); breakloop = true; } } } break; // end TYPE_POINT case TYPE_FLOAT: { if (isStreamEnd(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isExponent(c)) { setState(TYPE_EXPONENT); saveChar(c); } else if (isSpecialStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isDigit(c)) { saveChar(c); } else if (isDelimiterStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_FLOAT case TYPE_IDENTIFIER: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { saveChar(c); } else if (isDigit(c)) { saveChar(c); } else if (isUnderscore(c)) { saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_IDENTIFIER case TYPE_SPECIAL: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { saveChar(c); } else if (isDigit(c)) { saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_IDENTIFIER case TYPE_HEX_INTEGER0: { if (isStreamEnd(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isPoint(c)) { setState(TYPE_FLOAT); saveChar(c); } else if (isSpecialStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (c == 'x' || c == 'X') { setState(TYPE_HEX_INTEGER1); saveChar(c); } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else if (isDigit(c)) { setState(TYPE_NUMBER); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_HEX_INTEGER0 case TYPE_HEX_INTEGER1: { if (isStreamEnd(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isPoint(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isHexDigit(c)) { setState(TYPE_HEX_INTEGER); saveChar(c); } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_HEX_INTEGER1 case TYPE_HEX_INTEGER: { if (isStreamEnd(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isHexDigit(c)) { saveChar(c); } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_HEX_INTEGER case TYPE_NUMBER: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isPoint(c)) { setState(TYPE_FLOAT); saveChar(c); } else if (isExponent(c)) { setState(TYPE_EXPONENT); saveChar(c); } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else if (isDigit(c)) { saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_NUMBER case TYPE_EXPONENT: { if (isStreamEnd(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isExponentSign(c)) { setState(TYPE_EXPONENT_POWER); saveChar(c); } else if (isSpecialStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isDelimiterStart(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isLetter(c)) { setState(TYPE_ILLEGAL); saveChar(c); } else if (isDigit(c)) { setState(TYPE_EXPONENT_POWER); saveChar(c); } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_EXPONENT case TYPE_EXPONENT_POWER: { if (isStreamEnd(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else if (isDigit(c)) { saveChar(c); } else if (isDelimiterStart(c)) { setState(TYPE_NUMBER); setDelimBreak(c); breakloop = true; } else { setState(TYPE_ILLEGAL); saveChar(c); } } break; // end TYPE_EXPONENT_POWER case TYPE_STRING: { if (isStreamEnd(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isNewline(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else if (isStringEnd(c)) { breakloop = true; } else if (isStringEscape(c)) { c = readChar(); if (isStringEnd(c)) saveChar(c); else if (isStringEscape(c)) saveChar(c); else switch (c) { case '0': saveChar('\0'); break; case 'b': saveChar('\b'); break; case 't': saveChar('\t'); break; case 'n': saveChar('\n'); break; case 'f': saveChar('\f'); break; case 'r': saveChar('\r'); break; case '/': saveChar('/'); break; case 'u': { StringBuilder sb = new StringBuilder(); for (int i = 0; i < 4; i++) { c = readChar(); if (!isHexDigit(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else sb.append(c); } if (!breakloop) { saveChar((char) (Integer.parseInt(sb.toString(), 16) & 0x0ffff)); } } break; case 'x': { StringBuilder sb = new StringBuilder(); for (int i = 0; i < 2; i++) { c = readChar(); if (!isHexDigit(c)) { setState(TYPE_ILLEGAL); setDelimBreak(c); breakloop = true; } else sb.append(c); } if (!breakloop) { saveChar((char) (Integer.parseInt(sb.toString(), 16) & 0x0ff)); } } break; } } else { saveChar(c); } } break; // end TYPE_STRING case TYPE_DELIMITER: { if (isStreamEnd(c)) { setDelimBreak(c); breakloop = true; } else if (kernel.getCommentStartTable().containsKey(getCurrentLexeme() + c)) { clearCurrentLexeme(); setState(TYPE_COMMENT); } else if (kernel.getCommentLineTable().containsKey(getCurrentLexeme() + c)) { clearCurrentLexeme(); setState(TYPE_LINE_COMMENT); } else if (kernel.getDelimTable().containsKey(getCurrentLexeme() + c)) { saveChar(c); } else if (isNewline(c)) { setDelimBreak(c); breakloop = true; } else if (isSpace(c)) { setDelimBreak(c); breakloop = true; } else if (isTab(c)) { setDelimBreak(c); breakloop = true; } else if (isWhitespace(c)) { setDelimBreak(c); breakloop = true; } else if (isSpecialStart(c)) { setDelimBreak(c); breakloop = true; } else if (isStringStart(c)) { setDelimBreak(c); breakloop = true; } else { setDelimBreak(c); breakloop = true; } } break; // end TYPE_DELIMITER case TYPE_COMMENT: { if (isStreamEnd(c)) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } else if (kernel.getCommentEndTable().containsKey(getCurrentLexeme())) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } else if (isCommentEndDelimiterStart(c)) { setState(TYPE_DELIM_COMMENT); saveChar(c); } } break; // end TYPE_COMMENT case TYPE_DELIM_COMMENT: { if (isStreamEnd(c)) { clearCurrentLexeme(); setState(TYPE_COMMENT); } else if (kernel.getCommentEndTable().containsKey(getCurrentLexeme() + c)) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } else if (isWhitespace(c)) { clearCurrentLexeme(); setState(TYPE_COMMENT); } else { clearCurrentLexeme(); saveChar(c); } } break; // end TYPE_DELIM_COMMENT case TYPE_LINE_COMMENT: { if (isStreamEnd(c)) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } else if (isNewline(c)) { clearCurrentLexeme(); setState(TYPE_UNKNOWN); } } break; // end TYPE_DELIM_COMMENT } } // send token. int type = getState(); String lexeme = getCurrentLexeme(); clearCurrentLexeme(); switch (getState()) { case TYPE_DELIM_SPACE: { type = TYPE_DELIM_SPACE; lexeme = " "; } break; case TYPE_DELIM_TAB: { type = TYPE_DELIM_TAB; lexeme = "\t"; } break; case TYPE_DELIM_NEWLINE: { type = TYPE_DELIM_NEWLINE; lexeme = ""; } break; case TYPE_DELIMITER: { type = TYPE_DELIMITER; if (kernel.getCommentStartTable().containsKey(lexeme)) type = kernel.getCommentStartTable().get(lexeme); else if (kernel.getCommentEndTable().containsKey(lexeme)) type = kernel.getCommentEndTable().get(lexeme); else if (kernel.getCommentLineTable().containsKey(lexeme)) type = kernel.getCommentLineTable().get(lexeme); else if (kernel.getDelimTable().containsKey(lexeme)) type = kernel.getDelimTable().get(lexeme); } break; case TYPE_IDENTIFIER: { type = TYPE_IDENTIFIER; if (kernel.getKeywordTable().containsKey(lexeme)) type = kernel.getKeywordTable().get(lexeme); else if (kernel.getCaseInsensitiveKeywordTable().containsKey(lexeme)) type = kernel.getCaseInsensitiveKeywordTable().get(lexeme); } break; case TYPE_SPECIAL: type = specialType; break; } Token out = null; if (getState() != TYPE_END_OF_LEXER) { out = makeToken(type, lexeme); setState(TYPE_UNKNOWN); } if (DEBUG) System.out.println(out); return out; }
/** * Gets the special type for a special char. * * @param c the character input. * @return the corresponding type, or {@link LexerKernel#TYPE_UNKNOWN} if no type. */ protected int getSpecialType(char c) { if (!kernel.getSpecialDelimTable().containsKey(c)) return TYPE_UNKNOWN; return kernel.getSpecialDelimTable().get(c); }
/** * Gets the character that ends a String, using the starting character. * * @param c the starting character. * @return the corresponding end character, or the null character ('\0') if this does not end a * string. */ protected char getStringEnd(char c) { if (!isStringStart(c)) return '\0'; return kernel.getStringDelimTable().get(c); }
/** * Checks if this is a character that starts a special token. * * @param c the character to test. * @return true if so, false if not. */ protected boolean isSpecialStart(char c) { return kernel.getSpecialDelimTable().containsKey(c); }
/** * Checks if this is a character that starts a String. * * @param c the character to test. * @return true if so, false if not. */ protected boolean isStringStart(char c) { return kernel.getStringDelimTable().containsKey(c); }
/** * Checks if a character is a decimal point (depends on locale/kernel). * * @param c the character to test. * @return true if so, false if not. */ protected boolean isPoint(char c) { return kernel.getDecimalSeparator() == c; }