private void fetchTokenInCCFor_digit() { if (syntax.opEscOctal3()) { unfetch(); final int last = p; int num = scanUnsignedOctalNumber(3); if (num < 0) { throw new ValueException(ERR_TOO_BIG_NUMBER); } if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.setC(num); } }
private void fetchTokenFor_zero() { if (syntax.opEscOctal3()) { final int last = p; int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3); if (num < 0) { throw new ValueException(ERR_TOO_BIG_NUMBER); } if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.setC(num); } else if (c != '0') { inc(); } }
private void fetchTokenInCCFor_x() { if (!left()) { return; } final int last = p; if (peekIs('{') && syntax.opEscXBraceHex8()) { inc(); final int num = scanUnsignedHexadecimalNumber(8); if (num < 0) { throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); } if (left()) { final int c2 = peek(); if (EncodingHelper.isXDigit(c2)) { throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); } } if (p > last + 1 && left() && peekIs('}')) { inc(); token.type = TokenType.CODE_POINT; token.setCode(num); } else { /* can't read nothing or invalid format */ p = last; } } else if (syntax.opEscXHex2()) { int num = scanUnsignedHexadecimalNumber(2); if (num < 0) { throw new ValueException(ERR_TOO_BIG_NUMBER); } if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.setC(num); } }
protected final TokenType fetchToken() { // mark(); // out start: while (true) { if (!left()) { token.type = TokenType.EOT; return token.type; } token.type = TokenType.STRING; token.backP = p; fetch(); if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn) if (!left()) { throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE); } token.backP = p; fetch(); token.setC(c); token.escaped = true; switch (c) { case '*': if (syntax.opEscAsteriskZeroInf()) { fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); } break; case '+': if (syntax.opEscPlusOneInf()) { fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); } break; case '?': if (syntax.opEscQMarkZeroOne()) { fetchTokenFor_repeat(0, 1); } break; case '{': if (syntax.opEscBraceInterval()) { fetchTokenFor_openBrace(); } break; case '|': if (syntax.opEscVBarAlt()) { token.type = TokenType.ALT; } break; case '(': if (syntax.opEscLParenSubexp()) { token.type = TokenType.SUBEXP_OPEN; } break; case ')': if (syntax.opEscLParenSubexp()) { token.type = TokenType.SUBEXP_CLOSE; } break; case 'w': if (syntax.opEscWWord()) { fetchTokenInCCFor_charType( false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); } break; case 'W': if (syntax.opEscWWord()) { fetchTokenInCCFor_charType( true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); } break; case 'b': if (syntax.opEscBWordBound()) { fetchTokenFor_anchor(AnchorType.WORD_BOUND); } break; case 'B': if (syntax.opEscBWordBound()) { fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND); } break; case '<': if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { fetchTokenFor_anchor(AnchorType.WORD_BEGIN); } break; case '>': if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { fetchTokenFor_anchor(AnchorType.WORD_END); } break; case 's': if (syntax.opEscSWhiteSpace()) { fetchTokenInCCFor_charType( false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); } break; case 'S': if (syntax.opEscSWhiteSpace()) { fetchTokenInCCFor_charType( true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); } break; case 'd': if (syntax.opEscDDigit()) { fetchTokenInCCFor_charType( false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); } break; case 'D': if (syntax.opEscDDigit()) { fetchTokenInCCFor_charType( true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); } break; case 'h': if (syntax.op2EscHXDigit()) { fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); } break; case 'H': if (syntax.op2EscHXDigit()) { fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); } break; case 'A': if (syntax.opEscAZBufAnchor()) { fetchTokenFor_anchor(AnchorType.BEGIN_BUF); } break; case 'Z': if (syntax.opEscAZBufAnchor()) { fetchTokenFor_anchor(AnchorType.SEMI_END_BUF); } break; case 'z': if (syntax.opEscAZBufAnchor()) { fetchTokenFor_anchor(AnchorType.END_BUF); } break; case 'G': if (syntax.opEscCapitalGBeginAnchor()) { fetchTokenFor_anchor(AnchorType.BEGIN_POSITION); } break; case '`': if (syntax.op2EscGnuBufAnchor()) { fetchTokenFor_anchor(AnchorType.BEGIN_BUF); } break; case '\'': if (syntax.op2EscGnuBufAnchor()) { fetchTokenFor_anchor(AnchorType.END_BUF); } break; case 'x': fetchTokenFor_xBrace(); break; case 'u': fetchTokenFor_uHex(); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': fetchTokenFor_digit(); break; case '0': fetchTokenFor_zero(); break; default: unfetch(); final int num = fetchEscapedValue(); /* set_raw: */ if (token.getC() != num) { token.type = TokenType.CODE_POINT; token.setCode(num); } else { /* string */ p = token.backP + 1; } break; } // switch (c) } else { token.setC(c); token.escaped = false; if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) { fetchTokenFor_metaChars(); break; } { switch (c) { case '.': if (syntax.opDotAnyChar()) { token.type = TokenType.ANYCHAR; } break; case '*': if (syntax.opAsteriskZeroInf()) { fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); } break; case '+': if (syntax.opPlusOneInf()) { fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); } break; case '?': if (syntax.opQMarkZeroOne()) { fetchTokenFor_repeat(0, 1); } break; case '{': if (syntax.opBraceInterval()) { fetchTokenFor_openBrace(); } break; case '|': if (syntax.opVBarAlt()) { token.type = TokenType.ALT; } break; case '(': if (peekIs('?') && syntax.op2QMarkGroupEffect()) { inc(); if (peekIs('#')) { fetch(); while (true) { if (!left()) { throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); } fetch(); if (c == syntax.metaCharTable.esc) { if (left()) { fetch(); } } else { if (c == ')') { break; } } } continue start; // goto start } unfetch(); } if (syntax.opLParenSubexp()) { token.type = TokenType.SUBEXP_OPEN; } break; case ')': if (syntax.opLParenSubexp()) { token.type = TokenType.SUBEXP_CLOSE; } break; case '^': if (syntax.opLineAnchor()) { fetchTokenFor_anchor( isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE); } break; case '$': if (syntax.opLineAnchor()) { fetchTokenFor_anchor( isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE); } break; case '[': if (syntax.opBracketCC()) { token.type = TokenType.CC_CC_OPEN; } break; case ']': // if (*src > env->pattern) /* /].../ is allowed. */ // CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); break; case '#': if (Option.isExtend(env.option)) { while (left()) { fetch(); if (EncodingHelper.isNewLine(c)) { break; } } continue start; // goto start } break; case ' ': case '\t': case '\n': case '\r': case '\f': if (Option.isExtend(env.option)) { continue start; // goto start } break; default: // string break; } // switch } } break; } // while return token.type; }
protected final TokenType fetchTokenInCC() { if (!left()) { token.type = TokenType.EOT; return token.type; } fetch(); token.type = TokenType.CHAR; token.setC(c); token.escaped = false; if (c == ']') { token.type = TokenType.CC_CLOSE; } else if (c == '-') { token.type = TokenType.CC_RANGE; } else if (c == syntax.metaCharTable.esc) { if (!syntax.backSlashEscapeInCC()) { return token.type; } if (!left()) { throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE); } fetch(); token.escaped = true; token.setC(c); switch (c) { case 'w': fetchTokenInCCFor_charType( false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); break; case 'W': fetchTokenInCCFor_charType( true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); break; case 'd': fetchTokenInCCFor_charType( false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); break; case 'D': fetchTokenInCCFor_charType( true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); break; case 's': fetchTokenInCCFor_charType( false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); break; case 'S': fetchTokenInCCFor_charType( true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); break; case 'h': if (syntax.op2EscHXDigit()) { fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); } break; case 'H': if (syntax.op2EscHXDigit()) { fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); } break; case 'x': fetchTokenInCCFor_x(); break; case 'u': fetchTokenInCCFor_u(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': fetchTokenInCCFor_digit(); break; default: unfetch(); final int num = fetchEscapedValue(); if (token.getC() != num) { token.setCode(num); token.type = TokenType.CODE_POINT; } break; } // switch } else if (c == '&') { fetchTokenInCCFor_and(); } return token.type; }