protected final TokenType fetchToken() { // mark(); // out start: while (true) { if (!left()) { token.type = TokenType.EOT; return token.type; } token.type = TokenType.STRING; token.backP = p; fetch(); if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn) if (!left()) { throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE); } token.backP = p; fetch(); token.setC(c); token.escaped = true; switch (c) { case '*': if (syntax.opEscAsteriskZeroInf()) { fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); } break; case '+': if (syntax.opEscPlusOneInf()) { fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); } break; case '?': if (syntax.opEscQMarkZeroOne()) { fetchTokenFor_repeat(0, 1); } break; case '{': if (syntax.opEscBraceInterval()) { fetchTokenFor_openBrace(); } break; case '|': if (syntax.opEscVBarAlt()) { token.type = TokenType.ALT; } break; case '(': if (syntax.opEscLParenSubexp()) { token.type = TokenType.SUBEXP_OPEN; } break; case ')': if (syntax.opEscLParenSubexp()) { token.type = TokenType.SUBEXP_CLOSE; } break; case 'w': if (syntax.opEscWWord()) { fetchTokenInCCFor_charType( false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); } break; case 'W': if (syntax.opEscWWord()) { fetchTokenInCCFor_charType( true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); } break; case 'b': if (syntax.opEscBWordBound()) { fetchTokenFor_anchor(AnchorType.WORD_BOUND); } break; case 'B': if (syntax.opEscBWordBound()) { fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND); } break; case '<': if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { fetchTokenFor_anchor(AnchorType.WORD_BEGIN); } break; case '>': if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { fetchTokenFor_anchor(AnchorType.WORD_END); } break; case 's': if (syntax.opEscSWhiteSpace()) { fetchTokenInCCFor_charType( false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); } break; case 'S': if (syntax.opEscSWhiteSpace()) { fetchTokenInCCFor_charType( true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); } break; case 'd': if (syntax.opEscDDigit()) { fetchTokenInCCFor_charType( false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); } break; case 'D': if (syntax.opEscDDigit()) { fetchTokenInCCFor_charType( true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); } break; case 'h': if (syntax.op2EscHXDigit()) { fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); } break; case 'H': if (syntax.op2EscHXDigit()) { fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); } break; case 'A': if (syntax.opEscAZBufAnchor()) { fetchTokenFor_anchor(AnchorType.BEGIN_BUF); } break; case 'Z': if (syntax.opEscAZBufAnchor()) { fetchTokenFor_anchor(AnchorType.SEMI_END_BUF); } break; case 'z': if (syntax.opEscAZBufAnchor()) { fetchTokenFor_anchor(AnchorType.END_BUF); } break; case 'G': if (syntax.opEscCapitalGBeginAnchor()) { fetchTokenFor_anchor(AnchorType.BEGIN_POSITION); } break; case '`': if (syntax.op2EscGnuBufAnchor()) { fetchTokenFor_anchor(AnchorType.BEGIN_BUF); } break; case '\'': if (syntax.op2EscGnuBufAnchor()) { fetchTokenFor_anchor(AnchorType.END_BUF); } break; case 'x': fetchTokenFor_xBrace(); break; case 'u': fetchTokenFor_uHex(); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': fetchTokenFor_digit(); break; case '0': fetchTokenFor_zero(); break; default: unfetch(); final int num = fetchEscapedValue(); /* set_raw: */ if (token.getC() != num) { token.type = TokenType.CODE_POINT; token.setCode(num); } else { /* string */ p = token.backP + 1; } break; } // switch (c) } else { token.setC(c); token.escaped = false; if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) { fetchTokenFor_metaChars(); break; } { switch (c) { case '.': if (syntax.opDotAnyChar()) { token.type = TokenType.ANYCHAR; } break; case '*': if (syntax.opAsteriskZeroInf()) { fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); } break; case '+': if (syntax.opPlusOneInf()) { fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); } break; case '?': if (syntax.opQMarkZeroOne()) { fetchTokenFor_repeat(0, 1); } break; case '{': if (syntax.opBraceInterval()) { fetchTokenFor_openBrace(); } break; case '|': if (syntax.opVBarAlt()) { token.type = TokenType.ALT; } break; case '(': if (peekIs('?') && syntax.op2QMarkGroupEffect()) { inc(); if (peekIs('#')) { fetch(); while (true) { if (!left()) { throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); } fetch(); if (c == syntax.metaCharTable.esc) { if (left()) { fetch(); } } else { if (c == ')') { break; } } } continue start; // goto start } unfetch(); } if (syntax.opLParenSubexp()) { token.type = TokenType.SUBEXP_OPEN; } break; case ')': if (syntax.opLParenSubexp()) { token.type = TokenType.SUBEXP_CLOSE; } break; case '^': if (syntax.opLineAnchor()) { fetchTokenFor_anchor( isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE); } break; case '$': if (syntax.opLineAnchor()) { fetchTokenFor_anchor( isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE); } break; case '[': if (syntax.opBracketCC()) { token.type = TokenType.CC_CC_OPEN; } break; case ']': // if (*src > env->pattern) /* /].../ is allowed. */ // CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); break; case '#': if (Option.isExtend(env.option)) { while (left()) { fetch(); if (EncodingHelper.isNewLine(c)) { break; } } continue start; // goto start } break; case ' ': case '\t': case '\n': case '\r': case '\f': if (Option.isExtend(env.option)) { continue start; // goto start } break; default: // string break; } // switch } } break; } // while return token.type; }
/** @return 0: normal {n,m}, 2: fixed {n} !introduce returnCode here */ private int fetchRangeQuantifier() { mark(); final boolean synAllow = syntax.allowInvalidInterval(); if (!left()) { if (synAllow) { return 1; /* "....{" : OK! */ } throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); } if (!synAllow) { c = peek(); if (c == ')' || c == '(' || c == '|') { throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); } } int low = scanUnsignedNumber(); if (low < 0) { throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); } if (low > Config.MAX_REPEAT_NUM) { throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); } boolean nonLow = false; if (p == _p) { /* can't read low */ if (syntax.allowIntervalLowAbbrev()) { low = 0; nonLow = true; } else { return invalidRangeQuantifier(synAllow); } } if (!left()) { return invalidRangeQuantifier(synAllow); } fetch(); int up; int ret = 0; if (c == ',') { final int prev = p; // ??? last up = scanUnsignedNumber(); if (up < 0) { throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); } if (up > Config.MAX_REPEAT_NUM) { throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); } if (p == prev) { if (nonLow) { return invalidRangeQuantifier(synAllow); } up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */ } } else { if (nonLow) { return invalidRangeQuantifier(synAllow); } unfetch(); up = low; /* {n} : exact n times */ ret = 2; /* fixed */ } if (!left()) { return invalidRangeQuantifier(synAllow); } fetch(); if (syntax.opEscBraceInterval()) { if (c != syntax.metaCharTable.esc) { return invalidRangeQuantifier(synAllow); } fetch(); } if (c != '}') { return invalidRangeQuantifier(synAllow); } if (!isRepeatInfinite(up) && low > up) { throw new ValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE); } token.type = TokenType.INTERVAL; token.setRepeatLower(low); token.setRepeatUpper(up); return ret; /* 0: normal {n,m}, 2: fixed {n} */ }