Esempio n. 1
0
  protected final TokenType fetchToken() {
    // mark(); // out
    start:
    while (true) {
      if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
      }

      token.type = TokenType.STRING;
      token.backP = p;

      fetch();

      if (c == syntax.metaCharTable.esc
          && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
        if (!left()) {
          throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }

        token.backP = p;
        fetch();

        token.setC(c);
        token.escaped = true;
        switch (c) {
          case '*':
            if (syntax.opEscAsteriskZeroInf()) {
              fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
            }
            break;
          case '+':
            if (syntax.opEscPlusOneInf()) {
              fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
            }
            break;
          case '?':
            if (syntax.opEscQMarkZeroOne()) {
              fetchTokenFor_repeat(0, 1);
            }
            break;
          case '{':
            if (syntax.opEscBraceInterval()) {
              fetchTokenFor_openBrace();
            }
            break;
          case '|':
            if (syntax.opEscVBarAlt()) {
              token.type = TokenType.ALT;
            }
            break;
          case '(':
            if (syntax.opEscLParenSubexp()) {
              token.type = TokenType.SUBEXP_OPEN;
            }
            break;
          case ')':
            if (syntax.opEscLParenSubexp()) {
              token.type = TokenType.SUBEXP_CLOSE;
            }
            break;
          case 'w':
            if (syntax.opEscWWord()) {
              fetchTokenInCCFor_charType(
                  false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            }
            break;
          case 'W':
            if (syntax.opEscWWord()) {
              fetchTokenInCCFor_charType(
                  true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            }
            break;
          case 'b':
            if (syntax.opEscBWordBound()) {
              fetchTokenFor_anchor(AnchorType.WORD_BOUND);
            }
            break;
          case 'B':
            if (syntax.opEscBWordBound()) {
              fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND);
            }
            break;
          case '<':
            if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
              fetchTokenFor_anchor(AnchorType.WORD_BEGIN);
            }
            break;
          case '>':
            if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
              fetchTokenFor_anchor(AnchorType.WORD_END);
            }
            break;
          case 's':
            if (syntax.opEscSWhiteSpace()) {
              fetchTokenInCCFor_charType(
                  false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            }
            break;
          case 'S':
            if (syntax.opEscSWhiteSpace()) {
              fetchTokenInCCFor_charType(
                  true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            }
            break;
          case 'd':
            if (syntax.opEscDDigit()) {
              fetchTokenInCCFor_charType(
                  false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            }
            break;
          case 'D':
            if (syntax.opEscDDigit()) {
              fetchTokenInCCFor_charType(
                  true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            }
            break;
          case 'h':
            if (syntax.op2EscHXDigit()) {
              fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            }
            break;
          case 'H':
            if (syntax.op2EscHXDigit()) {
              fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            }
            break;
          case 'A':
            if (syntax.opEscAZBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
            }
            break;
          case 'Z':
            if (syntax.opEscAZBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.SEMI_END_BUF);
            }
            break;
          case 'z':
            if (syntax.opEscAZBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.END_BUF);
            }
            break;
          case 'G':
            if (syntax.opEscCapitalGBeginAnchor()) {
              fetchTokenFor_anchor(AnchorType.BEGIN_POSITION);
            }
            break;
          case '`':
            if (syntax.op2EscGnuBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
            }
            break;
          case '\'':
            if (syntax.op2EscGnuBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.END_BUF);
            }
            break;
          case 'x':
            fetchTokenFor_xBrace();
            break;
          case 'u':
            fetchTokenFor_uHex();
            break;
          case '1':
          case '2':
          case '3':
          case '4':
          case '5':
          case '6':
          case '7':
          case '8':
          case '9':
            fetchTokenFor_digit();
            break;
          case '0':
            fetchTokenFor_zero();
            break;

          default:
            unfetch();
            final int num = fetchEscapedValue();

            /* set_raw: */
            if (token.getC() != num) {
              token.type = TokenType.CODE_POINT;
              token.setCode(num);
            } else {
                /* string */
              p = token.backP + 1;
            }
            break;
        } // switch (c)

      } else {
        token.setC(c);
        token.escaped = false;

        if (Config.USE_VARIABLE_META_CHARS
            && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) {
          fetchTokenFor_metaChars();
          break;
        }

        {
          switch (c) {
            case '.':
              if (syntax.opDotAnyChar()) {
                token.type = TokenType.ANYCHAR;
              }
              break;
            case '*':
              if (syntax.opAsteriskZeroInf()) {
                fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
              }
              break;
            case '+':
              if (syntax.opPlusOneInf()) {
                fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
              }
              break;
            case '?':
              if (syntax.opQMarkZeroOne()) {
                fetchTokenFor_repeat(0, 1);
              }
              break;
            case '{':
              if (syntax.opBraceInterval()) {
                fetchTokenFor_openBrace();
              }
              break;
            case '|':
              if (syntax.opVBarAlt()) {
                token.type = TokenType.ALT;
              }
              break;

            case '(':
              if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
                inc();
                if (peekIs('#')) {
                  fetch();
                  while (true) {
                    if (!left()) {
                      throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
                    }
                    fetch();
                    if (c == syntax.metaCharTable.esc) {
                      if (left()) {
                        fetch();
                      }
                    } else {
                      if (c == ')') {
                        break;
                      }
                    }
                  }
                  continue start; // goto start
                }
                unfetch();
              }

              if (syntax.opLParenSubexp()) {
                token.type = TokenType.SUBEXP_OPEN;
              }
              break;
            case ')':
              if (syntax.opLParenSubexp()) {
                token.type = TokenType.SUBEXP_CLOSE;
              }
              break;
            case '^':
              if (syntax.opLineAnchor()) {
                fetchTokenFor_anchor(
                    isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
              }
              break;
            case '$':
              if (syntax.opLineAnchor()) {
                fetchTokenFor_anchor(
                    isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE);
              }
              break;
            case '[':
              if (syntax.opBracketCC()) {
                token.type = TokenType.CC_CC_OPEN;
              }
              break;
            case ']':
              // if (*src > env->pattern)   /* /].../ is allowed. */
              // CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
              break;
            case '#':
              if (Option.isExtend(env.option)) {
                while (left()) {
                  fetch();
                  if (EncodingHelper.isNewLine(c)) {
                    break;
                  }
                }
                continue start; // goto start
              }
              break;

            case ' ':
            case '\t':
            case '\n':
            case '\r':
            case '\f':
              if (Option.isExtend(env.option)) {
                continue start; // goto start
              }
              break;

            default: // string
              break;
          } // switch
        }
      }

      break;
    } // while
    return token.type;
  }
Esempio n. 2
0
  /** @return 0: normal {n,m}, 2: fixed {n} !introduce returnCode here */
  private int fetchRangeQuantifier() {
    mark();
    final boolean synAllow = syntax.allowInvalidInterval();

    if (!left()) {
      if (synAllow) {
        return 1; /* "....{" : OK! */
      }
      throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
    }

    if (!synAllow) {
      c = peek();
      if (c == ')' || c == '(' || c == '|') {
        throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
      }
    }

    int low = scanUnsignedNumber();
    if (low < 0) {
      throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
    }
    if (low > Config.MAX_REPEAT_NUM) {
      throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
    }

    boolean nonLow = false;
    if (p == _p) {
        /* can't read low */
      if (syntax.allowIntervalLowAbbrev()) {
        low = 0;
        nonLow = true;
      } else {
        return invalidRangeQuantifier(synAllow);
      }
    }

    if (!left()) {
      return invalidRangeQuantifier(synAllow);
    }

    fetch();
    int up;
    int ret = 0;
    if (c == ',') {
      final int prev = p; // ??? last
      up = scanUnsignedNumber();
      if (up < 0) {
        throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
      }
      if (up > Config.MAX_REPEAT_NUM) {
        throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
      }

      if (p == prev) {
        if (nonLow) {
          return invalidRangeQuantifier(synAllow);
        }
        up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */
      }
    } else {
      if (nonLow) {
        return invalidRangeQuantifier(synAllow);
      }
      unfetch();
      up = low; /* {n} : exact n times */
      ret = 2; /* fixed */
    }

    if (!left()) {
      return invalidRangeQuantifier(synAllow);
    }
    fetch();

    if (syntax.opEscBraceInterval()) {
      if (c != syntax.metaCharTable.esc) {
        return invalidRangeQuantifier(synAllow);
      }
      fetch();
    }

    if (c != '}') {
      return invalidRangeQuantifier(synAllow);
    }

    if (!isRepeatInfinite(up) && low > up) {
      throw new ValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
    }

    token.type = TokenType.INTERVAL;
    token.setRepeatLower(low);
    token.setRepeatUpper(up);

    return ret; /* 0: normal {n,m}, 2: fixed {n} */
  }