Beispiel #1
0
  protected final TokenType fetchToken() {
    // mark(); // out
    start:
    while (true) {
      if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
      }

      token.type = TokenType.STRING;
      token.backP = p;

      fetch();

      if (c == syntax.metaCharTable.esc
          && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
        if (!left()) {
          throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }

        token.backP = p;
        fetch();

        token.setC(c);
        token.escaped = true;
        switch (c) {
          case '*':
            if (syntax.opEscAsteriskZeroInf()) {
              fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
            }
            break;
          case '+':
            if (syntax.opEscPlusOneInf()) {
              fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
            }
            break;
          case '?':
            if (syntax.opEscQMarkZeroOne()) {
              fetchTokenFor_repeat(0, 1);
            }
            break;
          case '{':
            if (syntax.opEscBraceInterval()) {
              fetchTokenFor_openBrace();
            }
            break;
          case '|':
            if (syntax.opEscVBarAlt()) {
              token.type = TokenType.ALT;
            }
            break;
          case '(':
            if (syntax.opEscLParenSubexp()) {
              token.type = TokenType.SUBEXP_OPEN;
            }
            break;
          case ')':
            if (syntax.opEscLParenSubexp()) {
              token.type = TokenType.SUBEXP_CLOSE;
            }
            break;
          case 'w':
            if (syntax.opEscWWord()) {
              fetchTokenInCCFor_charType(
                  false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            }
            break;
          case 'W':
            if (syntax.opEscWWord()) {
              fetchTokenInCCFor_charType(
                  true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            }
            break;
          case 'b':
            if (syntax.opEscBWordBound()) {
              fetchTokenFor_anchor(AnchorType.WORD_BOUND);
            }
            break;
          case 'B':
            if (syntax.opEscBWordBound()) {
              fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND);
            }
            break;
          case '<':
            if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
              fetchTokenFor_anchor(AnchorType.WORD_BEGIN);
            }
            break;
          case '>':
            if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
              fetchTokenFor_anchor(AnchorType.WORD_END);
            }
            break;
          case 's':
            if (syntax.opEscSWhiteSpace()) {
              fetchTokenInCCFor_charType(
                  false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            }
            break;
          case 'S':
            if (syntax.opEscSWhiteSpace()) {
              fetchTokenInCCFor_charType(
                  true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            }
            break;
          case 'd':
            if (syntax.opEscDDigit()) {
              fetchTokenInCCFor_charType(
                  false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            }
            break;
          case 'D':
            if (syntax.opEscDDigit()) {
              fetchTokenInCCFor_charType(
                  true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            }
            break;
          case 'h':
            if (syntax.op2EscHXDigit()) {
              fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            }
            break;
          case 'H':
            if (syntax.op2EscHXDigit()) {
              fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            }
            break;
          case 'A':
            if (syntax.opEscAZBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
            }
            break;
          case 'Z':
            if (syntax.opEscAZBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.SEMI_END_BUF);
            }
            break;
          case 'z':
            if (syntax.opEscAZBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.END_BUF);
            }
            break;
          case 'G':
            if (syntax.opEscCapitalGBeginAnchor()) {
              fetchTokenFor_anchor(AnchorType.BEGIN_POSITION);
            }
            break;
          case '`':
            if (syntax.op2EscGnuBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
            }
            break;
          case '\'':
            if (syntax.op2EscGnuBufAnchor()) {
              fetchTokenFor_anchor(AnchorType.END_BUF);
            }
            break;
          case 'x':
            fetchTokenFor_xBrace();
            break;
          case 'u':
            fetchTokenFor_uHex();
            break;
          case '1':
          case '2':
          case '3':
          case '4':
          case '5':
          case '6':
          case '7':
          case '8':
          case '9':
            fetchTokenFor_digit();
            break;
          case '0':
            fetchTokenFor_zero();
            break;

          default:
            unfetch();
            final int num = fetchEscapedValue();

            /* set_raw: */
            if (token.getC() != num) {
              token.type = TokenType.CODE_POINT;
              token.setCode(num);
            } else {
                /* string */
              p = token.backP + 1;
            }
            break;
        } // switch (c)

      } else {
        token.setC(c);
        token.escaped = false;

        if (Config.USE_VARIABLE_META_CHARS
            && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) {
          fetchTokenFor_metaChars();
          break;
        }

        {
          switch (c) {
            case '.':
              if (syntax.opDotAnyChar()) {
                token.type = TokenType.ANYCHAR;
              }
              break;
            case '*':
              if (syntax.opAsteriskZeroInf()) {
                fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
              }
              break;
            case '+':
              if (syntax.opPlusOneInf()) {
                fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
              }
              break;
            case '?':
              if (syntax.opQMarkZeroOne()) {
                fetchTokenFor_repeat(0, 1);
              }
              break;
            case '{':
              if (syntax.opBraceInterval()) {
                fetchTokenFor_openBrace();
              }
              break;
            case '|':
              if (syntax.opVBarAlt()) {
                token.type = TokenType.ALT;
              }
              break;

            case '(':
              if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
                inc();
                if (peekIs('#')) {
                  fetch();
                  while (true) {
                    if (!left()) {
                      throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
                    }
                    fetch();
                    if (c == syntax.metaCharTable.esc) {
                      if (left()) {
                        fetch();
                      }
                    } else {
                      if (c == ')') {
                        break;
                      }
                    }
                  }
                  continue start; // goto start
                }
                unfetch();
              }

              if (syntax.opLParenSubexp()) {
                token.type = TokenType.SUBEXP_OPEN;
              }
              break;
            case ')':
              if (syntax.opLParenSubexp()) {
                token.type = TokenType.SUBEXP_CLOSE;
              }
              break;
            case '^':
              if (syntax.opLineAnchor()) {
                fetchTokenFor_anchor(
                    isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
              }
              break;
            case '$':
              if (syntax.opLineAnchor()) {
                fetchTokenFor_anchor(
                    isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE);
              }
              break;
            case '[':
              if (syntax.opBracketCC()) {
                token.type = TokenType.CC_CC_OPEN;
              }
              break;
            case ']':
              // if (*src > env->pattern)   /* /].../ is allowed. */
              // CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
              break;
            case '#':
              if (Option.isExtend(env.option)) {
                while (left()) {
                  fetch();
                  if (EncodingHelper.isNewLine(c)) {
                    break;
                  }
                }
                continue start; // goto start
              }
              break;

            case ' ':
            case '\t':
            case '\n':
            case '\r':
            case '\f':
              if (Option.isExtend(env.option)) {
                continue start; // goto start
              }
              break;

            default: // string
              break;
          } // switch
        }
      }

      break;
    } // while
    return token.type;
  }