예제 #1
0
  // if numChars is 0, this means match as many as you want
  private int __parseUnsignedInteger(int radix, int minDigits, int maxDigits)
      throws MalformedPatternException {
    int num, digits = 0;
    StringBuffer buf;

    // We don't expect huge numbers, so an initial buffer of 4 is fine.
    buf = new StringBuffer(4);

    while (Character.digit(__lookahead, radix) != -1 && digits < maxDigits) {
      buf.append((char) __lookahead);
      __match(__lookahead);
      ++digits;
    }

    if (digits < minDigits || digits > maxDigits)
      throw new MalformedPatternException(
          "Parse error: unexpected number of digits at position " + __bytesRead);

    try {
      num = Integer.parseInt(buf.toString(), radix);
    } catch (NumberFormatException e) {
      throw new MalformedPatternException(
          "Parse error: numeric value at " + "position " + __bytesRead + " is invalid");
    }

    return num;
  }
예제 #2
0
  private SyntaxNode __backslashToken() throws MalformedPatternException {
    SyntaxNode current;
    char token;
    int number;

    __match('\\');

    if (__lookahead == 'x') {
      __match('x');
      // Parse a hexadecimal number
      current = _newTokenNode((char) __parseUnsignedInteger(16, 2, 2), __position++);
    } else if (__lookahead == 'c') {
      __match('c');
      // Create a control character
      token = Character.toUpperCase(__lookahead);
      token = (char) (token > 63 ? token - 64 : token + 64);
      current = new TokenNode(token, __position++);
      __match(__lookahead);
    } else if (__lookahead >= '0' && __lookahead <= '9') {
      __match(__lookahead);

      if (__lookahead >= '0' && __lookahead <= '9') {
        // We have an octal character or a multi-digit backreference.
        // Assume octal character for now.
        __putback();
        number = __parseUnsignedInteger(10, 2, 3);
        number = Integer.parseInt(Integer.toString(number), 8);
        current = _newTokenNode((char) number, __position++);
      } else {
        // We have either \0, an escaped digit, or a backreference.
        __putback();
        if (__lookahead == '0') {
          // \0 matches the null character
          __match('0');
          current = new TokenNode('\0', __position++);
        } else {
          // Either an escaped digit or backreference.
          number = Character.digit(__lookahead, 10);
          current = _newTokenNode(__lookahead, __position++);
          __match(__lookahead);
        }
      }
    } else if (__lookahead == 'b') {
      // Inside of a character class the \b means backspace, otherwise
      // it means a word boundary
      // if(__inCharacterClass)
      // \b always means backspace
      current = new TokenNode('\b', __position++);
      /*
           else
      current = new TokenNode((char)LeafNode._WORD_BOUNDARY_MARKER_TOKEN,
      			position++);
      			*/
      __match('b');
    } /*else if(__lookahead == 'B' && !__inCharacterClass){
        current = new TokenNode((char)LeafNode._NONWORD_BOUNDARY_MARKER_TOKEN,
           position++);
        __match('B');
      } */ else {
      CharacterClassNode characterSet;
      token = __lookahead;

      switch (__lookahead) {
        case 'n':
          token = '\n';
          break;
        case 'r':
          token = '\r';
          break;
        case 't':
          token = '\t';
          break;
        case 'f':
          token = '\f';
          break;
      }

      switch (token) {
        case 'd':
          characterSet = new CharacterClassNode(__position++);
          characterSet._addTokenRange('0', '9');
          current = characterSet;
          break;
        case 'D':
          characterSet = new NegativeCharacterClassNode(__position++);
          characterSet._addTokenRange('0', '9');
          current = characterSet;
          break;
        case 'w':
          characterSet = new CharacterClassNode(__position++);
          characterSet._addTokenRange('0', '9');
          characterSet._addTokenRange('a', 'z');
          characterSet._addTokenRange('A', 'Z');
          characterSet._addToken('_');
          current = characterSet;
          break;
        case 'W':
          characterSet = new NegativeCharacterClassNode(__position++);
          characterSet._addTokenRange('0', '9');
          characterSet._addTokenRange('a', 'z');
          characterSet._addTokenRange('A', 'Z');
          characterSet._addToken('_');
          current = characterSet;
          break;
        case 's':
          characterSet = new CharacterClassNode(__position++);
          characterSet._addToken(' ');
          characterSet._addToken('\f');
          characterSet._addToken('\n');
          characterSet._addToken('\r');
          characterSet._addToken('\t');
          current = characterSet;
          break;
        case 'S':
          characterSet = new NegativeCharacterClassNode(__position++);
          characterSet._addToken(' ');
          characterSet._addToken('\f');
          characterSet._addToken('\n');
          characterSet._addToken('\r');
          characterSet._addToken('\t');
          current = characterSet;
          break;
        default:
          current = _newTokenNode(token, __position++);
          break;
      }

      __match(__lookahead);
    }

    return current;
  }