// if numChars is 0, this means match as many as you want private int __parseUnsignedInteger(int radix, int minDigits, int maxDigits) throws MalformedPatternException { int num, digits = 0; StringBuffer buf; // We don't expect huge numbers, so an initial buffer of 4 is fine. buf = new StringBuffer(4); while (Character.digit(__lookahead, radix) != -1 && digits < maxDigits) { buf.append((char) __lookahead); __match(__lookahead); ++digits; } if (digits < minDigits || digits > maxDigits) throw new MalformedPatternException( "Parse error: unexpected number of digits at position " + __bytesRead); try { num = Integer.parseInt(buf.toString(), radix); } catch (NumberFormatException e) { throw new MalformedPatternException( "Parse error: numeric value at " + "position " + __bytesRead + " is invalid"); } return num; }
private SyntaxNode __backslashToken() throws MalformedPatternException { SyntaxNode current; char token; int number; __match('\\'); if (__lookahead == 'x') { __match('x'); // Parse a hexadecimal number current = _newTokenNode((char) __parseUnsignedInteger(16, 2, 2), __position++); } else if (__lookahead == 'c') { __match('c'); // Create a control character token = Character.toUpperCase(__lookahead); token = (char) (token > 63 ? token - 64 : token + 64); current = new TokenNode(token, __position++); __match(__lookahead); } else if (__lookahead >= '0' && __lookahead <= '9') { __match(__lookahead); if (__lookahead >= '0' && __lookahead <= '9') { // We have an octal character or a multi-digit backreference. // Assume octal character for now. __putback(); number = __parseUnsignedInteger(10, 2, 3); number = Integer.parseInt(Integer.toString(number), 8); current = _newTokenNode((char) number, __position++); } else { // We have either \0, an escaped digit, or a backreference. __putback(); if (__lookahead == '0') { // \0 matches the null character __match('0'); current = new TokenNode('\0', __position++); } else { // Either an escaped digit or backreference. number = Character.digit(__lookahead, 10); current = _newTokenNode(__lookahead, __position++); __match(__lookahead); } } } else if (__lookahead == 'b') { // Inside of a character class the \b means backspace, otherwise // it means a word boundary // if(__inCharacterClass) // \b always means backspace current = new TokenNode('\b', __position++); /* else current = new TokenNode((char)LeafNode._WORD_BOUNDARY_MARKER_TOKEN, position++); */ __match('b'); } /*else if(__lookahead == 'B' && !__inCharacterClass){ current = new TokenNode((char)LeafNode._NONWORD_BOUNDARY_MARKER_TOKEN, position++); __match('B'); } */ else { CharacterClassNode characterSet; token = __lookahead; switch (__lookahead) { case 'n': token = '\n'; break; case 'r': token = '\r'; break; case 't': token = '\t'; break; case 'f': token = '\f'; break; } switch (token) { case 'd': characterSet = new CharacterClassNode(__position++); characterSet._addTokenRange('0', '9'); current = characterSet; break; case 'D': characterSet = new NegativeCharacterClassNode(__position++); characterSet._addTokenRange('0', '9'); current = characterSet; break; case 'w': characterSet = new CharacterClassNode(__position++); characterSet._addTokenRange('0', '9'); characterSet._addTokenRange('a', 'z'); characterSet._addTokenRange('A', 'Z'); characterSet._addToken('_'); current = characterSet; break; case 'W': characterSet = new NegativeCharacterClassNode(__position++); characterSet._addTokenRange('0', '9'); characterSet._addTokenRange('a', 'z'); characterSet._addTokenRange('A', 'Z'); characterSet._addToken('_'); current = characterSet; break; case 's': characterSet = new CharacterClassNode(__position++); characterSet._addToken(' '); characterSet._addToken('\f'); characterSet._addToken('\n'); characterSet._addToken('\r'); characterSet._addToken('\t'); current = characterSet; break; case 'S': characterSet = new NegativeCharacterClassNode(__position++); characterSet._addToken(' '); characterSet._addToken('\f'); characterSet._addToken('\n'); characterSet._addToken('\r'); characterSet._addToken('\t'); current = characterSet; break; default: current = _newTokenNode(token, __position++); break; } __match(__lookahead); } return current; }