@NotNull private Pattern getPattern(String pattern) { if (!Comparing.strEqual(pattern, myPattern)) { myCompiledPattern = null; myPattern = pattern; } if (myCompiledPattern == null) { boolean allowToLower = true; final int eol = pattern.indexOf('\n'); if (eol != -1) { pattern = pattern.substring(0, eol); } if (pattern.length() >= 80) { pattern = pattern.substring(0, 80); } final @NonNls StringBuffer buffer = new StringBuffer(); if (containsOnlyUppercaseLetters(pattern)) { allowToLower = false; } if (allowToLower) { buffer.append(".*"); } boolean firstIdentifierLetter = true; for (int i = 0; i < pattern.length(); i++) { final char c = pattern.charAt(i); if (Character.isLetterOrDigit(c)) { // This logic allows to use uppercase letters only to catch the name like PDM for // PsiDocumentManager if (Character.isUpperCase(c) || Character.isDigit(c)) { if (!firstIdentifierLetter) { buffer.append("[^A-Z]*"); } buffer.append("["); buffer.append(c); if (allowToLower || i == 0) { buffer.append('|'); buffer.append(Character.toLowerCase(c)); } buffer.append("]"); } else if (Character.isLowerCase(c)) { buffer.append('['); buffer.append(c); buffer.append('|'); buffer.append(Character.toUpperCase(c)); buffer.append(']'); } else { buffer.append(c); } firstIdentifierLetter = false; } else if (c == '*') { buffer.append(".*"); firstIdentifierLetter = true; } else if (c == '.') { buffer.append("\\."); firstIdentifierLetter = true; } else if (c == ' ') { buffer.append("[^A-Z]*\\ "); firstIdentifierLetter = true; } else { firstIdentifierLetter = true; // for standard RegExp engine // buffer.append("\\u"); // buffer.append(Integer.toHexString(c + 0x20000).substring(1)); // for OROMATCHER RegExp engine buffer.append("\\x"); buffer.append(Integer.toHexString(c + 0x20000).substring(3)); } } buffer.append(".*"); try { myCompiledPattern = new Perl5Compiler().compile(buffer.toString()); } catch (MalformedPatternException e) { // do nothing } } return myCompiledPattern; }
private SyntaxNode __backslashToken() throws MalformedPatternException { SyntaxNode current; char token; int number; __match('\\'); if (__lookahead == 'x') { __match('x'); // Parse a hexadecimal number current = _newTokenNode((char) __parseUnsignedInteger(16, 2, 2), __position++); } else if (__lookahead == 'c') { __match('c'); // Create a control character token = Character.toUpperCase(__lookahead); token = (char) (token > 63 ? token - 64 : token + 64); current = new TokenNode(token, __position++); __match(__lookahead); } else if (__lookahead >= '0' && __lookahead <= '9') { __match(__lookahead); if (__lookahead >= '0' && __lookahead <= '9') { // We have an octal character or a multi-digit backreference. // Assume octal character for now. __putback(); number = __parseUnsignedInteger(10, 2, 3); number = Integer.parseInt(Integer.toString(number), 8); current = _newTokenNode((char) number, __position++); } else { // We have either \0, an escaped digit, or a backreference. __putback(); if (__lookahead == '0') { // \0 matches the null character __match('0'); current = new TokenNode('\0', __position++); } else { // Either an escaped digit or backreference. number = Character.digit(__lookahead, 10); current = _newTokenNode(__lookahead, __position++); __match(__lookahead); } } } else if (__lookahead == 'b') { // Inside of a character class the \b means backspace, otherwise // it means a word boundary // if(__inCharacterClass) // \b always means backspace current = new TokenNode('\b', __position++); /* else current = new TokenNode((char)LeafNode._WORD_BOUNDARY_MARKER_TOKEN, position++); */ __match('b'); } /*else if(__lookahead == 'B' && !__inCharacterClass){ current = new TokenNode((char)LeafNode._NONWORD_BOUNDARY_MARKER_TOKEN, position++); __match('B'); } */ else { CharacterClassNode characterSet; token = __lookahead; switch (__lookahead) { case 'n': token = '\n'; break; case 'r': token = '\r'; break; case 't': token = '\t'; break; case 'f': token = '\f'; break; } switch (token) { case 'd': characterSet = new CharacterClassNode(__position++); characterSet._addTokenRange('0', '9'); current = characterSet; break; case 'D': characterSet = new NegativeCharacterClassNode(__position++); characterSet._addTokenRange('0', '9'); current = characterSet; break; case 'w': characterSet = new CharacterClassNode(__position++); characterSet._addTokenRange('0', '9'); characterSet._addTokenRange('a', 'z'); characterSet._addTokenRange('A', 'Z'); characterSet._addToken('_'); current = characterSet; break; case 'W': characterSet = new NegativeCharacterClassNode(__position++); characterSet._addTokenRange('0', '9'); characterSet._addTokenRange('a', 'z'); characterSet._addTokenRange('A', 'Z'); characterSet._addToken('_'); current = characterSet; break; case 's': characterSet = new CharacterClassNode(__position++); characterSet._addToken(' '); characterSet._addToken('\f'); characterSet._addToken('\n'); characterSet._addToken('\r'); characterSet._addToken('\t'); current = characterSet; break; case 'S': characterSet = new NegativeCharacterClassNode(__position++); characterSet._addToken(' '); characterSet._addToken('\f'); characterSet._addToken('\n'); characterSet._addToken('\r'); characterSet._addToken('\t'); current = characterSet; break; default: current = _newTokenNode(token, __position++); break; } __match(__lookahead); } return current; }