@NotNull
  private Pattern getPattern(String pattern) {
    if (!Comparing.strEqual(pattern, myPattern)) {
      myCompiledPattern = null;
      myPattern = pattern;
    }
    if (myCompiledPattern == null) {
      boolean allowToLower = true;
      final int eol = pattern.indexOf('\n');
      if (eol != -1) {
        pattern = pattern.substring(0, eol);
      }
      if (pattern.length() >= 80) {
        pattern = pattern.substring(0, 80);
      }

      final @NonNls StringBuffer buffer = new StringBuffer();

      if (containsOnlyUppercaseLetters(pattern)) {
        allowToLower = false;
      }

      if (allowToLower) {
        buffer.append(".*");
      }

      boolean firstIdentifierLetter = true;
      for (int i = 0; i < pattern.length(); i++) {
        final char c = pattern.charAt(i);
        if (Character.isLetterOrDigit(c)) {
          // This logic allows to use uppercase letters only to catch the name like PDM for
          // PsiDocumentManager
          if (Character.isUpperCase(c) || Character.isDigit(c)) {

            if (!firstIdentifierLetter) {
              buffer.append("[^A-Z]*");
            }

            buffer.append("[");
            buffer.append(c);
            if (allowToLower || i == 0) {
              buffer.append('|');
              buffer.append(Character.toLowerCase(c));
            }
            buffer.append("]");
          } else if (Character.isLowerCase(c)) {
            buffer.append('[');
            buffer.append(c);
            buffer.append('|');
            buffer.append(Character.toUpperCase(c));
            buffer.append(']');
          } else {
            buffer.append(c);
          }

          firstIdentifierLetter = false;
        } else if (c == '*') {
          buffer.append(".*");
          firstIdentifierLetter = true;
        } else if (c == '.') {
          buffer.append("\\.");
          firstIdentifierLetter = true;
        } else if (c == ' ') {
          buffer.append("[^A-Z]*\\ ");
          firstIdentifierLetter = true;
        } else {
          firstIdentifierLetter = true;
          // for standard RegExp engine
          // buffer.append("\\u");
          // buffer.append(Integer.toHexString(c + 0x20000).substring(1));

          // for OROMATCHER RegExp engine
          buffer.append("\\x");
          buffer.append(Integer.toHexString(c + 0x20000).substring(3));
        }
      }

      buffer.append(".*");

      try {
        myCompiledPattern = new Perl5Compiler().compile(buffer.toString());
      } catch (MalformedPatternException e) {
        // do nothing
      }
    }

    return myCompiledPattern;
  }
示例#2
0
  private SyntaxNode __backslashToken() throws MalformedPatternException {
    SyntaxNode current;
    char token;
    int number;

    __match('\\');

    if (__lookahead == 'x') {
      __match('x');
      // Parse a hexadecimal number
      current = _newTokenNode((char) __parseUnsignedInteger(16, 2, 2), __position++);
    } else if (__lookahead == 'c') {
      __match('c');
      // Create a control character
      token = Character.toUpperCase(__lookahead);
      token = (char) (token > 63 ? token - 64 : token + 64);
      current = new TokenNode(token, __position++);
      __match(__lookahead);
    } else if (__lookahead >= '0' && __lookahead <= '9') {
      __match(__lookahead);

      if (__lookahead >= '0' && __lookahead <= '9') {
        // We have an octal character or a multi-digit backreference.
        // Assume octal character for now.
        __putback();
        number = __parseUnsignedInteger(10, 2, 3);
        number = Integer.parseInt(Integer.toString(number), 8);
        current = _newTokenNode((char) number, __position++);
      } else {
        // We have either \0, an escaped digit, or a backreference.
        __putback();
        if (__lookahead == '0') {
          // \0 matches the null character
          __match('0');
          current = new TokenNode('\0', __position++);
        } else {
          // Either an escaped digit or backreference.
          number = Character.digit(__lookahead, 10);
          current = _newTokenNode(__lookahead, __position++);
          __match(__lookahead);
        }
      }
    } else if (__lookahead == 'b') {
      // Inside of a character class the \b means backspace, otherwise
      // it means a word boundary
      // if(__inCharacterClass)
      // \b always means backspace
      current = new TokenNode('\b', __position++);
      /*
           else
      current = new TokenNode((char)LeafNode._WORD_BOUNDARY_MARKER_TOKEN,
      			position++);
      			*/
      __match('b');
    } /*else if(__lookahead == 'B' && !__inCharacterClass){
        current = new TokenNode((char)LeafNode._NONWORD_BOUNDARY_MARKER_TOKEN,
           position++);
        __match('B');
      } */ else {
      CharacterClassNode characterSet;
      token = __lookahead;

      switch (__lookahead) {
        case 'n':
          token = '\n';
          break;
        case 'r':
          token = '\r';
          break;
        case 't':
          token = '\t';
          break;
        case 'f':
          token = '\f';
          break;
      }

      switch (token) {
        case 'd':
          characterSet = new CharacterClassNode(__position++);
          characterSet._addTokenRange('0', '9');
          current = characterSet;
          break;
        case 'D':
          characterSet = new NegativeCharacterClassNode(__position++);
          characterSet._addTokenRange('0', '9');
          current = characterSet;
          break;
        case 'w':
          characterSet = new CharacterClassNode(__position++);
          characterSet._addTokenRange('0', '9');
          characterSet._addTokenRange('a', 'z');
          characterSet._addTokenRange('A', 'Z');
          characterSet._addToken('_');
          current = characterSet;
          break;
        case 'W':
          characterSet = new NegativeCharacterClassNode(__position++);
          characterSet._addTokenRange('0', '9');
          characterSet._addTokenRange('a', 'z');
          characterSet._addTokenRange('A', 'Z');
          characterSet._addToken('_');
          current = characterSet;
          break;
        case 's':
          characterSet = new CharacterClassNode(__position++);
          characterSet._addToken(' ');
          characterSet._addToken('\f');
          characterSet._addToken('\n');
          characterSet._addToken('\r');
          characterSet._addToken('\t');
          current = characterSet;
          break;
        case 'S':
          characterSet = new NegativeCharacterClassNode(__position++);
          characterSet._addToken(' ');
          characterSet._addToken('\f');
          characterSet._addToken('\n');
          characterSet._addToken('\r');
          characterSet._addToken('\t');
          current = characterSet;
          break;
        default:
          current = _newTokenNode(token, __position++);
          break;
      }

      __match(__lookahead);
    }

    return current;
  }