Пример #1
0
  /**
   * This function composes the perl regular expression to perform pattern matching with negative
   * look ahead. This becomes handy when a replacement string includes some string that is also a
   * pattern.
   *
   * <p>i.e.) pattern - & replacement - &quot, &amp
   *
   * @param pattern - pattern string to match
   * @param replacements - an array of replacement strings
   * @return returns the format pattern for perl5 negative look-ahead
   * @exception throws a FrameworkException when the pattern/replacement is null.
   */
  private static String makePerl5MatchPatternNegativeLookAhead(
      String pattern, String replacements[]) throws FrameworkException {
    if (pattern == null || replacements.length < 0) {
      throw new FrameworkException(
          "RegexUtils: makePerl5MatchPatternNegativeLookAhead: pattern or replacement is null."
              + "\npattern = "
              + pattern
              + "\nreplacements.length = "
              + replacements.length);
    }

    String result = null;
    StringBuffer patternBuffer = new StringBuffer();

    Perl5Util util = new Perl5Util();
    String formatPattern = makePerl5MatchPattern(pattern);

    for (int i = 0; i < replacements.length; i++) {
      if (util.match(formatPattern, replacements[i])) {
        if (replacements[i].startsWith(pattern)) {
          result = util.postMatch();

          // very first one
          if (i == 0) {
            patternBuffer.append(pattern);
            patternBuffer.append("(?!");
          }
          patternBuffer.append(result);

          // the last one
          if (i == (replacements.length - 1)) {
            patternBuffer.append(")");
          } else // not the last one, cat the perl5 separater
          {
            patternBuffer.append("|");
          }

        } else {
          throw new FrameworkException(
              "ERROR: RegexUtils: makePerl5MatchPatternNegativeLookAhead: The pattern in the "
                  + "replacement string should be at the beginning.");
        }
      } else // no match found meaning invalid use of this function
      {
        throw new FrameworkException(
            "ERROR: RegexUtils: makePerl5MatchPatternNegativeLookAhead: "
                + "Invalid use of the function.");
      }
    }

    if (Debug.isLevelEnabled(Debug.MSG_STATUS)) {
      Debug.log(
          Debug.MSG_STATUS,
          "RegexUtils: makePerl5MatchPatternNegativeLookAhead: patternBuffer.toString() = "
              + patternBuffer.toString());
    }

    return patternBuffer.toString();
  }
Пример #2
0
  /**
   * This function composes the perl regular expression to perform pattern matching with negative
   * look ahead. This becomes handy when a replacement string includes some string that is also a
   * pattern. i.e.) pattern - ", & replacement - &quot, &amp original input - I didn't &0 to say
   * "GOO".
   *
   * <p>After the first substitution with " and &quot: processed input - I didn't &0 to say
   * &quotGOO&quot.
   *
   * <p>When we do the second substitution with '&', we do not want to perform the pattern matching
   * on the '&' of "&quot" since it is already a replacement. If we give the pattern like this with
   * perl's negative look-ahead functionality:
   *
   * <p>not-negative look-ahead pattern - & negative look-ahead pattern - &(?!quot)
   *
   * <p>After the second substitution with & and &amp with non-negative look-ahead: processed input
   * - I didn't &amp0 to say &ampquotGOO&ampquot.
   *
   * <p>After the second substitution with & and &amp with negative look-ahead: processed input - I
   * didn't &amp0 to say &quotGOO&quot.
   *
   * @param pattern - pattern string to match
   * @param replacement - replacement
   * @return returns the format pattern for perl5 negative look-ahead
   * @exception throws a FrameworkException when the input/pattern/replacement is null.
   */
  private static String makePerl5MatchPatternNegativeLookAhead(String pattern, String replacement)
      throws FrameworkException {
    if (pattern == null || replacement == null) {
      throw new FrameworkException(
          "RegexUtils: makePerl5MatchPatternNegativeLookAhead: pattern or replacement is null."
              + "\npattern = "
              + pattern
              + "\nreplacement = "
              + replacement);
    }

    String result = pattern;
    StringBuffer patternBuffer = new StringBuffer();

    Perl5Util util = new Perl5Util();
    String formatPattern = makePerl5MatchPattern(pattern);

    // check if the pattern string is a part of the replacement string
    if (util.match(formatPattern, replacement)) {
      if (replacement.startsWith(pattern)) {
        result = util.postMatch();
        patternBuffer.append(pattern);
        patternBuffer.append("(?!");
        patternBuffer.append(result);
        patternBuffer.append(")");
        result = patternBuffer.toString();
      } else {
        throw new FrameworkException(
            "ERROR: RegexUtils: makePerl5MatchPatternNegativeLookAhead: The pattern in the "
                + "replacement string should be at the beginning.");
      }
    }

    return result;
  }
  /**
   * Makes WikiText from a Collection.
   *
   * @param links Collection to make into WikiText.
   * @param separator Separator string to use.
   * @param numItems How many items to show.
   * @return The WikiText
   */
  protected String wikitizeCollection(Collection links, String separator, int numItems) {
    if (links == null || links.isEmpty()) return "";

    StringBuffer output = new StringBuffer();

    Iterator it = links.iterator();
    int count = 0;

    //
    //  The output will be B Item[1] A S B Item[2] A S B Item[3] A
    //
    while (it.hasNext() && ((count < numItems) || (numItems == ALL_ITEMS))) {
      String value = (String) it.next();

      if (count > 0) {
        output.append(m_after);
        output.append(m_separator);
      }

      output.append(m_before);

      // Make a Wiki markup link. See TranslatorReader.
      output.append("[" + m_engine.beautifyTitle(value) + "|" + value + "]");
      count++;
    }

    //
    //  Output final item - if there have been none, no "after" is printed
    //
    if (count > 0) output.append(m_after);

    return output.toString();
  }
Пример #4
0
  // if numChars is 0, this means match as many as you want
  private int __parseUnsignedInteger(int radix, int minDigits, int maxDigits)
      throws MalformedPatternException {
    int num, digits = 0;
    StringBuffer buf;

    // We don't expect huge numbers, so an initial buffer of 4 is fine.
    buf = new StringBuffer(4);

    while (Character.digit(__lookahead, radix) != -1 && digits < maxDigits) {
      buf.append((char) __lookahead);
      __match(__lookahead);
      ++digits;
    }

    if (digits < minDigits || digits > maxDigits)
      throw new MalformedPatternException(
          "Parse error: unexpected number of digits at position " + __bytesRead);

    try {
      num = Integer.parseInt(buf.toString(), radix);
    } catch (NumberFormatException e) {
      throw new MalformedPatternException(
          "Parse error: numeric value at " + "position " + __bytesRead + " is invalid");
    }

    return num;
  }
Пример #5
0
  /**
   * This function replace only the first pattern matched with the replacement.
   *
   * @param pattern pattern string to match
   * @param input input string
   * @param replacement replacement string
   * @return returns the processed string
   * @exception throws a FrameworkException when the pattern/replacement is null.
   */
  public static String replaceLast(String pattern, String input, String replacement)
      throws FrameworkException {
    if ((pattern == null) || (input == null) || (replacement == null)) {
      throw new FrameworkException(
          "RegexUtil: replaceLast(): pattern or input cannot be null. "
              + "pattern = "
              + pattern
              + "input = "
              + input
              + "replacement = "
              + replacement);
    }

    Perl5Util util = new Perl5Util();
    MatchResult matchResult = null;
    String result = null;
    String pre = null;
    String post = null;
    StringBuffer resultBuffer = new StringBuffer();

    int length = input.length();

    String regex = makePerl5SubstitutionPattern(pattern, replacement);
    pattern = makePerl5MatchPattern(pattern);

    // counts the number of match and grab the last one
    while (util.match(pattern, input)) {
      // getting the string before match
      pre = util.preMatch();
      resultBuffer.append(pre);

      // get the matched string
      matchResult = util.getMatch();
      resultBuffer.append(matchResult.toString());

      // get the post string after the match
      post = util.postMatch();

      // the post becomes the new input
      input = post;
    }

    // get the last match found
    matchResult = util.getMatch();

    // do the string replacement on the pattern found
    result = util.substitute(regex, matchResult.toString());

    resultBuffer.append(result);
    resultBuffer.append(post);

    return resultBuffer.toString();
  }
Пример #6
0
  /**
   * This function makes a perl5 match pattern from non-format pattern for the substitution. This
   * function also checks for the case of special character. It the patter passed in is such a
   * chracter, the escape character back slash is attached in the front. The format is:
   *
   * <p>s/pattern/replacement/
   *
   * <p>i.e.) input: pattern - " replacement - &quot
   *
   * <p>output: s/\"/&quot/
   *
   * @param pattern pattern string to match
   * @param replacement replacement string
   * @return returns the perl5 format for substitution
   * @exception throws a FrameworkException when the pattern/replacement is null.
   */
  private static String makePerl5SubstitutionPattern(String pattern, String replacement)
      throws FrameworkException {
    if (replacement == null || pattern == null) {
      throw new FrameworkException(
          "RegexUtils: makePerl5SubstitutionPattern: pattern or replacement is null."
              + "pattern = "
              + pattern
              + " replacement = "
              + replacement);
    }

    StringBuffer regex = new StringBuffer();
    String formatPattern = pattern;

    // check for the special characters which needs to be preceeed it by a backslash
    if (pattern.equals("|")
        || pattern.equals(")")
        || pattern.equals("$")
        || pattern.equals("*")
        || pattern.equals("^")
        || pattern.equals("/")
        || pattern.equals("+")
        || pattern.equals(".")
        || pattern.equals("[")
        || pattern.equals("?")
        || pattern.equals("(")
        || pattern.equals("]")) {
      // add the escape character
      formatPattern = "\\" + pattern;
    }

    // the case the replacement has an option
    if (checkOption(replacement)) {
      regex.append("s/");
      regex.append(formatPattern);
      regex.append("/");
      regex.append(replacement);
      regex.append("g");

    } else {
      regex.append("s/");
      regex.append(formatPattern);
      regex.append("/");
      regex.append(replacement);
      regex.append("/g");
    }

    return regex.toString();
  }
Пример #7
0
  /**
   * This function makes a perl5 match pattern from non-format pattern. This function also checks
   * for the case of special character. It the patter passed in is such a chracter, the escape
   * character back slash is attached in the front.
   *
   * <p>i.e.) input "&" output "/&/"
   *
   * @param pattern pattern string to match
   * @return returns the perl5 format pattern
   * @exception throws a FrameworkException when the pattern is null.
   */
  private static String makePerl5MatchPattern(String pattern) throws FrameworkException {
    if (pattern == null) {
      throw new FrameworkException("RegexUtil: makePerl5MatchPattern(): the pattern is null.");
    }

    StringBuffer perl5Pattern = new StringBuffer();
    String formatPattern = pattern;

    // check for the special characters which needs to be preceeed it by a backslash
    if (pattern.equals("|")
        || pattern.equals(")")
        || pattern.equals("$")
        || pattern.equals("*")
        || pattern.equals("^")
        || pattern.equals("/")
        || pattern.equals("+")
        || pattern.equals(".")
        || pattern.equals("[")
        || pattern.equals("?")
        || pattern.equals("(")
        || pattern.equals("]")) {
      // add the escape character
      formatPattern = "\\" + pattern;
    }

    /* check if the pattern has the option. user can specify the pattern with the
     * perl5 option:  pattern/[i][m][s][x]
     * checkOption returns true if the pattern has index of the one of
     * those:  /i, /m, /s, /x
     */
    if (checkOption(formatPattern)) {
      perl5Pattern.append("/");
      perl5Pattern.append(formatPattern);
    } else {
      perl5Pattern.append("/");
      perl5Pattern.append(formatPattern);
      perl5Pattern.append("/");
    }

    return perl5Pattern.toString();
  }
Пример #8
0
  /**
   * The function checks and handles the situation where another begin token is found in between the
   * first begin token and the first end token. Such first beginToken is ignored. The current input
   * to be passed in to the replacement function will be the string between the LAST begin token and
   * the FIRST end token found.
   *
   * @param beginToken the pattern to look for
   * @param current input string
   * @param resultBuffer the buffer to hold the result
   * @return returns a Vector containing the substrings of the input that occur
   */
  private static final String skipOrphanedBeginToken(
      String beginToken, String current, StringBuffer resultBuffer) {
    Perl5Util util = new Perl5Util();
    boolean nextBeginTokenMatch = true;
    MatchResult matchResult = null;
    String remainderStr = null;
    String subCurrent = null;
    StringBuffer currentBuffer = new StringBuffer();
    String pre = null;

    // subCurrent is copy of the current to check if there is any next beginTokens
    subCurrent = current;

    while (nextBeginTokenMatch) {
      nextBeginTokenMatch = util.match(beginToken, subCurrent);

      if (nextBeginTokenMatch == true) {
        // pre is the string before the beginToken
        pre = util.preMatch();
        currentBuffer.append(pre);

        // get the  matched beginToken
        matchResult = util.getMatch();
        currentBuffer.append(matchResult.toString());

        // get the remaining string after the beginToken
        remainderStr = util.postMatch();
        subCurrent = remainderStr;
      } else // there is no match
      {
        // appending the string before the last begin token to the result
        resultBuffer.append(currentBuffer.toString());

        // finally get the string to perform the substitution on
        current = subCurrent;
        break;
      }
    } // while

    return current;
  }
Пример #9
0
  /**
   * This function substitues ALL the occurence of the pattern in the input with the replacement
   * passed in. The substitution is done only in the range bound by the begin token and the end
   * token.
   *
   * @param pattern pattern string to match
   * @param input input string
   * @param replacement replacement string to replace with
   * @param beginToken begin boundary token
   * @param endToken end boundary token
   * @param beginTokens[] an array of beginTokens - this is used for negative look ahead where there
   *     are other begin tokens to be recognized.
   * @return returns processed string if both beginToken and the endToken are successfully found,
   *     returns the unproessed original input otherwise.
   * @exception throws a FrameworkException when either pattern/input/replacement is null.
   */
  public static String replaceAll(
      String pattern,
      String input,
      String replacement,
      String beginToken,
      String endToken,
      String beginTokens[])
      throws FrameworkException {
    if ((pattern == null) || (replacement == null) || (input == null)) {
      Debug.log(
          Debug.ALL_ERRORS,
          "RegexUtils: replaceAll(): pattern or replacement or input is null. "
              + "\npattern = "
              + pattern
              + "\ninput = "
              + input
              + "\nreplacement = "
              + replacement);
      throw new FrameworkException(
          "RegexUtils: replaceAll(): pattern or replacement or input is null.");
    }

    Perl5Util util = new Perl5Util();
    StringBuffer resultBuffer = new StringBuffer();

    if ((beginToken == null) || (endToken == null)) {
      // either beginToken or endToken cannot be null, however both can be null.
      throw new FrameworkException(
          "RegexUtils: replaceAll(): Either begin or end token is null. BeginToken = "
              + beginToken
              + ", "
              + "endToken = "
              + endToken);
    } else // do pattern match
    {
      // making the Perl5 regular expression format pattern
      String begin = makePerl5MatchPattern(beginToken);
      String end = makePerl5MatchPattern(endToken);
      boolean beginTokenMatch = true;

      // if begin token found
      while (beginTokenMatch) {

        // check the input for each iteration. when there is no input, break out of the loop.
        if (input == null) break;

        beginTokenMatch = util.match(begin, input);

        if (beginTokenMatch) {
          String negativeLookAheadPattern =
              makePerl5MatchPatternNegativeLookAhead(pattern, replacement);
          int beginOffsetForBeginToken = util.beginOffset(0);
          int endOffsetForBeginToken = util.endOffset(0);

          // the input after the begin token
          String subInput = input.substring(endOffsetForBeginToken);

          // if end token passed in was an empty string
          if (endToken.equals("")) {
            String result = null;
            try {
              result = replaceAllWithBeginToken(pattern, input, replacement, beginToken);
            } catch (FrameworkException e) {
              Debug.log(Debug.ALL_ERRORS, "RegexUtils: replaceAll() failed." + e.getMessage());
            }
            return result;
          } else if (util.match(end, subInput)) // endToken found
          {

            // begin offset for the end token relative to the input not subInput
            int beginOffsetForEndToken = endOffsetForBeginToken + util.beginOffset(0);
            int endOffsetForEndToken = endOffsetForBeginToken + util.endOffset(0);

            // pre is the string before the beginToken and the beginToken
            String pre = input.substring(0, endOffsetForBeginToken);
            resultBuffer.append(pre);

            // current is the string between the beginToken and the endToken
            String current = input.substring(endOffsetForBeginToken, beginOffsetForEndToken);

            // theRest is the rest of the input string after the endToken
            String theRest = input.substring(endOffsetForEndToken);

            // current is the string between begin token and the endtoken
            current = skipOrphanedBeginToken(begin, current, resultBuffer);

            if (isOtherBeginTokenThere(beginToken, current, beginTokens) == false) {
              current = replaceAll(pattern, current, replacement);
            } // isOtherBeginTokenThere

            resultBuffer.append(current);
            resultBuffer.append(input.substring(beginOffsetForEndToken, endOffsetForEndToken));
            input = theRest;
          } else // endToken not found
          {
            resultBuffer.append(input);
            break;
          }
        } // if beginToken found
        else // beginToken not found
        {
          resultBuffer.append(input);
          break;
        }
      } // while begin token found
    } // else do pattern match

    return resultBuffer.toString();
  }
Пример #10
0
  @NotNull
  private Pattern getPattern(String pattern) {
    if (!Comparing.strEqual(pattern, myPattern)) {
      myCompiledPattern = null;
      myPattern = pattern;
    }
    if (myCompiledPattern == null) {
      boolean allowToLower = true;
      final int eol = pattern.indexOf('\n');
      if (eol != -1) {
        pattern = pattern.substring(0, eol);
      }
      if (pattern.length() >= 80) {
        pattern = pattern.substring(0, 80);
      }

      final @NonNls StringBuffer buffer = new StringBuffer();

      if (containsOnlyUppercaseLetters(pattern)) {
        allowToLower = false;
      }

      if (allowToLower) {
        buffer.append(".*");
      }

      boolean firstIdentifierLetter = true;
      for (int i = 0; i < pattern.length(); i++) {
        final char c = pattern.charAt(i);
        if (Character.isLetterOrDigit(c)) {
          // This logic allows to use uppercase letters only to catch the name like PDM for
          // PsiDocumentManager
          if (Character.isUpperCase(c) || Character.isDigit(c)) {

            if (!firstIdentifierLetter) {
              buffer.append("[^A-Z]*");
            }

            buffer.append("[");
            buffer.append(c);
            if (allowToLower || i == 0) {
              buffer.append('|');
              buffer.append(Character.toLowerCase(c));
            }
            buffer.append("]");
          } else if (Character.isLowerCase(c)) {
            buffer.append('[');
            buffer.append(c);
            buffer.append('|');
            buffer.append(Character.toUpperCase(c));
            buffer.append(']');
          } else {
            buffer.append(c);
          }

          firstIdentifierLetter = false;
        } else if (c == '*') {
          buffer.append(".*");
          firstIdentifierLetter = true;
        } else if (c == '.') {
          buffer.append("\\.");
          firstIdentifierLetter = true;
        } else if (c == ' ') {
          buffer.append("[^A-Z]*\\ ");
          firstIdentifierLetter = true;
        } else {
          firstIdentifierLetter = true;
          // for standard RegExp engine
          // buffer.append("\\u");
          // buffer.append(Integer.toHexString(c + 0x20000).substring(1));

          // for OROMATCHER RegExp engine
          buffer.append("\\x");
          buffer.append(Integer.toHexString(c + 0x20000).substring(3));
        }
      }

      buffer.append(".*");

      try {
        myCompiledPattern = new Perl5Compiler().compile(buffer.toString());
      } catch (MalformedPatternException e) {
        // do nothing
      }
    }

    return myCompiledPattern;
  }