/** * This function composes the perl regular expression to perform pattern matching with negative * look ahead. This becomes handy when a replacement string includes some string that is also a * pattern. * * <p>i.e.) pattern - & replacement - ", & * * @param pattern - pattern string to match * @param replacements - an array of replacement strings * @return returns the format pattern for perl5 negative look-ahead * @exception throws a FrameworkException when the pattern/replacement is null. */ private static String makePerl5MatchPatternNegativeLookAhead( String pattern, String replacements[]) throws FrameworkException { if (pattern == null || replacements.length < 0) { throw new FrameworkException( "RegexUtils: makePerl5MatchPatternNegativeLookAhead: pattern or replacement is null." + "\npattern = " + pattern + "\nreplacements.length = " + replacements.length); } String result = null; StringBuffer patternBuffer = new StringBuffer(); Perl5Util util = new Perl5Util(); String formatPattern = makePerl5MatchPattern(pattern); for (int i = 0; i < replacements.length; i++) { if (util.match(formatPattern, replacements[i])) { if (replacements[i].startsWith(pattern)) { result = util.postMatch(); // very first one if (i == 0) { patternBuffer.append(pattern); patternBuffer.append("(?!"); } patternBuffer.append(result); // the last one if (i == (replacements.length - 1)) { patternBuffer.append(")"); } else // not the last one, cat the perl5 separater { patternBuffer.append("|"); } } else { throw new FrameworkException( "ERROR: RegexUtils: makePerl5MatchPatternNegativeLookAhead: The pattern in the " + "replacement string should be at the beginning."); } } else // no match found meaning invalid use of this function { throw new FrameworkException( "ERROR: RegexUtils: makePerl5MatchPatternNegativeLookAhead: " + "Invalid use of the function."); } } if (Debug.isLevelEnabled(Debug.MSG_STATUS)) { Debug.log( Debug.MSG_STATUS, "RegexUtils: makePerl5MatchPatternNegativeLookAhead: patternBuffer.toString() = " + patternBuffer.toString()); } return patternBuffer.toString(); }
/** * This function composes the perl regular expression to perform pattern matching with negative * look ahead. This becomes handy when a replacement string includes some string that is also a * pattern. i.e.) pattern - ", & replacement - ", & original input - I didn't &0 to say * "GOO". * * <p>After the first substitution with " and ": processed input - I didn't &0 to say * "GOO". * * <p>When we do the second substitution with '&', we do not want to perform the pattern matching * on the '&' of """ since it is already a replacement. If we give the pattern like this with * perl's negative look-ahead functionality: * * <p>not-negative look-ahead pattern - & negative look-ahead pattern - &(?!quot) * * <p>After the second substitution with & and & with non-negative look-ahead: processed input * - I didn't &0 to say &quotGOO&quot. * * <p>After the second substitution with & and & with negative look-ahead: processed input - I * didn't &0 to say "GOO". * * @param pattern - pattern string to match * @param replacement - replacement * @return returns the format pattern for perl5 negative look-ahead * @exception throws a FrameworkException when the input/pattern/replacement is null. */ private static String makePerl5MatchPatternNegativeLookAhead(String pattern, String replacement) throws FrameworkException { if (pattern == null || replacement == null) { throw new FrameworkException( "RegexUtils: makePerl5MatchPatternNegativeLookAhead: pattern or replacement is null." + "\npattern = " + pattern + "\nreplacement = " + replacement); } String result = pattern; StringBuffer patternBuffer = new StringBuffer(); Perl5Util util = new Perl5Util(); String formatPattern = makePerl5MatchPattern(pattern); // check if the pattern string is a part of the replacement string if (util.match(formatPattern, replacement)) { if (replacement.startsWith(pattern)) { result = util.postMatch(); patternBuffer.append(pattern); patternBuffer.append("(?!"); patternBuffer.append(result); patternBuffer.append(")"); result = patternBuffer.toString(); } else { throw new FrameworkException( "ERROR: RegexUtils: makePerl5MatchPatternNegativeLookAhead: The pattern in the " + "replacement string should be at the beginning."); } } return result; }
/** * Makes WikiText from a Collection. * * @param links Collection to make into WikiText. * @param separator Separator string to use. * @param numItems How many items to show. * @return The WikiText */ protected String wikitizeCollection(Collection links, String separator, int numItems) { if (links == null || links.isEmpty()) return ""; StringBuffer output = new StringBuffer(); Iterator it = links.iterator(); int count = 0; // // The output will be B Item[1] A S B Item[2] A S B Item[3] A // while (it.hasNext() && ((count < numItems) || (numItems == ALL_ITEMS))) { String value = (String) it.next(); if (count > 0) { output.append(m_after); output.append(m_separator); } output.append(m_before); // Make a Wiki markup link. See TranslatorReader. output.append("[" + m_engine.beautifyTitle(value) + "|" + value + "]"); count++; } // // Output final item - if there have been none, no "after" is printed // if (count > 0) output.append(m_after); return output.toString(); }
// if numChars is 0, this means match as many as you want private int __parseUnsignedInteger(int radix, int minDigits, int maxDigits) throws MalformedPatternException { int num, digits = 0; StringBuffer buf; // We don't expect huge numbers, so an initial buffer of 4 is fine. buf = new StringBuffer(4); while (Character.digit(__lookahead, radix) != -1 && digits < maxDigits) { buf.append((char) __lookahead); __match(__lookahead); ++digits; } if (digits < minDigits || digits > maxDigits) throw new MalformedPatternException( "Parse error: unexpected number of digits at position " + __bytesRead); try { num = Integer.parseInt(buf.toString(), radix); } catch (NumberFormatException e) { throw new MalformedPatternException( "Parse error: numeric value at " + "position " + __bytesRead + " is invalid"); } return num; }
/** * This function replace only the first pattern matched with the replacement. * * @param pattern pattern string to match * @param input input string * @param replacement replacement string * @return returns the processed string * @exception throws a FrameworkException when the pattern/replacement is null. */ public static String replaceLast(String pattern, String input, String replacement) throws FrameworkException { if ((pattern == null) || (input == null) || (replacement == null)) { throw new FrameworkException( "RegexUtil: replaceLast(): pattern or input cannot be null. " + "pattern = " + pattern + "input = " + input + "replacement = " + replacement); } Perl5Util util = new Perl5Util(); MatchResult matchResult = null; String result = null; String pre = null; String post = null; StringBuffer resultBuffer = new StringBuffer(); int length = input.length(); String regex = makePerl5SubstitutionPattern(pattern, replacement); pattern = makePerl5MatchPattern(pattern); // counts the number of match and grab the last one while (util.match(pattern, input)) { // getting the string before match pre = util.preMatch(); resultBuffer.append(pre); // get the matched string matchResult = util.getMatch(); resultBuffer.append(matchResult.toString()); // get the post string after the match post = util.postMatch(); // the post becomes the new input input = post; } // get the last match found matchResult = util.getMatch(); // do the string replacement on the pattern found result = util.substitute(regex, matchResult.toString()); resultBuffer.append(result); resultBuffer.append(post); return resultBuffer.toString(); }
/** * This function makes a perl5 match pattern from non-format pattern for the substitution. This * function also checks for the case of special character. It the patter passed in is such a * chracter, the escape character back slash is attached in the front. The format is: * * <p>s/pattern/replacement/ * * <p>i.e.) input: pattern - " replacement - " * * <p>output: s/\"/"/ * * @param pattern pattern string to match * @param replacement replacement string * @return returns the perl5 format for substitution * @exception throws a FrameworkException when the pattern/replacement is null. */ private static String makePerl5SubstitutionPattern(String pattern, String replacement) throws FrameworkException { if (replacement == null || pattern == null) { throw new FrameworkException( "RegexUtils: makePerl5SubstitutionPattern: pattern or replacement is null." + "pattern = " + pattern + " replacement = " + replacement); } StringBuffer regex = new StringBuffer(); String formatPattern = pattern; // check for the special characters which needs to be preceeed it by a backslash if (pattern.equals("|") || pattern.equals(")") || pattern.equals("$") || pattern.equals("*") || pattern.equals("^") || pattern.equals("/") || pattern.equals("+") || pattern.equals(".") || pattern.equals("[") || pattern.equals("?") || pattern.equals("(") || pattern.equals("]")) { // add the escape character formatPattern = "\\" + pattern; } // the case the replacement has an option if (checkOption(replacement)) { regex.append("s/"); regex.append(formatPattern); regex.append("/"); regex.append(replacement); regex.append("g"); } else { regex.append("s/"); regex.append(formatPattern); regex.append("/"); regex.append(replacement); regex.append("/g"); } return regex.toString(); }
/** * This function makes a perl5 match pattern from non-format pattern. This function also checks * for the case of special character. It the patter passed in is such a chracter, the escape * character back slash is attached in the front. * * <p>i.e.) input "&" output "/&/" * * @param pattern pattern string to match * @return returns the perl5 format pattern * @exception throws a FrameworkException when the pattern is null. */ private static String makePerl5MatchPattern(String pattern) throws FrameworkException { if (pattern == null) { throw new FrameworkException("RegexUtil: makePerl5MatchPattern(): the pattern is null."); } StringBuffer perl5Pattern = new StringBuffer(); String formatPattern = pattern; // check for the special characters which needs to be preceeed it by a backslash if (pattern.equals("|") || pattern.equals(")") || pattern.equals("$") || pattern.equals("*") || pattern.equals("^") || pattern.equals("/") || pattern.equals("+") || pattern.equals(".") || pattern.equals("[") || pattern.equals("?") || pattern.equals("(") || pattern.equals("]")) { // add the escape character formatPattern = "\\" + pattern; } /* check if the pattern has the option. user can specify the pattern with the * perl5 option: pattern/[i][m][s][x] * checkOption returns true if the pattern has index of the one of * those: /i, /m, /s, /x */ if (checkOption(formatPattern)) { perl5Pattern.append("/"); perl5Pattern.append(formatPattern); } else { perl5Pattern.append("/"); perl5Pattern.append(formatPattern); perl5Pattern.append("/"); } return perl5Pattern.toString(); }
/** * The function checks and handles the situation where another begin token is found in between the * first begin token and the first end token. Such first beginToken is ignored. The current input * to be passed in to the replacement function will be the string between the LAST begin token and * the FIRST end token found. * * @param beginToken the pattern to look for * @param current input string * @param resultBuffer the buffer to hold the result * @return returns a Vector containing the substrings of the input that occur */ private static final String skipOrphanedBeginToken( String beginToken, String current, StringBuffer resultBuffer) { Perl5Util util = new Perl5Util(); boolean nextBeginTokenMatch = true; MatchResult matchResult = null; String remainderStr = null; String subCurrent = null; StringBuffer currentBuffer = new StringBuffer(); String pre = null; // subCurrent is copy of the current to check if there is any next beginTokens subCurrent = current; while (nextBeginTokenMatch) { nextBeginTokenMatch = util.match(beginToken, subCurrent); if (nextBeginTokenMatch == true) { // pre is the string before the beginToken pre = util.preMatch(); currentBuffer.append(pre); // get the matched beginToken matchResult = util.getMatch(); currentBuffer.append(matchResult.toString()); // get the remaining string after the beginToken remainderStr = util.postMatch(); subCurrent = remainderStr; } else // there is no match { // appending the string before the last begin token to the result resultBuffer.append(currentBuffer.toString()); // finally get the string to perform the substitution on current = subCurrent; break; } } // while return current; }
/** * This function substitues ALL the occurence of the pattern in the input with the replacement * passed in. The substitution is done only in the range bound by the begin token and the end * token. * * @param pattern pattern string to match * @param input input string * @param replacement replacement string to replace with * @param beginToken begin boundary token * @param endToken end boundary token * @param beginTokens[] an array of beginTokens - this is used for negative look ahead where there * are other begin tokens to be recognized. * @return returns processed string if both beginToken and the endToken are successfully found, * returns the unproessed original input otherwise. * @exception throws a FrameworkException when either pattern/input/replacement is null. */ public static String replaceAll( String pattern, String input, String replacement, String beginToken, String endToken, String beginTokens[]) throws FrameworkException { if ((pattern == null) || (replacement == null) || (input == null)) { Debug.log( Debug.ALL_ERRORS, "RegexUtils: replaceAll(): pattern or replacement or input is null. " + "\npattern = " + pattern + "\ninput = " + input + "\nreplacement = " + replacement); throw new FrameworkException( "RegexUtils: replaceAll(): pattern or replacement or input is null."); } Perl5Util util = new Perl5Util(); StringBuffer resultBuffer = new StringBuffer(); if ((beginToken == null) || (endToken == null)) { // either beginToken or endToken cannot be null, however both can be null. throw new FrameworkException( "RegexUtils: replaceAll(): Either begin or end token is null. BeginToken = " + beginToken + ", " + "endToken = " + endToken); } else // do pattern match { // making the Perl5 regular expression format pattern String begin = makePerl5MatchPattern(beginToken); String end = makePerl5MatchPattern(endToken); boolean beginTokenMatch = true; // if begin token found while (beginTokenMatch) { // check the input for each iteration. when there is no input, break out of the loop. if (input == null) break; beginTokenMatch = util.match(begin, input); if (beginTokenMatch) { String negativeLookAheadPattern = makePerl5MatchPatternNegativeLookAhead(pattern, replacement); int beginOffsetForBeginToken = util.beginOffset(0); int endOffsetForBeginToken = util.endOffset(0); // the input after the begin token String subInput = input.substring(endOffsetForBeginToken); // if end token passed in was an empty string if (endToken.equals("")) { String result = null; try { result = replaceAllWithBeginToken(pattern, input, replacement, beginToken); } catch (FrameworkException e) { Debug.log(Debug.ALL_ERRORS, "RegexUtils: replaceAll() failed." + e.getMessage()); } return result; } else if (util.match(end, subInput)) // endToken found { // begin offset for the end token relative to the input not subInput int beginOffsetForEndToken = endOffsetForBeginToken + util.beginOffset(0); int endOffsetForEndToken = endOffsetForBeginToken + util.endOffset(0); // pre is the string before the beginToken and the beginToken String pre = input.substring(0, endOffsetForBeginToken); resultBuffer.append(pre); // current is the string between the beginToken and the endToken String current = input.substring(endOffsetForBeginToken, beginOffsetForEndToken); // theRest is the rest of the input string after the endToken String theRest = input.substring(endOffsetForEndToken); // current is the string between begin token and the endtoken current = skipOrphanedBeginToken(begin, current, resultBuffer); if (isOtherBeginTokenThere(beginToken, current, beginTokens) == false) { current = replaceAll(pattern, current, replacement); } // isOtherBeginTokenThere resultBuffer.append(current); resultBuffer.append(input.substring(beginOffsetForEndToken, endOffsetForEndToken)); input = theRest; } else // endToken not found { resultBuffer.append(input); break; } } // if beginToken found else // beginToken not found { resultBuffer.append(input); break; } } // while begin token found } // else do pattern match return resultBuffer.toString(); }
@NotNull private Pattern getPattern(String pattern) { if (!Comparing.strEqual(pattern, myPattern)) { myCompiledPattern = null; myPattern = pattern; } if (myCompiledPattern == null) { boolean allowToLower = true; final int eol = pattern.indexOf('\n'); if (eol != -1) { pattern = pattern.substring(0, eol); } if (pattern.length() >= 80) { pattern = pattern.substring(0, 80); } final @NonNls StringBuffer buffer = new StringBuffer(); if (containsOnlyUppercaseLetters(pattern)) { allowToLower = false; } if (allowToLower) { buffer.append(".*"); } boolean firstIdentifierLetter = true; for (int i = 0; i < pattern.length(); i++) { final char c = pattern.charAt(i); if (Character.isLetterOrDigit(c)) { // This logic allows to use uppercase letters only to catch the name like PDM for // PsiDocumentManager if (Character.isUpperCase(c) || Character.isDigit(c)) { if (!firstIdentifierLetter) { buffer.append("[^A-Z]*"); } buffer.append("["); buffer.append(c); if (allowToLower || i == 0) { buffer.append('|'); buffer.append(Character.toLowerCase(c)); } buffer.append("]"); } else if (Character.isLowerCase(c)) { buffer.append('['); buffer.append(c); buffer.append('|'); buffer.append(Character.toUpperCase(c)); buffer.append(']'); } else { buffer.append(c); } firstIdentifierLetter = false; } else if (c == '*') { buffer.append(".*"); firstIdentifierLetter = true; } else if (c == '.') { buffer.append("\\."); firstIdentifierLetter = true; } else if (c == ' ') { buffer.append("[^A-Z]*\\ "); firstIdentifierLetter = true; } else { firstIdentifierLetter = true; // for standard RegExp engine // buffer.append("\\u"); // buffer.append(Integer.toHexString(c + 0x20000).substring(1)); // for OROMATCHER RegExp engine buffer.append("\\x"); buffer.append(Integer.toHexString(c + 0x20000).substring(3)); } } buffer.append(".*"); try { myCompiledPattern = new Perl5Compiler().compile(buffer.toString()); } catch (MalformedPatternException e) { // do nothing } } return myCompiledPattern; }