/** * This function returns the begin offset of the input string where the first matched pattern is * found. * * @param pattern pattern string to match * @param input input string * @return returns the offset of the beginning of the first matched pattern. if the match not * found, it returns -1. * @exception throws a FrameworkException when either pattern/input is null. */ public static int getBeginOffset(String pattern, String input) throws FrameworkException { if (input == null || pattern == null) { throw new FrameworkException( "RegexUtils: getBeginOffset(): input or pattern is null." + "input = " + input + ", " + "pattern = " + pattern); } Perl5Util util = new Perl5Util(); int beginOffset = -1; String formatPattern = null; formatPattern = makePerl5MatchPattern(pattern); if (util.match(formatPattern, input)) { beginOffset = util.beginOffset(0); } else { Debug.log(Debug.MSG_STATUS, "RegexUtils: getBeginOffset(): No match found."); } return beginOffset; }
/** * This function replace only the first pattern matched with the replacement. * * @param pattern pattern string to match * @param input input string * @param replacement replacement string * @return returns the processed string * @exception throws a FrameworkException when the pattern/replacement is null. */ public static String replaceFirst(String pattern, String input, String replacement) throws FrameworkException { if ((pattern == null) || (input == null) || (replacement == null)) { throw new FrameworkException( "RegexUtil: replaceFirst(): pattern or input cannot be null. " + "pattern = " + pattern + "input = " + input + "replacement = " + replacement); } String result = null; String perl5SubstitutionPattern = null; Perl5Util util = new Perl5Util(); // default return value result = input; perl5SubstitutionPattern = makePerl5SubstitutionPattern(pattern, replacement); pattern = makePerl5MatchPattern(pattern); if (util.match(pattern, input)) { result = util.substitute(perl5SubstitutionPattern, input); } return result; }
/** * This function composes the perl regular expression to perform pattern matching with negative * look ahead. This becomes handy when a replacement string includes some string that is also a * pattern. i.e.) pattern - ", & replacement - ", & original input - I didn't &0 to say * "GOO". * * <p>After the first substitution with " and ": processed input - I didn't &0 to say * "GOO". * * <p>When we do the second substitution with '&', we do not want to perform the pattern matching * on the '&' of """ since it is already a replacement. If we give the pattern like this with * perl's negative look-ahead functionality: * * <p>not-negative look-ahead pattern - & negative look-ahead pattern - &(?!quot) * * <p>After the second substitution with & and & with non-negative look-ahead: processed input * - I didn't &0 to say &quotGOO&quot. * * <p>After the second substitution with & and & with negative look-ahead: processed input - I * didn't &0 to say "GOO". * * @param pattern - pattern string to match * @param replacement - replacement * @return returns the format pattern for perl5 negative look-ahead * @exception throws a FrameworkException when the input/pattern/replacement is null. */ private static String makePerl5MatchPatternNegativeLookAhead(String pattern, String replacement) throws FrameworkException { if (pattern == null || replacement == null) { throw new FrameworkException( "RegexUtils: makePerl5MatchPatternNegativeLookAhead: pattern or replacement is null." + "\npattern = " + pattern + "\nreplacement = " + replacement); } String result = pattern; StringBuffer patternBuffer = new StringBuffer(); Perl5Util util = new Perl5Util(); String formatPattern = makePerl5MatchPattern(pattern); // check if the pattern string is a part of the replacement string if (util.match(formatPattern, replacement)) { if (replacement.startsWith(pattern)) { result = util.postMatch(); patternBuffer.append(pattern); patternBuffer.append("(?!"); patternBuffer.append(result); patternBuffer.append(")"); result = patternBuffer.toString(); } else { throw new FrameworkException( "ERROR: RegexUtils: makePerl5MatchPatternNegativeLookAhead: The pattern in the " + "replacement string should be at the beginning."); } } return result; }
/** * This function returns the matched string in input when the pattern is found. * * @param pattern pattern string to match * @param input input string * @return returns the portion of the matched string in the input * @exception throws a FrameworkException when either pattern/input is null. */ public static String getMatch(String pattern, String input) throws FrameworkException { if (input == null || pattern == null) { throw new FrameworkException( "RegexUtils: getMatch(): input or pattern is null." + "input =" + input + ", " + "pattern = " + pattern); } Perl5Util util = new Perl5Util(); boolean isMatched = false; MatchResult matchResult = null; // default return value is the original input String result = input; pattern = makePerl5MatchPattern(pattern); if (util.match(pattern, input)) { matchResult = util.getMatch(); result = matchResult.toString(); } return result; }
/** * This function composes the perl regular expression to perform pattern matching with negative * look ahead. This becomes handy when a replacement string includes some string that is also a * pattern. * * <p>i.e.) pattern - & replacement - ", & * * @param pattern - pattern string to match * @param replacements - an array of replacement strings * @return returns the format pattern for perl5 negative look-ahead * @exception throws a FrameworkException when the pattern/replacement is null. */ private static String makePerl5MatchPatternNegativeLookAhead( String pattern, String replacements[]) throws FrameworkException { if (pattern == null || replacements.length < 0) { throw new FrameworkException( "RegexUtils: makePerl5MatchPatternNegativeLookAhead: pattern or replacement is null." + "\npattern = " + pattern + "\nreplacements.length = " + replacements.length); } String result = null; StringBuffer patternBuffer = new StringBuffer(); Perl5Util util = new Perl5Util(); String formatPattern = makePerl5MatchPattern(pattern); for (int i = 0; i < replacements.length; i++) { if (util.match(formatPattern, replacements[i])) { if (replacements[i].startsWith(pattern)) { result = util.postMatch(); // very first one if (i == 0) { patternBuffer.append(pattern); patternBuffer.append("(?!"); } patternBuffer.append(result); // the last one if (i == (replacements.length - 1)) { patternBuffer.append(")"); } else // not the last one, cat the perl5 separater { patternBuffer.append("|"); } } else { throw new FrameworkException( "ERROR: RegexUtils: makePerl5MatchPatternNegativeLookAhead: The pattern in the " + "replacement string should be at the beginning."); } } else // no match found meaning invalid use of this function { throw new FrameworkException( "ERROR: RegexUtils: makePerl5MatchPatternNegativeLookAhead: " + "Invalid use of the function."); } } if (Debug.isLevelEnabled(Debug.MSG_STATUS)) { Debug.log( Debug.MSG_STATUS, "RegexUtils: makePerl5MatchPatternNegativeLookAhead: patternBuffer.toString() = " + patternBuffer.toString()); } return patternBuffer.toString(); }
/** * This function replace only the first pattern matched with the replacement. * * @param pattern pattern string to match * @param input input string * @param replacement replacement string * @return returns the processed string * @exception throws a FrameworkException when the pattern/replacement is null. */ public static String replaceLast(String pattern, String input, String replacement) throws FrameworkException { if ((pattern == null) || (input == null) || (replacement == null)) { throw new FrameworkException( "RegexUtil: replaceLast(): pattern or input cannot be null. " + "pattern = " + pattern + "input = " + input + "replacement = " + replacement); } Perl5Util util = new Perl5Util(); MatchResult matchResult = null; String result = null; String pre = null; String post = null; StringBuffer resultBuffer = new StringBuffer(); int length = input.length(); String regex = makePerl5SubstitutionPattern(pattern, replacement); pattern = makePerl5MatchPattern(pattern); // counts the number of match and grab the last one while (util.match(pattern, input)) { // getting the string before match pre = util.preMatch(); resultBuffer.append(pre); // get the matched string matchResult = util.getMatch(); resultBuffer.append(matchResult.toString()); // get the post string after the match post = util.postMatch(); // the post becomes the new input input = post; } // get the last match found matchResult = util.getMatch(); // do the string replacement on the pattern found result = util.substitute(regex, matchResult.toString()); resultBuffer.append(result); resultBuffer.append(post); return resultBuffer.toString(); }
/** * This function checks in the input if there is other tokens in the begingTokens array than * begin. This utility function is used on a string which is between a valid begin token and all * possible other begin tokens. If there is another begin token of different kind is found, the * substitution shouldn't occur. Consider this situation: * * <p>i.e.) <beginToken_0> "some string <beginToken_1> here to do the substitution on" * <endToken_0> <beginToken_0> "another some string" <endToken_1> * * <p>In this case, the replaceAll() function finds the matched pair <beginToken_0> and * <endToken_0> and attempt to perform the substitution. However, since there is another boundary * _1 is overlapped with _0 boundary, we do not want to perform a string substitution. * * @param replacement replacement string to replace with * @param beginToken begin boundary token * @param endToken end boundary token * @return true if any of other beginTokens in the beginToken[] than begin is found in the input * string, false otherwise */ private static final boolean isOtherBeginTokenThere( String begin, String input, String beginTokens[]) throws FrameworkException { Perl5Util util = new Perl5Util(); for (int i = 0; i < beginTokens.length; i++) { // look for the other token if (!beginTokens[i].equals(begin)) { String perl5Pattern = makePerl5MatchPattern(beginTokens[i]); // if the input contains at least one beginToken which differs from the begin if (util.match(perl5Pattern, input)) { Debug.log( Debug.MSG_STATUS, "RegexUtils: isOtherBeginTokenThere: token = " + perl5Pattern); return true; } } } return false; }
/** * This function returns boolean value true when the pattern is found in the input string. Returns * false otherwise. * * @param pattern pattern string to match * @param input input string * @return returns true if a match is found in the input string passed in, false otherwise. * @exception throws a FrameworkException when either pattern/input is null. */ public static boolean match(String pattern, String input) throws FrameworkException { if (input == null || pattern == null) { throw new FrameworkException( "RegexUtils: match(): input or pattern is null." + "input =" + input + ", " + "pattern = " + pattern); } Perl5Util util = new Perl5Util(); boolean isMatched = false; // convert the pattern to the perl5 format pattern = makePerl5MatchPattern(pattern); isMatched = util.match(pattern, input); return isMatched; }
/** * The function checks and handles the situation where another begin token is found in between the * first begin token and the first end token. Such first beginToken is ignored. The current input * to be passed in to the replacement function will be the string between the LAST begin token and * the FIRST end token found. * * @param beginToken the pattern to look for * @param current input string * @param resultBuffer the buffer to hold the result * @return returns a Vector containing the substrings of the input that occur */ private static final String skipOrphanedBeginToken( String beginToken, String current, StringBuffer resultBuffer) { Perl5Util util = new Perl5Util(); boolean nextBeginTokenMatch = true; MatchResult matchResult = null; String remainderStr = null; String subCurrent = null; StringBuffer currentBuffer = new StringBuffer(); String pre = null; // subCurrent is copy of the current to check if there is any next beginTokens subCurrent = current; while (nextBeginTokenMatch) { nextBeginTokenMatch = util.match(beginToken, subCurrent); if (nextBeginTokenMatch == true) { // pre is the string before the beginToken pre = util.preMatch(); currentBuffer.append(pre); // get the matched beginToken matchResult = util.getMatch(); currentBuffer.append(matchResult.toString()); // get the remaining string after the beginToken remainderStr = util.postMatch(); subCurrent = remainderStr; } else // there is no match { // appending the string before the last begin token to the result resultBuffer.append(currentBuffer.toString()); // finally get the string to perform the substitution on current = subCurrent; break; } } // while return current; }
/** * This function locates the first match on the begin token and replace all the pattern with the * replacement the area after the begin token is located. * * @param pattern pattern string to match * @param input input string * @param replacement replacement string * @param beginToken begin token string * @return processed string. If there's no match on the begin token, it returns the original input * by default. * @exception throws a FrameworkException when the input/pattern/replacement is null. */ public static String replaceAllWithBeginToken( String pattern, String input, String replacement, String beginToken) throws FrameworkException { if (input == null || pattern == null || replacement == null) { throw new FrameworkException( "RegexUtils: replaceAllWithBeginToken: input or pattern or replacement is null." + "\ninput = " + input + "\npattern = " + pattern + "\nreplacement = " + replacement); } Perl5Util util = new Perl5Util(); // default return value String result = input; // conversion to the valid perl5 pattern regex String formatPattern = makePerl5MatchPattern(pattern); // if the beginToken is null, just do the regular replaceAll if (beginToken == null) { result = replaceAll(formatPattern, input, replacement); } // find if there is begin token in the input string else if (util.match(beginToken, input)) { // pre has the string before the match and the mathced pattern String pre = input.substring(0, util.endOffset(0)); // get the rest of the string after the begin token is found input = input.substring(util.endOffset(0)); result = replaceAll(pattern, input, replacement); result = pre + result; } return result; }
/** * This function checks if the replacement includes pattern string. * * <p>i.e 1) if ( pattern = &, replacement = & ) returns true i.e 2) if ( pattern = ", * replacement = " ) returns false * * @param pattern - pattern string * @param replacement - replacement string * @return returns true if pattern string is a part of the replacement string, false, otherwise. * @exception throws a FrameworkException when the pattern/replacement is null. */ public static boolean checkReplacement(String pattern, String replacement) throws FrameworkException { if (pattern == null || replacement == null) { throw new FrameworkException( "RegexUtils: checkReplacement: pattern or replacement is null." + "\npattern = " + pattern + "\nreplacement = " + replacement); } boolean check = false; Perl5Util util = new Perl5Util(); String formatPattern = makePerl5MatchPattern(pattern); // check if the pattern string is a part of the replacement string if (util.match(formatPattern, replacement)) { if (replacement.startsWith(pattern)) { check = true; } } return check; }
/** * This function substitues ALL the occurence of the pattern in the input with the replacement * passed in. The substitution is done only in the range bound by the begin token and the end * token. * * @param pattern pattern string to match * @param input input string * @param replacement replacement string to replace with * @param beginToken begin boundary token * @param endToken end boundary token * @param beginTokens[] an array of beginTokens - this is used for negative look ahead where there * are other begin tokens to be recognized. * @return returns processed string if both beginToken and the endToken are successfully found, * returns the unproessed original input otherwise. * @exception throws a FrameworkException when either pattern/input/replacement is null. */ public static String replaceAll( String pattern, String input, String replacement, String beginToken, String endToken, String beginTokens[]) throws FrameworkException { if ((pattern == null) || (replacement == null) || (input == null)) { Debug.log( Debug.ALL_ERRORS, "RegexUtils: replaceAll(): pattern or replacement or input is null. " + "\npattern = " + pattern + "\ninput = " + input + "\nreplacement = " + replacement); throw new FrameworkException( "RegexUtils: replaceAll(): pattern or replacement or input is null."); } Perl5Util util = new Perl5Util(); StringBuffer resultBuffer = new StringBuffer(); if ((beginToken == null) || (endToken == null)) { // either beginToken or endToken cannot be null, however both can be null. throw new FrameworkException( "RegexUtils: replaceAll(): Either begin or end token is null. BeginToken = " + beginToken + ", " + "endToken = " + endToken); } else // do pattern match { // making the Perl5 regular expression format pattern String begin = makePerl5MatchPattern(beginToken); String end = makePerl5MatchPattern(endToken); boolean beginTokenMatch = true; // if begin token found while (beginTokenMatch) { // check the input for each iteration. when there is no input, break out of the loop. if (input == null) break; beginTokenMatch = util.match(begin, input); if (beginTokenMatch) { String negativeLookAheadPattern = makePerl5MatchPatternNegativeLookAhead(pattern, replacement); int beginOffsetForBeginToken = util.beginOffset(0); int endOffsetForBeginToken = util.endOffset(0); // the input after the begin token String subInput = input.substring(endOffsetForBeginToken); // if end token passed in was an empty string if (endToken.equals("")) { String result = null; try { result = replaceAllWithBeginToken(pattern, input, replacement, beginToken); } catch (FrameworkException e) { Debug.log(Debug.ALL_ERRORS, "RegexUtils: replaceAll() failed." + e.getMessage()); } return result; } else if (util.match(end, subInput)) // endToken found { // begin offset for the end token relative to the input not subInput int beginOffsetForEndToken = endOffsetForBeginToken + util.beginOffset(0); int endOffsetForEndToken = endOffsetForBeginToken + util.endOffset(0); // pre is the string before the beginToken and the beginToken String pre = input.substring(0, endOffsetForBeginToken); resultBuffer.append(pre); // current is the string between the beginToken and the endToken String current = input.substring(endOffsetForBeginToken, beginOffsetForEndToken); // theRest is the rest of the input string after the endToken String theRest = input.substring(endOffsetForEndToken); // current is the string between begin token and the endtoken current = skipOrphanedBeginToken(begin, current, resultBuffer); if (isOtherBeginTokenThere(beginToken, current, beginTokens) == false) { current = replaceAll(pattern, current, replacement); } // isOtherBeginTokenThere resultBuffer.append(current); resultBuffer.append(input.substring(beginOffsetForEndToken, endOffsetForEndToken)); input = theRest; } else // endToken not found { resultBuffer.append(input); break; } } // if beginToken found else // beginToken not found { resultBuffer.append(input); break; } } // while begin token found } // else do pattern match return resultBuffer.toString(); }