/** * Returns any trailing period, comma, semicolon, or colon characters from the given string. This * method is useful when parsing raw HTML links, in which case trailing punctuation must be * removed. Note that only punctuation that is not previously matched is trimmed - if the input is * "http://example.com/page_(page)" then the trailing parantheses will not be trimmed. * * @param text The text from which trailing punctuation should be returned. * @return Any trailing punctuation from the given text, or an empty string otherwise. */ private String extractTrailingPunctuation(String text) { if (StringUtils.isBlank(text)) { return ""; } StringBuilder buffer = new StringBuilder(); for (int i = text.length() - 1; i >= 0; i--) { char c = text.charAt(i); if (c == '.' || c == ';' || c == ',' || c == ':' || c == '(' || c == '[' || c == '{') { buffer.append(c); continue; } // if the value ends with ), ] or } then strip it UNLESS there is a matching // opening tag if (c == ')' || c == ']' || c == '}') { String closeChar = String.valueOf(c); String openChar = (c == ')') ? "(" : ((c == ']') ? "[" : "{"); int pos = Utilities.findMatchingStartTag(text, i, openChar, closeChar); if (pos == -1) { buffer.append(c); continue; } } break; } if (buffer.length() == 0) { return ""; } buffer = buffer.reverse(); return buffer.toString(); }