public class HtmlEscapeHelper { public static final AggregateTranslator ESCAPE_HTML = new AggregateTranslator( StringEscapeUtils.ESCAPE_HTML4, new UnicodeControlCharacterToHtmlTranslator()); public static final LookupTranslator ESCAPE_BASIC = new LookupTranslator(EntityArrays.BASIC_ESCAPE()); public static String escape(final CharSequence text) { if (text == null) return null; return ESCAPE_HTML.translate(text); } public static String toHtml(final String string) { if (string == null) return null; return escape(string).replace("\n", "<br/>"); } public static String toPlainText(final String string) { if (string == null) return null; return unescape(string.replace("<br/>", "\n").replaceAll("<!--.*?-->|<[^>]+>", "")); } public static String unescape(final String string) { if (string == null) return null; return StringEscapeUtils.unescapeHtml4(string); } public static String escapeBasic(CharSequence text) { return ESCAPE_BASIC.translate(text); } private static class UnicodeControlCharacterToHtmlTranslator extends CodePointTranslator { @Override public boolean translate(int codePoint, Writer out) throws IOException { if (Character.isISOControl(codePoint)) { out.append("&#x"); final char[] chars = Character.toChars(codePoint); for (char c : chars) { out.append(Integer.toHexString(c)); } out.append(';'); return true; } return false; } } }
/** Created by mariotaku on 15/1/11. */ public class TwitterContentUtils { public static final int TWITTER_BULK_QUERY_COUNT = 100; private static final Pattern PATTERN_TWITTER_STATUS_LINK = Pattern.compile("https?://twitter\\.com/(?:#!/)?(\\w+)/status(es)?/(\\d+)"); public static String formatDirectMessageText(final DirectMessage message) { if (message == null) return null; final HtmlBuilder builder = new HtmlBuilder(message.getText(), false, true, true); TwitterContentUtils.parseEntities(builder, message); return builder.build(); } public static String formatExpandedUserDescription(final User user) { if (user == null) return null; final String text = user.getDescription(); if (text == null) return null; final HtmlBuilder builder = new HtmlBuilder(text, false, true, true); final UrlEntity[] urls = user.getDescriptionEntities(); if (urls != null) { for (final UrlEntity url : urls) { final String expanded_url = url.getExpandedUrl(); if (expanded_url != null) { builder.addLink(expanded_url, expanded_url, url.getStart(), url.getEnd()); } } } return toPlainText(builder.build()); } public static String formatStatusText(final Status status) { if (status == null) return null; final HtmlBuilder builder = new HtmlBuilder(status.getText(), false, true, true); TwitterContentUtils.parseEntities(builder, status); return builder.build(); } public static String formatUserDescription(final User user) { if (user == null) return null; final String text = user.getDescription(); if (text == null) return null; final HtmlBuilder builder = new HtmlBuilder(text, false, true, true); final UrlEntity[] urls = user.getDescriptionEntities(); if (urls != null) { for (final UrlEntity url : urls) { final String expanded_url = url.getExpandedUrl(); if (expanded_url != null) { builder.addLink(expanded_url, url.getDisplayUrl(), url.getStart(), url.getEnd()); } } } return builder.build(); } @NonNull public static String getInReplyToName(@NonNull final Status status) { final Status orig = status.isRetweet() ? status.getRetweetedStatus() : status; final long inReplyToUserId = status.getInReplyToUserId(); final UserMentionEntity[] entities = status.getUserMentionEntities(); if (entities == null) return orig.getInReplyToScreenName(); for (final UserMentionEntity entity : entities) { if (inReplyToUserId == entity.getId()) return entity.getName(); } return orig.getInReplyToScreenName(); } public static boolean isOfficialKey( final Context context, final String consumerKey, final String consumerSecret) { if (context == null || consumerKey == null || consumerSecret == null) return false; final String[] keySecrets = context.getResources().getStringArray(R.array.values_official_consumer_secret_crc32); final CRC32 crc32 = new CRC32(); final byte[] consumerSecretBytes = consumerSecret.getBytes(Charset.forName("UTF-8")); crc32.update(consumerSecretBytes, 0, consumerSecretBytes.length); final long value = crc32.getValue(); crc32.reset(); for (final String keySecret : keySecrets) { if (Long.parseLong(keySecret, 16) == value) return true; } return false; } public static String getOfficialKeyName( final Context context, final String consumerKey, final String consumerSecret) { if (context == null || consumerKey == null || consumerSecret == null) return null; final String[] keySecrets = context.getResources().getStringArray(R.array.values_official_consumer_secret_crc32); final String[] keyNames = context.getResources().getStringArray(R.array.names_official_consumer_secret); final CRC32 crc32 = new CRC32(); final byte[] consumerSecretBytes = consumerSecret.getBytes(Charset.forName("UTF-8")); crc32.update(consumerSecretBytes, 0, consumerSecretBytes.length); final long value = crc32.getValue(); crc32.reset(); for (int i = 0, j = keySecrets.length; i < j; i++) { if (Long.parseLong(keySecrets[i], 16) == value) return keyNames[i]; } return null; } @NonNull public static ConsumerKeyType getOfficialKeyType( final Context context, final String consumerKey, final String consumerSecret) { if (context == null || consumerKey == null || consumerSecret == null) { return ConsumerKeyType.UNKNOWN; } final String[] keySecrets = context.getResources().getStringArray(R.array.values_official_consumer_secret_crc32); final String[] keyNames = context.getResources().getStringArray(R.array.types_official_consumer_secret); final CRC32 crc32 = new CRC32(); final byte[] consumerSecretBytes = consumerSecret.getBytes(Charset.forName("UTF-8")); crc32.update(consumerSecretBytes, 0, consumerSecretBytes.length); final long value = crc32.getValue(); crc32.reset(); for (int i = 0, j = keySecrets.length; i < j; i++) { if (Long.parseLong(keySecrets[i], 16) == value) { return ConsumerKeyType.parse(keyNames[i]); } } return ConsumerKeyType.UNKNOWN; } private static final CharSequenceTranslator UNESCAPE_TWITTER_RAW_TEXT = new LookupTranslator(EntityArrays.BASIC_UNESCAPE()); private static final CharSequenceTranslator ESCAPE_TWITTER_RAW_TEXT = new LookupTranslator(EntityArrays.BASIC_ESCAPE()); public static String unescapeTwitterStatusText(final CharSequence text) { if (text == null) return null; return UNESCAPE_TWITTER_RAW_TEXT.translate(text); } public static String escapeTwitterStatusText(final CharSequence text) { if (text == null) return null; return ESCAPE_TWITTER_RAW_TEXT.translate(text); } public static <T extends List<Status>> T getStatusesWithQuoteData( Twitter twitter, @NonNull T list) throws TwitterException { LongSparseMap<Status> quotes = new LongSparseMap<>(); // Phase 1: collect all statuses contains a status link, and put it in the map for (Status status : list) { if (status.isQuote()) continue; final UrlEntity[] entities = status.getUrlEntities(); if (entities == null || entities.length <= 0) continue; // Seems Twitter will find last status link for quote target, so we search backward for (int i = entities.length - 1; i >= 0; i--) { final Matcher m = PATTERN_TWITTER_STATUS_LINK.matcher(entities[i].getExpandedUrl()); if (!m.matches()) continue; final long def = -1; final long quoteId = NumberUtils.toLong(m.group(3), def); if (quoteId > 0) { quotes.put(quoteId, status); } break; } } // Phase 2: look up quoted tweets. Each lookup can fetch up to 100 tweets, so we split quote // ids into batches final long[] quoteIds = quotes.keys(); for (int currentBulkIdx = 0, totalLength = quoteIds.length; currentBulkIdx < totalLength; currentBulkIdx += TWITTER_BULK_QUERY_COUNT) { final int currentBulkCount = Math.min(totalLength, currentBulkIdx + TWITTER_BULK_QUERY_COUNT) - currentBulkIdx; final long[] ids = new long[currentBulkCount]; System.arraycopy(quoteIds, currentBulkIdx, ids, 0, currentBulkCount); // Lookup quoted statuses, then set each status into original status for (Status quoted : twitter.lookupStatuses(ids)) { final Set<Status> orig = quotes.get(quoted.getId()); // This set shouldn't be null here, add null check to make inspector happy. if (orig == null) continue; for (Status status : orig) { Status.setQuotedStatus(status, quoted); } } } return list; } public static String getMediaUrl(MediaEntity entity) { return TextUtils.isEmpty(entity.getMediaUrlHttps()) ? entity.getMediaUrl() : entity.getMediaUrlHttps(); } public static String getProfileImageUrl(@Nullable User user) { if (user == null) return null; return TextUtils.isEmpty(user.getProfileImageUrlHttps()) ? user.getProfileImageUrl() : user.getProfileImageUrlHttps(); } private static void parseEntities(final HtmlBuilder builder, final EntitySupport entities) { // Format media. final MediaEntity[] mediaEntities = entities.getMediaEntities(); if (mediaEntities != null) { for (final MediaEntity mediaEntity : mediaEntities) { final int start = mediaEntity.getStart(), end = mediaEntity.getEnd(); final String mediaUrl = TwitterContentUtils.getMediaUrl(mediaEntity); if (mediaUrl != null && start >= 0 && end >= 0) { builder.addLink(mediaUrl, mediaEntity.getDisplayUrl(), start, end); } } } final UrlEntity[] urlEntities = entities.getUrlEntities(); if (urlEntities != null) { for (final UrlEntity urlEntity : urlEntities) { final int start = urlEntity.getStart(), end = urlEntity.getEnd(); final String expandedUrl = urlEntity.getExpandedUrl(); if (expandedUrl != null && start >= 0 && end >= 0) { builder.addLink(expandedUrl, urlEntity.getDisplayUrl(), start, end); } } } } public static boolean isFiltered( final SQLiteDatabase database, final long user_id, final String text_plain, final String text_html, final String source, final long retweeted_by_id, final long quotedUserId) { return isFiltered( database, user_id, text_plain, text_html, source, retweeted_by_id, quotedUserId, true); } public static boolean isFiltered( final SQLiteDatabase database, final long userId, final String textPlain, final String textHtml, final String source, final long retweetedById, final long quotedUserId, final boolean filterRts) { if (database == null) return false; if (textPlain == null && textHtml == null && userId <= 0 && source == null) return false; final StringBuilder builder = new StringBuilder(); final List<String> selection_args = new ArrayList<>(); builder.append("SELECT NULL WHERE"); if (textPlain != null) { selection_args.add(textPlain); builder.append( "(SELECT 1 IN (SELECT ? LIKE '%'||" + Filters.Keywords.TABLE_NAME + "." + Filters.VALUE + "||'%' FROM " + Filters.Keywords.TABLE_NAME + "))"); } if (textHtml != null) { if (!selection_args.isEmpty()) { builder.append(" OR "); } selection_args.add(textHtml); builder.append( "(SELECT 1 IN (SELECT ? LIKE '%<a href=\"%'||" + Filters.Links.TABLE_NAME + "." + Filters.VALUE + "||'%\">%' FROM " + Filters.Links.TABLE_NAME + "))"); } if (userId > 0) { if (!selection_args.isEmpty()) { builder.append(" OR "); } builder .append("(SELECT ") .append(userId) .append(" IN (SELECT ") .append(Filters.Users.USER_ID) .append(" FROM ") .append(Filters.Users.TABLE_NAME) .append("))"); } if (retweetedById > 0) { if (!selection_args.isEmpty()) { builder.append(" OR "); } builder .append("(SELECT ") .append(retweetedById) .append(" IN (SELECT ") .append(Filters.Users.USER_ID) .append(" FROM ") .append(Filters.Users.TABLE_NAME) .append("))"); } if (quotedUserId > 0) { if (!selection_args.isEmpty()) { builder.append(" OR "); } builder .append("(SELECT ") .append(quotedUserId) .append(" IN (SELECT ") .append(Filters.Users.USER_ID) .append(" FROM ") .append(Filters.Users.TABLE_NAME) .append("))"); } if (source != null) { if (!selection_args.isEmpty()) { builder.append(" OR "); } selection_args.add(source); builder.append( "(SELECT 1 IN (SELECT ? LIKE '%>'||" + Filters.Sources.TABLE_NAME + "." + Filters.VALUE + "||'</a>%' FROM " + Filters.Sources.TABLE_NAME + "))"); } final Cursor cur = database.rawQuery( builder.toString(), selection_args.toArray(new String[selection_args.size()])); if (cur == null) return false; try { return cur.getCount() > 0; } finally { cur.close(); } } public static boolean isFiltered( final SQLiteDatabase database, final ParcelableStatus status, final boolean filter_rts) { if (database == null || status == null) return false; return isFiltered( database, status.user_id, status.text_plain, status.text_html, status.source, status.retweeted_by_user_id, status.quoted_user_id, filter_rts); } @Nullable public static String getBestBannerUrl(@Nullable final String baseUrl, final int width) { if (baseUrl == null) return null; final String type = getBestBannerType(width); final String authority = PreviewMediaExtractor.getAuthority(baseUrl); return authority != null && authority.endsWith(".twimg.com") ? baseUrl + "/" + type : baseUrl; } public static String getBestBannerType(final int width) { if (width <= 320) return "mobile"; else if (width <= 520) return "web"; else if (width <= 626) return "ipad"; else if (width <= 640) return "mobile_retina"; else if (width <= 1040) return "web_retina"; else return "ipad_retina"; } }
/** * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML. * * <p>#ThreadSafe# * * @since 2.0 * @version $Id$ */ public class StringEscapeUtils { /* ESCAPE TRANSLATORS */ /** * Translator object for escaping Java. * * <p>While {@link #escapeJava(String)} is the expected method of use, this object allows the Java * escaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator ESCAPE_JAVA = new LookupTranslator( new String[][] { {"\"", "\\\""}, {"\\", "\\\\"}, }) .with(new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())) .with(JavaUnicodeEscaper.outsideOf(32, 0x7f)); /** * Translator object for escaping EcmaScript/JavaScript. * * <p>While {@link #escapeEcmaScript(String)} is the expected method of use, this object allows * the EcmaScript escaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = new AggregateTranslator( new LookupTranslator( new String[][] { {"'", "\\'"}, {"\"", "\\\""}, {"\\", "\\\\"}, {"/", "\\/"} }), new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), JavaUnicodeEscaper.outsideOf(32, 0x7f)); /** * Translator object for escaping Json. * * <p>While {@link #escapeJson(String)} is the expected method of use, this object allows the Json * escaping functionality to be used as the foundation for a custom translator. * * @since 3.2 */ public static final CharSequenceTranslator ESCAPE_JSON = new AggregateTranslator( new LookupTranslator( new String[][] { {"\"", "\\\""}, {"\\", "\\\\"}, {"/", "\\/"} }), new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), JavaUnicodeEscaper.outsideOf(32, 0x7f)); /** * Translator object for escaping XML. * * <p>While {@link #escapeXml(String)} is the expected method of use, this object allows the XML * escaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator ESCAPE_XML = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_ESCAPE()), new LookupTranslator(EntityArrays.APOS_ESCAPE())); /** * Translator object for escaping HTML version 3.0. * * <p>While {@link #escapeHtml3(String)} is the expected method of use, this object allows the * HTML escaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator ESCAPE_HTML3 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_ESCAPE()), new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())); /** * Translator object for escaping HTML version 4.0. * * <p>While {@link #escapeHtml4(String)} is the expected method of use, this object allows the * HTML escaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator ESCAPE_HTML4 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_ESCAPE()), new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())); /** * Translator object for escaping individual Comma Separated Values. * * <p>While {@link #escapeCsv(String)} is the expected method of use, this object allows the CSV * escaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); // TODO: Create a parent class - 'SinglePassTranslator' ? // It would handle the index checking + length returning, // and could also have an optimization check method. static class CsvEscaper extends CharSequenceTranslator { private static final char CSV_DELIMITER = ','; private static final char CSV_QUOTE = '"'; private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; @Override public int translate(final CharSequence input, final int index, final Writer out) throws IOException { if (index != 0) { throw new IllegalStateException("CsvEscaper should never reach the [1] index"); } if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { out.write(input.toString()); } else { out.write(CSV_QUOTE); out.write( StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); out.write(CSV_QUOTE); } return input.length(); } } /* UNESCAPE TRANSLATORS */ /** * Translator object for unescaping escaped Java. * * <p>While {@link #unescapeJava(String)} is the expected method of use, this object allows the * Java unescaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the // compiler)? public static final CharSequenceTranslator UNESCAPE_JAVA = new AggregateTranslator( new OctalUnescaper(), // .between('\1', '\377'), new UnicodeUnescaper(), new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), new LookupTranslator( new String[][] { {"\\\\", "\\"}, {"\\\"", "\""}, {"\\'", "'"}, {"\\", ""} })); /** * Translator object for unescaping escaped EcmaScript. * * <p>While {@link #unescapeEcmaScript(String)} is the expected method of use, this object allows * the EcmaScript unescaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; /** * Translator object for unescaping escaped Json. * * <p>While {@link #unescapeJson(String)} is the expected method of use, this object allows the * Json unescaping functionality to be used as the foundation for a custom translator. * * @since 3.2 */ public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; /** * Translator object for unescaping escaped HTML 3.0. * * <p>While {@link #unescapeHtml3(String)} is the expected method of use, this object allows the * HTML unescaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator UNESCAPE_HTML3 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), new NumericEntityUnescaper()); /** * Translator object for unescaping escaped HTML 4.0. * * <p>While {@link #unescapeHtml4(String)} is the expected method of use, this object allows the * HTML unescaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator UNESCAPE_HTML4 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), new NumericEntityUnescaper()); /** * Translator object for unescaping escaped XML. * * <p>While {@link #unescapeXml(String)} is the expected method of use, this object allows the XML * unescaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator UNESCAPE_XML = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), new LookupTranslator(EntityArrays.APOS_UNESCAPE()), new NumericEntityUnescaper()); /** * Translator object for unescaping escaped Comma Separated Value entries. * * <p>While {@link #unescapeCsv(String)} is the expected method of use, this object allows the CSV * unescaping functionality to be used as the foundation for a custom translator. * * @since 3.0 */ public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); static class CsvUnescaper extends CharSequenceTranslator { private static final char CSV_DELIMITER = ','; private static final char CSV_QUOTE = '"'; private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; @Override public int translate(final CharSequence input, final int index, final Writer out) throws IOException { if (index != 0) { throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); } if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) { out.write(input.toString()); return input.length(); } // strip quotes final String quoteless = input.subSequence(1, input.length() - 1).toString(); if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) { // deal with escaped quotes; ie) "" out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); } else { out.write(input.toString()); } return input.length(); } } /* Helper functions */ /** * {@code StringEscapeUtils} instances should NOT be constructed in standard programming. * * <p>Instead, the class should be used as: * * <pre>StringEscapeUtils.escapeJava("foo");</pre> * * <p>This constructor is public to permit tools that require a JavaBean instance to operate. */ public StringEscapeUtils() { super(); } // Java and JavaScript // -------------------------------------------------------------------------- /** * Escapes the characters in a {@code String} using Java String rules. * * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) * * <p>So a tab becomes the characters {@code '\\'} and {@code 't'}. * * <p>The only difference between Java strings and JavaScript strings is that in JavaScript, a * single quote and forward-slash (/) are escaped. * * <p>Example: * * <pre> * input string: He didn't say, "Stop!" * output string: He didn't say, \"Stop!\" * </pre> * * @param input String to escape values in, may be null * @return String with escaped values, {@code null} if null string input */ public static final String escapeJava(final String input) { return ESCAPE_JAVA.translate(input); } /** * Escapes the characters in a {@code String} using EcmaScript String rules. * * <p>Escapes any values it finds into their EcmaScript String form. Deals correctly with quotes * and control-chars (tab, backslash, cr, ff, etc.) * * <p>So a tab becomes the characters {@code '\\'} and {@code 't'}. * * <p>The only difference between Java strings and EcmaScript strings is that in EcmaScript, a * single quote and forward-slash (/) are escaped. * * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. * * <p>Example: * * <pre> * input string: He didn't say, "Stop!" * output string: He didn\'t say, \"Stop!\" * </pre> * * @param input String to escape values in, may be null * @return String with escaped values, {@code null} if null string input * @since 3.0 */ public static final String escapeEcmaScript(final String input) { return ESCAPE_ECMASCRIPT.translate(input); } /** * Escapes the characters in a {@code String} using Json String rules. * * <p>Escapes any values it finds into their Json String form. Deals correctly with quotes and * control-chars (tab, backslash, cr, ff, etc.) * * <p>So a tab becomes the characters {@code '\\'} and {@code 't'}. * * <p>The only difference between Java strings and Json strings is that in Json, forward-slash (/) * is escaped. * * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. * * <p>Example: * * <pre> * input string: He didn't say, "Stop!" * output string: He didn't say, \"Stop!\" * </pre> * * @param input String to escape values in, may be null * @return String with escaped values, {@code null} if null string input * @since 3.2 */ public static final String escapeJson(final String input) { return ESCAPE_JSON.translate(input); } /** * Unescapes any Java literals found in the {@code String}. For example, it will turn a sequence * of {@code '\'} and {@code 'n'} into a newline character, unless the {@code '\'} is preceded by * another {@code '\'}. * * @param input the {@code String} to unescape, may be null * @return a new unescaped {@code String}, {@code null} if null string input */ public static final String unescapeJava(final String input) { return UNESCAPE_JAVA.translate(input); } /** * Unescapes any EcmaScript literals found in the {@code String}. * * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline * character, unless the {@code '\'} is preceded by another {@code '\'}. * * @see #unescapeJava(String) * @param input the {@code String} to unescape, may be null * @return A new unescaped {@code String}, {@code null} if null string input * @since 3.0 */ public static final String unescapeEcmaScript(final String input) { return UNESCAPE_ECMASCRIPT.translate(input); } /** * Unescapes any Json literals found in the {@code String}. * * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline * character, unless the {@code '\'} is preceded by another {@code '\'}. * * @see #unescapeJava(String) * @param input the {@code String} to unescape, may be null * @return A new unescaped {@code String}, {@code null} if null string input * @since 3.2 */ public static final String unescapeJson(final String input) { return UNESCAPE_JSON.translate(input); } // HTML and XML // -------------------------------------------------------------------------- /** * Escapes the characters in a {@code String} using HTML entities. * * <p>For example: * * <p><code>"bread" & "butter"</code> becomes: * * <p><code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. * * <p>Supports all known HTML 4.0 entities, including funky accents. Note that the commonly used * apostrophe escape character (&apos;) is not a legal entity and so is not supported). * * @param input the {@code String} to escape, may be null * @return a new escaped {@code String}, {@code null} if null string input * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO * Entities</a> * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO * Latin-1</a> * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity * references</a> * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character * References</a> * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code * positions</a> * @since 3.0 */ public static final String escapeHtml4(final String input) { return ESCAPE_HTML4.translate(input); } /** * Escapes the characters in a {@code String} using HTML entities. * * <p>Supports only the HTML 3.0 entities. * * @param input the {@code String} to escape, may be null * @return a new escaped {@code String}, {@code null} if null string input * @since 3.0 */ public static final String escapeHtml3(final String input) { return ESCAPE_HTML3.translate(input); } // ----------------------------------------------------------------------- /** * Unescapes a string containing entity escapes to a string containing the actual Unicode * characters corresponding to the escapes. Supports HTML 4.0 entities. * * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" will become * "<Français>" * * <p>If an entity is unrecognized, it is left alone, and inserted verbatim into the result * string. e.g. "&gt;&zzzz;x" will become ">&zzzz;x". * * @param input the {@code String} to unescape, may be null * @return a new unescaped {@code String}, {@code null} if null string input * @since 3.0 */ public static final String unescapeHtml4(final String input) { return UNESCAPE_HTML4.translate(input); } /** * Unescapes a string containing entity escapes to a string containing the actual Unicode * characters corresponding to the escapes. Supports only HTML 3.0 entities. * * @param input the {@code String} to unescape, may be null * @return a new unescaped {@code String}, {@code null} if null string input * @since 3.0 */ public static final String unescapeHtml3(final String input) { return UNESCAPE_HTML3.translate(input); } // ----------------------------------------------------------------------- /** * Escapes the characters in a {@code String} using XML entities. * * <p>For example: <tt>"bread" & "butter"</tt> => <tt>&quot;bread&quot; &amp; * &quot;butter&quot;</tt>. * * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs * or external entities. * * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer escaped. If you * still wish this functionality, you can achieve it via the following: {@code * StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );} * * @param input the {@code String} to escape, may be null * @return a new escaped {@code String}, {@code null} if null string input * @see #unescapeXml(java.lang.String) */ public static final String escapeXml(final String input) { return ESCAPE_XML.translate(input); } // ----------------------------------------------------------------------- /** * Unescapes a string containing XML entity escapes to a string containing the actual Unicode * characters corresponding to the escapes. * * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs * or external entities. * * <p>Note that numerical \\u Unicode codes are unescaped to their respective Unicode characters. * This may change in future releases. * * @param input the {@code String} to unescape, may be null * @return a new unescaped {@code String}, {@code null} if null string input * @see #escapeXml(String) */ public static final String unescapeXml(final String input) { return UNESCAPE_XML.translate(input); } // ----------------------------------------------------------------------- /** * Returns a {@code String} value for a CSV column enclosed in double quotes, if required. * * <p>If the value contains a comma, newline or double quote, then the String value is returned * enclosed in double quotes. * * <p>Any double quote characters in the value are escaped with another double quote. * * <p>If the value does not contain a comma, newline or double quote, then the String value is * returned unchanged. see <a * href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and <a * href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. * * @param input the input CSV column String, may be null * @return the input String, enclosed in double quotes if the value contains a comma, newline or * double quote, {@code null} if null string input * @since 2.4 */ public static final String escapeCsv(final String input) { return ESCAPE_CSV.translate(input); } /** * Returns a {@code String} value for an unescaped CSV column. * * <p>If the value is enclosed in double quotes, and contains a comma, newline or double quote, * then quotes are removed. * * <p>Any double quote escaped characters (a pair of double quotes) are unescaped to just one * double quote. * * <p>If the value is not enclosed in double quotes, or is and does not contain a comma, newline * or double quote, then the String value is returned unchanged. see <a * href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and <a * href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. * * @param input the input CSV column String, may be null * @return the input String, with enclosing double quotes removed and embedded double quotes * unescaped, {@code null} if null string input * @since 2.4 */ public static final String unescapeCsv(final String input) { return UNESCAPE_CSV.translate(input); } }