@Test public void html40Entities() throws Exception { assertEquals("nbsp", Entities.HTML40.getEntityName('\u00A0')); assertEquals('\u00A0', Entities.HTML40.getEntityValue("nbsp")); assertEquals(-1, Entities.XML.getEntityValue("xyzzy")); assertEquals(null, Entities.XML.getEntityName(123)); }
/** * Escapes the characters in a <code>String</code> using HTML entities. * * <p>For example: * * <p><code>"bread" & "butter"</code> becomes: * * <p><code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. * * <p>Supports all known HTML 4.0 entities, including funky accents. * * @param str the <code>String</code> to escape, may be null * @return a new escaped <code>String</code>, <code>null</code> if null string input * @see #unescapeHtml(String) * @see </br><a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO * Entities</a> * @see </br><a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO * Latin-1</a> * @see </br><a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character * entity references</a> * @see </br><a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character * References</a> * @see </br><a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code * positions</a> */ public static String escapeHtml(String str) { if (str == null) { return null; } // todo: add a version that takes a Writer // todo: rewrite underlying method to use a Writer instead of a StringBuffer return Entities.HTML40.escape(str); }
/** * Unescapes a string containing entity escapes to a string containing the actual Unicode * characters corresponding to the escapes. Supports HTML 4.0 entities. * * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" will become * "<Français>" * * <p>If an entity is unrecognized, it is left alone, and inserted verbatim into the result * string. e.g. "&gt;&zzzz;x" will become ">&zzzz;x". * * @param writer the writer receiving the unescaped string, not null * @param string the <code>String</code> to unescape, may be null * @throws IllegalArgumentException if the writer is null * @throws IOException if an IOException occurs * @see #escapeHtml(String) */ public static void unescapeHtml(Writer writer, String string) throws IOException { if (writer == null) { throw new IllegalArgumentException("The Writer must not be null."); } if (string == null) { return; } Entities.HTML40.unescape(writer, string); }
/** * Unescapes a string containing entity escapes to a string containing the actual Unicode * characters corresponding to the escapes. Supports HTML 4.0 entities. * * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" will become * "<Français>" * * <p>If an entity is unrecognized, it is left alone, and inserted verbatim into the result * string. e.g. "&gt;&zzzz;x" will become ">&zzzz;x". * * @param str the <code>String</code> to unescape, may be null * @return a new unescaped <code>String</code>, <code>null</code> if null string input * @see #escapeHtml(String) */ public static String unescapeHtml(String str) { if (str == null) { return null; } return Entities.HTML40.unescape(str); }
/** @see StringEscapeUtils.escapeHtml(Writer, String) */ private static void escapeHtml(Writer writer, String string) throws IOException { Entities.HTML40.escape(writer, string); }