/** * 去除html标记(类似DOM的element.innerText) * * @param htmlcontent * @return content */ private static String getTextByRemoveHTMLTag(String htmlcontent) { if (StringUtils.isEmpty(htmlcontent)) { return ""; } // 不能替换导致多个空格被改成一个空格. Source source = new Source(htmlcontent); Renderer renderer = source.getRenderer(); renderer.setMaxLineLength(99999); // 设置一行最长个数,默认76字符 String txt = renderer.toString(); return txt; }
/** * Renders the HTML content into text. This provides a human readable version of the content that * is modeled on the way Mozilla Thunderbird® and other email clients provide an automatic * conversion of HTML content to text in their alternative MIME encoding of emails. * * <p>Using the default settings, the output complies with the <code> * Text/Plain; Format=Flowed (DelSp=No)</code> protocol described in <a * href="http://tools.ietf.org/html/rfc3676">RFC-3676</a>. * * @param html the HTML text * @return the rendered HTML text, or <code>null</code> if the HTML text is <code>null</code> */ @Override public String render(String html) { if (html == null) { return null; } Source source = new Source(html); Renderer renderer = source.getRenderer(); return renderer.toString(); }