@SuppressLint("DefaultLocale") private String improveHtml(final String html) { final Document document = Jsoup.parse(html); for (final Element e : document.getAllElements()) { if (e.hasAttr("style")) { for (final Attribute a : e.attributes()) { if (a.getKey().compareTo("style") == 0) { final String[] items = a.getValue().trim().split(";"); String newValue = ""; for (final String item : items) { if (!item.toLowerCase(Locale.ENGLISH).contains("font-family:") && !item.toLowerCase(Locale.ENGLISH).contains("font-size:")) { newValue = newValue.concat(item).concat(";"); } } a.setValue(newValue); } } } } return document.body().html(); }
/** * @param attributes A list of attribs * @return Returns a mutable map parsed out of the attribute list */ public static Map<String, String> parseAttribs(Attributes attributes) { Map<String, String> attrs = new LinkedHashMap<String, String>(attributes.size() + 4); for (Attribute a : attributes.asList()) if (!SKIP_ATTR.contains(a.getKey())) attrs.put(a.getKey(), a.getValue()); return attrs; }
private static String cleanHtml(final Node node) { if (node instanceof Element) { Element element = ((Element) node); StringBuilder accum = new StringBuilder(); accum.append("<").append(element.tagName()); for (Attribute attribute : element.attributes()) { if (!(attribute.getKey().startsWith("_"))) { accum.append(" "); accum.append(attribute.getKey()); accum.append("=\""); accum.append(attribute.getValue()); accum.append('"'); } } if (element.childNodes().isEmpty() && element.tag().isEmpty()) { accum.append(" />"); } else { accum.append(">"); for (Node child : element.childNodes()) accum.append(cleanHtml(child)); accum.append("</").append(element.tagName()).append(">"); } return accum.toString(); } else if (node instanceof TextNode) { return ((TextNode) node).getWholeText(); } else if (node instanceof XmlDeclaration) { // HACK if (node.childNodes().isEmpty()) { return ""; } return node.outerHtml(); } else if (node instanceof Comment) { // HACK: elide comments for now. return ""; } else if (node instanceof DataNode && node.childNodes().isEmpty()) { // No child nodes are defined but we have to handle content if such exists, example // <script language="JavaScript">var a = { name: "${user.name}"}</script> String content = node.attr("data"); if (Strings.empty(content)) { return ""; } return content; } else { return node.outerHtml(); } }
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has // abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown // protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString() + ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }