Example #1
0
  @SuppressLint("DefaultLocale")
  private String improveHtml(final String html) {

    final Document document = Jsoup.parse(html);

    for (final Element e : document.getAllElements()) {
      if (e.hasAttr("style")) {

        for (final Attribute a : e.attributes()) {
          if (a.getKey().compareTo("style") == 0) {
            final String[] items = a.getValue().trim().split(";");
            String newValue = "";
            for (final String item : items) {
              if (!item.toLowerCase(Locale.ENGLISH).contains("font-family:")
                  && !item.toLowerCase(Locale.ENGLISH).contains("font-size:")) {
                newValue = newValue.concat(item).concat(";");
              }
            }
            a.setValue(newValue);
          }
        }
      }
    }

    return document.body().html();
  }
  /**
   * @param attributes A list of attribs
   * @return Returns a mutable map parsed out of the attribute list
   */
  public static Map<String, String> parseAttribs(Attributes attributes) {

    Map<String, String> attrs = new LinkedHashMap<String, String>(attributes.size() + 4);

    for (Attribute a : attributes.asList())
      if (!SKIP_ATTR.contains(a.getKey())) attrs.put(a.getKey(), a.getValue());

    return attrs;
  }
  private static String cleanHtml(final Node node) {
    if (node instanceof Element) {
      Element element = ((Element) node);
      StringBuilder accum = new StringBuilder();
      accum.append("<").append(element.tagName());
      for (Attribute attribute : element.attributes()) {
        if (!(attribute.getKey().startsWith("_"))) {
          accum.append(" ");
          accum.append(attribute.getKey());
          accum.append("=\"");
          accum.append(attribute.getValue());
          accum.append('"');
        }
      }

      if (element.childNodes().isEmpty() && element.tag().isEmpty()) {
        accum.append(" />");
      } else {
        accum.append(">");
        for (Node child : element.childNodes()) accum.append(cleanHtml(child));

        accum.append("</").append(element.tagName()).append(">");
      }
      return accum.toString();
    } else if (node instanceof TextNode) {
      return ((TextNode) node).getWholeText();
    } else if (node instanceof XmlDeclaration) {

      // HACK
      if (node.childNodes().isEmpty()) {
        return "";
      }
      return node.outerHtml();
    } else if (node instanceof Comment) {
      // HACK: elide comments for now.
      return "";
    } else if (node instanceof DataNode && node.childNodes().isEmpty()) {
      // No child nodes are defined but we have to handle content if such exists, example
      // <script language="JavaScript">var a =  { name: "${user.name}"}</script>

      String content = node.attr("data");
      if (Strings.empty(content)) {
        return "";
      }

      return content;
    } else {
      return node.outerHtml();
    }
  }
Example #4
0
  private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) {
    // try to resolve relative urls to abs, and optionally update the attribute so output html has
    // abs.
    // rels without a baseuri get removed
    String value = el.absUrl(attr.getKey());
    if (value.length() == 0)
      value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown
    // protocols
    if (!preserveRelativeLinks) attr.setValue(value);

    for (Protocol protocol : protocols) {
      String prot = protocol.toString() + ":";
      if (value.toLowerCase().startsWith(prot)) {
        return true;
      }
    }
    return false;
  }