Ejemplo n.º 1
0
    public void head(Node source, int depth) {
      if (source instanceof Element) {
        Element sourceEl = (Element) source;

        if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
          ElementMeta meta = createSafeElement(sourceEl);
          Element destChild = meta.el;
          destination.appendChild(destChild);

          numDiscarded += meta.numAttribsDiscarded;
          destination = destChild;
        } else if (source
            != root) { // not a safe tag, so don't add. don't count root against discarded.
          numDiscarded++;
        }
      } else if (source instanceof TextNode) {
        TextNode sourceText = (TextNode) source;
        TextNode destText = new TextNode(sourceText.getWholeText(), source.baseUri());
        destination.appendChild(destText);
      } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
        DataNode sourceData = (DataNode) source;
        DataNode destData = new DataNode(sourceData.getWholeData(), source.baseUri());
        destination.appendChild(destData);
      } else { // else, we don't care about comments, xml proc instructions, etc
        numDiscarded++;
      }
    }
 private Element cleanupElement(Element el) {
   Tag newTag = null;
   String newText = null;
   if (el.nodeName().equals("img")) {
     newTag = Tag.valueOf("x");
     newText = el.attr("src");
   }
   if (el.nodeName().equals("em")) {
     newTag = Tag.valueOf("b");
   }
   if (el.nodeName().equals("a")) {
     String clazz = el.attr("class");
     if (clazz.equals("user")) {
       newTag = Tag.valueOf("x");
       newText = "@" + el.text().trim();
     } else if (clazz.startsWith("postimg video")) {
       newTag = Tag.valueOf("x");
       newText = "VIDEO: " + el.attr("href") + " THUMBNAIL: " + el.select("img").attr("src");
     } else if (clazz.startsWith("postimg")) {
       newTag = Tag.valueOf("x");
     } else if (clazz.equals("post")) {
       newTag = Tag.valueOf("x");
     } else {
       newTag = Tag.valueOf("x");
       newText = el.attr("href");
     }
   }
   if (el.nodeName().equals("div")) {
     newTag = Tag.valueOf("x");
   }
   Element nel;
   if (newTag == null) {
     // el = el;
     nel = new Element(el.tag(), "");
     //            for(List<Node> children = nel.childNodes(); children.size() > 0; children =
     // nel.childNodes()) {
     //                children.get(0).remove();
     //            }
   } else {
     nel = new Element(newTag, "");
   }
   if (newText != null) {
     nel.appendChild(new TextNode(newText, ""));
   } else {
     List<Node> children = el.childNodes();
     for (Node child : children) {
       if (child instanceof Element) {
         nel.appendChild(cleanupElement((Element) child));
       } else {
         nel.appendChild(new TextNode(child.toString(), ""));
       }
     }
   }
   return nel;
 }
Ejemplo n.º 3
0
 public static Element createElement(Tag tag, String baseUri, Attributes attrs, Node[] contents) {
   Element el = new Element(tag, baseUri, attrs);
   for (Node n : contents) {
     el.appendChild(n);
   }
   return el;
 }
Ejemplo n.º 4
0
  /**
   * Set the text of this element. Any existing contents (text or elements) will be cleared
   *
   * @param text unencoded text
   * @return this element
   */
  public Element text(String text) {
    Validate.notNull(text);

    empty();
    TextNode textNode = new TextNode(text, baseUri);
    appendChild(textNode);

    return this;
  }
Ejemplo n.º 5
0
  @Test
  public void testHashcodeIsStableWithContentChanges() {
    Element root = new Element(Tag.valueOf("root"), "");

    HashSet<Element> set = new HashSet<Element>();
    // Add root node:
    set.add(root);

    root.appendChild(new Element(Tag.valueOf("a"), ""));
    assertTrue(set.contains(root));
  }
Ejemplo n.º 6
0
  /**
   * Generates an html tree representation of the component hierarchy having the root
   * designContext.getRootComponent(). The hierarchy is stored under &lt;body&gt; in the tree. The
   * generated tree represents a valid html document.
   *
   * @param designContext a DesignContext object specifying the root component
   *     (designContext.getRootComponent()) of the hierarchy
   * @return an html tree representation of the component hierarchy
   */
  private static Document createHtml(DesignContext designContext) {
    // Create the html tree skeleton.
    Document doc = new Document("");
    DocumentType docType = new DocumentType("html", "", "", "");
    doc.appendChild(docType);
    Element html = doc.createElement("html");
    doc.appendChild(html);
    html.appendChild(doc.createElement("head"));
    Element body = doc.createElement("body");
    html.appendChild(body);

    // Append the design under <body> in the html tree. createNode
    // creates the entire component hierarchy rooted at the
    // given root node.
    Component root = designContext.getRootComponent();
    if (root != null) {
      Node rootNode = designContext.createElement(root);
      body.appendChild(rootNode);
    }
    designContext.writePackageMappings(doc);
    return doc;
  }
Ejemplo n.º 7
0
  private static Count modify(Element e, Count c) {
    List<Node> o = e.childNodes();
    if (o.size() == 0 && e.textNodes().size() == 0) return new Count(c.getCount(), c.getPgCount());
    for (Node n : o) {
      if (n instanceof TextNode) {
        TextNode nd = (TextNode) n;
        String[] arr = nd.text().trim().split("\\s");
        String txt = "";
        List<Node> nodes = new ArrayList<Node>();
        int j = 0;
        TextNode ndTemp = new TextNode("", " ");
        nodes.add(j, ndTemp);
        for (int i = 0; i < arr.length; i++) {
          if (arr[i].length() > 0) c.incrementCount();
          if (c.getCount() > PAGE_COUNT) {
            ((TextNode) nodes.get(j)).text(((TextNode) nodes.get(j)).text() + " ");
            j++;
            nodes.add(j, new Element(Tag.valueOf("pageid=" + c.getPgCount()), ""));
            j++;
            nodes.add(j, new TextNode(" " + arr[i] + " ", ""));
            // "<!--page id="+c.getPgCount()+ "--!>" + " " +  arr[i]);
            // txt = txt + " " + "<!--page id="+c.getPgCount()+ "--!>"  + " " + arr[i]; //<div
            // style='visibility:hidden'>Page="+pageCount+"</div>
            c.incrementPgCount();
            c.setCount(0);

          } else {
            // txt = txt + " " + arr[i];
            ((TextNode) nodes.get(j)).text(((TextNode) nodes.get(j)).text() + " " + arr[i]);
          }
        }
        if (nodes.size() > 1) {
          Element etemp = new Element(Tag.valueOf("span"), "");
          nd.replaceWith(etemp);
          for (Node d : nodes) {
            etemp.appendChild(d);
          }
        }
        // nd.text(ndTemp.text());

      } else if (n instanceof Element) {
        Count ctemp = modify((Element) n, c);
        c.setCount(ctemp.getCount());
        c.setPgCount(ctemp.getPgCount());
      }
    }

    return c;
  }
Ejemplo n.º 8
0
  /**
   * Wrap the supplied HTML around this node.
   *
   * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be
   *     arbitrarily deep.
   * @return this node, for chaining.
   */
  public Node wrap(String html) {
    Validate.notEmpty(html);

    Element context = parent() instanceof Element ? (Element) parent() : null;
    List<Node> wrapChildren = Parser.parseFragment(html, context, baseUri());
    Node wrapNode = wrapChildren.get(0);
    if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop
    return null;

    Element wrap = (Element) wrapNode;
    Element deepest = getDeepChild(wrap);
    parentNode.replaceChild(this, wrap);
    deepest.addChildren(this);

    // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder
    if (wrapChildren.size() > 0) {
      for (int i = 0; i < wrapChildren.size(); i++) {
        Node remainder = wrapChildren.get(i);
        remainder.parentNode.removeChild(remainder);
        wrap.appendChild(remainder);
      }
    }
    return this;
  }
 /** Appends all the given children to the parent Element. */
 public static Element appendChildren(final Element parent, final Elements children) {
   for (final Element paragraph : children) {
     parent.appendChild(paragraph);
   }
   return parent;
 }
Ejemplo n.º 10
0
  @Test
  public void shouldCleanHTMLContentReadability() throws IOException {
    // http://trib.al/KsvH2JE
    // http://tcrn.ch/1NxNAZJ
    String urll = "http://tcrn.ch/1NxNAZJ";
    URL url = new URL(urll);

    final Readability extractor = new Readability(url, 10000);
    extractor.init();
    String content = extractor.html();

    Whitelist wl = Whitelist.relaxed();
    // add additional tags here as necessary
    wl.addTags("figure");
    String clean = Jsoup.clean(content, wl);

    Document mDocument = new Document("");
    Element html = mDocument.createElement("html");
    Element head = mDocument.createElement("head");
    Element body = mDocument.createElement("body");

    html.appendChild(head);
    body = body.append(clean).select("div").first();
    html.appendChild(body);
    mDocument.appendChild(html);

    //	Document document = Jsoup.parse(content);
    //	Element head = document.head();
    //	String style = "<STYLE type=\"text/css\">"+
    //		"blockquote{"+
    //		"  display:block;"+
    //		"  background: #fff;"+
    //		"  padding: 15px 20px 15px 45px;"+
    //		"  margin: 0 0 20px;"+
    //		"  position: relative;"+
    //		"  "+
    //		"  /*Font*/"+
    //		"  font-family: Georgia, serif;"+
    //		"  font-size: 16px;"+
    //		"  line-height: 1.2;"+
    //		"  color: #666;"+
    //		"  text-align: justify;"+
    //		"  "+
    //		"  /*Borders - (Optional)*/"+
    //		"  border-left: 15px solid #76AABA;"+
    //		"  border-right: 2px solid #76AABA;"+
    //		"  "+
    //		"  /*Box Shadow - (Optional)*/"+
    //		"  -moz-box-shadow: 2px 2px 15px #ccc;"+
    //		"  -webkit-box-shadow: 2px 2px 15px #ccc;"+
    //		"  box-shadow: 2px 2px 15px #ccc;"+
    //		"}"+
    //		""+
    //		"blockquote::before{"+
    //		"  content: \"\\201C\"; /*Unicode for Left Double Quote*/"+
    //		"  "+
    //		"  /*Font*/"+
    //		"  font-family: Georgia, serif;"+
    //		"  font-size: 60px;"+
    //		"  font-weight: bold;"+
    //		"  color: #999;"+
    //		"  "+
    //		"  /*Positioning*/"+
    //		"  position: absolute;"+
    //		"  left: 10px;"+
    //		"  top:5px;"+
    //		"}"+
    //		""+
    //		"blockquote::after{"+
    //		"  /*Reset to make sure"+
    //		"  content: \"\";*/"+
    //		"  "+
    //		"   content: \"\\201D\"; /*Unicode for Left Double Quote*/"+
    //		"  "+
    //		"  /*Font*/"+
    //		"  font-family: Georgia, serif;"+
    //		"  font-size: 60px;"+
    //		"  font-weight: bold;"+
    //		"  color: #999;"+
    //		"  "+
    //		"  /*Positioning*/"+
    //		"  position: absolute;"+
    //		"  right: 10px;"+
    //		"  bottom:5px;"+
    //		"}"+
    //		""+
    //		"blockquote a{"+
    //		"  text-decoration: none;"+
    //		"  background: #eee;"+
    //		"  cursor: pointer;"+
    //		"  padding: 0 3px;"+
    //		"  color: #76AABA;"+
    //		"}"+
    //		""+
    //		"blockquote a:hover{"+
    //		" color: #666;"+
    //		"}"+
    //		""+
    //		"blockquote em{"+
    //		"  font-style: italic;"+
    //		"}"+
    //		"</STYLE>";
    //	head.append(style);
    //
    //	content = document.html();
    LOG.debug("Article content : {}", mDocument.html());

    // LOG.debug("Article content outer: {}", contentOuter);

  }
Ejemplo n.º 11
0
 /**
  * Create and append a new TextNode to this element.
  *
  * @param text the unencoded text to add
  * @return this element
  */
 public Element appendText(String text) {
   TextNode node = new TextNode(text, baseUri());
   appendChild(node);
   return this;
 }
Ejemplo n.º 12
0
 /**
  * Create a new element by tag name, and add it as the last child.
  *
  * @param tagName the name of the tag (e.g. {@code div}).
  * @return the new element, to allow you to add content to it, e.g.: {@code
  *     parent.appendElement("h1").attr("id", "header").text("Welcome");}
  */
 public Element appendElement(String tagName) {
   Element child = new Element(Tag.valueOf(tagName), baseUri());
   appendChild(child);
   return child;
 }