Example #1
0
 private String getTextNodeText(TextNode tn, boolean normalText) {
   String input = normalText ? tn.text() : tn.getWholeText();
   Node prev = tn.previousSibling();
   Node next = tn.nextSibling();
   boolean parentIsBlock = isBlock(tn.parent());
   if (isBlock(prev)) {
     input = ltrim(input);
   } else if (prev == null && parentIsBlock) {
     input = ltrim(input);
   } else if (normalText && prev instanceof TextNode) {
     TextNode tprev = (TextNode) prev;
     if (EMPTY_MATCHER.matcher(tprev.text()).matches()) {
       input = ltrim(input);
     }
   }
   if (input.length() > 0) {
     if (isBlock(next)) {
       input = rtrim(input);
     } else if (next == null && parentIsBlock) {
       input = rtrim(input);
     } else if (normalText && next instanceof TextNode) {
       TextNode tnext = (TextNode) next;
       if (EMPTY_MATCHER.matcher(tnext.text()).matches()) {
         input = rtrim(input);
       }
     }
   }
   return input;
 }
Example #2
0
    public void head(Node source, int depth) {
      if (source instanceof Element) {
        Element sourceEl = (Element) source;

        if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
          ElementMeta meta = createSafeElement(sourceEl);
          Element destChild = meta.el;
          destination.appendChild(destChild);

          numDiscarded += meta.numAttribsDiscarded;
          destination = destChild;
        } else if (source
            != root) { // not a safe tag, so don't add. don't count root against discarded.
          numDiscarded++;
        }
      } else if (source instanceof TextNode) {
        TextNode sourceText = (TextNode) source;
        TextNode destText = new TextNode(sourceText.getWholeText(), source.baseUri());
        destination.appendChild(destText);
      } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
        DataNode sourceData = (DataNode) source;
        DataNode destData = new DataNode(sourceData.getWholeData(), source.baseUri());
        destination.appendChild(destData);
      } else { // else, we don't care about comments, xml proc instructions, etc
        numDiscarded++;
      }
    }
Example #3
0
 @Test
 public void parsesUnterminatedComments() {
   String html = "<p>Hello<!-- <tr><td>";
   Document doc = Jsoup.parse(html);
   Element p = doc.getElementsByTag("p").get(0);
   assertEquals("Hello", p.text());
   TextNode text = (TextNode) p.childNode(0);
   assertEquals("Hello", text.getWholeText());
   Comment comment = (Comment) p.childNode(1);
   assertEquals(" <tr><td>", comment.getData());
 }
Example #4
0
  @Test
  public void parsesComments() {
    String html =
        "<html><head></head><body><!-- <table><tr><td></table> --><p>Hello</p></body></html>";
    Document doc = Jsoup.parse(html);

    Element body = doc.child(1);
    Comment comment = (Comment) body.childNode(0);
    assertEquals(" <table><tr><td></table> ", comment.getData());
    Element p = body.child(0);
    TextNode text = (TextNode) p.childNode(0);
    assertEquals("Hello", text.getWholeText());
  }
Example #5
0
  private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
    String text = textNode.getWholeText();

    if (preserveWhitespace(textNode.parentNode())) accum.append(text);
    else StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum));
  }