@Test public void testTwoHtml() throws IOException { File file = new File(testDataRoot, "two.html"); String htmlContent = FileUtil.readString(file); Document document = new LagartoDOMBuilder().parse(htmlContent); Node html = new NodeSelector(document).select("html").get(0); assertNotNull(html); Node body = new NodeSelector(html).selectFirst("body"); Element h1 = body.getFirstChildElement(); assertEquals("h1", h1.getNodeName()); Node comment1 = body.getFirstChild().getNextSibling(); assertEquals(Node.NodeType.COMMENT, comment1.getNodeType()); Element p = (Element) new NodeSelector(body).selectFirst("p"); assertEquals(h1, p.getPreviousSiblingElement()); assertEquals(h1, comment1.getNextSiblingElement()); assertNull(comment1.getNextSiblingName()); // check if filter works just for sub elements List<Node> p_ems = new NodeSelector(p).select("em"); assertEquals(1, p_ems.size()); Element script = (Element) new NodeSelector(html).selectFirst("script"); assertEquals("text/javascript", script.getAttribute("type")); assertTrue(document.check()); }
@Test public void isIdSetReturnsTrue() { Document model = new Document(); model.setId(ValueGenerator.getUniqueString(32)); assertNotNull(model.getId()); assertTrue(model.isIdSet()); }
@Test public void testAddNewText() { Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); Element div = doc.getElementById("1"); div.appendText(" there & now >"); assertEquals("<p>Hello</p> there & now >", TextUtil.stripNewlines(div.html())); }
@Test public void testNormalisesText() { String h = "<p>Hello<p>There.</p> \n <p>Here <b>is</b> \n s<b>om</b>e text."; Document doc = Jsoup.parse(h); String text = doc.text(); assertEquals("Hello There. Here is some text.", text); }
@Test public void insertChildrenArgumentValidation() { Document doc = Jsoup.parse("<div id=1>Text <p>One</p> Text <p>Two</p></div><div id=2></div>"); Element div1 = doc.select("div").get(0); Element div2 = doc.select("div").get(1); List<Node> children = div1.childNodes(); try { div2.insertChildren(6, children); fail(); } catch (IllegalArgumentException e) { } try { div2.insertChildren(-5, children); fail(); } catch (IllegalArgumentException e) { } try { div2.insertChildren(0, null); fail(); } catch (IllegalArgumentException e) { } }
@Test public void testGetChildText() { Document doc = Jsoup.parse("<p>Hello <b>there</b> now"); Element p = doc.select("p").first(); assertEquals("Hello there now", p.text()); assertEquals("Hello now", p.ownText()); }
@Test public void testClassNames() { Document doc = Jsoup.parse("<div class=\"c1 c2\">C</div>"); Element div = doc.select("div").get(0); assertEquals("c1 c2", div.className()); final Set<String> set1 = div.classNames(); final Object[] arr1 = set1.toArray(); assertTrue(arr1.length == 2); assertEquals("c1", arr1[0]); assertEquals("c2", arr1[1]); // Changes to the set should not be reflected in the Elements getters set1.add("c3"); assertTrue(2 == div.classNames().size()); assertEquals("c1 c2", div.className()); // Update the class names to a fresh set final Set<String> newSet = new LinkedHashSet<String>(3); newSet.addAll(set1); newSet.add("c3"); div.classNames(newSet); assertEquals("c1 c2 c3", div.className()); final Set<String> set2 = div.classNames(); final Object[] arr2 = set2.toArray(); assertTrue(arr2.length == 3); assertEquals("c1", arr2[0]); assertEquals("c2", arr2[1]); assertEquals("c3", arr2[2]); }
@Test public void testKeepsPreTextInCode() { String h = "<pre><code>code\n\ncode</code></pre>"; Document doc = Jsoup.parse(h); assertEquals("code\n\ncode", doc.text()); assertEquals("<pre><code>code\n\ncode</code></pre>", doc.body().html()); }
@Test public void testSetHtml() { Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); Element div = doc.getElementById("1"); div.html("<p>there</p><p>now</p>"); assertEquals("<p>there</p><p>now</p>", TextUtil.stripNewlines(div.html())); }
@Test public void testClonesClassnames() { Document doc = Jsoup.parse("<div class='one two'></div>"); Element div = doc.select("div").first(); Set<String> classes = div.classNames(); assertEquals(2, classes.size()); assertTrue(classes.contains("one")); assertTrue(classes.contains("two")); Element copy = div.clone(); Set<String> copyClasses = copy.classNames(); assertEquals(2, copyClasses.size()); assertTrue(copyClasses.contains("one")); assertTrue(copyClasses.contains("two")); copyClasses.add("three"); copyClasses.remove("one"); assertTrue(classes.contains("one")); assertFalse(classes.contains("three")); assertFalse(copyClasses.contains("one")); assertTrue(copyClasses.contains("three")); assertEquals("", div.html()); assertEquals("", copy.html()); }
@Test public void testHtmlContainsOuter() { Document doc = Jsoup.parse("<title>Check</title> <div>Hello there</div>"); doc.outputSettings().indentAmount(0); assertTrue(doc.html().contains(doc.select("title").outerHtml())); assertTrue(doc.html().contains(doc.select("div").outerHtml())); }
@Test public void testClone() { Document doc = Jsoup.parse("<div><p>One<p><span>Two</div>"); Element p = doc.select("p").get(1); Element clone = p.clone(); assertNull(clone.parent()); // should be orphaned assertEquals(0, clone.siblingIndex); assertEquals(1, p.siblingIndex); assertNotNull(p.parent()); clone.append("<span>Three"); assertEquals( "<p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(clone.outerHtml())); assertEquals( "<div><p>One</p><p><span>Two</span></p></div>", TextUtil.stripNewlines(doc.body().html())); // not modified doc.body().appendChild(clone); // adopt assertNotNull(clone.parent()); assertEquals( "<div><p>One</p><p><span>Two</span></p></div><p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void dataset() { Document doc = Jsoup.parse( "<div id=1 data-name=jsoup class=new data-package=jar>Hello</div><p id=2>Hello</p>"); Element div = doc.select("div").first(); Map<String, String> dataset = div.dataset(); Attributes attributes = div.attributes(); // size, get, set, add, remove assertEquals(2, dataset.size()); assertEquals("jsoup", dataset.get("name")); assertEquals("jar", dataset.get("package")); dataset.put("name", "jsoup updated"); dataset.put("language", "java"); dataset.remove("package"); assertEquals(2, dataset.size()); assertEquals(4, attributes.size()); assertEquals("jsoup updated", attributes.get("data-name")); assertEquals("jsoup updated", dataset.get("name")); assertEquals("java", attributes.get("data-language")); assertEquals("java", dataset.get("language")); attributes.put("data-food", "bacon"); assertEquals(3, dataset.size()); assertEquals("bacon", dataset.get("food")); attributes.put("data-", "empty"); assertEquals(null, dataset.get("")); // data- is not a data attribute Element p = doc.select("p").first(); assertEquals(0, p.dataset().size()); }
@Test public void insertChildrenAtPosition() { Document doc = Jsoup.parse( "<div id=1>Text1 <p>One</p> Text2 <p>Two</p></div><div id=2>Text3 <p>Three</p></div>"); Element div1 = doc.select("div").get(0); Elements p1s = div1.select("p"); Element div2 = doc.select("div").get(1); assertEquals(2, div2.childNodeSize()); div2.insertChildren(-1, p1s); assertEquals(2, div1.childNodeSize()); // moved two out assertEquals(4, div2.childNodeSize()); assertEquals(3, p1s.get(1).siblingIndex()); // should be last List<Node> els = new ArrayList<Node>(); Element el1 = new Element(Tag.valueOf("span"), "").text("Span1"); Element el2 = new Element(Tag.valueOf("span"), "").text("Span2"); TextNode tn1 = new TextNode("Text4", ""); els.add(el1); els.add(el2); els.add(tn1); assertNull(el1.parent()); div2.insertChildren(-2, els); assertEquals(div2, el1.parent()); assertEquals(7, div2.childNodeSize()); assertEquals(3, el1.siblingIndex()); assertEquals(4, el2.siblingIndex()); assertEquals(5, tn1.siblingIndex()); }
@Test public void testOuterHtml() { Document doc = Jsoup.parse("<div title='Tags &c.'><img src=foo.png><p><!-- comment -->Hello<p>there"); assertEquals( "<html><head></head><body><div title=\"Tags &c.\"><img src=\"foo.png\"><p><!-- comment -->Hello</p><p>there</p></div></body></html>", TextUtil.stripNewlines(doc.outerHtml())); }
@Test public void testTagNameSet() { Document doc = Jsoup.parse("<div><i>Hello</i>"); doc.select("i").first().tagName("em"); assertEquals(0, doc.select("i").size()); assertEquals(1, doc.select("em").size()); assertEquals("<em>Hello</em>", doc.select("div").first().html()); }
@Test public void testElementSiblingIndexSameContent() { Document doc = Jsoup.parse("<div><p>One</p>...<p>One</p>...<p>One</p>"); Elements ps = doc.select("p"); assertTrue(0 == ps.get(0).elementSiblingIndex()); assertTrue(1 == ps.get(1).elementSiblingIndex()); assertTrue(2 == ps.get(2).elementSiblingIndex()); }
@Test public void testPrependElement() { Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); Element div = doc.getElementById("1"); div.prependElement("p").text("Before"); assertEquals("Before", div.child(0).text()); assertEquals("Hello", div.child(1).text()); }
@Test public void getNamespacedElementsByTag() { Document doc = Jsoup.parse("<div><abc:def id=1>Hello</abc:def></div>"); Elements els = doc.getElementsByTag("abc:def"); assertEquals(1, els.size()); assertEquals("1", els.first().id()); assertEquals("abc:def", els.first().tagName()); }
@Test public void testSetIndent() { Document doc = Jsoup.parse("<div><p>Hello\nthere</p></div>"); doc.outputSettings().indentAmount(0); assertEquals( "<html>\n<head></head>\n<body>\n<div>\n<p>Hello there</p>\n</div>\n</body>\n</html>", doc.html()); }
@Test public void parentlessToString() { Document doc = Jsoup.parse("<img src='foo'>"); Element img = doc.select("img").first(); assertEquals("<img src=\"foo\">", img.toString()); img.remove(); // lost its parent assertEquals("<img src=\"foo\">", img.toString()); }
@Test public void testBrHasSpace() { Document doc = Jsoup.parse("<p>Hello<br>there</p>"); assertEquals("Hello there", doc.text()); assertEquals("Hello there", doc.select("p").first().ownText()); doc = Jsoup.parse("<p>Hello <br> there</p>"); assertEquals("Hello there", doc.text()); }
@Test public void testFormatHtml() { Document doc = Jsoup.parse( "<title>Format test</title><div><p>Hello <span>jsoup <span>users</span></span></p><p>Good.</p></div>"); assertEquals( "<html>\n <head>\n <title>Format test</title>\n </head>\n <body>\n <div>\n <p>Hello <span>jsoup <span>users</span></span></p>\n <p>Good.</p>\n </div>\n </body>\n</html>", doc.html()); }
@Test public void testWrapWithRemainder() { Document doc = Jsoup.parse("<div><p>Hello</p></div>"); Element p = doc.select("p").first(); p.wrap("<div class='head'></div><p>There!</p>"); assertEquals( "<div><div class=\"head\"><p>Hello</p><p>There!</p></div></div>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void testGetElementsWithAttributeDash() { Document doc = Jsoup.parse( "<meta http-equiv=content-type value=utf8 id=1> <meta name=foo content=bar id=2> <div http-equiv=content-type value=utf8 id=3>"); Elements meta = doc.select("meta[http-equiv=content-type], meta[charset]"); assertEquals(1, meta.size()); assertEquals("1", meta.first().id()); }
@Test public void testNamespacedElements() { // Namespaces with ns:tag in HTML must be translated to ns|tag in CSS. String html = "<html><body><fb:comments /></body></html>"; Document doc = Jsoup.parse(html, "http://example.com/bar/"); Elements els = doc.select("fb|comments"); assertEquals(1, els.size()); assertEquals("html > body > fb|comments", els.get(0).cssSelector()); }
@Test public void testHasText() { Document doc = Jsoup.parse("<div><p>Hello</p><p></p></div>"); Element div = doc.select("div").first(); Elements ps = doc.select("p"); assertTrue(div.hasText()); assertTrue(ps.first().hasText()); assertFalse(ps.last().hasText()); }
@Test public void testFormatOutline() { Document doc = Jsoup.parse( "<title>Format test</title><div><p>Hello <span>jsoup <span>users</span></span></p><p>Good.</p></div>"); doc.outputSettings().outline(true); assertEquals( "<html>\n <head>\n <title>Format test</title>\n </head>\n <body>\n <div>\n <p>\n Hello \n <span>\n jsoup \n <span>users</span>\n </span>\n </p>\n <p>Good.</p>\n </div>\n </body>\n</html>", doc.html()); }
@Test public void testGetElementsWithAttributeValue() { Document doc = Jsoup.parse("<div style='bold'><p><p><b style></b></p></div>"); List<Element> els = doc.getElementsByAttributeValue("style", "bold"); assertEquals(1, els.size()); assertEquals("div", els.get(0).tagName()); List<Element> none = doc.getElementsByAttributeValue("style", "none"); assertEquals(0, none.size()); }
@Test public void testAppendRowToTable() { Document doc = Jsoup.parse("<table><tr><td>1</td></tr></table>"); Element table = doc.select("tbody").first(); table.append("<tr><td>2</td></tr>"); assertEquals( "<table><tbody><tr><td>1</td></tr><tr><td>2</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html())); }