@Test public void testPseudoHas() { Document doc = Jsoup.parse( "<div id=0><p><span>Hello</span></p></div> <div id=1><span class=foo>There</span></div> <div id=2><p>Not</p></div>"); Elements divs1 = doc.select("div:has(span)"); assertEquals(2, divs1.size()); assertEquals("0", divs1.get(0).id()); assertEquals("1", divs1.get(1).id()); Elements divs2 = doc.select("div:has([class]"); assertEquals(1, divs2.size()); assertEquals("1", divs2.get(0).id()); Elements divs3 = doc.select("div:has(span, p)"); assertEquals(3, divs3.size()); assertEquals("0", divs3.get(0).id()); assertEquals("1", divs3.get(1).id()); assertEquals("2", divs3.get(2).id()); Elements els1 = doc.body().select(":has(p)"); assertEquals(3, els1.size()); // body, div, dib assertEquals("body", els1.first().tagName()); assertEquals("0", els1.get(1).id()); assertEquals("2", els1.get(2).id()); }
@Test public void testPseudoGreaterThan() { Document doc = Jsoup.parse("<div><p>One</p><p>Two</p><p>Three</p></div><div><p>Four</p>"); Elements ps = doc.select("div p:gt(0)"); assertEquals(2, ps.size()); assertEquals("Two", ps.get(0).text()); assertEquals("Three", ps.get(1).text()); }
@Test public void testPseudoLessThan() { Document doc = Jsoup.parse("<div><p>One</p><p>Two</p><p>Three</>p></div><div><p>Four</p>"); Elements ps = doc.select("div p:lt(2)"); assertEquals(3, ps.size()); assertEquals("One", ps.get(0).text()); assertEquals("Two", ps.get(1).text()); assertEquals("Four", ps.get(2).text()); }
@Test public void adjacentSiblings() { String h = "<ol><li>One<li>Two<li>Three</ol>"; Document doc = Jsoup.parse(h); Elements sibs = doc.select("li + li"); assertEquals(2, sibs.size()); assertEquals("Two", sibs.get(0).text()); assertEquals("Three", sibs.get(1).text()); }
@Test public void parentChildStar() { String h = "<div id=1><p>Hello<p><b>there</b></p></div><div id=2><span>Hi</span></div>"; Document doc = Jsoup.parse(h); Elements divChilds = doc.select("div > *"); assertEquals(3, divChilds.size()); assertEquals("p", divChilds.get(0).tagName()); assertEquals("p", divChilds.get(1).tagName()); assertEquals("span", divChilds.get(2).tagName()); }
@Test public void testGroupOrAttribute() { String h = "<div id=1 /><div id=2 /><div title=foo /><div title=bar />"; Elements els = Jsoup.parse(h).select("[id],[title=foo]"); assertEquals(3, els.size()); assertEquals("1", els.get(0).id()); assertEquals("2", els.get(1).id()); assertEquals("foo", els.get(2).attr("title")); }
@Test public void mixCombinator() { String h = "<div class=foo><ol><li>One<li>Two<li>Three</ol></div>"; Document doc = Jsoup.parse(h); Elements sibs = doc.select("body > div.foo li + li"); assertEquals(2, sibs.size()); assertEquals("Two", sibs.get(0).text()); assertEquals("Three", sibs.get(1).text()); }
@Test public void parents() { Document doc = Jsoup.parse("<div><p>Hello</p></div><p>There</p>"); Elements parents = doc.select("p").parents(); assertEquals(3, parents.size()); assertEquals("div", parents.get(0).tagName()); assertEquals("body", parents.get(1).tagName()); assertEquals("html", parents.get(2).tagName()); }
@Test public void testByAttributeRegex() { Document doc = Jsoup.parse( "<p><img src=foo.png id=1><img src=bar.jpg id=2><img src=qux.JPEG id=3><img src=old.gif><img></p>"); Elements imgs = doc.select("img[src~=(?i)\\.(png|jpe?g)]"); assertEquals(3, imgs.size()); assertEquals("1", imgs.get(0).id()); assertEquals("2", imgs.get(1).id()); assertEquals("3", imgs.get(2).id()); }
@Test public void testById() { Elements els = Jsoup.parse("<div><p id=foo>Hello</p><p id=foo>Foo two!</p></div>").select("#foo"); assertEquals(2, els.size()); assertEquals("Hello", els.get(0).text()); assertEquals("Foo two!", els.get(1).text()); Elements none = Jsoup.parse("<div id=1></div>").select("#foo"); assertEquals(0, none.size()); }
@Test public void setHtml() { Document doc = Jsoup.parse("<p>One</p><p>Two</p><p>Three</p>"); Elements ps = doc.select("p"); ps.prepend("<b>Bold</b>").append("<i>Ital</i>"); assertEquals("<p><b>Bold</b>Two<i>Ital</i></p>", TextUtil.stripNewlines(ps.get(1).outerHtml())); ps.html("<span>Gone</span>"); assertEquals("<p><span>Gone</span></p>", TextUtil.stripNewlines(ps.get(1).outerHtml())); }
@Test public void mixCombinatorGroup() { String h = "<div class=foo><ol><li>One<li>Two<li>Three</ol></div>"; Document doc = Jsoup.parse(h); Elements els = doc.select(".foo > ol, ol > li + li"); assertEquals(3, els.size()); assertEquals("ol", els.get(0).tagName()); assertEquals("Two", els.get(1).text()); assertEquals("Three", els.get(2).text()); }
@Test public void testPseudoEquals() { Document doc = Jsoup.parse("<div><p>One</p><p>Two</p><p>Three</>p></div><div><p>Four</p>"); Elements ps = doc.select("div p:eq(0)"); assertEquals(2, ps.size()); assertEquals("One", ps.get(0).text()); assertEquals("Four", ps.get(1).text()); Elements ps2 = doc.select("div:eq(0) p:eq(0)"); assertEquals(1, ps2.size()); assertEquals("One", ps2.get(0).text()); assertEquals("p", ps2.get(0).tagName()); }
@Test public void handlesCommasInSelector() { Document doc = Jsoup.parse("<p name='1,2'>One</p><div>Two</div><ol><li>123</li><li>Text</li></ol>"); Elements ps = doc.select("[name=1,2]"); assertEquals(1, ps.size()); Elements containers = doc.select("div, li:matches([0-9,]+)"); assertEquals(2, containers.size()); assertEquals("div", containers.get(0).tagName()); assertEquals("li", containers.get(1).tagName()); assertEquals("123", containers.get(1).text()); }
@Test public void classes() { Document doc = Jsoup.parse("<div><p class='mellow yellow'></p><p class='red green'></p>"); Elements els = doc.select("p"); assertTrue(els.hasClass("red")); assertFalse(els.hasClass("blue")); els.addClass("blue"); els.removeClass("yellow"); els.toggleClass("mellow"); assertEquals("blue", els.get(0).className()); assertEquals("red green blue mellow", els.get(1).className()); }
@Test public void testByTag() { // should be case insensitive Elements els = Jsoup.parse("<div id=1><div id=2><p>Hello</p></div></div><DIV id=3>").select("DIV"); assertEquals(3, els.size()); assertEquals("1", els.get(0).id()); assertEquals("2", els.get(1).id()); assertEquals("3", els.get(2).id()); Elements none = Jsoup.parse("<div id=1><div id=2><p>Hello</p></div></div><div id=3>").select("span"); assertEquals(0, none.size()); }
@Test public void parentChildElement() { String h = "<div id=1><div id=2><div id = 3></div></div></div><div id=4></div>"; Document doc = Jsoup.parse(h); Elements divs = doc.select("div > div"); assertEquals(2, divs.size()); assertEquals("2", divs.get(0).id()); // 2 is child of 1 assertEquals("3", divs.get(1).id()); // 3 is child of 2 Elements div2 = doc.select("div#1 > div"); assertEquals(1, div2.size()); assertEquals("2", div2.get(0).id()); }
@Test public void adjacentSiblingsWithId() { String h = "<ol><li id=1>One<li id=2>Two<li id=3>Three</ol>"; Document doc = Jsoup.parse(h); Elements sibs = doc.select("li#1 + li#2"); assertEquals(1, sibs.size()); assertEquals("Two", sibs.get(0).text()); }
@Test public void testPseudoCombined() { Document doc = Jsoup.parse( "<div class='foo'><p>One</p><p>Two</p></div><div><p>Three</p><p>Four</p></div>"); Elements ps = doc.select("div.foo p:gt(0)"); assertEquals(1, ps.size()); assertEquals("Two", ps.get(0).text()); }
@Test public void testByAttributeRegexCharacterClass() { Document doc = Jsoup.parse( "<p><img src=foo.png id=1><img src=bar.jpg id=2><img src=qux.JPEG id=3><img src=old.gif id=4></p>"); Elements imgs = doc.select("img[src~=[o]]"); assertEquals(2, imgs.size()); assertEquals("1", imgs.get(0).id()); assertEquals("4", imgs.get(1).id()); }
@Test public void filter() { String h = "<p>Excl</p><div class=headline><p>Hello</p><p>There</p></div><div class=headline><h1>Headline</h1></div>"; Document doc = Jsoup.parse(h); Elements els = doc.select(".headline").select("p"); assertEquals(2, els.size()); assertEquals("Hello", els.get(0).text()); assertEquals("There", els.get(1).text()); }
@Test public void selectClassWithSpace() { final String html = "<div class=\"value\">class without space</div>\n" + "<div class=\"value \">class with space</div>"; Document doc = Jsoup.parse(html); Elements found = doc.select("div[class=value ]"); assertEquals(2, found.size()); assertEquals("class without space", found.get(0).text()); assertEquals("class with space", found.get(1).text()); found = doc.select("div[class=\"value \"]"); assertEquals(2, found.size()); assertEquals("class without space", found.get(0).text()); assertEquals("class with space", found.get(1).text()); found = doc.select("div[class=\"value\\ \"]"); assertEquals(0, found.size()); }
@Test public void descendant() { String h = "<div class=head><p class=first>Hello</p><p>There</p></div><p>None</p>"; Document doc = Jsoup.parse(h); Element root = doc.getElementsByClass("HEAD").first(); Elements els = root.select(".head p"); assertEquals(2, els.size()); assertEquals("Hello", els.get(0).text()); assertEquals("There", els.get(1).text()); Elements p = root.select("p.first"); assertEquals(1, p.size()); assertEquals("Hello", p.get(0).text()); Elements empty = root.select("p .first"); // self, not descend, should not match assertEquals(0, empty.size()); Elements aboveRoot = root.select("body div.head"); assertEquals(0, aboveRoot.size()); }
@Test public void containsData() { String html = "<p>jsoup</p><script>jsoup</script><span><!-- comments --></span>"; Document doc = Jsoup.parse(html); Element body = doc.body(); Elements dataEls1 = body.select(":containsData(jsoup)"); Elements dataEls2 = body.select("script:containsData(jsoup)"); Elements dataEls3 = body.select("span:containsData(comments)"); Elements dataEls4 = body.select(":containsData(s)"); assertEquals(2, dataEls1.size()); // body and script assertEquals(1, dataEls2.size()); assertEquals(dataEls1.last(), dataEls2.first()); assertEquals("<script>jsoup</script>", dataEls2.outerHtml()); assertEquals(1, dataEls3.size()); assertEquals("span", dataEls3.first().tagName()); assertEquals(3, dataEls4.size()); assertEquals("body", dataEls4.first().tagName()); assertEquals("script", dataEls4.get(1).tagName()); assertEquals("span", dataEls4.get(2).tagName()); }
@Test public void testByClass() { Elements els = Jsoup.parse("<p id=0 class='ONE two'><p id=1 class='one'><p id=2 class='two'>") .select("P.One"); assertEquals(2, els.size()); assertEquals("0", els.get(0).id()); assertEquals("1", els.get(1).id()); Elements none = Jsoup.parse("<div class='one'></div>").select(".foo"); assertEquals(0, none.size()); Elements els2 = Jsoup.parse("<div class='One-Two'></div>").select(".one-two"); assertEquals(1, els2.size()); }
@Test public void testGroupOr() { String h = "<div title=foo /><div title=bar /><div /><p></p><img /><span title=qux>"; Document doc = Jsoup.parse(h); Elements els = doc.select("p,div,[title]"); assertEquals(5, els.size()); assertEquals("div", els.get(0).tagName()); assertEquals("foo", els.get(0).attr("title")); assertEquals("div", els.get(1).tagName()); assertEquals("bar", els.get(1).attr("title")); assertEquals("div", els.get(2).tagName()); assertTrue( els.get(2).attr("title").length() == 0); // missing attributes come back as empty string assertFalse(els.get(2).hasAttr("title")); assertEquals("p", els.get(3).tagName()); assertEquals("span", els.get(4).tagName()); }