@Test public void testPseudoContains() { Document doc = Jsoup.parse( "<div><p>The Rain.</p> <p class=light>The <i>rain</i>.</p> <p>Rain, the.</p></div>"); Elements ps1 = doc.select("p:contains(Rain)"); assertEquals(3, ps1.size()); Elements ps2 = doc.select("p:contains(the rain)"); assertEquals(2, ps2.size()); assertEquals("The Rain.", ps2.first().html()); assertEquals("The <i>rain</i>.", ps2.last().html()); Elements ps3 = doc.select("p:contains(the Rain):has(i)"); assertEquals(1, ps3.size()); assertEquals("light", ps3.first().className()); Elements ps4 = doc.select(".light:contains(rain)"); assertEquals(1, ps4.size()); assertEquals("light", ps3.first().className()); Elements ps5 = doc.select(":contains(rain)"); assertEquals(8, ps5.size()); // html, body, div,... }
@Test public void testMatches() { Document doc = Jsoup.parse( "<p id=1>The <i>Rain</i></p> <p id=2>There are 99 bottles.</p> <p id=3>Harder (this)</p> <p id=4>Rain</p>"); Elements p1 = doc.select("p:matches(The rain)"); // no match, case sensitive assertEquals(0, p1.size()); Elements p2 = doc.select("p:matches((?i)the rain)"); // case insense. should include root, html, body assertEquals(1, p2.size()); assertEquals("1", p2.first().id()); Elements p4 = doc.select("p:matches((?i)^rain$)"); // bounding assertEquals(1, p4.size()); assertEquals("4", p4.first().id()); Elements p5 = doc.select("p:matches(\\d+)"); assertEquals(1, p5.size()); assertEquals("2", p5.first().id()); Elements p6 = doc.select("p:matches(\\w+\\s+\\(\\w+\\))"); // test bracket matching assertEquals(1, p6.size()); assertEquals("3", p6.first().id()); Elements p7 = doc.select("p:matches((?i)the):has(i)"); // multi assertEquals(1, p7.size()); assertEquals("1", p7.first().id()); }
@Test public void testPsuedoContainsWithParentheses() { Document doc = Jsoup.parse("<div><p id=1>This (is good)</p><p id=2>This is bad)</p>"); Elements ps1 = doc.select("p:contains(this (is good))"); assertEquals(1, ps1.size()); assertEquals("1", ps1.first().id()); Elements ps2 = doc.select("p:contains(this is bad\\))"); assertEquals(1, ps2.size()); assertEquals("2", ps2.first().id()); }
@Test public void testRelaxedTags() { Document doc = Jsoup.parse("<abc_def id=1>Hello</abc_def> <abc-def id=2>There</abc-def>"); Elements el1 = doc.select("abc_def"); assertEquals(1, el1.size()); assertEquals("1", el1.first().id()); Elements el2 = doc.select("abc-def"); assertEquals(1, el2.size()); assertEquals("2", el2.first().id()); }
@Test public void not() { Document doc = Jsoup.parse("<div id=1><p>One</p></div> <div id=2><p><span>Two</span></p></div>"); Elements div1 = doc.select("div").not(":has(p > span)"); assertEquals(1, div1.size()); assertEquals("1", div1.first().id()); Elements div2 = doc.select("div").not("#1"); assertEquals(1, div2.size()); assertEquals("2", div2.first().id()); }
@Test public void testByAttributeStarting() { Document doc = Jsoup.parse( "<div id=1 data-name=jsoup>Hello</div><p data-val=5 id=2>There</p><p id=3>No</p>"); Elements withData = doc.select("[^data-]"); assertEquals(2, withData.size()); assertEquals("1", withData.first().id()); assertEquals("2", withData.last().id()); withData = doc.select("p[^data-]"); assertEquals(1, withData.size()); assertEquals("2", withData.first().id()); }
@Test public void notParas() { Document doc = Jsoup.parse("<p id=1>One</p> <p>Two</p> <p><span>Three</span></p>"); Elements el1 = doc.select("p:not([id=1])"); assertEquals(2, el1.size()); assertEquals("Two", el1.first().text()); assertEquals("Three", el1.last().text()); Elements el2 = doc.select("p:not(:has(span))"); assertEquals(2, el2.size()); assertEquals("One", el2.first().text()); assertEquals("Two", el2.last().text()); }
@Test public void deeperDescendant() { String h = "<div class=head><p><span class=first>Hello</div><div class=head><p class=first><span>Another</span><p>Again</div>"; Document doc = Jsoup.parse(h); Element root = doc.getElementsByClass("head").first(); Elements els = root.select("div p .first"); assertEquals(1, els.size()); assertEquals("Hello", els.first().text()); assertEquals("span", els.first().tagName()); Elements aboveRoot = root.select("body p .first"); assertEquals(0, aboveRoot.size()); }
@Test public void testPseudoHas() { Document doc = Jsoup.parse( "<div id=0><p><span>Hello</span></p></div> <div id=1><span class=foo>There</span></div> <div id=2><p>Not</p></div>"); Elements divs1 = doc.select("div:has(span)"); assertEquals(2, divs1.size()); assertEquals("0", divs1.get(0).id()); assertEquals("1", divs1.get(1).id()); Elements divs2 = doc.select("div:has([class]"); assertEquals(1, divs2.size()); assertEquals("1", divs2.get(0).id()); Elements divs3 = doc.select("div:has(span, p)"); assertEquals(3, divs3.size()); assertEquals("0", divs3.get(0).id()); assertEquals("1", divs3.get(1).id()); assertEquals("2", divs3.get(2).id()); Elements els1 = doc.body().select(":has(p)"); assertEquals(3, els1.size()); // body, div, dib assertEquals("body", els1.first().tagName()); assertEquals("0", els1.get(1).id()); assertEquals("2", els1.get(2).id()); }
@Test public void generalSiblings() { String h = "<ol><li id=1>One<li id=2>Two<li id=3>Three</ol>"; Document doc = Jsoup.parse(h); Elements els = doc.select("#1 ~ #3"); assertEquals(1, els.size()); assertEquals("Three", els.first().text()); }
@Test public void notClass() { Document doc = Jsoup.parse("<div class=left>One</div><div class=right id=1><p>Two</p></div>"); Elements el1 = doc.select("div:not(.left)"); assertEquals(1, el1.size()); assertEquals("1", el1.first().id()); }
@Test public void testCombinedWithContains() { Document doc = Jsoup.parse("<p id=1>One</p><p>Two +</p><p>Three +</p>"); Elements els = doc.select("p#1 + :contains(+)"); assertEquals(1, els.size()); assertEquals("Two +", els.text()); assertEquals("p", els.first().tagName()); }
@Test public void parentWithClassChild() { String h = "<h1 class=foo><a href=1 /></h1><h1 class=foo><a href=2 class=bar /></h1><h1><a href=3 /></h1>"; Document doc = Jsoup.parse(h); Elements allAs = doc.select("h1 > a"); assertEquals(3, allAs.size()); assertEquals("a", allAs.first().tagName()); Elements fooAs = doc.select("h1.foo > a"); assertEquals(2, fooAs.size()); assertEquals("a", fooAs.first().tagName()); Elements barAs = doc.select("h1.foo > a.bar"); assertEquals(1, barAs.size()); }
@Test public void multiChildDescent() { String h = "<div id=foo><h1 class=bar><a href=http://example.com/>One</a></h1></div>"; Document doc = Jsoup.parse(h); Elements els = doc.select("div#foo > h1.bar > a[href*=example]"); assertEquals(1, els.size()); assertEquals("a", els.first().tagName()); }
@Test public void containsOwn() { Document doc = Jsoup.parse("<p id=1>Hello <b>there</b> now</p>"); Elements ps = doc.select("p:containsOwn(Hello now)"); assertEquals(1, ps.size()); assertEquals("1", ps.first().id()); assertEquals(0, doc.select("p:containsOwn(there)").size()); }
@Test public void notAll() { Document doc = Jsoup.parse("<p>Two</p> <p><span>Three</span></p>"); Elements el1 = doc.body().select(":not(p)"); // should just be the span assertEquals(2, el1.size()); assertEquals("body", el1.first().tagName()); assertEquals("span", el1.last().tagName()); }
@Test public void matchesOwn() { Document doc = Jsoup.parse("<p id=1>Hello <b>there</b> now</p>"); Elements p1 = doc.select("p:matchesOwn((?i)hello now)"); assertEquals(1, p1.size()); assertEquals("1", p1.first().id()); assertEquals(0, doc.select("p:matchesOwn(there)").size()); }
@Test public void testAllElements() { String h = "<div><p>Hello</p><p><b>there</b></p></div>"; Document doc = Jsoup.parse(h); Elements allDoc = doc.select("*"); Elements allUnderDiv = doc.select("div *"); assertEquals(8, allDoc.size()); assertEquals(3, allUnderDiv.size()); assertEquals("p", allUnderDiv.first().tagName()); }
@Test public void testNestedHas() { Document doc = Jsoup.parse("<div><p><span>One</span></p></div> <div><p>Two</p></div>"); Elements divs = doc.select("div:has(p:has(span))"); assertEquals(1, divs.size()); assertEquals("One", divs.first().text()); // test matches in has divs = doc.select("div:has(p:matches((?i)two))"); assertEquals(1, divs.size()); assertEquals("div", divs.first().tagName()); assertEquals("Two", divs.first().text()); // test contains in has divs = doc.select("div:has(p:contains(two))"); assertEquals(1, divs.size()); assertEquals("div", divs.first().tagName()); assertEquals("Two", divs.first().text()); }
@Test public void testByAttribute() { String h = "<div Title=Foo /><div Title=Bar /><div Style=Qux /><div title=Bam /><div title=SLAM />" + "<div data-name='with spaces'/>"; Document doc = Jsoup.parse(h); Elements withTitle = doc.select("[title]"); assertEquals(4, withTitle.size()); Elements foo = doc.select("[TITLE=foo]"); assertEquals(1, foo.size()); Elements foo2 = doc.select("[title=\"foo\"]"); assertEquals(1, foo2.size()); Elements foo3 = doc.select("[title=\"Foo\"]"); assertEquals(1, foo3.size()); Elements dataName = doc.select("[data-name=\"with spaces\"]"); assertEquals(1, dataName.size()); assertEquals("with spaces", dataName.first().attr("data-name")); Elements not = doc.select("div[title!=bar]"); assertEquals(5, not.size()); assertEquals("Foo", not.first().attr("title")); Elements starts = doc.select("[title^=ba]"); assertEquals(2, starts.size()); assertEquals("Bar", starts.first().attr("title")); assertEquals("Bam", starts.last().attr("title")); Elements ends = doc.select("[title$=am]"); assertEquals(2, ends.size()); assertEquals("Bam", ends.first().attr("title")); assertEquals("SLAM", ends.last().attr("title")); Elements contains = doc.select("[title*=a]"); assertEquals(3, contains.size()); assertEquals("Bar", contains.first().attr("title")); assertEquals("SLAM", contains.last().attr("title")); }
@Test public void and() { String h = "<div id=1 class='foo bar' title=bar name=qux><p class=foo title=bar>Hello</p></div"; Document doc = Jsoup.parse(h); Elements div = doc.select("div.foo"); assertEquals(1, div.size()); assertEquals("div", div.first().tagName()); Elements p = doc.select("div .foo"); // space indicates like "div *.foo" assertEquals(1, p.size()); assertEquals("p", p.first().tagName()); Elements div2 = doc.select("div#1.foo.bar[title=bar][name=qux]"); // very specific! assertEquals(1, div2.size()); assertEquals("div", div2.first().tagName()); Elements p2 = doc.select("div *.foo"); // space indicates like "div *.foo" assertEquals(1, p2.size()); assertEquals("p", p2.first().tagName()); }
@Test public void containsData() { String html = "<p>jsoup</p><script>jsoup</script><span><!-- comments --></span>"; Document doc = Jsoup.parse(html); Element body = doc.body(); Elements dataEls1 = body.select(":containsData(jsoup)"); Elements dataEls2 = body.select("script:containsData(jsoup)"); Elements dataEls3 = body.select("span:containsData(comments)"); Elements dataEls4 = body.select(":containsData(s)"); assertEquals(2, dataEls1.size()); // body and script assertEquals(1, dataEls2.size()); assertEquals(dataEls1.last(), dataEls2.first()); assertEquals("<script>jsoup</script>", dataEls2.outerHtml()); assertEquals(1, dataEls3.size()); assertEquals("span", dataEls3.first().tagName()); assertEquals(3, dataEls4.size()); assertEquals("body", dataEls4.first().tagName()); assertEquals("script", dataEls4.get(1).tagName()); assertEquals("span", dataEls4.get(2).tagName()); }
@Test public void val() { Document doc = Jsoup.parse("<input value='one' /><textarea>two</textarea>"); Elements els = doc.select("input, textarea"); assertEquals(2, els.size()); assertEquals("one", els.val()); assertEquals("two", els.last().val()); els.val("three"); assertEquals("three", els.first().val()); assertEquals("three", els.last().val()); assertEquals("<textarea>three</textarea>", els.last().outerHtml()); }
@Test public void testWildcardNamespacedTag() { Document doc = Jsoup.parse( "<div><abc:def id=1>Hello</abc:def></div> <abc:def class=bold id=2>There</abc:def>"); Elements byTag = doc.select("*|def"); assertEquals(2, byTag.size()); assertEquals("1", byTag.first().id()); assertEquals("2", byTag.last().id()); Elements byAttr = doc.select(".bold"); assertEquals(1, byAttr.size()); assertEquals("2", byAttr.last().id()); Elements byTagAttr = doc.select("*|def.bold"); assertEquals(1, byTagAttr.size()); assertEquals("2", byTagAttr.last().id()); Elements byContains = doc.select("*|def:contains(e)"); assertEquals(2, byContains.size()); assertEquals("1", byContains.first().id()); assertEquals("2", byContains.last().id()); }
// for http://github.com/jhy/jsoup/issues#issue/13 @Test public void testSupportsLeadingCombinator() { String h = "<div><p><span>One</span><span>Two</span></p></div>"; Document doc = Jsoup.parse(h); Element p = doc.select("div > p").first(); Elements spans = p.select("> span"); assertEquals(2, spans.size()); assertEquals("One", spans.first().text()); // make sure doesn't get nested h = "<div id=1><div id=2><div id=3></div></div></div>"; doc = Jsoup.parse(h); Element div = doc.select("div").select(" > div").first(); assertEquals("2", div.id()); }