@Test public void testPeterFull() { String content = "<DIV class=\"section\" id=\"forest-elephants\" >\n" + "<H1>Forest elephants</H1>\n" + "<P>In this section, we discuss the lesser known forest elephants.\n" + "...this section continues...\n" + "<DIV class=\"subsection\" id=\"forest-habitat\" >\n" + "<H2>Habitat</H2>\n" + "<P>Forest elephants do not live in trees but among them.\n" + "...this subsection continues...\n" + "</DIV>\n" + "</DIV>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); String expected = "<div class=\"section\" id=\"forest-elephants\">\n" + "<h1>Forest elephants</h1>\n" + "<p>In this section, we discuss the lesser known forest elephants.\n" + "...this section continues...\n</p>" + "<div class=\"subsection\" id=\"forest-habitat\">\n" + "<h2>Habitat</h2>\n" + "<p>Forest elephants do not live in trees but among them.\n" + "...this subsection continues...\n</p>" + "</div>\n" + "</div>"; assertEquals(expected, doc.getHtml()); assertTrue(doc.check()); }
@Test public void testTwoHtml() throws IOException { File file = new File(testDataRoot, "two.html"); String htmlContent = FileUtil.readString(file); Document document = new LagartoDOMBuilder().parse(htmlContent); Node html = new NodeSelector(document).select("html").get(0); assertNotNull(html); Node body = new NodeSelector(html).selectFirst("body"); Element h1 = body.getFirstChildElement(); assertEquals("h1", h1.getNodeName()); Node comment1 = body.getFirstChild().getNextSibling(); assertEquals(Node.NodeType.COMMENT, comment1.getNodeType()); Element p = (Element) new NodeSelector(body).selectFirst("p"); assertEquals(h1, p.getPreviousSiblingElement()); assertEquals(h1, comment1.getNextSiblingElement()); assertNull(comment1.getNextSiblingName()); // check if filter works just for sub elements List<Node> p_ems = new NodeSelector(p).select("em"); assertEquals(1, p_ems.size()); Element script = (Element) new NodeSelector(html).selectFirst("script"); assertEquals("text/javascript", script.getAttribute("type")); assertTrue(document.check()); }
@Test public void testPeterSimple1() { String content = "<div><h1>FORELE</h1><p>dicuss<div>xxx</div></div>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<div><h1>FORELE</h1><p>dicuss</p><div>xxx</div></div>", doc.getHtml()); assertTrue(doc.check()); }
@Test public void testTwoNodes2() { String content = "<body><div>test<span><form>xxx</body>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<body><div>test<span><form>xxx</form></span></div></body>", doc.getHtml()); assertTrue(doc.check()); }
@Test public void testOneNodeWithBlanks() { String content = "<body><div> <span>sss</span></body>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<body><div> <span>sss</span></div></body>", doc.getHtml()); assertTrue(doc.check()); }
@Test public void testEof2() { String content = "<body><div>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<body><div></div></body>", doc.getHtml()); assertTrue(doc.check()); }
@Test public void testPeterSimple3WithSpaces() { String content = "<div> <h1>FORELE</h1> <p>dicuss <div> <h2>HAB</h2> <p>AMONG </div> </div>".toUpperCase(); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals( "<div> <h1>FORELE</h1> <p>DICUSS </p><div> <h2>HAB</h2> <p>AMONG </p></div> </div>", doc.getHtml()); assertTrue(doc.check()); }
@Test public void testGroupOfSelectors() throws IOException { File file = new File(testDataRoot, "one.html"); String htmlContent = FileUtil.readString(file); Document document = new LagartoDOMBuilder().parse(htmlContent); List<Node> nodes = new NodeSelector(document).select("em, b, b"); assertEquals(9, nodes.size()); assertTrue(document.check()); }