Example #1
0
  @Test
  public void testPeterFull() {
    String content =
        "<DIV class=\"section\" id=\"forest-elephants\" >\n"
            + "<H1>Forest elephants</H1>\n"
            + "<P>In this section, we discuss the lesser known forest elephants.\n"
            + "...this section continues...\n"
            + "<DIV class=\"subsection\" id=\"forest-habitat\" >\n"
            + "<H2>Habitat</H2>\n"
            + "<P>Forest elephants do not live in trees but among them.\n"
            + "...this subsection continues...\n"
            + "</DIV>\n"
            + "</DIV>";
    LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
    Document doc = lagartoDOMBuilder.parse(content);

    String expected =
        "<div class=\"section\" id=\"forest-elephants\">\n"
            + "<h1>Forest elephants</h1>\n"
            + "<p>In this section, we discuss the lesser known forest elephants.\n"
            + "...this section continues...\n</p>"
            + "<div class=\"subsection\" id=\"forest-habitat\">\n"
            + "<h2>Habitat</h2>\n"
            + "<p>Forest elephants do not live in trees but among them.\n"
            + "...this subsection continues...\n</p>"
            + "</div>\n"
            + "</div>";

    assertEquals(expected, doc.getHtml());
    assertTrue(doc.check());
  }
Example #2
0
  @Test
  public void testTwoHtml() throws IOException {
    File file = new File(testDataRoot, "two.html");
    String htmlContent = FileUtil.readString(file);

    Document document = new LagartoDOMBuilder().parse(htmlContent);

    Node html = new NodeSelector(document).select("html").get(0);
    assertNotNull(html);

    Node body = new NodeSelector(html).selectFirst("body");
    Element h1 = body.getFirstChildElement();
    assertEquals("h1", h1.getNodeName());

    Node comment1 = body.getFirstChild().getNextSibling();
    assertEquals(Node.NodeType.COMMENT, comment1.getNodeType());

    Element p = (Element) new NodeSelector(body).selectFirst("p");

    assertEquals(h1, p.getPreviousSiblingElement());
    assertEquals(h1, comment1.getNextSiblingElement());
    assertNull(comment1.getNextSiblingName());

    // check if filter works just for sub elements
    List<Node> p_ems = new NodeSelector(p).select("em");
    assertEquals(1, p_ems.size());

    Element script = (Element) new NodeSelector(html).selectFirst("script");
    assertEquals("text/javascript", script.getAttribute("type"));

    assertTrue(document.check());
  }
Example #3
0
 @Test
 public void testPeterSimple1() {
   String content = "<div><h1>FORELE</h1><p>dicuss<div>xxx</div></div>";
   LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
   Document doc = lagartoDOMBuilder.parse(content);
   assertEquals("<div><h1>FORELE</h1><p>dicuss</p><div>xxx</div></div>", doc.getHtml());
   assertTrue(doc.check());
 }
Example #4
0
 @Test
 public void testTwoNodes2() {
   String content = "<body><div>test<span><form>xxx</body>";
   LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
   Document doc = lagartoDOMBuilder.parse(content);
   assertEquals("<body><div>test<span><form>xxx</form></span></div></body>", doc.getHtml());
   assertTrue(doc.check());
 }
Example #5
0
 @Test
 public void testOneNodeWithBlanks() {
   String content = "<body><div>   <span>sss</span></body>";
   LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
   Document doc = lagartoDOMBuilder.parse(content);
   assertEquals("<body><div>   <span>sss</span></div></body>", doc.getHtml());
   assertTrue(doc.check());
 }
Example #6
0
 @Test
 public void testEof2() {
   String content = "<body><div>";
   LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
   Document doc = lagartoDOMBuilder.parse(content);
   assertEquals("<body><div></div></body>", doc.getHtml());
   assertTrue(doc.check());
 }
Example #7
0
 @Test
 public void testPeterSimple3WithSpaces() {
   String content =
       "<div> <h1>FORELE</h1> <p>dicuss <div> <h2>HAB</h2> <p>AMONG </div> </div>".toUpperCase();
   LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
   Document doc = lagartoDOMBuilder.parse(content);
   assertEquals(
       "<div> <h1>FORELE</h1> <p>DICUSS </p><div> <h2>HAB</h2> <p>AMONG </p></div> </div>",
       doc.getHtml());
   assertTrue(doc.check());
 }
Example #8
0
  @Test
  public void testGroupOfSelectors() throws IOException {
    File file = new File(testDataRoot, "one.html");
    String htmlContent = FileUtil.readString(file);

    Document document = new LagartoDOMBuilder().parse(htmlContent);

    List<Node> nodes = new NodeSelector(document).select("em, b, b");
    assertEquals(9, nodes.size());

    assertTrue(document.check());
  }