public void testParseMixedSiblings() throws Exception {
    List<ParsedHtmlNode> nodes =
        htmlParser.parse("content<span>more</span><div id=\"foo\">yet more</div>");
    assertNotNull(nodes);
    assertEquals(3, nodes.size());

    {
      ParsedHtmlNode textNode = nodes.get(0);
      assertEquals("content", textNode.getText());
    }

    {
      ParsedHtmlNode spanNode = nodes.get(1);
      assertNull(spanNode.getText());
      assertNotNull(spanNode.getAttributes());
      assertEquals(0, spanNode.getAttributes().size());
      assertNotNull(spanNode.getChildren());
      assertEquals(1, spanNode.getChildren().size());
      assertEquals("more", spanNode.getChildren().get(0).getText());
    }

    {
      ParsedHtmlNode divNode = nodes.get(2);
      assertNull(divNode.getText());
      assertNotNull(divNode.getAttributes());
      assertEquals(1, divNode.getAttributes().size());
      assertEquals("id", divNode.getAttributes().get(0).getName());
      assertEquals("foo", divNode.getAttributes().get(0).getValue());
      assertNotNull(divNode.getChildren());
      assertEquals(1, divNode.getChildren().size());
      assertEquals("yet more", divNode.getChildren().get(0).getText());
    }
  }
  public void testParseStringUnescapesProperly() throws Exception {
    List<ParsedHtmlNode> nodes = htmlParser.parse("&lt;content&amp;&apos;chrome&apos;&gt;");
    assertNotNull(nodes);
    assertEquals(1, nodes.size());

    ParsedHtmlNode node = nodes.get(0);
    assertNotNull(node);
    assertEquals("<content&'chrome'>", node.getText());
    assertNull(node.getAttributes());
    assertNull(node.getChildren());
    assertNull(node.getTagName());
  }
  public void testParseSimpleString() throws Exception {
    List<ParsedHtmlNode> nodes = htmlParser.parse("content");
    assertNotNull(nodes);
    assertEquals(1, nodes.size());

    ParsedHtmlNode node = nodes.get(0);
    assertNotNull(node);
    assertEquals("content", node.getText());
    assertNull(node.getAttributes());
    assertNull(node.getChildren());
    assertNull(node.getTagName());
  }
  public void testParseTagWithStringContents() throws Exception {
    List<ParsedHtmlNode> nodes = htmlParser.parse("<span>content</span>");
    assertNotNull(nodes);
    assertEquals(1, nodes.size());

    ParsedHtmlNode node = nodes.get(0);
    assertNull(node.getText());
    assertNotNull(node.getAttributes());
    assertEquals(0, node.getAttributes().size());
    assertNotNull(node.getChildren());
    assertEquals(1, node.getChildren().size());
    assertEquals("content", node.getChildren().get(0).getText());
    assertEquals("span", node.getTagName().toLowerCase());
  }
  public void testParseTagWithAttributes() throws Exception {
    List<ParsedHtmlNode> nodes = htmlParser.parse("<div id=\"foo\">content</div>");
    assertNotNull(nodes);
    assertEquals(1, nodes.size());

    ParsedHtmlNode node = nodes.get(0);
    assertNotNull(node);
    assertNull(node.getText());
    assertNotNull(node.getAttributes());
    assertEquals(1, node.getAttributes().size());
    assertEquals("id", node.getAttributes().get(0).getName());
    assertEquals("foo", node.getAttributes().get(0).getValue());
    assertNotNull(node.getChildren());
    assertEquals(1, node.getChildren().size());
    assertEquals("content", node.getChildren().get(0).getText());
  }
  public void testParseNestedContentWithNoCloseForBrAndHr() throws Exception {
    List<ParsedHtmlNode> nodes = htmlParser.parse("<div><br>  and  <hr></div>");
    assertNotNull(nodes);
    assertEquals(1, nodes.size());

    ParsedHtmlNode divNode = nodes.get(0);
    assertNull(divNode.getText());
    assertEquals("div", divNode.getTagName());
    assertNotNull(divNode.getAttributes());
    assertEquals(0, divNode.getAttributes().size());
    assertNotNull(divNode.getChildren());
    assertEquals(3, divNode.getChildren().size());

    {
      // <br>
      ParsedHtmlNode divChild = divNode.getChildren().get(0);
      assertNotNull(divChild);
      assertEquals("br", divChild.getTagName());
      assertNull(divChild.getText());
      assertNotNull(divChild.getAttributes());
      assertEquals(0, divChild.getAttributes().size());
      assertNotNull(divChild.getChildren());
      assertEquals(0, divChild.getChildren().size());
    }

    {
      // text
      ParsedHtmlNode divChild = divNode.getChildren().get(1);
      assertEquals("  and  ", divChild.getText());
      assertNull(divChild.getAttributes());
      assertNull(divChild.getChildren());
      assertNull(divChild.getTagName());
    }

    {
      // <hr> should be parsed lieniently
      ParsedHtmlNode divChild = divNode.getChildren().get(2);
      assertNotNull(divChild);
      assertEquals("hr", divChild.getTagName());
      assertNull(divChild.getText());
      assertNotNull(divChild.getAttributes());
      assertEquals(0, divChild.getAttributes().size());
      assertNotNull(divChild.getChildren());
      assertEquals(0, divChild.getChildren().size());
    }
  }
 public void testParseEmptyContent() throws Exception {
   String html = "   \n   \t  ";
   List<ParsedHtmlNode> nodes = htmlParser.parse(html);
   assertNotNull(nodes);
   assertEquals(0, nodes.size());
 }