Exemplo n.º 1
0
  /** Test scan with data which is of diff nodes type */
  public void testScan() throws ParserException {
    createParser(
        "<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>", "http://www.yahoo.com");
    parser.setNodeFactory(
        new PrototypicalNodeFactory(
            new Tag[] {
              new LinkTag(), new ImageTag(),
            }));
    parseAndAssertNodeCount(1);
    assertTrue("Node should be a link node", node[0] instanceof LinkTag);

    LinkTag linkTag = (LinkTag) node[0];
    // Get the link data and cross-check
    Node[] dataNode = new Node[10];
    int i = 0;
    for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes(); ) {
      dataNode[i++] = e.nextNode();
    }
    assertEquals("Number of data nodes", new Integer(2), new Integer(i));
    assertTrue("First data node should be an Image Node", dataNode[0] instanceof ImageTag);
    assertTrue("Second data node shouls be a String Node", dataNode[1] instanceof Text);

    // Check the contents of each data node
    ImageTag imageTag = (ImageTag) dataNode[0];
    assertEquals("Image URL", "http://www.yahoo.com/abcd.jpg", imageTag.getImageURL());
    Text stringNode = (Text) dataNode[1];
    assertEquals("String Contents", "Hello World", stringNode.getText());
  }
Exemplo n.º 2
0
 /**
  * A bug in the freshmeat page - really bad html tag - &lt;A&gt;Revision&lt;\a&gt; Reported by
  * Mazlan Mat Note: Actually, this is completely legal HTML - Derrick
  */
 public void testFreshMeatBug() throws ParserException {
   String html = "<a>Revision</a>";
   createParser(html, "http://www.yahoo.com");
   parseAndAssertNodeCount(1);
   assertTrue("Node 0 should be a tag", node[0] instanceof Tag);
   Tag tag = (Tag) node[0];
   assertEquals("Tag Contents", html, tag.toHtml());
   assertEquals("Node 0 should have one child", 1, tag.getChildren().size());
   assertTrue("The child should be a string node", tag.getChildren().elementAt(0) instanceof Text);
   Text stringNode = (Text) tag.getChildren().elementAt(0);
   assertEquals("Text Contents", "Revision", stringNode.getText());
 }
Exemplo n.º 3
0
 public void testErroneousLinkBug() throws ParserException {
   createParser(
       "Site Comments?<br>"
           + "<a href=\"mailto:[email protected]?subject=Site Comments\">"
           + "Mail Us"
           + "<a>");
   parseAndAssertNodeCount(4);
   // The first node should be a Text
   assertTrue("First node should be a Text", node[0] instanceof Text);
   Text stringNode = (Text) node[0];
   assertEquals("Text of the Text", "Site Comments?", stringNode.getText());
   assertTrue("Second node should be a tag", node[1] instanceof Tag);
   assertTrue("Third node should be a link", node[2] instanceof LinkTag);
   // LinkScanner.evaluate() says no HREF means it isn't a link:
   assertTrue("Fourth node should be a tag", node[3] instanceof Tag);
 }
Exemplo n.º 4
0
 /**
  * Bug reported by Raj Sharma,5-Apr-2002, upon parsing http://www.samachar.com, the entire page
  * could not be picked up. The problem was occurring after parsing a particular link after which
  * the parsing would not proceed. This link was spread over three lines. The bug has been
  * reproduced and fixed.
  */
 public void testMultipleLineBug() throws ParserException {
   createParser(
       "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"
           + "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"
           + "nical.html\"> Journalism 3.0</a> by Rajesh Jain");
   parser.setNodeFactory(new PrototypicalNodeFactory(new LinkTag()));
   parseAndAssertNodeCount(8);
   assertTrue("Seventh node should be a link tag", node[6] instanceof LinkTag);
   LinkTag linkTag = (LinkTag) node[6];
   String exp =
       new String(
           "http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/technical.html");
   // assertEquals("Length of link tag",exp.length(), linkTag.getLink().length());
   assertStringEquals("Link URL of link tag", exp, linkTag.getLink());
   assertEquals("Link Text of link tag", " Journalism 3.0", linkTag.getLinkText());
   assertTrue("Eight node should be a string node", node[7] instanceof Text);
   Text stringNode = (Text) node[7];
   assertEquals("String node contents", " by Rajesh Jain", stringNode.getText());
 }
 public String getText() {
   return delegate.getText();
 }
Exemplo n.º 6
0
 @Override
 public void visitStringNode(Text string) {
   Element parent = getParent(string.getParent());
   parent.setInnerText(string.getText());
 }