/** Test scan with data which is of diff nodes type */ public void testScan() throws ParserException { createParser( "<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>", "http://www.yahoo.com"); parser.setNodeFactory( new PrototypicalNodeFactory( new Tag[] { new LinkTag(), new ImageTag(), })); parseAndAssertNodeCount(1); assertTrue("Node should be a link node", node[0] instanceof LinkTag); LinkTag linkTag = (LinkTag) node[0]; // Get the link data and cross-check Node[] dataNode = new Node[10]; int i = 0; for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes(); ) { dataNode[i++] = e.nextNode(); } assertEquals("Number of data nodes", new Integer(2), new Integer(i)); assertTrue("First data node should be an Image Node", dataNode[0] instanceof ImageTag); assertTrue("Second data node shouls be a String Node", dataNode[1] instanceof Text); // Check the contents of each data node ImageTag imageTag = (ImageTag) dataNode[0]; assertEquals("Image URL", "http://www.yahoo.com/abcd.jpg", imageTag.getImageURL()); Text stringNode = (Text) dataNode[1]; assertEquals("String Contents", "Hello World", stringNode.getText()); }
/** * A bug in the freshmeat page - really bad html tag - <A>Revision<\a> Reported by * Mazlan Mat Note: Actually, this is completely legal HTML - Derrick */ public void testFreshMeatBug() throws ParserException { String html = "<a>Revision</a>"; createParser(html, "http://www.yahoo.com"); parseAndAssertNodeCount(1); assertTrue("Node 0 should be a tag", node[0] instanceof Tag); Tag tag = (Tag) node[0]; assertEquals("Tag Contents", html, tag.toHtml()); assertEquals("Node 0 should have one child", 1, tag.getChildren().size()); assertTrue("The child should be a string node", tag.getChildren().elementAt(0) instanceof Text); Text stringNode = (Text) tag.getChildren().elementAt(0); assertEquals("Text Contents", "Revision", stringNode.getText()); }
public void testErroneousLinkBug() throws ParserException { createParser( "Site Comments?<br>" + "<a href=\"mailto:[email protected]?subject=Site Comments\">" + "Mail Us" + "<a>"); parseAndAssertNodeCount(4); // The first node should be a Text assertTrue("First node should be a Text", node[0] instanceof Text); Text stringNode = (Text) node[0]; assertEquals("Text of the Text", "Site Comments?", stringNode.getText()); assertTrue("Second node should be a tag", node[1] instanceof Tag); assertTrue("Third node should be a link", node[2] instanceof LinkTag); // LinkScanner.evaluate() says no HREF means it isn't a link: assertTrue("Fourth node should be a tag", node[3] instanceof Tag); }
/** * Bug reported by Raj Sharma,5-Apr-2002, upon parsing http://www.samachar.com, the entire page * could not be picked up. The problem was occurring after parsing a particular link after which * the parsing would not proceed. This link was spread over three lines. The bug has been * reproduced and fixed. */ public void testMultipleLineBug() throws ParserException { createParser( "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n" + "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n" + "nical.html\"> Journalism 3.0</a> by Rajesh Jain"); parser.setNodeFactory(new PrototypicalNodeFactory(new LinkTag())); parseAndAssertNodeCount(8); assertTrue("Seventh node should be a link tag", node[6] instanceof LinkTag); LinkTag linkTag = (LinkTag) node[6]; String exp = new String( "http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/technical.html"); // assertEquals("Length of link tag",exp.length(), linkTag.getLink().length()); assertStringEquals("Link URL of link tag", exp, linkTag.getLink()); assertEquals("Link Text of link tag", " Journalism 3.0", linkTag.getLinkText()); assertTrue("Eight node should be a string node", node[7] instanceof Text); Text stringNode = (Text) node[7]; assertEquals("String node contents", " by Rajesh Jain", stringNode.getText()); }
public String getText() { return delegate.getText(); }
@Override public void visitStringNode(Text string) { Element parent = getParent(string.getParent()); parent.setInnerText(string.getText()); }