/** Test scan with data which is of diff nodes type */ public void testScan() throws ParserException { createParser( "<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>", "http://www.yahoo.com"); parser.setNodeFactory( new PrototypicalNodeFactory( new Tag[] { new LinkTag(), new ImageTag(), })); parseAndAssertNodeCount(1); assertTrue("Node should be a link node", node[0] instanceof LinkTag); LinkTag linkTag = (LinkTag) node[0]; // Get the link data and cross-check Node[] dataNode = new Node[10]; int i = 0; for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes(); ) { dataNode[i++] = e.nextNode(); } assertEquals("Number of data nodes", new Integer(2), new Integer(i)); assertTrue("First data node should be an Image Node", dataNode[0] instanceof ImageTag); assertTrue("Second data node shouls be a String Node", dataNode[1] instanceof Text); // Check the contents of each data node ImageTag imageTag = (ImageTag) dataNode[0]; assertEquals("Image URL", "http://www.yahoo.com/abcd.jpg", imageTag.getImageURL()); Text stringNode = (Text) dataNode[1]; assertEquals("String Contents", "Hello World", stringNode.getText()); }
public void testCompositeTagWithDeadlock() throws ParserException { createParser( "<custom>" + "<another>something" + "</custom>" + "<custom>" + "<another>else</another>" + "</custom>"); parser.setNodeFactory( new PrototypicalNodeFactory( new Tag[] { new CustomTag(), new AnotherTag(true), })); parseAndAssertNodeCount(2); assertType("node", CustomTag.class, node[0]); CustomTag customTag = (CustomTag) node[0]; assertEquals("child count", 1, customTag.getChildCount()); assertFalse("custom tag should not be xml end tag", customTag.isEmptyXmlTag()); assertEquals("starting loc", 0, customTag.getStartPosition()); assertEquals("ending loc", 8, customTag.getEndPosition()); assertEquals("starting line position", 0, customTag.getStartingLineNumber()); assertEquals("ending line position", 0, customTag.getEndingLineNumber()); AnotherTag anotherTag = (AnotherTag) customTag.childAt(0); assertEquals("anotherTag child count", 1, anotherTag.getChildCount()); Text stringNode = (Text) anotherTag.childAt(0); assertStringEquals("anotherTag child text", "something", stringNode.toPlainTextString()); assertStringEquals( "first custom tag html", "<custom><another>something</another></custom>", customTag.toHtml()); customTag = (CustomTag) node[1]; assertStringEquals( "second custom tag html", "<custom><another>else</another></custom>", customTag.toHtml()); }
public void testCompositeTagWithTwoNestedTags() throws ParserException { createParser( "<Custom>" + "<Another>" + "Hello" + "</Another>" + "<unknown>" + "World" + "</unknown>" + "<Custom/>" + "</Custom>" + "<Custom/>"); parser.setNodeFactory( new PrototypicalNodeFactory( new Tag[] { new CustomTag(), new AnotherTag(false), })); parseAndAssertNodeCount(2); assertType("first node", CustomTag.class, node[0]); assertType("second node", CustomTag.class, node[1]); CustomTag customTag = (CustomTag) node[0]; assertEquals("first custom tag children count", 5, customTag.getChildCount()); Node node = customTag.childAt(0); assertType("first child", AnotherTag.class, node); AnotherTag anotherTag = (AnotherTag) node; assertEquals("another tag children count", 1, anotherTag.getChildCount()); node = anotherTag.childAt(0); assertType("nested child", Text.class, node); Text text = (Text) node; assertEquals("text", "Hello", text.toPlainTextString()); }
/** * A bug in the freshmeat page - really bad html tag - <A>Revision<\a> Reported by * Mazlan Mat Note: Actually, this is completely legal HTML - Derrick */ public void testFreshMeatBug() throws ParserException { String html = "<a>Revision</a>"; createParser(html, "http://www.yahoo.com"); parseAndAssertNodeCount(1); assertTrue("Node 0 should be a tag", node[0] instanceof Tag); Tag tag = (Tag) node[0]; assertEquals("Tag Contents", html, tag.toHtml()); assertEquals("Node 0 should have one child", 1, tag.getChildren().size()); assertTrue("The child should be a string node", tag.getChildren().elementAt(0) instanceof Text); Text stringNode = (Text) tag.getChildren().elementAt(0); assertEquals("Text Contents", "Revision", stringNode.getText()); }
public void testErroneousLinkBug() throws ParserException { createParser( "Site Comments?<br>" + "<a href=\"mailto:[email protected]?subject=Site Comments\">" + "Mail Us" + "<a>"); parseAndAssertNodeCount(4); // The first node should be a Text assertTrue("First node should be a Text", node[0] instanceof Text); Text stringNode = (Text) node[0]; assertEquals("Text of the Text", "Site Comments?", stringNode.getText()); assertTrue("Second node should be a tag", node[1] instanceof Tag); assertTrue("Third node should be a link", node[2] instanceof LinkTag); // LinkScanner.evaluate() says no HREF means it isn't a link: assertTrue("Fourth node should be a tag", node[3] instanceof Tag); }
/** * Bug reported by Raj Sharma,5-Apr-2002, upon parsing http://www.samachar.com, the entire page * could not be picked up. The problem was occurring after parsing a particular link after which * the parsing would not proceed. This link was spread over three lines. The bug has been * reproduced and fixed. */ public void testMultipleLineBug() throws ParserException { createParser( "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n" + "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n" + "nical.html\"> Journalism 3.0</a> by Rajesh Jain"); parser.setNodeFactory(new PrototypicalNodeFactory(new LinkTag())); parseAndAssertNodeCount(8); assertTrue("Seventh node should be a link tag", node[6] instanceof LinkTag); LinkTag linkTag = (LinkTag) node[6]; String exp = new String( "http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/technical.html"); // assertEquals("Length of link tag",exp.length(), linkTag.getLink().length()); assertStringEquals("Link URL of link tag", exp, linkTag.getLink()); assertEquals("Link Text of link tag", " Journalism 3.0", linkTag.getLinkText()); assertTrue("Eight node should be a string node", node[7] instanceof Text); Text stringNode = (Text) node[7]; assertEquals("String node contents", " by Rajesh Jain", stringNode.getText()); }
public String toString() { return delegate.toString(); }
public String toHtml() { return delegate.toHtml(); }
public Node getNextSibling() { return delegate.getNextSibling(); }
public Node getLastChild() { return delegate.getLastChild(); }
/** * Get the children of this node. * * @return The list of children contained by this node, if it's been set, <code>null</code> * otherwise. */ public NodeList getChildren() { return (delegate.getChildren()); }
public String getText() { return delegate.getText(); }
public void accept(NodeVisitor visitor) { delegate.accept(visitor); }
@Override public void visitStringNode(Text string) { Element parent = getParent(string.getParent()); parent.setInnerText(string.getText()); }
public boolean equals(Object arg0) { return delegate.equals(arg0); }
public Node getParent() { return delegate.getParent(); }
public void collectInto(NodeList list, NodeFilter filter) { delegate.collectInto(list, filter); }
public void setParent(Node node) { delegate.setParent(node); }
/** * Gets the starting position of the node. * * @return The start position. */ public int getStartPosition() { return (delegate.getStartPosition()); }
/** * Set the children of this node. * * @param children The new list of children this node contains. */ public void setChildren(NodeList children) { delegate.setChildren(children); }
/** * Sets the starting position of the node. * * @param position The new start position. */ public void setStartPosition(int position) { delegate.setStartPosition(position); }
public Node getPreviousSibling() { return delegate.getPreviousSibling(); }
/** * Gets the ending position of the node. * * @return The end position. */ public int getEndPosition() { return (delegate.getEndPosition()); }
public void setText(String text) { delegate.setText(text); }
/** * Sets the ending position of the node. * * @param position The new end position. */ public void setEndPosition(int position) { delegate.setEndPosition(position); }
public String toPlainTextString() { return delegate.toPlainTextString(); }
/** * Get the page this node came from. * * @return The page that supplied this node. */ public Page getPage() { return (delegate.getPage()); }
public void doSemanticAction() throws ParserException { delegate.doSemanticAction(); }
/** * Set the page this node came from. * * @param page The page that supplied this node. */ public void setPage(Page page) { delegate.setPage(page); }