Exemple #1
0
  /** Test scan with data which is of diff nodes type */
  public void testScan() throws ParserException {
    createParser(
        "<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>", "http://www.yahoo.com");
    parser.setNodeFactory(
        new PrototypicalNodeFactory(
            new Tag[] {
              new LinkTag(), new ImageTag(),
            }));
    parseAndAssertNodeCount(1);
    assertTrue("Node should be a link node", node[0] instanceof LinkTag);

    LinkTag linkTag = (LinkTag) node[0];
    // Get the link data and cross-check
    Node[] dataNode = new Node[10];
    int i = 0;
    for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes(); ) {
      dataNode[i++] = e.nextNode();
    }
    assertEquals("Number of data nodes", new Integer(2), new Integer(i));
    assertTrue("First data node should be an Image Node", dataNode[0] instanceof ImageTag);
    assertTrue("Second data node shouls be a String Node", dataNode[1] instanceof Text);

    // Check the contents of each data node
    ImageTag imageTag = (ImageTag) dataNode[0];
    assertEquals("Image URL", "http://www.yahoo.com/abcd.jpg", imageTag.getImageURL());
    Text stringNode = (Text) dataNode[1];
    assertEquals("String Contents", "Hello World", stringNode.getText());
  }
  public void testCompositeTagWithDeadlock() throws ParserException {
    createParser(
        "<custom>"
            + "<another>something"
            + "</custom>"
            + "<custom>"
            + "<another>else</another>"
            + "</custom>");
    parser.setNodeFactory(
        new PrototypicalNodeFactory(
            new Tag[] {
              new CustomTag(), new AnotherTag(true),
            }));
    parseAndAssertNodeCount(2);
    assertType("node", CustomTag.class, node[0]);
    CustomTag customTag = (CustomTag) node[0];

    assertEquals("child count", 1, customTag.getChildCount());
    assertFalse("custom tag should not be xml end tag", customTag.isEmptyXmlTag());
    assertEquals("starting loc", 0, customTag.getStartPosition());
    assertEquals("ending loc", 8, customTag.getEndPosition());
    assertEquals("starting line position", 0, customTag.getStartingLineNumber());
    assertEquals("ending line position", 0, customTag.getEndingLineNumber());
    AnotherTag anotherTag = (AnotherTag) customTag.childAt(0);
    assertEquals("anotherTag child count", 1, anotherTag.getChildCount());
    Text stringNode = (Text) anotherTag.childAt(0);
    assertStringEquals("anotherTag child text", "something", stringNode.toPlainTextString());
    assertStringEquals(
        "first custom tag html",
        "<custom><another>something</another></custom>",
        customTag.toHtml());
    customTag = (CustomTag) node[1];
    assertStringEquals(
        "second custom tag html", "<custom><another>else</another></custom>", customTag.toHtml());
  }
 public void testCompositeTagWithTwoNestedTags() throws ParserException {
   createParser(
       "<Custom>"
           + "<Another>"
           + "Hello"
           + "</Another>"
           + "<unknown>"
           + "World"
           + "</unknown>"
           + "<Custom/>"
           + "</Custom>"
           + "<Custom/>");
   parser.setNodeFactory(
       new PrototypicalNodeFactory(
           new Tag[] {
             new CustomTag(), new AnotherTag(false),
           }));
   parseAndAssertNodeCount(2);
   assertType("first node", CustomTag.class, node[0]);
   assertType("second node", CustomTag.class, node[1]);
   CustomTag customTag = (CustomTag) node[0];
   assertEquals("first custom tag children count", 5, customTag.getChildCount());
   Node node = customTag.childAt(0);
   assertType("first child", AnotherTag.class, node);
   AnotherTag anotherTag = (AnotherTag) node;
   assertEquals("another tag children count", 1, anotherTag.getChildCount());
   node = anotherTag.childAt(0);
   assertType("nested child", Text.class, node);
   Text text = (Text) node;
   assertEquals("text", "Hello", text.toPlainTextString());
 }
Exemple #4
0
 /**
  * A bug in the freshmeat page - really bad html tag - &lt;A&gt;Revision&lt;\a&gt; Reported by
  * Mazlan Mat Note: Actually, this is completely legal HTML - Derrick
  */
 public void testFreshMeatBug() throws ParserException {
   String html = "<a>Revision</a>";
   createParser(html, "http://www.yahoo.com");
   parseAndAssertNodeCount(1);
   assertTrue("Node 0 should be a tag", node[0] instanceof Tag);
   Tag tag = (Tag) node[0];
   assertEquals("Tag Contents", html, tag.toHtml());
   assertEquals("Node 0 should have one child", 1, tag.getChildren().size());
   assertTrue("The child should be a string node", tag.getChildren().elementAt(0) instanceof Text);
   Text stringNode = (Text) tag.getChildren().elementAt(0);
   assertEquals("Text Contents", "Revision", stringNode.getText());
 }
Exemple #5
0
 public void testErroneousLinkBug() throws ParserException {
   createParser(
       "Site Comments?<br>"
           + "<a href=\"mailto:[email protected]?subject=Site Comments\">"
           + "Mail Us"
           + "<a>");
   parseAndAssertNodeCount(4);
   // The first node should be a Text
   assertTrue("First node should be a Text", node[0] instanceof Text);
   Text stringNode = (Text) node[0];
   assertEquals("Text of the Text", "Site Comments?", stringNode.getText());
   assertTrue("Second node should be a tag", node[1] instanceof Tag);
   assertTrue("Third node should be a link", node[2] instanceof LinkTag);
   // LinkScanner.evaluate() says no HREF means it isn't a link:
   assertTrue("Fourth node should be a tag", node[3] instanceof Tag);
 }
Exemple #6
0
 /**
  * Bug reported by Raj Sharma,5-Apr-2002, upon parsing http://www.samachar.com, the entire page
  * could not be picked up. The problem was occurring after parsing a particular link after which
  * the parsing would not proceed. This link was spread over three lines. The bug has been
  * reproduced and fixed.
  */
 public void testMultipleLineBug() throws ParserException {
   createParser(
       "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"
           + "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"
           + "nical.html\"> Journalism 3.0</a> by Rajesh Jain");
   parser.setNodeFactory(new PrototypicalNodeFactory(new LinkTag()));
   parseAndAssertNodeCount(8);
   assertTrue("Seventh node should be a link tag", node[6] instanceof LinkTag);
   LinkTag linkTag = (LinkTag) node[6];
   String exp =
       new String(
           "http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/technical.html");
   // assertEquals("Length of link tag",exp.length(), linkTag.getLink().length());
   assertStringEquals("Link URL of link tag", exp, linkTag.getLink());
   assertEquals("Link Text of link tag", " Journalism 3.0", linkTag.getLinkText());
   assertTrue("Eight node should be a string node", node[7] instanceof Text);
   Text stringNode = (Text) node[7];
   assertEquals("String node contents", " by Rajesh Jain", stringNode.getText());
 }
 public String toString() {
   return delegate.toString();
 }
 public String toHtml() {
   return delegate.toHtml();
 }
 public Node getNextSibling() {
   return delegate.getNextSibling();
 }
 public Node getLastChild() {
   return delegate.getLastChild();
 }
 /**
  * Get the children of this node.
  *
  * @return The list of children contained by this node, if it's been set, <code>null</code>
  *     otherwise.
  */
 public NodeList getChildren() {
   return (delegate.getChildren());
 }
 public String getText() {
   return delegate.getText();
 }
 public void accept(NodeVisitor visitor) {
   delegate.accept(visitor);
 }
 @Override
 public void visitStringNode(Text string) {
   Element parent = getParent(string.getParent());
   parent.setInnerText(string.getText());
 }
 public boolean equals(Object arg0) {
   return delegate.equals(arg0);
 }
 public Node getParent() {
   return delegate.getParent();
 }
 public void collectInto(NodeList list, NodeFilter filter) {
   delegate.collectInto(list, filter);
 }
 public void setParent(Node node) {
   delegate.setParent(node);
 }
 /**
  * Gets the starting position of the node.
  *
  * @return The start position.
  */
 public int getStartPosition() {
   return (delegate.getStartPosition());
 }
 /**
  * Set the children of this node.
  *
  * @param children The new list of children this node contains.
  */
 public void setChildren(NodeList children) {
   delegate.setChildren(children);
 }
 /**
  * Sets the starting position of the node.
  *
  * @param position The new start position.
  */
 public void setStartPosition(int position) {
   delegate.setStartPosition(position);
 }
 public Node getPreviousSibling() {
   return delegate.getPreviousSibling();
 }
 /**
  * Gets the ending position of the node.
  *
  * @return The end position.
  */
 public int getEndPosition() {
   return (delegate.getEndPosition());
 }
 public void setText(String text) {
   delegate.setText(text);
 }
 /**
  * Sets the ending position of the node.
  *
  * @param position The new end position.
  */
 public void setEndPosition(int position) {
   delegate.setEndPosition(position);
 }
 public String toPlainTextString() {
   return delegate.toPlainTextString();
 }
 /**
  * Get the page this node came from.
  *
  * @return The page that supplied this node.
  */
 public Page getPage() {
   return (delegate.getPage());
 }
 public void doSemanticAction() throws ParserException {
   delegate.doSemanticAction();
 }
 /**
  * Set the page this node came from.
  *
  * @param page The page that supplied this node.
  */
 public void setPage(Page page) {
   delegate.setPage(page);
 }