Exemplo n.º 1
0
  public void testCharsetWithTEXT() throws Exception {
    String text =
        "<html>"
            + "<head>"
            + "</head>"
            + "<body>"
            + "<h1>This is a HTML file for testing!</h1>"
            + "</body>"
            + "</html>";
    HTMLDocument document = HTMLParser.createDocument(text);
    assertNotNull(document);
    NodePath path = NodePathParser.toPath("html.body.h1");
    HTMLNode node = NodePathUtil.lookFor(document.getRoot(), path);
    assertNotNull(node);
    assertEquals(node.getName(), Name.H1);
    assertEquals(node.getName().toString(), "H1");
    System.out.println("NAME: " + node.getName());
    System.out.println("VALUE: " + new String(node.getValue()));
    System.out.println("TEXTVALUE: " + node.getTextValue());
    // assertNull(node.getChildren());
    assertNotNull(node.getChildren());
    assertEquals(node.getChildren().size(), 1);
    assertEquals(!node.getChildren().isEmpty(), true);
    assertEquals(node.getChildren().get(0).getName(), Name.CONTENT);
    assertEquals(node.getChildren().get(0).getName().toString(), "CONTENT");

    HTMLNode child = node.getChildren().get(0);
    assertNotNull(child);
    assertNull(child.getChildren());
    assertEquals(child.getTextValue(), "This is a HTML file for testing!");
    // assertEquals(child.getValue(),"content");
    System.out.println("CONTENT-VALUE: " + new String(child.getValue()));
    assertEquals(child.getTextValue(), new String(child.getValue()));
  }
Exemplo n.º 2
0
  public void testCharsetWithURL() throws Exception {
    HTMLDocument document;
    try {
      URL url_ = new URL("http://www.24h.com.vn");
      document = HTMLParser.createDocument(url_.openConnection().getInputStream(), null);
      document = HTMLParser.createDocument(url_.openStream(), null);
    } catch (java.net.UnknownHostException e) {
      return;
    } catch (java.net.ConnectException e) {
      return;
    }

    assertNotNull(document);
    assertNotNull(document.getRoot());
  }
Exemplo n.º 3
0
  public void testCharsetWithFile() throws Exception {
    HTMLDocument document = HTMLParser.createDocument(this.file_, null);
    assertNotNull(document);
    assertEquals("ASCII", HTMLParser.getCharset());
    assertNotSame("UTF-8", HTMLParser.getCharset());
    System.out.println("CHARSET: " + HTMLParser.getCharset());

    System.out.println("DOCUMENT-TEXTVALUE: " + document.getTextValue());
    System.out.println("DOCUMENT-ROOT: " + document.getRoot().getName().toString());
    System.out.println("CLASS: " + document.getClass().getName() + "\n");

    System.out.println("DOCUMENT-DOCTYPE-TEXTVALUE: " + document.getDoctype().getTextValue());
    System.out.println("DOCUMENT-DOCTYPE-VALUE: " + new String(document.getDoctype().getValue()));
    System.out.println("DOCUMENT-DOCTYPE-NAME: " + document.getDoctype().getName().toString());
  }
Exemplo n.º 4
0
  public void testNode() throws Exception {
    // assertNotNull(this.file_);
    System.out.println("FILE PATH: " + this.file_.getAbsolutePath());

    // HTMLDocument.
    String text =
        "<html>"
            + "<head>"
            + "<title>My own HTML file</title>"
            + "</head>"
            + "<body>"
            + "<h2>This is a test exercise for me!</h2>"
            + "</body>"
            + "</html>";
    HTMLDocument document = HTMLParser.createDocument(text);
    assertNotNull(document);

    String pathStr = "html.head.title";
    NodePath path = NodePathParser.toPath(pathStr);
    assertNotNull(path);
    assertEquals(path.toString(), "HTML[0].HEAD[0].TITLE[0]");
    System.out.println("PATH: " + path.toString());

    HTMLNode node = NodePathUtil.lookFor(document.getRoot(), path);
    assertNotNull(node);
    assertEquals(node.getName(), Name.TITLE);

    // Add a Tag to HTMLDocument.
    NodeImpl impl = new NodeImpl("h2 id = \"dds\"".toCharArray(), Name.H2);
    node.addChild(impl);
    assertNotNull(node.getChildrenNode().get(1));
    assertEquals(node.getChildren().get(1).getName(), Name.H2);
    System.out.println("THE NEW NODE-NAME: " + node.getChildrenNode().get(1).getName().toString());
    System.out.println("THE NEW NODE-VALUE: " + new String(node.getChildren().get(1).getValue()));

    // Add a Table to HTMLDocument.
    HTMLDocument doc = HTMLParser.createDocument("<table border='1'><tr></tr></table>");
    HTMLNode table = NodePathUtil.lookFor(doc.getRoot(), NodePathParser.toPath("html.body.table"));
    node.addChild(table);

    // Remove a Node which is text in format from HTMLDocument.
    System.out.println("\n\nRemove:");
    HTMLNode contentNode =
        NodePathUtil.lookFor(document.getRoot(), NodePathParser.toPath("html.head.title.content"));
    assertNotNull(contentNode);
    assertEquals(Name.CONTENT, contentNode.getName());
    assertEquals("CONTENT", contentNode.getName().toString());
    assertEquals(new String(contentNode.getValue()), contentNode.getTextValue());

    System.out.println("NODE-VALUE: " + new String(contentNode.getValue()));
    System.out.println("NODE-TEXTVALUE: " + contentNode.getTextValue());

    assertEquals(true, node.getChildren().remove(contentNode));

    // Pass the Node which has removed from HTMLDocument into the <h2> TAG.
    HTMLNode h2Node =
        NodePathUtil.lookFor(document.getRoot(), NodePathParser.toPath("html.head.title.h2"));
    assertNotNull(h2Node);
    assertEquals(Name.H2, h2Node.getName());
    assertEquals("H2", h2Node.getName().toString());
    h2Node.addChild(contentNode);

    // Show all.
    System.out.println("\n\nShow all the content of HTML file:");
    System.out.println(node.getTextValue());
  }