Beispiel #1
0
  @Test
  @Ignore
  public void test01() throws Exception {
    System.out.println("-------------------------------");
    WebClient webClient = new WebClient(BrowserVersion.CHROME);
    try {
      WebClientOptions options = webClient.getOptions();
      options.setThrowExceptionOnFailingStatusCode(false);
      options.setThrowExceptionOnScriptError(false);
      options.setCssEnabled(false);
      options.setJavaScriptEnabled(true);
      options.setTimeout(50000);
      // webClient.setAjaxController(new NicelyResynchronizingAjaxController());
      HtmlPage pageOrgin = webClient.getPage("http://lvyou.baidu.com/jinhua/jingdian");
      Thread.sleep(5000);
      DomNodeList<DomNode> pageNodes = pageOrgin.querySelectorAll(".pagination");
      HtmlDivision pageDiv = (HtmlDivision) pageNodes.get(0);
      DomNodeList<HtmlElement> liElements = pageDiv.getElementsByTagName("li");
      Integer pageSize = liElements.size() - 1;
      for (int pageNow = 0; pageNow < pageSize; pageNow++) {
        DomNodeList<HtmlElement> pageAnchors = liElements.get(pageNow).getElementsByTagName("a");
        HtmlAnchor pageAnchor = (HtmlAnchor) pageAnchors.get(0);
        HtmlPage page = pageAnchor.click();
        Thread.sleep(10000);
        DomElement jViewDom = page.getElementById("J-view-list-container");
        DomNodeList<HtmlElement> lis = jViewDom.getElementsByTagName("li");
        for (HtmlElement li : lis) {
          DomNodeList<DomNode> titleNodes = li.querySelectorAll(".title");
          HtmlAnchor titleAnchor = (HtmlAnchor) titleNodes.get(0);
          System.out.println("---------------标题----------------");
          log.debug("{}", titleAnchor.asText());

          DomNodeList<DomNode> picNodes = li.querySelectorAll(".pic");
          HtmlAnchor picAnchor = (HtmlAnchor) picNodes.get(0);
          System.out.println("---------------详情URL----------------");
          String detailUrl = "http://lvyou.baidu.com" + picAnchor.getAttribute("href");
          log.debug("{}", detailUrl);
          DomNodeList<HtmlElement> imgEelements = picAnchor.getElementsByTagName("img");
          for (HtmlElement imgEelement : imgEelements) {
            System.out.println("---------------图片----------------");
            log.debug("{}", imgEelement.getAttribute("src"));
          }

          DomNodeList<DomNode> sumNodes = li.querySelectorAll(".view-userSays");
          HtmlDivision sumDiv = (HtmlDivision) sumNodes.get(0);
          DomNodeList<HtmlElement> sumElements = sumDiv.getElementsByTagName("p");
          HtmlParagraph sumPara = (HtmlParagraph) sumElements.get(0);
          System.out.println("---------------摘要----------------");
          log.debug("{}", sumPara.asText());
        }
      }
    } finally {
      webClient.close();
    }
    System.out.println("-------------------------------");
  }
Beispiel #2
0
  public WebForm(DomElement form) {
    this.form = form;
    this.inputs = form.getElementsByTagName("input");
    this.submitField = null;
    this.usernameField = null;
    this.passwordField = null;

    for (HtmlElement input : inputs) {
      String inputType = input.getAttribute("type");

      if (inputType == null) {
        continue;
      } else if (inputType.equals("password")) {
        passwordField = (HtmlPasswordInput) input;
      } else if (inputType.equals("submit")) {
        submitField = (HtmlSubmitInput) input;
      } else if ((input.getId() != null && input.getId().contains("user"))
          || (input.getAttribute("name") != null && input.getAttribute("name").contains("user"))) {
        usernameField = (HtmlInput) input;
      }
    }
  }