Пример #1
0
  @Test
  @Ignore
  public void test3() throws Exception {
    System.out.println("-------------------------------");

    WebClient webClient = new WebClient(BrowserVersion.CHROME);
    webClient.getOptions().setCssEnabled(false);
    webClient.getOptions().setJavaScriptEnabled(false);
    HtmlPage page =
        webClient.getPage("http://www.zjnu.edu.cn/news/common/article_show.aspx?article_id=19285");

    System.out.println("---------------标题----------------");
    HtmlSpan span1 = (HtmlSpan) page.getElementById("mytitle");
    System.out.println(span1.asText());
    System.out.println("-------------------------------");

    System.out.println("---------------正文----------------");
    HtmlSpan span2 = (HtmlSpan) page.getElementById("mycontent");
    System.out.println(span2.asText());
    System.out.println("-------------------------------");

    System.out.println("---------------图片----------------");
    DomNodeList<HtmlElement> elements = span2.getElementsByTagName("img");
    for (HtmlElement element : elements) {
      System.out.println(element.getAttribute("src"));
    }
    // log.debug("{}", elements);
    System.out.println("-------------------------------");

    webClient.close();
    System.out.println("-------------------------------");
  }
Пример #2
0
  @Test
  @Ignore
  public void test01() throws Exception {
    System.out.println("-------------------------------");
    WebClient webClient = new WebClient(BrowserVersion.CHROME);
    try {
      WebClientOptions options = webClient.getOptions();
      options.setThrowExceptionOnFailingStatusCode(false);
      options.setThrowExceptionOnScriptError(false);
      options.setCssEnabled(false);
      options.setJavaScriptEnabled(true);
      options.setTimeout(50000);
      // webClient.setAjaxController(new NicelyResynchronizingAjaxController());
      HtmlPage pageOrgin = webClient.getPage("http://lvyou.baidu.com/jinhua/jingdian");
      Thread.sleep(5000);
      DomNodeList<DomNode> pageNodes = pageOrgin.querySelectorAll(".pagination");
      HtmlDivision pageDiv = (HtmlDivision) pageNodes.get(0);
      DomNodeList<HtmlElement> liElements = pageDiv.getElementsByTagName("li");
      Integer pageSize = liElements.size() - 1;
      for (int pageNow = 0; pageNow < pageSize; pageNow++) {
        DomNodeList<HtmlElement> pageAnchors = liElements.get(pageNow).getElementsByTagName("a");
        HtmlAnchor pageAnchor = (HtmlAnchor) pageAnchors.get(0);
        HtmlPage page = pageAnchor.click();
        Thread.sleep(10000);
        DomElement jViewDom = page.getElementById("J-view-list-container");
        DomNodeList<HtmlElement> lis = jViewDom.getElementsByTagName("li");
        for (HtmlElement li : lis) {
          DomNodeList<DomNode> titleNodes = li.querySelectorAll(".title");
          HtmlAnchor titleAnchor = (HtmlAnchor) titleNodes.get(0);
          System.out.println("---------------标题----------------");
          log.debug("{}", titleAnchor.asText());

          DomNodeList<DomNode> picNodes = li.querySelectorAll(".pic");
          HtmlAnchor picAnchor = (HtmlAnchor) picNodes.get(0);
          System.out.println("---------------详情URL----------------");
          String detailUrl = "http://lvyou.baidu.com" + picAnchor.getAttribute("href");
          log.debug("{}", detailUrl);
          DomNodeList<HtmlElement> imgEelements = picAnchor.getElementsByTagName("img");
          for (HtmlElement imgEelement : imgEelements) {
            System.out.println("---------------图片----------------");
            log.debug("{}", imgEelement.getAttribute("src"));
          }

          DomNodeList<DomNode> sumNodes = li.querySelectorAll(".view-userSays");
          HtmlDivision sumDiv = (HtmlDivision) sumNodes.get(0);
          DomNodeList<HtmlElement> sumElements = sumDiv.getElementsByTagName("p");
          HtmlParagraph sumPara = (HtmlParagraph) sumElements.get(0);
          System.out.println("---------------摘要----------------");
          log.debug("{}", sumPara.asText());
        }
      }
    } finally {
      webClient.close();
    }
    System.out.println("-------------------------------");
  }
Пример #3
0
  @Test
  @Ignore
  public void test() throws Exception {
    System.out.println("-------------------------------");

    WebClient webClient = new WebClient(BrowserVersion.CHROME);
    webClient.getOptions().setCssEnabled(false);
    webClient.getOptions().setJavaScriptEnabled(false);
    HtmlPage page = webClient.getPage("http://news.163.com/domestic/");
    // DomNodeList<HtmlElement> elements = page.getElementBy

    System.out.println("---------------标题----------------");
    DomNodeList<DomNode> domNodes = page.querySelectorAll(".item-top");
    // log.debug("{}", domNodes);
    for (DomNode domNode : domNodes) {
      HtmlDivision htmlDivision = (HtmlDivision) domNode;
      DomNodeList<HtmlElement> aElements = htmlDivision.getElementsByTagName("a");
      HtmlAnchor htmlAnchor = (HtmlAnchor) aElements.get(0);
      // HTMLHeadingElement htmlHeading2 = (HTMLHeadingElement)
      // htmlDivision.getElementsByTagName("h2");
      // HtmlAnchor htmlAnchor = (HtmlAnchor) htmlDivision.getElementsByTagName("a");
      log.debug("{}", htmlAnchor.asText());
      log.debug("{}", htmlAnchor.getAttribute("href"));

      DomNodeList<HtmlElement> pElements = htmlDivision.getElementsByTagName("p");
      HtmlParagraph htmlParagraph = (HtmlParagraph) pElements.get(0);
      log.debug("{}", htmlParagraph.asText());

      DomNodeList<HtmlElement> iEelements = htmlDivision.getElementsByTagName("img");
      for (HtmlElement iEelement : iEelements) {
        log.debug("{}", iEelement.getAttribute("src"));
      }

      String detailUrl = htmlAnchor.getAttribute("href");
      if (detailUrl.equals("http://news.163.com/15/1215/17/BAT2L8RB00014JB6.html#f=dlist")) {
        HtmlPage detailPage = webClient.getPage(detailUrl);
        System.out.println("---------------正文----------------");
        DomElement endTextElement = detailPage.getElementById("endText");
        log.debug("{}", endTextElement.asText());

        System.out.println("---------------图片----------------");
        DomNodeList<DomNode> imgNodes = endTextElement.querySelectorAll(".f_center");
        for (DomNode imgNode : imgNodes) {
          HtmlParagraph imgpara = (HtmlParagraph) imgNode;
          DomNodeList<HtmlElement> endImgs = imgpara.getElementsByTagName("img");
          for (HtmlElement endImg : endImgs) {
            log.debug("{}", endImg.getAttribute("src"));
          }
        }
      }
    }
    webClient.close();
    System.out.println("-------------------------------");
  }
Пример #4
0
 @Override
 public void run() {
   String str;
   // 创建一个webclient
   WebClient webClient = new WebClient();
   // htmlunit 对css和javascript的支持不好,所以请关闭之
   webClient.getOptions().setJavaScriptEnabled(false);
   webClient.getOptions().setCssEnabled(false);
   Cookie cookie =
       new Cookie(
           "www.qixin.com", "login_returnurl", "http%3A//www.qixin.com/search/prov/SH%3Fpage%3D2");
   Cookie cookie1 =
       new Cookie(
           "www.qixin.com",
           "userKey",
           "QXBAdmin-Web2.0_5tUrhr/6EVtLT+GVfE+vU8k330y+oPICCM6jhUGEoLc%3D");
   Cookie cookie2 =
       new Cookie("www.qixin.com", "userValue", "4a68111b-0cfa-457f-91bd-b6fda97fa524");
   Cookie cookie3 =
       new Cookie(
           "www.qixin.com",
           "gr_session_id_955c17a7426f3e98",
           "d25fe84e-fb1d-4ef8-8b4e-b530e5004b30");
   Cookie cookie4 =
       new Cookie("www.qixin.com", "_alicdn_sec", "5732cf53d99e48a838049be355d47a44000895ae");
   CookieManager cookieManager = new CookieManager();
   cookieManager.addCookie(cookie);
   cookieManager.addCookie(cookie2);
   cookieManager.addCookie(cookie3);
   cookieManager.addCookie(cookie1);
   cookieManager.addCookie(cookie4);
   webClient.setCookieManager(cookieManager);
   // 获取页面
   HtmlPage page = null;
   try {
     page = webClient.getPage("http://www.qixin.com/search/prov/SH?page=20");
   } catch (IOException e) {
     e.printStackTrace();
   }
   // 获取页面的XML代码
   List<HtmlAnchor> hbList = (List<HtmlAnchor>) page.getByXPath("//a");
   Iterator iterator = hbList.iterator();
   while (iterator.hasNext()) {
     HtmlAnchor ha = (HtmlAnchor) iterator.next();
     if ("search-result-title".equals(ha.getAttribute("class"))) {
       System.out.println(ha.asText());
       System.out.println("http://www.qixin.com" + ha.getAttribute("href"));
     }
   }
   // 关闭webclient
   webClient.close();
 }
 /**
  * @param ticker The ticker of the stock to look up.
  * @return A retrieval.CSV that contains the given stock's data over the past year.
  * @throws ConnectionException if it has problems connecting to Yahoo.
  * @throws exceptions.MissingCSVDataException if Yahoo does not have any data for the given stock.
  */
 public synchronized YahooCSV getStockData(String ticker)
     throws ConnectionException, MissingCSVDataException {
   boolean connectionsuccess = false;
   while (!connectionsuccess) {
     try {
       String contents =
           client
               .getPage(
                   new StringBuilder("http://real-chart.finance.yahoo.com/table.csv?s=")
                       .append(ticker)
                       .toString())
               .getWebResponse()
               .getContentAsString();
       if (contents.charAt(0) == '<') {
         throw new MissingCSVDataException("No data exists for this stock");
       }
       final YahooCSV csv = new YahooCSV(contents, ticker);
       connectionsuccess = true;
       client.close();
       System.out.println(new StringBuilder(ticker).append(" connected to Yahoo").toString());
       return csv;
     } catch (UnknownHostException uhe) {
       // Connection error.
       System.out.println(
           new StringBuilder("Exception in retrieval.")
               .append("InternetConnection.getStockData(")
               .append(ticker)
               .append(")")
               .toString());
       System.out.println(uhe.toString());
       System.out.println("Waiting to reconnect.");
       try {
         Thread.sleep(5000);
       } catch (Exception e) {
         System.out.println(
             new StringBuilder("Exception in ")
                 .append("retrieval.InternetConnection.getStockData(")
                 .append(ticker)
                 .append(")")
                 .toString());
         System.out.println(uhe.toString());
       }
     } catch (MissingCSVDataException mcsvde) {
       // There is no CSV data for this stock.
       System.out.println(
           new StringBuilder("Exception in ")
               .append("retrieval.InternetConnection.getStockData(")
               .append(ticker)
               .append(")")
               .toString());
       System.out.println(mcsvde.toString());
       connectionsuccess = true;
       client.close();
     } catch (Exception e) {
       System.out.println(
           new StringBuilder("Exception in ")
               .append("retrieval.InternetConnection.getStockData(")
               .append(ticker)
               .append(")")
               .toString());
       System.out.println(e.toString());
     }
   }
   throw new ConnectionException(
       new StringBuilder("Unable to find CSV File for ").append(ticker).toString());
 }
 /**
  * @param ticker The ticker of the stock to look up.
  * @param startdate The first date of the range of dates over which to search.
  * @return A retrieval.CSV that contains the given stock's data over the past year.
  * @throws ConnectionException if it has problems connecting to Yahoo.
  * @throws exceptions.MissingCSVDataException if Yahoo does not have any data for the given stock.
  * @throws exceptions.InvalidStartDateException if the start date is equal to or after the current
  *     date.
  */
 public synchronized YahooCSV getStockData(String ticker, GregorianCalendar startdate)
     throws ConnectionException, MissingCSVDataException, InvalidStartDateException {
   // Make sure that the start date is before today!
   GregorianCalendar today = new GregorianCalendar();
   if (!startdate.before(today)) {
     throw new InvalidStartDateException("The start date must be before the current date.");
   }
   // This connection loop will re-connect to Yahoo if the initial and
   // subsequent attempts fail.
   boolean connectionsuccess = false;
   while (!connectionsuccess) {
     try {
       String contents =
           client
               .getPage(
                   new StringBuilder("http://real-chart.finance.yahoo.com/table.csv?s=")
                       .append(ticker)
                       .append("&a=")
                       .append(startdate.get(Calendar.MONTH))
                       .append("&b=")
                       .append(startdate.get(Calendar.DATE))
                       .append("&c=")
                       .append(startdate.get(Calendar.YEAR))
                       .toString())
               .getWebResponse()
               .getContentAsString();
       if (contents.charAt(0) == '<') {
         // There is no stock data for the given ticker, and Yahoo
         // has responded with an error page rather than a CSV.
         throw new MissingCSVDataException("No data exists for this stock");
       }
       final YahooCSV csv = new YahooCSV(contents, ticker);
       connectionsuccess = true;
       client.close();
       System.out.println(ticker + " connected to Yahoo.");
       return csv;
     } catch (UnknownHostException uhe) {
       // Connection error.
       System.out.println(
           new StringBuilder("Exception in ")
               .append("retrieval.InternetConnection.getStockData(")
               .append(ticker)
               .append(")")
               .toString());
       System.out.println(uhe.toString());
       System.out.println("Waiting to reconnect.");
       try {
         Thread.sleep(5000);
       } catch (Exception e) {
         System.out.println(
             new StringBuilder("Exception in ")
                 .append("retrieval.InternetConnection.getStockData(")
                 .append(ticker)
                 .append(")")
                 .toString());
         System.out.println(uhe.toString());
       }
     } catch (MissingCSVDataException mcsvde) {
       // There is no CSV data for this stock.
       System.out.println(
           new StringBuilder("Exception in ")
               .append("retrieval.InternetConnection.getStockData(")
               .append(ticker)
               .append(")")
               .toString());
       System.out.println(mcsvde.toString());
       connectionsuccess = true;
       client.close();
     } catch (Exception e) {
       System.out.println(
           new StringBuilder("Exception in ")
               .append("retrieval.InternetConnection.getStockData(")
               .append(ticker)
               .append(")")
               .toString());
       System.out.println(e.toString());
     }
   }
   throw new ConnectionException(
       new StringBuilder("Unable to find CSV File for ").append(ticker).toString());
 }
Пример #7
0
 @After
 public void tearDown() {
   webClient.close();
 }