@Test @Ignore public void test3() throws Exception { System.out.println("-------------------------------"); WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(false); HtmlPage page = webClient.getPage("http://www.zjnu.edu.cn/news/common/article_show.aspx?article_id=19285"); System.out.println("---------------标题----------------"); HtmlSpan span1 = (HtmlSpan) page.getElementById("mytitle"); System.out.println(span1.asText()); System.out.println("-------------------------------"); System.out.println("---------------正文----------------"); HtmlSpan span2 = (HtmlSpan) page.getElementById("mycontent"); System.out.println(span2.asText()); System.out.println("-------------------------------"); System.out.println("---------------图片----------------"); DomNodeList<HtmlElement> elements = span2.getElementsByTagName("img"); for (HtmlElement element : elements) { System.out.println(element.getAttribute("src")); } // log.debug("{}", elements); System.out.println("-------------------------------"); webClient.close(); System.out.println("-------------------------------"); }
@Test @Ignore public void test01() throws Exception { System.out.println("-------------------------------"); WebClient webClient = new WebClient(BrowserVersion.CHROME); try { WebClientOptions options = webClient.getOptions(); options.setThrowExceptionOnFailingStatusCode(false); options.setThrowExceptionOnScriptError(false); options.setCssEnabled(false); options.setJavaScriptEnabled(true); options.setTimeout(50000); // webClient.setAjaxController(new NicelyResynchronizingAjaxController()); HtmlPage pageOrgin = webClient.getPage("http://lvyou.baidu.com/jinhua/jingdian"); Thread.sleep(5000); DomNodeList<DomNode> pageNodes = pageOrgin.querySelectorAll(".pagination"); HtmlDivision pageDiv = (HtmlDivision) pageNodes.get(0); DomNodeList<HtmlElement> liElements = pageDiv.getElementsByTagName("li"); Integer pageSize = liElements.size() - 1; for (int pageNow = 0; pageNow < pageSize; pageNow++) { DomNodeList<HtmlElement> pageAnchors = liElements.get(pageNow).getElementsByTagName("a"); HtmlAnchor pageAnchor = (HtmlAnchor) pageAnchors.get(0); HtmlPage page = pageAnchor.click(); Thread.sleep(10000); DomElement jViewDom = page.getElementById("J-view-list-container"); DomNodeList<HtmlElement> lis = jViewDom.getElementsByTagName("li"); for (HtmlElement li : lis) { DomNodeList<DomNode> titleNodes = li.querySelectorAll(".title"); HtmlAnchor titleAnchor = (HtmlAnchor) titleNodes.get(0); System.out.println("---------------标题----------------"); log.debug("{}", titleAnchor.asText()); DomNodeList<DomNode> picNodes = li.querySelectorAll(".pic"); HtmlAnchor picAnchor = (HtmlAnchor) picNodes.get(0); System.out.println("---------------详情URL----------------"); String detailUrl = "http://lvyou.baidu.com" + picAnchor.getAttribute("href"); log.debug("{}", detailUrl); DomNodeList<HtmlElement> imgEelements = picAnchor.getElementsByTagName("img"); for (HtmlElement imgEelement : imgEelements) { System.out.println("---------------图片----------------"); log.debug("{}", imgEelement.getAttribute("src")); } DomNodeList<DomNode> sumNodes = li.querySelectorAll(".view-userSays"); HtmlDivision sumDiv = (HtmlDivision) sumNodes.get(0); DomNodeList<HtmlElement> sumElements = sumDiv.getElementsByTagName("p"); HtmlParagraph sumPara = (HtmlParagraph) sumElements.get(0); System.out.println("---------------摘要----------------"); log.debug("{}", sumPara.asText()); } } } finally { webClient.close(); } System.out.println("-------------------------------"); }
@Test @Ignore public void test() throws Exception { System.out.println("-------------------------------"); WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(false); HtmlPage page = webClient.getPage("http://news.163.com/domestic/"); // DomNodeList<HtmlElement> elements = page.getElementBy System.out.println("---------------标题----------------"); DomNodeList<DomNode> domNodes = page.querySelectorAll(".item-top"); // log.debug("{}", domNodes); for (DomNode domNode : domNodes) { HtmlDivision htmlDivision = (HtmlDivision) domNode; DomNodeList<HtmlElement> aElements = htmlDivision.getElementsByTagName("a"); HtmlAnchor htmlAnchor = (HtmlAnchor) aElements.get(0); // HTMLHeadingElement htmlHeading2 = (HTMLHeadingElement) // htmlDivision.getElementsByTagName("h2"); // HtmlAnchor htmlAnchor = (HtmlAnchor) htmlDivision.getElementsByTagName("a"); log.debug("{}", htmlAnchor.asText()); log.debug("{}", htmlAnchor.getAttribute("href")); DomNodeList<HtmlElement> pElements = htmlDivision.getElementsByTagName("p"); HtmlParagraph htmlParagraph = (HtmlParagraph) pElements.get(0); log.debug("{}", htmlParagraph.asText()); DomNodeList<HtmlElement> iEelements = htmlDivision.getElementsByTagName("img"); for (HtmlElement iEelement : iEelements) { log.debug("{}", iEelement.getAttribute("src")); } String detailUrl = htmlAnchor.getAttribute("href"); if (detailUrl.equals("http://news.163.com/15/1215/17/BAT2L8RB00014JB6.html#f=dlist")) { HtmlPage detailPage = webClient.getPage(detailUrl); System.out.println("---------------正文----------------"); DomElement endTextElement = detailPage.getElementById("endText"); log.debug("{}", endTextElement.asText()); System.out.println("---------------图片----------------"); DomNodeList<DomNode> imgNodes = endTextElement.querySelectorAll(".f_center"); for (DomNode imgNode : imgNodes) { HtmlParagraph imgpara = (HtmlParagraph) imgNode; DomNodeList<HtmlElement> endImgs = imgpara.getElementsByTagName("img"); for (HtmlElement endImg : endImgs) { log.debug("{}", endImg.getAttribute("src")); } } } } webClient.close(); System.out.println("-------------------------------"); }
@Override public void run() { String str; // 创建一个webclient WebClient webClient = new WebClient(); // htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); Cookie cookie = new Cookie( "www.qixin.com", "login_returnurl", "http%3A//www.qixin.com/search/prov/SH%3Fpage%3D2"); Cookie cookie1 = new Cookie( "www.qixin.com", "userKey", "QXBAdmin-Web2.0_5tUrhr/6EVtLT+GVfE+vU8k330y+oPICCM6jhUGEoLc%3D"); Cookie cookie2 = new Cookie("www.qixin.com", "userValue", "4a68111b-0cfa-457f-91bd-b6fda97fa524"); Cookie cookie3 = new Cookie( "www.qixin.com", "gr_session_id_955c17a7426f3e98", "d25fe84e-fb1d-4ef8-8b4e-b530e5004b30"); Cookie cookie4 = new Cookie("www.qixin.com", "_alicdn_sec", "5732cf53d99e48a838049be355d47a44000895ae"); CookieManager cookieManager = new CookieManager(); cookieManager.addCookie(cookie); cookieManager.addCookie(cookie2); cookieManager.addCookie(cookie3); cookieManager.addCookie(cookie1); cookieManager.addCookie(cookie4); webClient.setCookieManager(cookieManager); // 获取页面 HtmlPage page = null; try { page = webClient.getPage("http://www.qixin.com/search/prov/SH?page=20"); } catch (IOException e) { e.printStackTrace(); } // 获取页面的XML代码 List<HtmlAnchor> hbList = (List<HtmlAnchor>) page.getByXPath("//a"); Iterator iterator = hbList.iterator(); while (iterator.hasNext()) { HtmlAnchor ha = (HtmlAnchor) iterator.next(); if ("search-result-title".equals(ha.getAttribute("class"))) { System.out.println(ha.asText()); System.out.println("http://www.qixin.com" + ha.getAttribute("href")); } } // 关闭webclient webClient.close(); }
/** * @param ticker The ticker of the stock to look up. * @return A retrieval.CSV that contains the given stock's data over the past year. * @throws ConnectionException if it has problems connecting to Yahoo. * @throws exceptions.MissingCSVDataException if Yahoo does not have any data for the given stock. */ public synchronized YahooCSV getStockData(String ticker) throws ConnectionException, MissingCSVDataException { boolean connectionsuccess = false; while (!connectionsuccess) { try { String contents = client .getPage( new StringBuilder("http://real-chart.finance.yahoo.com/table.csv?s=") .append(ticker) .toString()) .getWebResponse() .getContentAsString(); if (contents.charAt(0) == '<') { throw new MissingCSVDataException("No data exists for this stock"); } final YahooCSV csv = new YahooCSV(contents, ticker); connectionsuccess = true; client.close(); System.out.println(new StringBuilder(ticker).append(" connected to Yahoo").toString()); return csv; } catch (UnknownHostException uhe) { // Connection error. System.out.println( new StringBuilder("Exception in retrieval.") .append("InternetConnection.getStockData(") .append(ticker) .append(")") .toString()); System.out.println(uhe.toString()); System.out.println("Waiting to reconnect."); try { Thread.sleep(5000); } catch (Exception e) { System.out.println( new StringBuilder("Exception in ") .append("retrieval.InternetConnection.getStockData(") .append(ticker) .append(")") .toString()); System.out.println(uhe.toString()); } } catch (MissingCSVDataException mcsvde) { // There is no CSV data for this stock. System.out.println( new StringBuilder("Exception in ") .append("retrieval.InternetConnection.getStockData(") .append(ticker) .append(")") .toString()); System.out.println(mcsvde.toString()); connectionsuccess = true; client.close(); } catch (Exception e) { System.out.println( new StringBuilder("Exception in ") .append("retrieval.InternetConnection.getStockData(") .append(ticker) .append(")") .toString()); System.out.println(e.toString()); } } throw new ConnectionException( new StringBuilder("Unable to find CSV File for ").append(ticker).toString()); }
/** * @param ticker The ticker of the stock to look up. * @param startdate The first date of the range of dates over which to search. * @return A retrieval.CSV that contains the given stock's data over the past year. * @throws ConnectionException if it has problems connecting to Yahoo. * @throws exceptions.MissingCSVDataException if Yahoo does not have any data for the given stock. * @throws exceptions.InvalidStartDateException if the start date is equal to or after the current * date. */ public synchronized YahooCSV getStockData(String ticker, GregorianCalendar startdate) throws ConnectionException, MissingCSVDataException, InvalidStartDateException { // Make sure that the start date is before today! GregorianCalendar today = new GregorianCalendar(); if (!startdate.before(today)) { throw new InvalidStartDateException("The start date must be before the current date."); } // This connection loop will re-connect to Yahoo if the initial and // subsequent attempts fail. boolean connectionsuccess = false; while (!connectionsuccess) { try { String contents = client .getPage( new StringBuilder("http://real-chart.finance.yahoo.com/table.csv?s=") .append(ticker) .append("&a=") .append(startdate.get(Calendar.MONTH)) .append("&b=") .append(startdate.get(Calendar.DATE)) .append("&c=") .append(startdate.get(Calendar.YEAR)) .toString()) .getWebResponse() .getContentAsString(); if (contents.charAt(0) == '<') { // There is no stock data for the given ticker, and Yahoo // has responded with an error page rather than a CSV. throw new MissingCSVDataException("No data exists for this stock"); } final YahooCSV csv = new YahooCSV(contents, ticker); connectionsuccess = true; client.close(); System.out.println(ticker + " connected to Yahoo."); return csv; } catch (UnknownHostException uhe) { // Connection error. System.out.println( new StringBuilder("Exception in ") .append("retrieval.InternetConnection.getStockData(") .append(ticker) .append(")") .toString()); System.out.println(uhe.toString()); System.out.println("Waiting to reconnect."); try { Thread.sleep(5000); } catch (Exception e) { System.out.println( new StringBuilder("Exception in ") .append("retrieval.InternetConnection.getStockData(") .append(ticker) .append(")") .toString()); System.out.println(uhe.toString()); } } catch (MissingCSVDataException mcsvde) { // There is no CSV data for this stock. System.out.println( new StringBuilder("Exception in ") .append("retrieval.InternetConnection.getStockData(") .append(ticker) .append(")") .toString()); System.out.println(mcsvde.toString()); connectionsuccess = true; client.close(); } catch (Exception e) { System.out.println( new StringBuilder("Exception in ") .append("retrieval.InternetConnection.getStockData(") .append(ticker) .append(")") .toString()); System.out.println(e.toString()); } } throw new ConnectionException( new StringBuilder("Unable to find CSV File for ").append(ticker).toString()); }
@After public void tearDown() { webClient.close(); }