@Test @Ignore public void test01() throws Exception { System.out.println("-------------------------------"); WebClient webClient = new WebClient(BrowserVersion.CHROME); try { WebClientOptions options = webClient.getOptions(); options.setThrowExceptionOnFailingStatusCode(false); options.setThrowExceptionOnScriptError(false); options.setCssEnabled(false); options.setJavaScriptEnabled(true); options.setTimeout(50000); // webClient.setAjaxController(new NicelyResynchronizingAjaxController()); HtmlPage pageOrgin = webClient.getPage("http://lvyou.baidu.com/jinhua/jingdian"); Thread.sleep(5000); DomNodeList<DomNode> pageNodes = pageOrgin.querySelectorAll(".pagination"); HtmlDivision pageDiv = (HtmlDivision) pageNodes.get(0); DomNodeList<HtmlElement> liElements = pageDiv.getElementsByTagName("li"); Integer pageSize = liElements.size() - 1; for (int pageNow = 0; pageNow < pageSize; pageNow++) { DomNodeList<HtmlElement> pageAnchors = liElements.get(pageNow).getElementsByTagName("a"); HtmlAnchor pageAnchor = (HtmlAnchor) pageAnchors.get(0); HtmlPage page = pageAnchor.click(); Thread.sleep(10000); DomElement jViewDom = page.getElementById("J-view-list-container"); DomNodeList<HtmlElement> lis = jViewDom.getElementsByTagName("li"); for (HtmlElement li : lis) { DomNodeList<DomNode> titleNodes = li.querySelectorAll(".title"); HtmlAnchor titleAnchor = (HtmlAnchor) titleNodes.get(0); System.out.println("---------------标题----------------"); log.debug("{}", titleAnchor.asText()); DomNodeList<DomNode> picNodes = li.querySelectorAll(".pic"); HtmlAnchor picAnchor = (HtmlAnchor) picNodes.get(0); System.out.println("---------------详情URL----------------"); String detailUrl = "http://lvyou.baidu.com" + picAnchor.getAttribute("href"); log.debug("{}", detailUrl); DomNodeList<HtmlElement> imgEelements = picAnchor.getElementsByTagName("img"); for (HtmlElement imgEelement : imgEelements) { System.out.println("---------------图片----------------"); log.debug("{}", imgEelement.getAttribute("src")); } DomNodeList<DomNode> sumNodes = li.querySelectorAll(".view-userSays"); HtmlDivision sumDiv = (HtmlDivision) sumNodes.get(0); DomNodeList<HtmlElement> sumElements = sumDiv.getElementsByTagName("p"); HtmlParagraph sumPara = (HtmlParagraph) sumElements.get(0); System.out.println("---------------摘要----------------"); log.debug("{}", sumPara.asText()); } } } finally { webClient.close(); } System.out.println("-------------------------------"); }
public WebForm(DomElement form) { this.form = form; this.inputs = form.getElementsByTagName("input"); this.submitField = null; this.usernameField = null; this.passwordField = null; for (HtmlElement input : inputs) { String inputType = input.getAttribute("type"); if (inputType == null) { continue; } else if (inputType.equals("password")) { passwordField = (HtmlPasswordInput) input; } else if (inputType.equals("submit")) { submitField = (HtmlSubmitInput) input; } else if ((input.getId() != null && input.getId().contains("user")) || (input.getAttribute("name") != null && input.getAttribute("name").contains("user"))) { usernameField = (HtmlInput) input; } } }