/** * @return * @author douzh * @time 2015-4-23下午4:36:05 */ public static WebClient getClient() { WebClient client = new WebClient(BrowserVersion.FIREFOX_24); client.getOptions().setJavaScriptEnabled(true); client.getOptions().setActiveXNative(false); client.getOptions().setCssEnabled(false); client.getOptions().setRedirectEnabled(true); client.getOptions().setThrowExceptionOnScriptError(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setGeolocationEnabled(true); // client.addWebWindowListener(new WebWindowListener() { // public void webWindowOpened(WebWindowEvent event) { // System.out.println("Web Window Openning"); // } // // public void webWindowContentChanged(WebWindowEvent event) { // System.out.println("Web Content Changed"); // } // // public void webWindowClosed(WebWindowEvent event) { // System.out.println("Web Window Closed"); // } // }); client.setAjaxController( new NicelyResynchronizingAjaxController() { public boolean processSynchron(HtmlPage page, WebRequest settings, boolean async) { System.out.println(settings.getUrl()); return super.processSynchron(page, settings, async); } }); return client; }
public String getParsedPage() { List<String> alertHandler = new LinkedList<String>(); ; WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); // CHROME); webClient.setAjaxController(new MyNicelyResynchronizingAjaxController()); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setTimeout(3500); webClient.getOptions().setThrowExceptionOnScriptError(true); webClient.getOptions().setCssEnabled(true); webClient.getOptions().isRedirectEnabled(); webClient.setAlertHandler( new CollectingAlertHandler(alertHandler)); // 将JavaScript中alert标签产生的数据保存在一个链表中 // webClient.getOptions().setThrowExceptionOnScriptError(false); HtmlPage page = null; JavaScriptEngine engine = new JavaScriptEngine(webClient); webClient.setJavaScriptEngine(engine); try { page = webClient.getPage(data); } catch (FailingHttpStatusCodeException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } if (page != null) { return page.asXml(); } return null; }
/** * get the ajax url from the click button * * @param clickOfXpath:页面待点击按钮的xpath表达式 * @param index * @return List<String>:链表的第一个信息是页面的title,以后的信息是所有的ajax的url */ public static List<String> getAjaxUrl(String targetUrl, String clickOfXpath, int index) throws FailingHttpStatusCodeException, MalformedURLException, IOException { // TARGET_URL = // "http://app.flyme.cn/apps/public/detail?package_name=com.myzaker.zaker_phone_smartbar"; List<String> urls = new LinkedList<String>(); // 每次ajax请求时都会创建一个AjaxController对象,在该对象中可以查看ajax请求的地址 MyNicelyResynchronizingAjaxController ajaxController = new MyNicelyResynchronizingAjaxController(); List alertHandler = new LinkedList(); // 模拟一个浏览器 WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); // HtmlUnitDriver // 设置webClient的相关参数 webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(false); webClient.setAjaxController(ajaxController); webClient.getOptions().setTimeout(35000); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.setAlertHandler( new CollectingAlertHandler(alertHandler)); // 将JavaScript中alert标签产生的数据保存在一个链表中 // 模拟浏览器打开一个目标网址 HtmlPage rootPage = webClient.getPage(targetUrl); urls.add(rootPage.getTitleText()); urls.add(ajaxController.getVisitUrl()); // System.out.println("url1:" + url); HtmlElement elementA = (HtmlElement) rootPage.getByXPath(clickOfXpath).get(index); Page page = elementA.click(); urls.add(ajaxController.getVisitUrl()); return urls; }
public static void main(String[] args) throws Exception { // WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24, "54.186.230.121", 3128); WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.setJavaScriptTimeout(10000); webClient.getOptions().setJavaScriptEnabled(true); webClient.setAjaxController(new NicelyResynchronizingAjaxController()); webClient.getOptions().setTimeout(10000); // webClient.getOptions().setJavaScriptEnabled(false); // webClient.getOptions().setAppletEnabled(false); // webClient.getOptions().setCssEnabled(false); // webClient.getOptions().setThrowExceptionOnScriptError(false); // webClient.setJavaScriptTimeout(10000); // webClient.getOptions().setJavaScriptEnabled(true); // webClient.setAjaxController(new NicelyResynchronizingAjaxController()); // webClient.getOptions().setTimeout(10000); // webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); // webClient.getOptions().setThrowExceptionOnScriptError(false); HtmlPage currentPage = webClient.getPage("http://www.yandex.ru/"); // HtmlPage currentPage = webClient.getPage("http://www.google.ru"); // HtmlDivision div = currentPage.getHtmlElementById("del_competitors-1_42"); // HtmlElement clickable = (HtmlElement) // currentPage.getHtmlElementById("del_competitors-1_42"); // currentPage = (HtmlPage) clickable.click(); HtmlAnchor advancedSearchAn = currentPage.getAnchorByText("Завести ящик"); currentPage = advancedSearchAn.click(); HtmlImage image = currentPage.<HtmlImage>getFirstByXPath("//img[@src='images/ash2008.jpg']"); currentPage = (HtmlPage) image.click(); System.out.println(currentPage.asXml()); // HtmlImage image = // currentPage.<HtmlImage>getFirstByXPath("//img[@src='images/ash2008.jpg']"); // currentPage = (HtmlPage) image.click(); // HtmlImage imagetosave = // currentPage.<HtmlImage>getFirstByXPath("//img[@src='//yastatic.net/www/1.977/yaru/i/logo.png']"); // HtmlImage image = currentPage.<HtmlImage>getHtmlElementById("add_competitors-1_3"); // currentPage = (HtmlPage) image.click(); // File imageFile = new File("test_new.jpg"); // image.saveAs(imageFile); // System.out.println(currentPage.asXml()); System.out.println("It is done."); webClient.closeAllWindows(); }
public static void homePage(String url) { String str; // 创建一个webclient WebClient webClient = new WebClient(); // webClient.getWebConsole().setLogger(null); // //htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setCssEnabled(false); // webClient.getOptions().setUseInsecureSSL(true); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); // webClient.getCookieManager().setCookiesEnabled(true); webClient.setAjaxController(new NicelyResynchronizingAjaxController()); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.waitForBackgroundJavaScript(1000); webClient.waitForBackgroundJavaScriptStartingBefore(1000); // 获取页面 HtmlPage page = null; int times = 10; try { page = webClient.getPage(url); for (int i = 0; i < times; i++) { synchronized (page) { page.wait(2000); } if (page.getByXPath("//object") != null) { System.out.println("***************************yes**********************"); for (int j = 0; j < page.getByXPath("//object").size(); j++) { System.out.println(page.getByXPath("//object").get(j)); } break; } System.out.println("***************************no***************************"); } } catch (Exception e) { e.printStackTrace(); } // 获取页面的TITLE // str = page.getTitleText(); // System.out.println(str); // //获取页面的XML代码 // System.out.println("***************************start***************************"); // System.out.println(page.asXml()); // System.out.println("***************************end***************************"); // //获取页面的文本 // str = page.asText(); // System.out.println(str); }
public static WebClient buildWebClient() { WebClient webClient = new WebClient(BrowserVersion.FIREFOX_10); webClient.setAjaxController(new NicelyResynchronizingAjaxController()); webClient.getOptions().setCssEnabled(true); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setUseInsecureSSL(true); webClient.setCssErrorHandler( new ErrorHandler() { @Override public void warning(CSSParseException exception) throws CSSException { // nothing to do here } @Override public void error(CSSParseException exception) throws CSSException { // todo: log or throw exception } @Override public void fatalError(CSSParseException exception) throws CSSException { // todo: log or throw exception } }); webClient.setIncorrectnessListener( new IncorrectnessListener() { @Override public void notify(String message, Object origin) { // todo: analyze and throw exception } }); webClient.waitForBackgroundJavaScript(100000); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setRedirectEnabled(true); return webClient; }
/** * Runs the test and keeps the results. * * @throws IOException If there're errors reading the test file. */ public void run() throws IOException { try { startTime = new Date().getTime(); browser.setAjaxController(new NicelyResynchronizingAjaxController()); browser.setJavaScriptEnabled(true); HtmlPage page = browser.getPage(testUrl); // This is to avoid errors when background JS hasn't updated the DOM yet // while we're trying to access it. // See: http://htmlunit.sourceforge.net/faq.html browser.waitForBackgroundJavaScript(10000); readTests(page); readResults(page); } catch (IOException ex) { throw (IOException) new IOException("Cannot read the test resource.").initCause(ex); } catch (ScriptException scriptEx) { error = new QUnitException(scriptEx); } }
public static void main(String[] args) throws IOException { WebClient client = new WebClient(BrowserVersion.CHROME); CookieManager cookie = new CookieManager(); client.setCookieManager(cookie); client.getOptions().setJavaScriptEnabled(true); client.getOptions().setActiveXNative(false); client.getOptions().setCssEnabled(false); client.getOptions().setThrowExceptionOnScriptError(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setDoNotTrackEnabled(true); client.getOptions().setPrintContentOnFailingStatusCode(false); client.setAjaxController(new NicelyResynchronizingAjaxController()); client.setJavaScriptTimeout(Long.MAX_VALUE); List<Integer> TopCategory = new LinkedList<Integer>(TopCategoryUrl.keySet()); Collections.sort(TopCategory); long today = new Date().getTime(); long ymd = today - 1000 * 60 * 60 * 24; String date = sdf.format(ymd); FileWriter fw = new FileWriter("ShuTaobaoTop/search-" + date, false); BufferedWriter bw = new BufferedWriter(fw); while (TopCategory.size() > 0) { int cid = TopCategory.get(0); // if(cid<50002766) // { // TopCategory.remove(0); // continue; // } String cname = TopCategoryUrl.get(cid); String pid = "0", pname = ""; String topUrl = "http://shu.taobao.com/top/" + cid + "/search"; boolean success = false; ArrayList<String> result = new ArrayList<String>(); try { // client.getOptions().setJavaScriptEnabled(true); do { System.out.println("剩余类目数=" + TopCategory.size()); // ProxyUnit.configProxy(client, ProxyUnit.CHECKURL); } while (false == AccountLogin.loginTaoBao(client, AccountLogin.TAOBAOLOGINURL, true)); // client.getOptions().setJavaScriptEnabled(false); HtmlPage queryTrade = client.getPage(topUrl); Thread.sleep(1000L); System.out.println(queryTrade.getTitleText()); /* * 解析 */ Document doc = Jsoup.parse(queryTrade.asXml()); Element time = doc.getElementsByAttributeValue("class", "time").get(0); String startTime = time.text().substring(6, 16).replace("-", ""); // 统计时间: 2014-05-17 - 2014-05-23 String endTime = time.text().substring(19).replace("-", ""); Elements elements = doc.getElementsByAttributeValueStarting( "class", "mod "); // <div class="mod odd mod-10 sm-ua"> <h3 class="title"> for (Element element : elements) { String title = element.getElementsByTag("h3").get(0).text(); System.out.println(title); Elements lis = element.getElementsByTag("ol").get(0).getElementsByTag("li"); // <li class="up "> for (Element li : lis) { Elements spans = li.getElementsByTag("span"); String rank = spans.get(0).text(); String key = spans.get(1).text(); String rise = spans.get(2).text(); rise = rise.substring(0, rise.length() - 1); URL href = new URL( URLDecoder.decode( "http://shu.taobao.com" + spans.get(1).getElementsByTag("a").get(0).attr("href"), "utf-8")); String hrefQuery = href.getQuery(); if (hrefQuery.contains("cid=")) { if ("0".equals(pid)) { pid = String.valueOf(cid); pname = cname; } cid = Integer.parseInt(hrefQuery.split("cid=")[1].split("&")[0]); cname = title; } System.out.println( startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001" + pname + "\001" + rank + "\001" + key + "\001" + rise); result.add( startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001" + pname + "\001" + rank + "\001" + key + "\001" + rise); } } success = true; } catch (Exception e) { // TODO Auto-generated catch block // e.printStackTrace(); } if (success) { TopCategory.remove(0); System.out.println("剩余类目数=" + TopCategory.size()); for (String r : result) { bw.write(r + "\n"); } } } bw.close(); fw.close(); }