public static WebClient buildWebClient(CookieManager manager) { WebClient webClient = buildWebClient(); webClient.setCookieManager(manager); return webClient; }
public static final WebClient newWebClient(CookieManager cookieManager) { // Create and initialize WebClient object WebClient webClient = newWebClient(); webClient.setCookieManager(cookieManager); return webClient; }
@Override public void run() { String str; // 创建一个webclient WebClient webClient = new WebClient(); // htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); Cookie cookie = new Cookie( "www.qixin.com", "login_returnurl", "http%3A//www.qixin.com/search/prov/SH%3Fpage%3D2"); Cookie cookie1 = new Cookie( "www.qixin.com", "userKey", "QXBAdmin-Web2.0_5tUrhr/6EVtLT+GVfE+vU8k330y+oPICCM6jhUGEoLc%3D"); Cookie cookie2 = new Cookie("www.qixin.com", "userValue", "4a68111b-0cfa-457f-91bd-b6fda97fa524"); Cookie cookie3 = new Cookie( "www.qixin.com", "gr_session_id_955c17a7426f3e98", "d25fe84e-fb1d-4ef8-8b4e-b530e5004b30"); Cookie cookie4 = new Cookie("www.qixin.com", "_alicdn_sec", "5732cf53d99e48a838049be355d47a44000895ae"); CookieManager cookieManager = new CookieManager(); cookieManager.addCookie(cookie); cookieManager.addCookie(cookie2); cookieManager.addCookie(cookie3); cookieManager.addCookie(cookie1); cookieManager.addCookie(cookie4); webClient.setCookieManager(cookieManager); // 获取页面 HtmlPage page = null; try { page = webClient.getPage("http://www.qixin.com/search/prov/SH?page=20"); } catch (IOException e) { e.printStackTrace(); } // 获取页面的XML代码 List<HtmlAnchor> hbList = (List<HtmlAnchor>) page.getByXPath("//a"); Iterator iterator = hbList.iterator(); while (iterator.hasNext()) { HtmlAnchor ha = (HtmlAnchor) iterator.next(); if ("search-result-title".equals(ha.getAttribute("class"))) { System.out.println(ha.asText()); System.out.println("http://www.qixin.com" + ha.getAttribute("href")); } } // 关闭webclient webClient.close(); }
public static void main(String[] args) throws IOException { WebClient client = new WebClient(BrowserVersion.CHROME); CookieManager cookie = new CookieManager(); client.setCookieManager(cookie); client.getOptions().setJavaScriptEnabled(true); client.getOptions().setActiveXNative(false); client.getOptions().setCssEnabled(false); client.getOptions().setThrowExceptionOnScriptError(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setDoNotTrackEnabled(true); client.getOptions().setPrintContentOnFailingStatusCode(false); client.setAjaxController(new NicelyResynchronizingAjaxController()); client.setJavaScriptTimeout(Long.MAX_VALUE); List<Integer> TopCategory = new LinkedList<Integer>(TopCategoryUrl.keySet()); Collections.sort(TopCategory); long today = new Date().getTime(); long ymd = today - 1000 * 60 * 60 * 24; String date = sdf.format(ymd); FileWriter fw = new FileWriter("ShuTaobaoTop/search-" + date, false); BufferedWriter bw = new BufferedWriter(fw); while (TopCategory.size() > 0) { int cid = TopCategory.get(0); // if(cid<50002766) // { // TopCategory.remove(0); // continue; // } String cname = TopCategoryUrl.get(cid); String pid = "0", pname = ""; String topUrl = "http://shu.taobao.com/top/" + cid + "/search"; boolean success = false; ArrayList<String> result = new ArrayList<String>(); try { // client.getOptions().setJavaScriptEnabled(true); do { System.out.println("剩余类目数=" + TopCategory.size()); // ProxyUnit.configProxy(client, ProxyUnit.CHECKURL); } while (false == AccountLogin.loginTaoBao(client, AccountLogin.TAOBAOLOGINURL, true)); // client.getOptions().setJavaScriptEnabled(false); HtmlPage queryTrade = client.getPage(topUrl); Thread.sleep(1000L); System.out.println(queryTrade.getTitleText()); /* * 解析 */ Document doc = Jsoup.parse(queryTrade.asXml()); Element time = doc.getElementsByAttributeValue("class", "time").get(0); String startTime = time.text().substring(6, 16).replace("-", ""); // 统计时间: 2014-05-17 - 2014-05-23 String endTime = time.text().substring(19).replace("-", ""); Elements elements = doc.getElementsByAttributeValueStarting( "class", "mod "); // <div class="mod odd mod-10 sm-ua"> <h3 class="title"> for (Element element : elements) { String title = element.getElementsByTag("h3").get(0).text(); System.out.println(title); Elements lis = element.getElementsByTag("ol").get(0).getElementsByTag("li"); // <li class="up "> for (Element li : lis) { Elements spans = li.getElementsByTag("span"); String rank = spans.get(0).text(); String key = spans.get(1).text(); String rise = spans.get(2).text(); rise = rise.substring(0, rise.length() - 1); URL href = new URL( URLDecoder.decode( "http://shu.taobao.com" + spans.get(1).getElementsByTag("a").get(0).attr("href"), "utf-8")); String hrefQuery = href.getQuery(); if (hrefQuery.contains("cid=")) { if ("0".equals(pid)) { pid = String.valueOf(cid); pname = cname; } cid = Integer.parseInt(hrefQuery.split("cid=")[1].split("&")[0]); cname = title; } System.out.println( startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001" + pname + "\001" + rank + "\001" + key + "\001" + rise); result.add( startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001" + pname + "\001" + rank + "\001" + key + "\001" + rise); } } success = true; } catch (Exception e) { // TODO Auto-generated catch block // e.printStackTrace(); } if (success) { TopCategory.remove(0); System.out.println("剩余类目数=" + TopCategory.size()); for (String r : result) { bw.write(r + "\n"); } } } bw.close(); fw.close(); }