Ejemplo n.º 1
0
  public static WebClient buildWebClient(CookieManager manager) {
    WebClient webClient = buildWebClient();

    webClient.setCookieManager(manager);

    return webClient;
  }
Ejemplo n.º 2
0
  public static final WebClient newWebClient(CookieManager cookieManager) {
    // Create and initialize WebClient object
    WebClient webClient = newWebClient();
    webClient.setCookieManager(cookieManager);

    return webClient;
  }
Ejemplo n.º 3
0
 @Override
 public void run() {
   String str;
   // 创建一个webclient
   WebClient webClient = new WebClient();
   // htmlunit 对css和javascript的支持不好,所以请关闭之
   webClient.getOptions().setJavaScriptEnabled(false);
   webClient.getOptions().setCssEnabled(false);
   Cookie cookie =
       new Cookie(
           "www.qixin.com", "login_returnurl", "http%3A//www.qixin.com/search/prov/SH%3Fpage%3D2");
   Cookie cookie1 =
       new Cookie(
           "www.qixin.com",
           "userKey",
           "QXBAdmin-Web2.0_5tUrhr/6EVtLT+GVfE+vU8k330y+oPICCM6jhUGEoLc%3D");
   Cookie cookie2 =
       new Cookie("www.qixin.com", "userValue", "4a68111b-0cfa-457f-91bd-b6fda97fa524");
   Cookie cookie3 =
       new Cookie(
           "www.qixin.com",
           "gr_session_id_955c17a7426f3e98",
           "d25fe84e-fb1d-4ef8-8b4e-b530e5004b30");
   Cookie cookie4 =
       new Cookie("www.qixin.com", "_alicdn_sec", "5732cf53d99e48a838049be355d47a44000895ae");
   CookieManager cookieManager = new CookieManager();
   cookieManager.addCookie(cookie);
   cookieManager.addCookie(cookie2);
   cookieManager.addCookie(cookie3);
   cookieManager.addCookie(cookie1);
   cookieManager.addCookie(cookie4);
   webClient.setCookieManager(cookieManager);
   // 获取页面
   HtmlPage page = null;
   try {
     page = webClient.getPage("http://www.qixin.com/search/prov/SH?page=20");
   } catch (IOException e) {
     e.printStackTrace();
   }
   // 获取页面的XML代码
   List<HtmlAnchor> hbList = (List<HtmlAnchor>) page.getByXPath("//a");
   Iterator iterator = hbList.iterator();
   while (iterator.hasNext()) {
     HtmlAnchor ha = (HtmlAnchor) iterator.next();
     if ("search-result-title".equals(ha.getAttribute("class"))) {
       System.out.println(ha.asText());
       System.out.println("http://www.qixin.com" + ha.getAttribute("href"));
     }
   }
   // 关闭webclient
   webClient.close();
 }
Ejemplo n.º 4
0
  public static void main(String[] args) throws IOException {

    WebClient client = new WebClient(BrowserVersion.CHROME);
    CookieManager cookie = new CookieManager();
    client.setCookieManager(cookie);
    client.getOptions().setJavaScriptEnabled(true);
    client.getOptions().setActiveXNative(false);
    client.getOptions().setCssEnabled(false);
    client.getOptions().setThrowExceptionOnScriptError(false);
    client.getOptions().setThrowExceptionOnFailingStatusCode(false);
    client.getOptions().setDoNotTrackEnabled(true);
    client.getOptions().setPrintContentOnFailingStatusCode(false);
    client.setAjaxController(new NicelyResynchronizingAjaxController());
    client.setJavaScriptTimeout(Long.MAX_VALUE);

    List<Integer> TopCategory = new LinkedList<Integer>(TopCategoryUrl.keySet());
    Collections.sort(TopCategory);

    long today = new Date().getTime();
    long ymd = today - 1000 * 60 * 60 * 24;
    String date = sdf.format(ymd);
    FileWriter fw = new FileWriter("ShuTaobaoTop/search-" + date, false);
    BufferedWriter bw = new BufferedWriter(fw);

    while (TopCategory.size() > 0) {
      int cid = TopCategory.get(0);
      //			if(cid<50002766)
      //			{
      //				TopCategory.remove(0);
      //				continue;
      //			}
      String cname = TopCategoryUrl.get(cid);
      String pid = "0", pname = "";
      String topUrl = "http://shu.taobao.com/top/" + cid + "/search";

      boolean success = false;
      ArrayList<String> result = new ArrayList<String>();
      try {
        //				client.getOptions().setJavaScriptEnabled(true);
        do {
          System.out.println("剩余类目数=" + TopCategory.size());
          // ProxyUnit.configProxy(client, ProxyUnit.CHECKURL);
        } while (false == AccountLogin.loginTaoBao(client, AccountLogin.TAOBAOLOGINURL, true));
        //				client.getOptions().setJavaScriptEnabled(false);

        HtmlPage queryTrade = client.getPage(topUrl);
        Thread.sleep(1000L);
        System.out.println(queryTrade.getTitleText());

        /*
         * 解析
         */
        Document doc = Jsoup.parse(queryTrade.asXml());
        Element time = doc.getElementsByAttributeValue("class", "time").get(0);
        String startTime =
            time.text().substring(6, 16).replace("-", ""); // 统计时间: 2014-05-17 - 2014-05-23
        String endTime = time.text().substring(19).replace("-", "");
        Elements elements =
            doc.getElementsByAttributeValueStarting(
                "class", "mod "); // <div class="mod odd mod-10 sm-ua"> <h3 class="title">
        for (Element element : elements) {
          String title = element.getElementsByTag("h3").get(0).text();
          System.out.println(title);
          Elements lis =
              element.getElementsByTag("ol").get(0).getElementsByTag("li"); // <li class="up ">
          for (Element li : lis) {
            Elements spans = li.getElementsByTag("span");
            String rank = spans.get(0).text();
            String key = spans.get(1).text();
            String rise = spans.get(2).text();
            rise = rise.substring(0, rise.length() - 1);

            URL href =
                new URL(
                    URLDecoder.decode(
                        "http://shu.taobao.com"
                            + spans.get(1).getElementsByTag("a").get(0).attr("href"),
                        "utf-8"));
            String hrefQuery = href.getQuery();
            if (hrefQuery.contains("cid=")) {
              if ("0".equals(pid)) {
                pid = String.valueOf(cid);
                pname = cname;
              }
              cid = Integer.parseInt(hrefQuery.split("cid=")[1].split("&")[0]);
              cname = title;
            }

            System.out.println(
                startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001"
                    + pname + "\001" + rank + "\001" + key + "\001" + rise);
            result.add(
                startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001"
                    + pname + "\001" + rank + "\001" + key + "\001" + rise);
          }
        }
        success = true;
      } catch (Exception e) {
        // TODO Auto-generated catch block
        // e.printStackTrace();
      }
      if (success) {
        TopCategory.remove(0);
        System.out.println("剩余类目数=" + TopCategory.size());
        for (String r : result) {
          bw.write(r + "\n");
        }
      }
    }
    bw.close();
    fw.close();
  }