예제 #1
0
파일: HttpUtil.java 프로젝트: douzh/mydisk
 /**
  * @return
  * @author douzh
  * @time 2015-4-23下午4:36:05
  */
 public static WebClient getClient() {
   WebClient client = new WebClient(BrowserVersion.FIREFOX_24);
   client.getOptions().setJavaScriptEnabled(true);
   client.getOptions().setActiveXNative(false);
   client.getOptions().setCssEnabled(false);
   client.getOptions().setRedirectEnabled(true);
   client.getOptions().setThrowExceptionOnScriptError(false);
   client.getOptions().setThrowExceptionOnFailingStatusCode(false);
   client.getOptions().setGeolocationEnabled(true);
   //        client.addWebWindowListener(new WebWindowListener() {
   //            public void webWindowOpened(WebWindowEvent event) {
   //                System.out.println("Web Window Openning");
   //            }
   //
   //            public void webWindowContentChanged(WebWindowEvent event) {
   //                System.out.println("Web Content Changed");
   //            }
   //
   //            public void webWindowClosed(WebWindowEvent event) {
   //                System.out.println("Web Window Closed");
   //            }
   //        });
   client.setAjaxController(
       new NicelyResynchronizingAjaxController() {
         public boolean processSynchron(HtmlPage page, WebRequest settings, boolean async) {
           System.out.println(settings.getUrl());
           return super.processSynchron(page, settings, async);
         }
       });
   return client;
 }
예제 #2
0
 public String getParsedPage() {
   List<String> alertHandler = new LinkedList<String>();
   ;
   WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); // CHROME);
   webClient.setAjaxController(new MyNicelyResynchronizingAjaxController());
   webClient.getOptions().setJavaScriptEnabled(true);
   webClient.getOptions().setTimeout(3500);
   webClient.getOptions().setThrowExceptionOnScriptError(true);
   webClient.getOptions().setCssEnabled(true);
   webClient.getOptions().isRedirectEnabled();
   webClient.setAlertHandler(
       new CollectingAlertHandler(alertHandler)); // 将JavaScript中alert标签产生的数据保存在一个链表中
   // webClient.getOptions().setThrowExceptionOnScriptError(false);
   HtmlPage page = null;
   JavaScriptEngine engine = new JavaScriptEngine(webClient);
   webClient.setJavaScriptEngine(engine);
   try {
     page = webClient.getPage(data);
   } catch (FailingHttpStatusCodeException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (MalformedURLException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   }
   if (page != null) {
     return page.asXml();
   }
   return null;
 }
예제 #3
0
  /**
   * get the ajax url from the click button
   *
   * @param clickOfXpath:页面待点击按钮的xpath表达式
   * @param index
   * @return List<String>:链表的第一个信息是页面的title,以后的信息是所有的ajax的url
   */
  public static List<String> getAjaxUrl(String targetUrl, String clickOfXpath, int index)
      throws FailingHttpStatusCodeException, MalformedURLException, IOException {
    // TARGET_URL =
    // "http://app.flyme.cn/apps/public/detail?package_name=com.myzaker.zaker_phone_smartbar";
    List<String> urls = new LinkedList<String>();
    // 每次ajax请求时都会创建一个AjaxController对象,在该对象中可以查看ajax请求的地址
    MyNicelyResynchronizingAjaxController ajaxController =
        new MyNicelyResynchronizingAjaxController();

    List alertHandler = new LinkedList();
    // 模拟一个浏览器
    WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24);
    // HtmlUnitDriver
    // 设置webClient的相关参数
    webClient.getOptions().setJavaScriptEnabled(true);
    webClient.getOptions().setCssEnabled(false);
    webClient.setAjaxController(ajaxController);
    webClient.getOptions().setTimeout(35000);
    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.setAlertHandler(
        new CollectingAlertHandler(alertHandler)); // 将JavaScript中alert标签产生的数据保存在一个链表中

    // 模拟浏览器打开一个目标网址
    HtmlPage rootPage = webClient.getPage(targetUrl);
    urls.add(rootPage.getTitleText());
    urls.add(ajaxController.getVisitUrl());
    // System.out.println("url1:" + url);
    HtmlElement elementA = (HtmlElement) rootPage.getByXPath(clickOfXpath).get(index);
    Page page = elementA.click();
    urls.add(ajaxController.getVisitUrl());
    return urls;
  }
예제 #4
0
  public static void main(String[] args) throws Exception {

    //    	WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24, "54.186.230.121", 3128);
    WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24);

    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.setJavaScriptTimeout(10000);
    webClient.getOptions().setJavaScriptEnabled(true);
    webClient.setAjaxController(new NicelyResynchronizingAjaxController());
    webClient.getOptions().setTimeout(10000);

    //    	webClient.getOptions().setJavaScriptEnabled(false);
    //		webClient.getOptions().setAppletEnabled(false);
    //		webClient.getOptions().setCssEnabled(false);

    //		webClient.getOptions().setThrowExceptionOnScriptError(false);
    //		webClient.setJavaScriptTimeout(10000);
    //		webClient.getOptions().setJavaScriptEnabled(true);
    //		webClient.setAjaxController(new NicelyResynchronizingAjaxController());
    //		webClient.getOptions().setTimeout(10000);

    //		webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
    //		webClient.getOptions().setThrowExceptionOnScriptError(false);

    HtmlPage currentPage = webClient.getPage("http://www.yandex.ru/");
    //    	HtmlPage currentPage = webClient.getPage("http://www.google.ru");

    //    	HtmlDivision div = currentPage.getHtmlElementById("del_competitors-1_42");

    //    	HtmlElement clickable = (HtmlElement)
    // currentPage.getHtmlElementById("del_competitors-1_42");
    //    	currentPage = (HtmlPage) clickable.click();

    HtmlAnchor advancedSearchAn = currentPage.getAnchorByText("Завести ящик");
    currentPage = advancedSearchAn.click();

    HtmlImage image = currentPage.<HtmlImage>getFirstByXPath("//img[@src='images/ash2008.jpg']");
    currentPage = (HtmlPage) image.click();

    System.out.println(currentPage.asXml());

    //    	HtmlImage image =
    // currentPage.<HtmlImage>getFirstByXPath("//img[@src='images/ash2008.jpg']");
    //    	currentPage = (HtmlPage) image.click();

    //    	HtmlImage imagetosave =
    // currentPage.<HtmlImage>getFirstByXPath("//img[@src='//yastatic.net/www/1.977/yaru/i/logo.png']");

    //    	HtmlImage image = currentPage.<HtmlImage>getHtmlElementById("add_competitors-1_3");
    //    	currentPage = (HtmlPage) image.click();

    //    	File imageFile = new File("test_new.jpg");
    //    	image.saveAs(imageFile);

    //    	System.out.println(currentPage.asXml());

    System.out.println("It is done.");

    webClient.closeAllWindows();
  }
예제 #5
0
  public static void homePage(String url) {
    String str;
    // 创建一个webclient

    WebClient webClient = new WebClient();

    //        webClient.getWebConsole().setLogger(null);
    //        //htmlunit 对css和javascript的支持不好,所以请关闭之
    webClient.getOptions().setCssEnabled(false);
    //        webClient.getOptions().setUseInsecureSSL(true);
    webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
    //        webClient.getCookieManager().setCookiesEnabled(true);
    webClient.setAjaxController(new NicelyResynchronizingAjaxController());
    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.waitForBackgroundJavaScript(1000);
    webClient.waitForBackgroundJavaScriptStartingBefore(1000);

    // 获取页面
    HtmlPage page = null;
    int times = 10;
    try {
      page = webClient.getPage(url);

      for (int i = 0; i < times; i++) {
        synchronized (page) {
          page.wait(2000);
        }
        if (page.getByXPath("//object") != null) {
          System.out.println("***************************yes**********************");
          for (int j = 0; j < page.getByXPath("//object").size(); j++) {
            System.out.println(page.getByXPath("//object").get(j));
          }
          break;
        }
        System.out.println("***************************no***************************");
      }

    } catch (Exception e) {
      e.printStackTrace();
    }
    // 获取页面的TITLE
    //        str = page.getTitleText();
    //        System.out.println(str);
    //        //获取页面的XML代码
    //        System.out.println("***************************start***************************");
    //        System.out.println(page.asXml());

    //        System.out.println("***************************end***************************");

    //        //获取页面的文本
    //        str = page.asText();
    //        System.out.println(str);
  }
예제 #6
0
  public static WebClient buildWebClient() {
    WebClient webClient = new WebClient(BrowserVersion.FIREFOX_10);
    webClient.setAjaxController(new NicelyResynchronizingAjaxController());
    webClient.getOptions().setCssEnabled(true);
    webClient.getOptions().setJavaScriptEnabled(true);
    webClient.getOptions().setUseInsecureSSL(true);

    webClient.setCssErrorHandler(
        new ErrorHandler() {
          @Override
          public void warning(CSSParseException exception) throws CSSException {
            // nothing to do here
          }

          @Override
          public void error(CSSParseException exception) throws CSSException {
            // todo: log or throw exception
          }

          @Override
          public void fatalError(CSSParseException exception) throws CSSException {
            // todo: log or throw exception
          }
        });

    webClient.setIncorrectnessListener(
        new IncorrectnessListener() {
          @Override
          public void notify(String message, Object origin) {
            // todo: analyze and throw exception
          }
        });

    webClient.waitForBackgroundJavaScript(100000);
    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.getOptions().setRedirectEnabled(true);

    return webClient;
  }
  /**
   * Runs the test and keeps the results.
   *
   * @throws IOException If there're errors reading the test file.
   */
  public void run() throws IOException {
    try {
      startTime = new Date().getTime();

      browser.setAjaxController(new NicelyResynchronizingAjaxController());
      browser.setJavaScriptEnabled(true);

      HtmlPage page = browser.getPage(testUrl);

      // This is to avoid errors when background JS hasn't updated the DOM yet
      // while we're trying to access it.
      // See: http://htmlunit.sourceforge.net/faq.html
      browser.waitForBackgroundJavaScript(10000);

      readTests(page);
      readResults(page);

    } catch (IOException ex) {
      throw (IOException) new IOException("Cannot read the test resource.").initCause(ex);
    } catch (ScriptException scriptEx) {
      error = new QUnitException(scriptEx);
    }
  }
예제 #8
0
  public static void main(String[] args) throws IOException {

    WebClient client = new WebClient(BrowserVersion.CHROME);
    CookieManager cookie = new CookieManager();
    client.setCookieManager(cookie);
    client.getOptions().setJavaScriptEnabled(true);
    client.getOptions().setActiveXNative(false);
    client.getOptions().setCssEnabled(false);
    client.getOptions().setThrowExceptionOnScriptError(false);
    client.getOptions().setThrowExceptionOnFailingStatusCode(false);
    client.getOptions().setDoNotTrackEnabled(true);
    client.getOptions().setPrintContentOnFailingStatusCode(false);
    client.setAjaxController(new NicelyResynchronizingAjaxController());
    client.setJavaScriptTimeout(Long.MAX_VALUE);

    List<Integer> TopCategory = new LinkedList<Integer>(TopCategoryUrl.keySet());
    Collections.sort(TopCategory);

    long today = new Date().getTime();
    long ymd = today - 1000 * 60 * 60 * 24;
    String date = sdf.format(ymd);
    FileWriter fw = new FileWriter("ShuTaobaoTop/search-" + date, false);
    BufferedWriter bw = new BufferedWriter(fw);

    while (TopCategory.size() > 0) {
      int cid = TopCategory.get(0);
      //			if(cid<50002766)
      //			{
      //				TopCategory.remove(0);
      //				continue;
      //			}
      String cname = TopCategoryUrl.get(cid);
      String pid = "0", pname = "";
      String topUrl = "http://shu.taobao.com/top/" + cid + "/search";

      boolean success = false;
      ArrayList<String> result = new ArrayList<String>();
      try {
        //				client.getOptions().setJavaScriptEnabled(true);
        do {
          System.out.println("剩余类目数=" + TopCategory.size());
          // ProxyUnit.configProxy(client, ProxyUnit.CHECKURL);
        } while (false == AccountLogin.loginTaoBao(client, AccountLogin.TAOBAOLOGINURL, true));
        //				client.getOptions().setJavaScriptEnabled(false);

        HtmlPage queryTrade = client.getPage(topUrl);
        Thread.sleep(1000L);
        System.out.println(queryTrade.getTitleText());

        /*
         * 解析
         */
        Document doc = Jsoup.parse(queryTrade.asXml());
        Element time = doc.getElementsByAttributeValue("class", "time").get(0);
        String startTime =
            time.text().substring(6, 16).replace("-", ""); // 统计时间: 2014-05-17 - 2014-05-23
        String endTime = time.text().substring(19).replace("-", "");
        Elements elements =
            doc.getElementsByAttributeValueStarting(
                "class", "mod "); // <div class="mod odd mod-10 sm-ua"> <h3 class="title">
        for (Element element : elements) {
          String title = element.getElementsByTag("h3").get(0).text();
          System.out.println(title);
          Elements lis =
              element.getElementsByTag("ol").get(0).getElementsByTag("li"); // <li class="up ">
          for (Element li : lis) {
            Elements spans = li.getElementsByTag("span");
            String rank = spans.get(0).text();
            String key = spans.get(1).text();
            String rise = spans.get(2).text();
            rise = rise.substring(0, rise.length() - 1);

            URL href =
                new URL(
                    URLDecoder.decode(
                        "http://shu.taobao.com"
                            + spans.get(1).getElementsByTag("a").get(0).attr("href"),
                        "utf-8"));
            String hrefQuery = href.getQuery();
            if (hrefQuery.contains("cid=")) {
              if ("0".equals(pid)) {
                pid = String.valueOf(cid);
                pname = cname;
              }
              cid = Integer.parseInt(hrefQuery.split("cid=")[1].split("&")[0]);
              cname = title;
            }

            System.out.println(
                startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001"
                    + pname + "\001" + rank + "\001" + key + "\001" + rise);
            result.add(
                startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001"
                    + pname + "\001" + rank + "\001" + key + "\001" + rise);
          }
        }
        success = true;
      } catch (Exception e) {
        // TODO Auto-generated catch block
        // e.printStackTrace();
      }
      if (success) {
        TopCategory.remove(0);
        System.out.println("剩余类目数=" + TopCategory.size());
        for (String r : result) {
          bw.write(r + "\n");
        }
      }
    }
    bw.close();
    fw.close();
  }