예제 #1
0
 public String getParsedPage() {
   List<String> alertHandler = new LinkedList<String>();
   ;
   WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); // CHROME);
   webClient.setAjaxController(new MyNicelyResynchronizingAjaxController());
   webClient.getOptions().setJavaScriptEnabled(true);
   webClient.getOptions().setTimeout(3500);
   webClient.getOptions().setThrowExceptionOnScriptError(true);
   webClient.getOptions().setCssEnabled(true);
   webClient.getOptions().isRedirectEnabled();
   webClient.setAlertHandler(
       new CollectingAlertHandler(alertHandler)); // 将JavaScript中alert标签产生的数据保存在一个链表中
   // webClient.getOptions().setThrowExceptionOnScriptError(false);
   HtmlPage page = null;
   JavaScriptEngine engine = new JavaScriptEngine(webClient);
   webClient.setJavaScriptEngine(engine);
   try {
     page = webClient.getPage(data);
   } catch (FailingHttpStatusCodeException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (MalformedURLException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   }
   if (page != null) {
     return page.asXml();
   }
   return null;
 }
예제 #2
0
  /**
   * get the ajax url from the click button
   *
   * @param clickOfXpath:页面待点击按钮的xpath表达式
   * @param index
   * @return List<String>:链表的第一个信息是页面的title,以后的信息是所有的ajax的url
   */
  public static List<String> getAjaxUrl(String targetUrl, String clickOfXpath, int index)
      throws FailingHttpStatusCodeException, MalformedURLException, IOException {
    // TARGET_URL =
    // "http://app.flyme.cn/apps/public/detail?package_name=com.myzaker.zaker_phone_smartbar";
    List<String> urls = new LinkedList<String>();
    // 每次ajax请求时都会创建一个AjaxController对象,在该对象中可以查看ajax请求的地址
    MyNicelyResynchronizingAjaxController ajaxController =
        new MyNicelyResynchronizingAjaxController();

    List alertHandler = new LinkedList();
    // 模拟一个浏览器
    WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24);
    // HtmlUnitDriver
    // 设置webClient的相关参数
    webClient.getOptions().setJavaScriptEnabled(true);
    webClient.getOptions().setCssEnabled(false);
    webClient.setAjaxController(ajaxController);
    webClient.getOptions().setTimeout(35000);
    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.setAlertHandler(
        new CollectingAlertHandler(alertHandler)); // 将JavaScript中alert标签产生的数据保存在一个链表中

    // 模拟浏览器打开一个目标网址
    HtmlPage rootPage = webClient.getPage(targetUrl);
    urls.add(rootPage.getTitleText());
    urls.add(ajaxController.getVisitUrl());
    // System.out.println("url1:" + url);
    HtmlElement elementA = (HtmlElement) rootPage.getByXPath(clickOfXpath).get(index);
    Page page = elementA.click();
    urls.add(ajaxController.getVisitUrl());
    return urls;
  }
예제 #3
0
파일: HttpUtil.java 프로젝트: douzh/mydisk
 /**
  * @return
  * @author douzh
  * @time 2015-4-23下午4:36:05
  */
 public static WebClient getClient() {
   WebClient client = new WebClient(BrowserVersion.FIREFOX_24);
   client.getOptions().setJavaScriptEnabled(true);
   client.getOptions().setActiveXNative(false);
   client.getOptions().setCssEnabled(false);
   client.getOptions().setRedirectEnabled(true);
   client.getOptions().setThrowExceptionOnScriptError(false);
   client.getOptions().setThrowExceptionOnFailingStatusCode(false);
   client.getOptions().setGeolocationEnabled(true);
   //        client.addWebWindowListener(new WebWindowListener() {
   //            public void webWindowOpened(WebWindowEvent event) {
   //                System.out.println("Web Window Openning");
   //            }
   //
   //            public void webWindowContentChanged(WebWindowEvent event) {
   //                System.out.println("Web Content Changed");
   //            }
   //
   //            public void webWindowClosed(WebWindowEvent event) {
   //                System.out.println("Web Window Closed");
   //            }
   //        });
   client.setAjaxController(
       new NicelyResynchronizingAjaxController() {
         public boolean processSynchron(HtmlPage page, WebRequest settings, boolean async) {
           System.out.println(settings.getUrl());
           return super.processSynchron(page, settings, async);
         }
       });
   return client;
 }
예제 #4
0
  public static void main(String[] args) throws Exception {

    //    	WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24, "54.186.230.121", 3128);
    WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24);

    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.setJavaScriptTimeout(10000);
    webClient.getOptions().setJavaScriptEnabled(true);
    webClient.setAjaxController(new NicelyResynchronizingAjaxController());
    webClient.getOptions().setTimeout(10000);

    //    	webClient.getOptions().setJavaScriptEnabled(false);
    //		webClient.getOptions().setAppletEnabled(false);
    //		webClient.getOptions().setCssEnabled(false);

    //		webClient.getOptions().setThrowExceptionOnScriptError(false);
    //		webClient.setJavaScriptTimeout(10000);
    //		webClient.getOptions().setJavaScriptEnabled(true);
    //		webClient.setAjaxController(new NicelyResynchronizingAjaxController());
    //		webClient.getOptions().setTimeout(10000);

    //		webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
    //		webClient.getOptions().setThrowExceptionOnScriptError(false);

    HtmlPage currentPage = webClient.getPage("http://www.yandex.ru/");
    //    	HtmlPage currentPage = webClient.getPage("http://www.google.ru");

    //    	HtmlDivision div = currentPage.getHtmlElementById("del_competitors-1_42");

    //    	HtmlElement clickable = (HtmlElement)
    // currentPage.getHtmlElementById("del_competitors-1_42");
    //    	currentPage = (HtmlPage) clickable.click();

    HtmlAnchor advancedSearchAn = currentPage.getAnchorByText("Завести ящик");
    currentPage = advancedSearchAn.click();

    HtmlImage image = currentPage.<HtmlImage>getFirstByXPath("//img[@src='images/ash2008.jpg']");
    currentPage = (HtmlPage) image.click();

    System.out.println(currentPage.asXml());

    //    	HtmlImage image =
    // currentPage.<HtmlImage>getFirstByXPath("//img[@src='images/ash2008.jpg']");
    //    	currentPage = (HtmlPage) image.click();

    //    	HtmlImage imagetosave =
    // currentPage.<HtmlImage>getFirstByXPath("//img[@src='//yastatic.net/www/1.977/yaru/i/logo.png']");

    //    	HtmlImage image = currentPage.<HtmlImage>getHtmlElementById("add_competitors-1_3");
    //    	currentPage = (HtmlPage) image.click();

    //    	File imageFile = new File("test_new.jpg");
    //    	image.saveAs(imageFile);

    //    	System.out.println(currentPage.asXml());

    System.out.println("It is done.");

    webClient.closeAllWindows();
  }
예제 #5
0
  public static void homePage(String url) {
    String str;
    // 创建一个webclient

    WebClient webClient = new WebClient();

    //        webClient.getWebConsole().setLogger(null);
    //        //htmlunit 对css和javascript的支持不好,所以请关闭之
    webClient.getOptions().setCssEnabled(false);
    //        webClient.getOptions().setUseInsecureSSL(true);
    webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
    //        webClient.getCookieManager().setCookiesEnabled(true);
    webClient.setAjaxController(new NicelyResynchronizingAjaxController());
    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.waitForBackgroundJavaScript(1000);
    webClient.waitForBackgroundJavaScriptStartingBefore(1000);

    // 获取页面
    HtmlPage page = null;
    int times = 10;
    try {
      page = webClient.getPage(url);

      for (int i = 0; i < times; i++) {
        synchronized (page) {
          page.wait(2000);
        }
        if (page.getByXPath("//object") != null) {
          System.out.println("***************************yes**********************");
          for (int j = 0; j < page.getByXPath("//object").size(); j++) {
            System.out.println(page.getByXPath("//object").get(j));
          }
          break;
        }
        System.out.println("***************************no***************************");
      }

    } catch (Exception e) {
      e.printStackTrace();
    }
    // 获取页面的TITLE
    //        str = page.getTitleText();
    //        System.out.println(str);
    //        //获取页面的XML代码
    //        System.out.println("***************************start***************************");
    //        System.out.println(page.asXml());

    //        System.out.println("***************************end***************************");

    //        //获取页面的文本
    //        str = page.asText();
    //        System.out.println(str);
  }
예제 #6
0
  /**
   * Static method for <code>Login</code>. Uses dependencies <b>HttpUnit</b> in connecting to Keats.
   *
   * @see com.gargoylesoftware.htmlunit
   * @param parent The parent window. instanceof<code>Scrape</code>, to set relative locations to
   * @param link The url of which to retrieve information from.
   * @param username The username to log in KEATS with.
   * @param password The password to log in KEATS with.
   * @return results.asText() Returns the content of the url if login successful. Returns null
   *     otherwise.
   */
  public static String login(Scrape parent, String link, String username, String password) {
    try {
      WebClient client = new WebClient();

      // Settings
      client.getOptions().setThrowExceptionOnScriptError(false);
      client.getOptions().setThrowExceptionOnScriptError(false);
      client.getOptions().setThrowExceptionOnFailingStatusCode(false);
      client.getOptions().setJavaScriptEnabled(false);
      client.getOptions().setCssEnabled(false);
      client.getOptions().setRedirectEnabled(true);
      client.getOptions().setUseInsecureSSL(true);
      client.getCookieManager().setCookiesEnabled(true);

      HtmlPage page = client.getPage("https://login-keats.kcl.ac.uk/");

      HtmlForm form =
          page.getFirstByXPath("//form[@action='https://keats.kcl.ac.uk/login/index.php']");

      HtmlInput usernameInput = form.getInputByName("username");
      usernameInput.setValueAttribute(username);
      HtmlInput passwordInput = form.getInputByName("password");
      passwordInput.setValueAttribute(password);

      page = form.getInputByValue("Log in").click();

      HtmlPage results = client.getPage(link);

      client.closeAllWindows();
      return results.asText();
    } catch (MalformedURLException e) {
      JOptionPane.showMessageDialog(
          parent,
          "The URL you have provided is not recognised. Please double check your "
              + "input and try again.",
          "MalformedURLException found.",
          JOptionPane.ERROR_MESSAGE);
      e.printStackTrace();
    } catch (IOException e) {
      JOptionPane.showMessageDialog(
          parent,
          "An error has occurred when attempting to read information from the "
              + "server. The input could be interrupted by external processes.",
          "IOException found.",
          JOptionPane.ERROR_MESSAGE);
      e.printStackTrace();
    }

    return null;
  }
예제 #7
0
  /**
   * 获取速度最快的浏览器
   *
   * @return
   */
  public WebClient getFastWebClient() {

    // TODO 这里可以启动webclient的ajax功能,读取技术指标的变化
    webClient.getOptions().setCssEnabled(false); // if you don't need css
    webClient.getOptions().setJavaScriptEnabled(false); // if you don't need js
    webClient.getOptions().setActiveXNative(false);
    webClient.getOptions().setAppletEnabled(false);
    webClient.getOptions().setGeolocationEnabled(false);
    //        webClient.getOptions().setDoNotTrackEnabled(true);  //不追踪隐私
    webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); // 页面有错,不抛出异常
    webClient.getOptions().setPrintContentOnFailingStatusCode(false); // 页面有错,不打印

    return webClient;
  }
예제 #8
0
 @Before
 public void setUp() {
   webUrl = System.getProperty("integration.url");
   webClient = new WebClient();
   webClient.getOptions().setJavaScriptEnabled(true);
   webClient.setJavaScriptTimeout(60000);
 }
예제 #9
0
 @Before
 public void setUp() {
   ProxyConfig proxyConfig = new ProxyConfig("localhost", proxyPort);
   proxyConfig.addHostsToProxyBypass("127.0.0.1");
   webClient.getOptions().setProxyConfig(proxyConfig);
   webClient.getOptions().setTimeout(2000);
 }
예제 #10
0
  @Test
  @Ignore
  public void test3() throws Exception {
    System.out.println("-------------------------------");

    WebClient webClient = new WebClient(BrowserVersion.CHROME);
    webClient.getOptions().setCssEnabled(false);
    webClient.getOptions().setJavaScriptEnabled(false);
    HtmlPage page =
        webClient.getPage("http://www.zjnu.edu.cn/news/common/article_show.aspx?article_id=19285");

    System.out.println("---------------标题----------------");
    HtmlSpan span1 = (HtmlSpan) page.getElementById("mytitle");
    System.out.println(span1.asText());
    System.out.println("-------------------------------");

    System.out.println("---------------正文----------------");
    HtmlSpan span2 = (HtmlSpan) page.getElementById("mycontent");
    System.out.println(span2.asText());
    System.out.println("-------------------------------");

    System.out.println("---------------图片----------------");
    DomNodeList<HtmlElement> elements = span2.getElementsByTagName("img");
    for (HtmlElement element : elements) {
      System.out.println(element.getAttribute("src"));
    }
    // log.debug("{}", elements);
    System.out.println("-------------------------------");

    webClient.close();
    System.out.println("-------------------------------");
  }
예제 #11
0
파일: Crawl.java 프로젝트: OliveLv/MyUtils
 public static HtmlPage getPage(String url)
     throws FailingHttpStatusCodeException, MalformedURLException, IOException {
   WebClient wc = new WebClient(BrowserVersion.CHROME);
   wc.getOptions().setCssEnabled(false);
   HtmlPage page = wc.getPage(url);
   return page;
 }
예제 #12
0
  public static ArrayList<String> trans(String url)
      throws FailingHttpStatusCodeException, MalformedURLException, IOException {

    ArrayList<String> hrefList = new ArrayList<String>();
    WebClient webClient = new WebClient(BrowserVersion.CHROME);
    webClient.getOptions().setJavaScriptEnabled(false);
    webClient.getOptions().setCssEnabled(false);
    try {
      HtmlPage page = null;
      try {
        page = (HtmlPage) webClient.getPage(url);
      } catch (ConnectException e) {
        System.out.println("Connect fails here:" + e.getMessage());
      }
      InputStream temp = new ByteArrayInputStream(page.asText().getBytes());
      InputStreamReader isr = new InputStreamReader(temp);
      BufferedReader br = new BufferedReader(isr);
      String str = null, rs = null;
      while ((str = br.readLine()) != null) {
        rs = str;
        // System.out.println(rs);
        if (rs != null) hrefList.add(rs);
      }
      System.out.println("从该网址查找的可能相关文本如下:");
      for (int i = 0; i < hrefList.size(); i++) {
        String string = hrefList.get(i);
        string = getTextFromHtml(string);
        if (string.length() >= 30) System.out.println("------" + i + ":" + string);
      }
    } catch (IOException e) {
    }
    return hrefList;
  }
예제 #13
0
 private HtmlPage getPage(String url) throws IOException {
   final WebClient webClient =
       new WebClient(
           getBrowserVersionFromName(
               searchEngine.getDefaultBrowser())); // BrowserVersion.FIREFOX_24);
   webClient.getOptions().setJavaScriptEnabled(false);
   return webClient.getPage(url);
 }
예제 #14
0
  @Test
  @Ignore
  public void test01() throws Exception {
    System.out.println("-------------------------------");
    WebClient webClient = new WebClient(BrowserVersion.CHROME);
    try {
      WebClientOptions options = webClient.getOptions();
      options.setThrowExceptionOnFailingStatusCode(false);
      options.setThrowExceptionOnScriptError(false);
      options.setCssEnabled(false);
      options.setJavaScriptEnabled(true);
      options.setTimeout(50000);
      // webClient.setAjaxController(new NicelyResynchronizingAjaxController());
      HtmlPage pageOrgin = webClient.getPage("http://lvyou.baidu.com/jinhua/jingdian");
      Thread.sleep(5000);
      DomNodeList<DomNode> pageNodes = pageOrgin.querySelectorAll(".pagination");
      HtmlDivision pageDiv = (HtmlDivision) pageNodes.get(0);
      DomNodeList<HtmlElement> liElements = pageDiv.getElementsByTagName("li");
      Integer pageSize = liElements.size() - 1;
      for (int pageNow = 0; pageNow < pageSize; pageNow++) {
        DomNodeList<HtmlElement> pageAnchors = liElements.get(pageNow).getElementsByTagName("a");
        HtmlAnchor pageAnchor = (HtmlAnchor) pageAnchors.get(0);
        HtmlPage page = pageAnchor.click();
        Thread.sleep(10000);
        DomElement jViewDom = page.getElementById("J-view-list-container");
        DomNodeList<HtmlElement> lis = jViewDom.getElementsByTagName("li");
        for (HtmlElement li : lis) {
          DomNodeList<DomNode> titleNodes = li.querySelectorAll(".title");
          HtmlAnchor titleAnchor = (HtmlAnchor) titleNodes.get(0);
          System.out.println("---------------标题----------------");
          log.debug("{}", titleAnchor.asText());

          DomNodeList<DomNode> picNodes = li.querySelectorAll(".pic");
          HtmlAnchor picAnchor = (HtmlAnchor) picNodes.get(0);
          System.out.println("---------------详情URL----------------");
          String detailUrl = "http://lvyou.baidu.com" + picAnchor.getAttribute("href");
          log.debug("{}", detailUrl);
          DomNodeList<HtmlElement> imgEelements = picAnchor.getElementsByTagName("img");
          for (HtmlElement imgEelement : imgEelements) {
            System.out.println("---------------图片----------------");
            log.debug("{}", imgEelement.getAttribute("src"));
          }

          DomNodeList<DomNode> sumNodes = li.querySelectorAll(".view-userSays");
          HtmlDivision sumDiv = (HtmlDivision) sumNodes.get(0);
          DomNodeList<HtmlElement> sumElements = sumDiv.getElementsByTagName("p");
          HtmlParagraph sumPara = (HtmlParagraph) sumElements.get(0);
          System.out.println("---------------摘要----------------");
          log.debug("{}", sumPara.asText());
        }
      }
    } finally {
      webClient.close();
    }
    System.out.println("-------------------------------");
  }
예제 #15
0
 @JsfTest(JsfVersion.JSF_2_2_5)
 @Test
 public void testSetForEach() throws Exception {
   webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
   HtmlPage page = webClient.getPage(webUrl + "faces/setForEach.xhtml");
   assertEquals(200, page.getWebResponse().getStatusCode());
   assertTrue(page.asXml().indexOf("foo") == -1);
   assertTrue(page.asXml().indexOf("bar") != -1);
   assertTrue(page.asXml().indexOf("baz") != -1);
 }
예제 #16
0
  public static void main(String[] args) throws Exception {
    LogFactory.getFactory()
        .setAttribute("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.NoOpLog");

    java.util.logging.Logger.getLogger("com.gargoylesoftware.htmlunit").setLevel(Level.OFF);
    java.util.logging.Logger.getLogger("org.apache.commons.httpclient").setLevel(Level.OFF);
    WebClient client = new WebClient();
    client.getOptions().setCssEnabled(false);
    client.getOptions().setJavaScriptEnabled(false);
    client.getOptions().setThrowExceptionOnFailingStatusCode(false);
    client.getOptions().setThrowExceptionOnScriptError(false);
    //		HtmlPage page = client.getPage(url);
    //		String source = page.getWebResponse().getContentAsString();
    //		p(source);

    Request r = new Request();
    r.getRequest(client);
    while (true) ;
  }
예제 #17
0
  @Test
  @Ignore
  public void test() throws Exception {
    System.out.println("-------------------------------");

    WebClient webClient = new WebClient(BrowserVersion.CHROME);
    webClient.getOptions().setCssEnabled(false);
    webClient.getOptions().setJavaScriptEnabled(false);
    HtmlPage page = webClient.getPage("http://news.163.com/domestic/");
    // DomNodeList<HtmlElement> elements = page.getElementBy

    System.out.println("---------------标题----------------");
    DomNodeList<DomNode> domNodes = page.querySelectorAll(".item-top");
    // log.debug("{}", domNodes);
    for (DomNode domNode : domNodes) {
      HtmlDivision htmlDivision = (HtmlDivision) domNode;
      DomNodeList<HtmlElement> aElements = htmlDivision.getElementsByTagName("a");
      HtmlAnchor htmlAnchor = (HtmlAnchor) aElements.get(0);
      // HTMLHeadingElement htmlHeading2 = (HTMLHeadingElement)
      // htmlDivision.getElementsByTagName("h2");
      // HtmlAnchor htmlAnchor = (HtmlAnchor) htmlDivision.getElementsByTagName("a");
      log.debug("{}", htmlAnchor.asText());
      log.debug("{}", htmlAnchor.getAttribute("href"));

      DomNodeList<HtmlElement> pElements = htmlDivision.getElementsByTagName("p");
      HtmlParagraph htmlParagraph = (HtmlParagraph) pElements.get(0);
      log.debug("{}", htmlParagraph.asText());

      DomNodeList<HtmlElement> iEelements = htmlDivision.getElementsByTagName("img");
      for (HtmlElement iEelement : iEelements) {
        log.debug("{}", iEelement.getAttribute("src"));
      }

      String detailUrl = htmlAnchor.getAttribute("href");
      if (detailUrl.equals("http://news.163.com/15/1215/17/BAT2L8RB00014JB6.html#f=dlist")) {
        HtmlPage detailPage = webClient.getPage(detailUrl);
        System.out.println("---------------正文----------------");
        DomElement endTextElement = detailPage.getElementById("endText");
        log.debug("{}", endTextElement.asText());

        System.out.println("---------------图片----------------");
        DomNodeList<DomNode> imgNodes = endTextElement.querySelectorAll(".f_center");
        for (DomNode imgNode : imgNodes) {
          HtmlParagraph imgpara = (HtmlParagraph) imgNode;
          DomNodeList<HtmlElement> endImgs = imgpara.getElementsByTagName("img");
          for (HtmlElement endImg : endImgs) {
            log.debug("{}", endImg.getAttribute("src"));
          }
        }
      }
    }
    webClient.close();
    System.out.println("-------------------------------");
  }
예제 #18
0
 @Override
 public void run() {
   String str;
   // 创建一个webclient
   WebClient webClient = new WebClient();
   // htmlunit 对css和javascript的支持不好,所以请关闭之
   webClient.getOptions().setJavaScriptEnabled(false);
   webClient.getOptions().setCssEnabled(false);
   Cookie cookie =
       new Cookie(
           "www.qixin.com", "login_returnurl", "http%3A//www.qixin.com/search/prov/SH%3Fpage%3D2");
   Cookie cookie1 =
       new Cookie(
           "www.qixin.com",
           "userKey",
           "QXBAdmin-Web2.0_5tUrhr/6EVtLT+GVfE+vU8k330y+oPICCM6jhUGEoLc%3D");
   Cookie cookie2 =
       new Cookie("www.qixin.com", "userValue", "4a68111b-0cfa-457f-91bd-b6fda97fa524");
   Cookie cookie3 =
       new Cookie(
           "www.qixin.com",
           "gr_session_id_955c17a7426f3e98",
           "d25fe84e-fb1d-4ef8-8b4e-b530e5004b30");
   Cookie cookie4 =
       new Cookie("www.qixin.com", "_alicdn_sec", "5732cf53d99e48a838049be355d47a44000895ae");
   CookieManager cookieManager = new CookieManager();
   cookieManager.addCookie(cookie);
   cookieManager.addCookie(cookie2);
   cookieManager.addCookie(cookie3);
   cookieManager.addCookie(cookie1);
   cookieManager.addCookie(cookie4);
   webClient.setCookieManager(cookieManager);
   // 获取页面
   HtmlPage page = null;
   try {
     page = webClient.getPage("http://www.qixin.com/search/prov/SH?page=20");
   } catch (IOException e) {
     e.printStackTrace();
   }
   // 获取页面的XML代码
   List<HtmlAnchor> hbList = (List<HtmlAnchor>) page.getByXPath("//a");
   Iterator iterator = hbList.iterator();
   while (iterator.hasNext()) {
     HtmlAnchor ha = (HtmlAnchor) iterator.next();
     if ("search-result-title".equals(ha.getAttribute("class"))) {
       System.out.println(ha.asText());
       System.out.println("http://www.qixin.com" + ha.getAttribute("href"));
     }
   }
   // 关闭webclient
   webClient.close();
 }
예제 #19
0
  public static WebClient buildWebClient() {
    WebClient webClient = new WebClient(BrowserVersion.FIREFOX_10);
    webClient.setAjaxController(new NicelyResynchronizingAjaxController());
    webClient.getOptions().setCssEnabled(true);
    webClient.getOptions().setJavaScriptEnabled(true);
    webClient.getOptions().setUseInsecureSSL(true);

    webClient.setCssErrorHandler(
        new ErrorHandler() {
          @Override
          public void warning(CSSParseException exception) throws CSSException {
            // nothing to do here
          }

          @Override
          public void error(CSSParseException exception) throws CSSException {
            // todo: log or throw exception
          }

          @Override
          public void fatalError(CSSParseException exception) throws CSSException {
            // todo: log or throw exception
          }
        });

    webClient.setIncorrectnessListener(
        new IncorrectnessListener() {
          @Override
          public void notify(String message, Object origin) {
            // todo: analyze and throw exception
          }
        });

    webClient.waitForBackgroundJavaScript(100000);
    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.getOptions().setRedirectEnabled(true);

    return webClient;
  }
 @Before
 public void setUp() throws IOException {
   if (server == null) {
     server =
         new Thread(
             new Runnable() {
               public void run() {
                 try {
                   Main.main(args);
                 } catch (IOException e) {
                   throw new RuntimeException(e);
                 }
               }
             });
     server.start();
   }
   webClient.getOptions().setTimeout(30000);
 }
  private void ProcessTest(WebTest test) {
    HtmlPage page = null;

    // Orville.LOG().info(String.format("Executing test %s\n", event));

    WebClient webClient = new WebClient();
    webClient.setRefreshHandler(new ThreadedRefreshHandler());
    webClient.getOptions().setJavaScriptEnabled(false);

    try {
      if (test.getMethod().toLowerCase().equals("post")) {
        WebRequest request =
            new WebRequest(UrlUtils.toUrlUnsafe(test.getUrlString()), HttpMethod.POST);

        request.setRequestParameters(new ArrayList<NameValuePair>());

        for (Map.Entry<String, String> entry : test.getPostData().entrySet()) {
          request.getRequestParameters().add(new NameValuePair(entry.getKey(), entry.getValue()));
        }

        page = webClient.getPage(request);
      } else {
        page = webClient.getPage(test.getUrlString());
      }

      ProcessResult(test, page.getWebResponse());

    } catch (FailingHttpStatusCodeException fsc) {
      Orville.LOG()
          .warning(
              String.format(
                  "Failing HTTP Status code caught executing the test command: %s\n",
                  fsc.getMessage()));
    } catch (IOException ioe) {
      Orville.LOG()
          .warning(
              String.format(
                  "IO Exception caught executing the test command: %s\n", ioe.getMessage()));
    }

    webClient.closeAllWindows();
  }
예제 #22
0
  @JsfTest(JsfVersion.JSF_2_2_5)
  @Test
  public void testDropFlashCookie() throws Exception {

    HtmlPage page = webClient.getPage(webUrl + "faces/flashDropCookie.xhtml");
    webClient.getOptions().setRedirectEnabled(true);
    HtmlTextInput textInput = (HtmlTextInput) page.getHtmlElementById("input");
    textInput.setValueAttribute("test");
    HtmlSubmitInput button = (HtmlSubmitInput) page.getHtmlElementById("submit");

    int currentSize = webClient.getCookieManager().getCookies().size();

    page = button.click();
    HtmlElement element = page.getHtmlElementById("link");
    page = element.click();

    int newSize = webClient.getCookieManager().getCookies().size();

    assertTrue(newSize < currentSize);
  }
 /**
  * Set up method run before each test to create a new Web client and load the application's
  * landing page.
  */
 @Before
 @RunAsClient
 public void setUp() {
   try {
     browser = new WebClient();
     browser.getOptions().setThrowExceptionOnScriptError(false);
     landingPageResponse = browser.getPage(deploymentUrl.toString() + "index.xhtml");
     Assert.assertEquals(
         "Could not load the application landing page.",
         "Cargo Tracker",
         landingPageResponse.getTitleText());
   } catch (IOException ex) {
     Assert.fail(
         "An IOException was thrown during the test setup for class \""
             + ViewDetailTest.class.getSimpleName()
             + "\" at method \""
             + testName.getMethodName()
             + "\" with message: "
             + ex.getMessage());
   }
 }
예제 #24
0
  /**
   * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN
   * RISK.</span><br>
   *
   * <p>Submits this form to the server. If <tt>submitElement</tt> is <tt>null</tt>, then the
   * submission is treated as if it was triggered by JavaScript, and the <tt>onsubmit</tt> handler
   * will not be executed.
   *
   * <p><b>IMPORTANT:</b> Using this method directly is not the preferred way of submitting forms.
   * Most consumers should emulate the user's actions instead, probably by using something like
   * {@link HtmlElement#click()} or {@link HtmlElement#dblClick()}.
   *
   * @param submitElement the element that caused the submit to occur
   * @return a new page that reflects the results of this submission
   */
  Page submit(final SubmittableElement submitElement) {
    final HtmlPage htmlPage = (HtmlPage) getPage();
    final WebClient webClient = htmlPage.getWebClient();
    if (webClient.getOptions().isJavaScriptEnabled()) {
      if (submitElement != null) {
        isPreventDefault_ = false;
        final ScriptResult scriptResult = fireEvent(Event.TYPE_SUBMIT);
        if (isPreventDefault_) {
          // null means 'nothing executed'
          if (scriptResult == null) {
            return htmlPage;
          }
          return scriptResult.getNewPage();
        }
      }

      final String action = getActionAttribute().trim();
      if (StringUtils.startsWithIgnoreCase(action, JavaScriptURLConnection.JAVASCRIPT_PREFIX)) {
        return htmlPage
            .executeJavaScriptIfPossible(action, "Form action", getStartLineNumber())
            .getNewPage();
      }
    } else {
      if (StringUtils.startsWithIgnoreCase(
          getActionAttribute(), JavaScriptURLConnection.JAVASCRIPT_PREFIX)) {
        // The action is JavaScript but JavaScript isn't enabled.
        // Return the current page.
        return htmlPage;
      }
    }

    final WebRequest request = getWebRequest(submitElement);
    final String target = htmlPage.getResolvedTarget(getTargetAttribute());

    final WebWindow webWindow = htmlPage.getEnclosingWindow();
    webClient.download(webWindow, target, request, false, "JS form.submit()");
    return htmlPage;
  }
예제 #25
0
파일: Page.java 프로젝트: vim951/Robot-D-ip
  public static void savePage(String path) {

    // path = transformPath(path);

    if (!Main.list.contains(path) && path.contains("https://www.darts-ip.com")) {

      System.out.println("");
      System.out.println("=================================================");
      System.out.println("");
      System.out.println(path);
      System.out.println("");

      Main.list.add(path);

      String content = "";
      String fileName = "";
      HtmlPage page = null;

      // ----------HTML-UNIT----------

      try {

        WebClient webClient = new WebClient();
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        page = webClient.getPage(path);
        WebResponse response = page.getWebResponse();
        content = response.getContentAsString();
        fileName = path + ".html";

      } catch (Exception e) {
        System.out.println("");
        System.out.println("ERROR in Page.savePage #HTML-UNIT");
        System.out.println(e);
        System.out.println("");
      }

      // ----------SAVE-HTML-FILE----------

      try {

        PrintWriter writer = new PrintWriter(fileName, "UTF-8");
        writer.print(content);
        writer.close();

      } catch (Exception e) {
        System.out.println("");
        System.out.println("ERROR in ERROR in Page.savePage #SAVE-HTML-FILE");
        System.out.println(e);
        System.out.println("");
      }

      // ----------ZIP-HTML-FILE----------

      try {

        FileInputStream fi = new FileInputStream(fileName);
        BufferedInputStream buffi = new BufferedInputStream(fi, Main.BUFFER);
        ZipEntry entry = new ZipEntry(fileName);
        Main.out.putNextEntry(entry);

        int count;
        while ((count = buffi.read(Main.data, 0, Main.BUFFER)) != -1) {
          Main.out.write(Main.data, 0, count);
        }

        Main.out.closeEntry();
        buffi.close();

      } catch (Exception e) {
        System.out.println("");
        System.out.println("ERROR in Page.savePage #ZIP-HTML-FILE");
        System.out.println(e);
        System.out.println("");
      }

      // ----------DELETE-TMP-FILE----------

      try {
        new File(fileName).delete();

      } catch (Exception e) {
        System.out.println("");
        System.out.println("ERROR in Page.savePage #DELETE-TMP-FILE");
        System.out.println(e);
        System.out.println("");
      }

      System.out.println("");
      System.out.println("=================================================");

      // ----------EXPLORE-LINKS----------

      exploreLinks(page);
    }
  }
예제 #26
0
  /**
   * Processes requests for both HTTP <code>GET</code> and <code>POST</code> methods.
   *
   * @param request servlet request
   * @param response servlet response
   * @throws ServletException if a servlet-specific error occurs
   * @throws IOException if an I/O error occurs
   */
  protected void processRequest(HttpServletRequest request, HttpServletResponse response)
      throws ServletException, IOException {
    response.setContentType("text/html;charset=UTF-8");
    PrintWriter out = response.getWriter();

    int no = 0;
    try {
      no = Integer.parseInt(request.getParameter("no"));

    } catch (NumberFormatException exception) {
      no = 0;
    }

    try {
      /* TODO output your page here. You may use following sample code. */
      out.println("<!DOCTYPE html>");
      out.println("<html>");
      out.println("<head>");
      out.println("<title>Servlet NewServlet</title>");
      out.println("</head>");
      out.println("<body>");

      final WebClient webClient = new WebClient();
      webClient.getCookieManager().clearCookies();
      webClient.getOptions().setUseInsecureSSL(true);
      webClient.setJavaScriptEnabled(false);
      final HtmlPage page = webClient.getPage("https://filestream.me/");
      // out.println(page.getTitleText());
      System.out.println("no of forms : " + page.getForms().size());
      HtmlForm form = page.getForms().get(1);
      final HtmlTextInput textField = form.getInputByName("login");
      textField.setAttribute("value", "*****@*****.**");

      final HtmlPasswordInput passField = form.getInputByName("password");
      passField.setAttribute("value", "ramkrishnan18");
      HtmlSubmitInput htmlSubmitInput = form.getInputByValue("login");
      HtmlPage page1 = htmlSubmitInput.click();

      HtmlTable htmlTable = page1.getHtmlElementById("fileCatTable");
      List<HtmlTableRow> listOfHtmlTableRow = htmlTable.getBodies().get(0).getRows();
      List<String> allLinks = new ArrayList<String>();
      for (HtmlTableRow htmlTableRow : listOfHtmlTableRow) {
        HtmlTableCell cell = htmlTableRow.getCells().get(htmlTableRow.getCells().size() - 1);
        for (DomElement domElement : cell.getChildElements()) {
          // out.println(domElement.getTagName());
          if (domElement.getTagName().equals("div")) {
            boolean flag = true;

            for (DomElement celldomElement : domElement.getChildElements()) {
              if (flag) {
                // out.println(celldomElement.getTagName());

                String title = celldomElement.getAttribute("title");
                if ("Download".equals(title) || "Downloads".equals(title)) {

                  String link = celldomElement.getAttribute("onclick");
                  String http =
                      link.substring(
                          link.indexOf('\'') + 1, link.indexOf('\'', link.indexOf('\'') + 1));

                  //  out.println(celldomElement.getAttribute("onclick") + "<br/>");
                  // out.println(http + "<br/>");
                  allLinks.add(http);
                  flag = false;
                }
              }
            }
          }
        }
      }

      //  InputStream is =anchorAttachment.click().getWebResponse().getContentAsStream();

      out.println("************ Start *************");

      //  HtmlAnchor anchorElement=HTMLAnchorElement.;
      //  anchorElement.set
      // anchorElement.setHref(allLinks.get(0));

      //  anchorElement.cl
      final String u = allLinks.get(no);
      Thread t =
          new Thread(
              new Runnable() {
                @Override
                public void run() {
                  try {
                    URL url = new URL(u);
                    System.out.println("Link : " + u);
                    HttpURLConnection con = (HttpURLConnection) url.openConnection();

                    // System.out.println("Response Code : " + con.getInputStream().available());
                    con.setRequestProperty("Accept-Encoding", "gzip,deflate");
                    con.setRequestProperty(
                        "User-Agent",
                        "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36");

                    Map<String, List<String>> map = con.getHeaderFields();

                    for (String object : map.keySet()) {
                      System.out.println(object + " --> " + map.get(object));
                    }

                    System.out.println("Response Code : " + con.getResponseCode());
                    System.out.println("length :" + con.getContentLength());
                    System.out.println("Message " + con.getResponseMessage());

                    InputStream inputStream = con.getInputStream();
                    String path = System.getenv("OPENSHIFT_JBOSSAS_DIR");
                    FileOutputStream fileOutputStream = new FileOutputStream(path + "test.zip");
                    byte[] buffer = new byte[1024];
                    System.out.println(inputStream.available());
                    int bytesRead = 10;
                    while (true) {
                      //   System.out.print("--");
                      bytesRead = inputStream.read(buffer);
                      //   System.out.println(bytesRead);
                      if (bytesRead == -1) {
                        break;
                      }
                      fileOutputStream.write(buffer, 0, bytesRead);
                      // System.out.print(">");
                    }
                    fileOutputStream.close();

                    System.out.println("File Completed");
                  } catch (IOException ex) {
                    Logger.getLogger(MakeData.class.getName()).log(Level.SEVERE, null, ex);
                  }
                }
              });
      t.start();

      //            System.out.println(allLinks.get(0));
      //            System.out.println(webClient.getCookieManager().getCookies(new
      // URL(allLinks.get(0))).size());
      //            // System.out.println(httpPage.asText());
      //             HtmlPage httpPage = webClient.getPage(allLinks.get(0));
      //
      //           // final String pageAsXml = page1.asXml();
      //            // InputStream inputStream=httpPage.getWebResponse().getContentAsStream();
      //           //  inputStream.available();
      //              System.out.println("Code : "+httpPage.getWebResponse().getStatusMessage());
      //               System.out.println("Code : "+httpPage.getWebResponse().getContentType());
      //                System.out.println("netCode :
      // "+httpPage.getWebResponse().getContentCharset());
      //
      // out.println("<br/><br/>Code : "+httpPage.getWebResponse().getStatusCode());
      // out.println("<br/><br/>Code : "+httpPage.getWebResponse().getContentType());
      // out.println("<br/><br/>Code : "+httpPage.getWebResponse().getContentCharset());

      // final String pageAsText = page1.asText();

      // out.println(pageAsXml);
      webClient.closeAllWindows();
      out.println("<h1>Servlet NewServlet at " + request.getContextPath() + "</h1>");
      out.println("</body>");
      out.println("</html>");
    } finally {
      out.close();
    }
  }
예제 #27
0
  public static void main(String[] args) throws IOException {

    WebClient client = new WebClient(BrowserVersion.CHROME);
    CookieManager cookie = new CookieManager();
    client.setCookieManager(cookie);
    client.getOptions().setJavaScriptEnabled(true);
    client.getOptions().setActiveXNative(false);
    client.getOptions().setCssEnabled(false);
    client.getOptions().setThrowExceptionOnScriptError(false);
    client.getOptions().setThrowExceptionOnFailingStatusCode(false);
    client.getOptions().setDoNotTrackEnabled(true);
    client.getOptions().setPrintContentOnFailingStatusCode(false);
    client.setAjaxController(new NicelyResynchronizingAjaxController());
    client.setJavaScriptTimeout(Long.MAX_VALUE);

    List<Integer> TopCategory = new LinkedList<Integer>(TopCategoryUrl.keySet());
    Collections.sort(TopCategory);

    long today = new Date().getTime();
    long ymd = today - 1000 * 60 * 60 * 24;
    String date = sdf.format(ymd);
    FileWriter fw = new FileWriter("ShuTaobaoTop/search-" + date, false);
    BufferedWriter bw = new BufferedWriter(fw);

    while (TopCategory.size() > 0) {
      int cid = TopCategory.get(0);
      //			if(cid<50002766)
      //			{
      //				TopCategory.remove(0);
      //				continue;
      //			}
      String cname = TopCategoryUrl.get(cid);
      String pid = "0", pname = "";
      String topUrl = "http://shu.taobao.com/top/" + cid + "/search";

      boolean success = false;
      ArrayList<String> result = new ArrayList<String>();
      try {
        //				client.getOptions().setJavaScriptEnabled(true);
        do {
          System.out.println("剩余类目数=" + TopCategory.size());
          // ProxyUnit.configProxy(client, ProxyUnit.CHECKURL);
        } while (false == AccountLogin.loginTaoBao(client, AccountLogin.TAOBAOLOGINURL, true));
        //				client.getOptions().setJavaScriptEnabled(false);

        HtmlPage queryTrade = client.getPage(topUrl);
        Thread.sleep(1000L);
        System.out.println(queryTrade.getTitleText());

        /*
         * 解析
         */
        Document doc = Jsoup.parse(queryTrade.asXml());
        Element time = doc.getElementsByAttributeValue("class", "time").get(0);
        String startTime =
            time.text().substring(6, 16).replace("-", ""); // 统计时间: 2014-05-17 - 2014-05-23
        String endTime = time.text().substring(19).replace("-", "");
        Elements elements =
            doc.getElementsByAttributeValueStarting(
                "class", "mod "); // <div class="mod odd mod-10 sm-ua"> <h3 class="title">
        for (Element element : elements) {
          String title = element.getElementsByTag("h3").get(0).text();
          System.out.println(title);
          Elements lis =
              element.getElementsByTag("ol").get(0).getElementsByTag("li"); // <li class="up ">
          for (Element li : lis) {
            Elements spans = li.getElementsByTag("span");
            String rank = spans.get(0).text();
            String key = spans.get(1).text();
            String rise = spans.get(2).text();
            rise = rise.substring(0, rise.length() - 1);

            URL href =
                new URL(
                    URLDecoder.decode(
                        "http://shu.taobao.com"
                            + spans.get(1).getElementsByTag("a").get(0).attr("href"),
                        "utf-8"));
            String hrefQuery = href.getQuery();
            if (hrefQuery.contains("cid=")) {
              if ("0".equals(pid)) {
                pid = String.valueOf(cid);
                pname = cname;
              }
              cid = Integer.parseInt(hrefQuery.split("cid=")[1].split("&")[0]);
              cname = title;
            }

            System.out.println(
                startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001"
                    + pname + "\001" + rank + "\001" + key + "\001" + rise);
            result.add(
                startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001"
                    + pname + "\001" + rank + "\001" + key + "\001" + rise);
          }
        }
        success = true;
      } catch (Exception e) {
        // TODO Auto-generated catch block
        // e.printStackTrace();
      }
      if (success) {
        TopCategory.remove(0);
        System.out.println("剩余类目数=" + TopCategory.size());
        for (String r : result) {
          bw.write(r + "\n");
        }
      }
    }
    bw.close();
    fw.close();
  }
예제 #28
0
 @Test
 public void testNullInCompositionTemplate() throws Exception {
   webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
   HtmlPage page = webClient.getPage(webUrl + "faces/compositionWithNullTemplate.xhtml");
   assertTrue(page.asText().contains("Invalid path"));
 }
예제 #29
0
 public InternetConnection() {
   client.getOptions().setJavaScriptEnabled(false);
   // Turns off annoying exceptions caused by broken javascript links,
   // which still cause problems despite javascript being turned off.
   client.getOptions().setThrowExceptionOnFailingStatusCode(false);
 }
예제 #30
0
 /** Setup before testing. */
 @Before
 public void setUp() {
   webUrl = System.getProperty("integration.url");
   webClient = new WebClient();
   webClient.getOptions().setThrowExceptionOnFailingStatusCode(true);
 }