public String getParsedPage() { List<String> alertHandler = new LinkedList<String>(); ; WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); // CHROME); webClient.setAjaxController(new MyNicelyResynchronizingAjaxController()); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setTimeout(3500); webClient.getOptions().setThrowExceptionOnScriptError(true); webClient.getOptions().setCssEnabled(true); webClient.getOptions().isRedirectEnabled(); webClient.setAlertHandler( new CollectingAlertHandler(alertHandler)); // 将JavaScript中alert标签产生的数据保存在一个链表中 // webClient.getOptions().setThrowExceptionOnScriptError(false); HtmlPage page = null; JavaScriptEngine engine = new JavaScriptEngine(webClient); webClient.setJavaScriptEngine(engine); try { page = webClient.getPage(data); } catch (FailingHttpStatusCodeException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } if (page != null) { return page.asXml(); } return null; }
/** * get the ajax url from the click button * * @param clickOfXpath:页面待点击按钮的xpath表达式 * @param index * @return List<String>:链表的第一个信息是页面的title,以后的信息是所有的ajax的url */ public static List<String> getAjaxUrl(String targetUrl, String clickOfXpath, int index) throws FailingHttpStatusCodeException, MalformedURLException, IOException { // TARGET_URL = // "http://app.flyme.cn/apps/public/detail?package_name=com.myzaker.zaker_phone_smartbar"; List<String> urls = new LinkedList<String>(); // 每次ajax请求时都会创建一个AjaxController对象,在该对象中可以查看ajax请求的地址 MyNicelyResynchronizingAjaxController ajaxController = new MyNicelyResynchronizingAjaxController(); List alertHandler = new LinkedList(); // 模拟一个浏览器 WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); // HtmlUnitDriver // 设置webClient的相关参数 webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(false); webClient.setAjaxController(ajaxController); webClient.getOptions().setTimeout(35000); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.setAlertHandler( new CollectingAlertHandler(alertHandler)); // 将JavaScript中alert标签产生的数据保存在一个链表中 // 模拟浏览器打开一个目标网址 HtmlPage rootPage = webClient.getPage(targetUrl); urls.add(rootPage.getTitleText()); urls.add(ajaxController.getVisitUrl()); // System.out.println("url1:" + url); HtmlElement elementA = (HtmlElement) rootPage.getByXPath(clickOfXpath).get(index); Page page = elementA.click(); urls.add(ajaxController.getVisitUrl()); return urls; }
/** * @return * @author douzh * @time 2015-4-23下午4:36:05 */ public static WebClient getClient() { WebClient client = new WebClient(BrowserVersion.FIREFOX_24); client.getOptions().setJavaScriptEnabled(true); client.getOptions().setActiveXNative(false); client.getOptions().setCssEnabled(false); client.getOptions().setRedirectEnabled(true); client.getOptions().setThrowExceptionOnScriptError(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setGeolocationEnabled(true); // client.addWebWindowListener(new WebWindowListener() { // public void webWindowOpened(WebWindowEvent event) { // System.out.println("Web Window Openning"); // } // // public void webWindowContentChanged(WebWindowEvent event) { // System.out.println("Web Content Changed"); // } // // public void webWindowClosed(WebWindowEvent event) { // System.out.println("Web Window Closed"); // } // }); client.setAjaxController( new NicelyResynchronizingAjaxController() { public boolean processSynchron(HtmlPage page, WebRequest settings, boolean async) { System.out.println(settings.getUrl()); return super.processSynchron(page, settings, async); } }); return client; }
public static void main(String[] args) throws Exception { // WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24, "54.186.230.121", 3128); WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.setJavaScriptTimeout(10000); webClient.getOptions().setJavaScriptEnabled(true); webClient.setAjaxController(new NicelyResynchronizingAjaxController()); webClient.getOptions().setTimeout(10000); // webClient.getOptions().setJavaScriptEnabled(false); // webClient.getOptions().setAppletEnabled(false); // webClient.getOptions().setCssEnabled(false); // webClient.getOptions().setThrowExceptionOnScriptError(false); // webClient.setJavaScriptTimeout(10000); // webClient.getOptions().setJavaScriptEnabled(true); // webClient.setAjaxController(new NicelyResynchronizingAjaxController()); // webClient.getOptions().setTimeout(10000); // webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); // webClient.getOptions().setThrowExceptionOnScriptError(false); HtmlPage currentPage = webClient.getPage("http://www.yandex.ru/"); // HtmlPage currentPage = webClient.getPage("http://www.google.ru"); // HtmlDivision div = currentPage.getHtmlElementById("del_competitors-1_42"); // HtmlElement clickable = (HtmlElement) // currentPage.getHtmlElementById("del_competitors-1_42"); // currentPage = (HtmlPage) clickable.click(); HtmlAnchor advancedSearchAn = currentPage.getAnchorByText("Завести ящик"); currentPage = advancedSearchAn.click(); HtmlImage image = currentPage.<HtmlImage>getFirstByXPath("//img[@src='images/ash2008.jpg']"); currentPage = (HtmlPage) image.click(); System.out.println(currentPage.asXml()); // HtmlImage image = // currentPage.<HtmlImage>getFirstByXPath("//img[@src='images/ash2008.jpg']"); // currentPage = (HtmlPage) image.click(); // HtmlImage imagetosave = // currentPage.<HtmlImage>getFirstByXPath("//img[@src='//yastatic.net/www/1.977/yaru/i/logo.png']"); // HtmlImage image = currentPage.<HtmlImage>getHtmlElementById("add_competitors-1_3"); // currentPage = (HtmlPage) image.click(); // File imageFile = new File("test_new.jpg"); // image.saveAs(imageFile); // System.out.println(currentPage.asXml()); System.out.println("It is done."); webClient.closeAllWindows(); }
public static void homePage(String url) { String str; // 创建一个webclient WebClient webClient = new WebClient(); // webClient.getWebConsole().setLogger(null); // //htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setCssEnabled(false); // webClient.getOptions().setUseInsecureSSL(true); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); // webClient.getCookieManager().setCookiesEnabled(true); webClient.setAjaxController(new NicelyResynchronizingAjaxController()); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.waitForBackgroundJavaScript(1000); webClient.waitForBackgroundJavaScriptStartingBefore(1000); // 获取页面 HtmlPage page = null; int times = 10; try { page = webClient.getPage(url); for (int i = 0; i < times; i++) { synchronized (page) { page.wait(2000); } if (page.getByXPath("//object") != null) { System.out.println("***************************yes**********************"); for (int j = 0; j < page.getByXPath("//object").size(); j++) { System.out.println(page.getByXPath("//object").get(j)); } break; } System.out.println("***************************no***************************"); } } catch (Exception e) { e.printStackTrace(); } // 获取页面的TITLE // str = page.getTitleText(); // System.out.println(str); // //获取页面的XML代码 // System.out.println("***************************start***************************"); // System.out.println(page.asXml()); // System.out.println("***************************end***************************"); // //获取页面的文本 // str = page.asText(); // System.out.println(str); }
/** * Static method for <code>Login</code>. Uses dependencies <b>HttpUnit</b> in connecting to Keats. * * @see com.gargoylesoftware.htmlunit * @param parent The parent window. instanceof<code>Scrape</code>, to set relative locations to * @param link The url of which to retrieve information from. * @param username The username to log in KEATS with. * @param password The password to log in KEATS with. * @return results.asText() Returns the content of the url if login successful. Returns null * otherwise. */ public static String login(Scrape parent, String link, String username, String password) { try { WebClient client = new WebClient(); // Settings client.getOptions().setThrowExceptionOnScriptError(false); client.getOptions().setThrowExceptionOnScriptError(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setJavaScriptEnabled(false); client.getOptions().setCssEnabled(false); client.getOptions().setRedirectEnabled(true); client.getOptions().setUseInsecureSSL(true); client.getCookieManager().setCookiesEnabled(true); HtmlPage page = client.getPage("https://login-keats.kcl.ac.uk/"); HtmlForm form = page.getFirstByXPath("//form[@action='https://keats.kcl.ac.uk/login/index.php']"); HtmlInput usernameInput = form.getInputByName("username"); usernameInput.setValueAttribute(username); HtmlInput passwordInput = form.getInputByName("password"); passwordInput.setValueAttribute(password); page = form.getInputByValue("Log in").click(); HtmlPage results = client.getPage(link); client.closeAllWindows(); return results.asText(); } catch (MalformedURLException e) { JOptionPane.showMessageDialog( parent, "The URL you have provided is not recognised. Please double check your " + "input and try again.", "MalformedURLException found.", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } catch (IOException e) { JOptionPane.showMessageDialog( parent, "An error has occurred when attempting to read information from the " + "server. The input could be interrupted by external processes.", "IOException found.", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } return null; }
/** * 获取速度最快的浏览器 * * @return */ public WebClient getFastWebClient() { // TODO 这里可以启动webclient的ajax功能,读取技术指标的变化 webClient.getOptions().setCssEnabled(false); // if you don't need css webClient.getOptions().setJavaScriptEnabled(false); // if you don't need js webClient.getOptions().setActiveXNative(false); webClient.getOptions().setAppletEnabled(false); webClient.getOptions().setGeolocationEnabled(false); // webClient.getOptions().setDoNotTrackEnabled(true); //不追踪隐私 webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); // 页面有错,不抛出异常 webClient.getOptions().setPrintContentOnFailingStatusCode(false); // 页面有错,不打印 return webClient; }
@Before public void setUp() { webUrl = System.getProperty("integration.url"); webClient = new WebClient(); webClient.getOptions().setJavaScriptEnabled(true); webClient.setJavaScriptTimeout(60000); }
@Before public void setUp() { ProxyConfig proxyConfig = new ProxyConfig("localhost", proxyPort); proxyConfig.addHostsToProxyBypass("127.0.0.1"); webClient.getOptions().setProxyConfig(proxyConfig); webClient.getOptions().setTimeout(2000); }
@Test @Ignore public void test3() throws Exception { System.out.println("-------------------------------"); WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(false); HtmlPage page = webClient.getPage("http://www.zjnu.edu.cn/news/common/article_show.aspx?article_id=19285"); System.out.println("---------------标题----------------"); HtmlSpan span1 = (HtmlSpan) page.getElementById("mytitle"); System.out.println(span1.asText()); System.out.println("-------------------------------"); System.out.println("---------------正文----------------"); HtmlSpan span2 = (HtmlSpan) page.getElementById("mycontent"); System.out.println(span2.asText()); System.out.println("-------------------------------"); System.out.println("---------------图片----------------"); DomNodeList<HtmlElement> elements = span2.getElementsByTagName("img"); for (HtmlElement element : elements) { System.out.println(element.getAttribute("src")); } // log.debug("{}", elements); System.out.println("-------------------------------"); webClient.close(); System.out.println("-------------------------------"); }
public static HtmlPage getPage(String url) throws FailingHttpStatusCodeException, MalformedURLException, IOException { WebClient wc = new WebClient(BrowserVersion.CHROME); wc.getOptions().setCssEnabled(false); HtmlPage page = wc.getPage(url); return page; }
public static ArrayList<String> trans(String url) throws FailingHttpStatusCodeException, MalformedURLException, IOException { ArrayList<String> hrefList = new ArrayList<String>(); WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); try { HtmlPage page = null; try { page = (HtmlPage) webClient.getPage(url); } catch (ConnectException e) { System.out.println("Connect fails here:" + e.getMessage()); } InputStream temp = new ByteArrayInputStream(page.asText().getBytes()); InputStreamReader isr = new InputStreamReader(temp); BufferedReader br = new BufferedReader(isr); String str = null, rs = null; while ((str = br.readLine()) != null) { rs = str; // System.out.println(rs); if (rs != null) hrefList.add(rs); } System.out.println("从该网址查找的可能相关文本如下:"); for (int i = 0; i < hrefList.size(); i++) { String string = hrefList.get(i); string = getTextFromHtml(string); if (string.length() >= 30) System.out.println("------" + i + ":" + string); } } catch (IOException e) { } return hrefList; }
private HtmlPage getPage(String url) throws IOException { final WebClient webClient = new WebClient( getBrowserVersionFromName( searchEngine.getDefaultBrowser())); // BrowserVersion.FIREFOX_24); webClient.getOptions().setJavaScriptEnabled(false); return webClient.getPage(url); }
@Test @Ignore public void test01() throws Exception { System.out.println("-------------------------------"); WebClient webClient = new WebClient(BrowserVersion.CHROME); try { WebClientOptions options = webClient.getOptions(); options.setThrowExceptionOnFailingStatusCode(false); options.setThrowExceptionOnScriptError(false); options.setCssEnabled(false); options.setJavaScriptEnabled(true); options.setTimeout(50000); // webClient.setAjaxController(new NicelyResynchronizingAjaxController()); HtmlPage pageOrgin = webClient.getPage("http://lvyou.baidu.com/jinhua/jingdian"); Thread.sleep(5000); DomNodeList<DomNode> pageNodes = pageOrgin.querySelectorAll(".pagination"); HtmlDivision pageDiv = (HtmlDivision) pageNodes.get(0); DomNodeList<HtmlElement> liElements = pageDiv.getElementsByTagName("li"); Integer pageSize = liElements.size() - 1; for (int pageNow = 0; pageNow < pageSize; pageNow++) { DomNodeList<HtmlElement> pageAnchors = liElements.get(pageNow).getElementsByTagName("a"); HtmlAnchor pageAnchor = (HtmlAnchor) pageAnchors.get(0); HtmlPage page = pageAnchor.click(); Thread.sleep(10000); DomElement jViewDom = page.getElementById("J-view-list-container"); DomNodeList<HtmlElement> lis = jViewDom.getElementsByTagName("li"); for (HtmlElement li : lis) { DomNodeList<DomNode> titleNodes = li.querySelectorAll(".title"); HtmlAnchor titleAnchor = (HtmlAnchor) titleNodes.get(0); System.out.println("---------------标题----------------"); log.debug("{}", titleAnchor.asText()); DomNodeList<DomNode> picNodes = li.querySelectorAll(".pic"); HtmlAnchor picAnchor = (HtmlAnchor) picNodes.get(0); System.out.println("---------------详情URL----------------"); String detailUrl = "http://lvyou.baidu.com" + picAnchor.getAttribute("href"); log.debug("{}", detailUrl); DomNodeList<HtmlElement> imgEelements = picAnchor.getElementsByTagName("img"); for (HtmlElement imgEelement : imgEelements) { System.out.println("---------------图片----------------"); log.debug("{}", imgEelement.getAttribute("src")); } DomNodeList<DomNode> sumNodes = li.querySelectorAll(".view-userSays"); HtmlDivision sumDiv = (HtmlDivision) sumNodes.get(0); DomNodeList<HtmlElement> sumElements = sumDiv.getElementsByTagName("p"); HtmlParagraph sumPara = (HtmlParagraph) sumElements.get(0); System.out.println("---------------摘要----------------"); log.debug("{}", sumPara.asText()); } } } finally { webClient.close(); } System.out.println("-------------------------------"); }
@JsfTest(JsfVersion.JSF_2_2_5) @Test public void testSetForEach() throws Exception { webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); HtmlPage page = webClient.getPage(webUrl + "faces/setForEach.xhtml"); assertEquals(200, page.getWebResponse().getStatusCode()); assertTrue(page.asXml().indexOf("foo") == -1); assertTrue(page.asXml().indexOf("bar") != -1); assertTrue(page.asXml().indexOf("baz") != -1); }
public static void main(String[] args) throws Exception { LogFactory.getFactory() .setAttribute("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.NoOpLog"); java.util.logging.Logger.getLogger("com.gargoylesoftware.htmlunit").setLevel(Level.OFF); java.util.logging.Logger.getLogger("org.apache.commons.httpclient").setLevel(Level.OFF); WebClient client = new WebClient(); client.getOptions().setCssEnabled(false); client.getOptions().setJavaScriptEnabled(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setThrowExceptionOnScriptError(false); // HtmlPage page = client.getPage(url); // String source = page.getWebResponse().getContentAsString(); // p(source); Request r = new Request(); r.getRequest(client); while (true) ; }
@Test @Ignore public void test() throws Exception { System.out.println("-------------------------------"); WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(false); HtmlPage page = webClient.getPage("http://news.163.com/domestic/"); // DomNodeList<HtmlElement> elements = page.getElementBy System.out.println("---------------标题----------------"); DomNodeList<DomNode> domNodes = page.querySelectorAll(".item-top"); // log.debug("{}", domNodes); for (DomNode domNode : domNodes) { HtmlDivision htmlDivision = (HtmlDivision) domNode; DomNodeList<HtmlElement> aElements = htmlDivision.getElementsByTagName("a"); HtmlAnchor htmlAnchor = (HtmlAnchor) aElements.get(0); // HTMLHeadingElement htmlHeading2 = (HTMLHeadingElement) // htmlDivision.getElementsByTagName("h2"); // HtmlAnchor htmlAnchor = (HtmlAnchor) htmlDivision.getElementsByTagName("a"); log.debug("{}", htmlAnchor.asText()); log.debug("{}", htmlAnchor.getAttribute("href")); DomNodeList<HtmlElement> pElements = htmlDivision.getElementsByTagName("p"); HtmlParagraph htmlParagraph = (HtmlParagraph) pElements.get(0); log.debug("{}", htmlParagraph.asText()); DomNodeList<HtmlElement> iEelements = htmlDivision.getElementsByTagName("img"); for (HtmlElement iEelement : iEelements) { log.debug("{}", iEelement.getAttribute("src")); } String detailUrl = htmlAnchor.getAttribute("href"); if (detailUrl.equals("http://news.163.com/15/1215/17/BAT2L8RB00014JB6.html#f=dlist")) { HtmlPage detailPage = webClient.getPage(detailUrl); System.out.println("---------------正文----------------"); DomElement endTextElement = detailPage.getElementById("endText"); log.debug("{}", endTextElement.asText()); System.out.println("---------------图片----------------"); DomNodeList<DomNode> imgNodes = endTextElement.querySelectorAll(".f_center"); for (DomNode imgNode : imgNodes) { HtmlParagraph imgpara = (HtmlParagraph) imgNode; DomNodeList<HtmlElement> endImgs = imgpara.getElementsByTagName("img"); for (HtmlElement endImg : endImgs) { log.debug("{}", endImg.getAttribute("src")); } } } } webClient.close(); System.out.println("-------------------------------"); }
@Override public void run() { String str; // 创建一个webclient WebClient webClient = new WebClient(); // htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); Cookie cookie = new Cookie( "www.qixin.com", "login_returnurl", "http%3A//www.qixin.com/search/prov/SH%3Fpage%3D2"); Cookie cookie1 = new Cookie( "www.qixin.com", "userKey", "QXBAdmin-Web2.0_5tUrhr/6EVtLT+GVfE+vU8k330y+oPICCM6jhUGEoLc%3D"); Cookie cookie2 = new Cookie("www.qixin.com", "userValue", "4a68111b-0cfa-457f-91bd-b6fda97fa524"); Cookie cookie3 = new Cookie( "www.qixin.com", "gr_session_id_955c17a7426f3e98", "d25fe84e-fb1d-4ef8-8b4e-b530e5004b30"); Cookie cookie4 = new Cookie("www.qixin.com", "_alicdn_sec", "5732cf53d99e48a838049be355d47a44000895ae"); CookieManager cookieManager = new CookieManager(); cookieManager.addCookie(cookie); cookieManager.addCookie(cookie2); cookieManager.addCookie(cookie3); cookieManager.addCookie(cookie1); cookieManager.addCookie(cookie4); webClient.setCookieManager(cookieManager); // 获取页面 HtmlPage page = null; try { page = webClient.getPage("http://www.qixin.com/search/prov/SH?page=20"); } catch (IOException e) { e.printStackTrace(); } // 获取页面的XML代码 List<HtmlAnchor> hbList = (List<HtmlAnchor>) page.getByXPath("//a"); Iterator iterator = hbList.iterator(); while (iterator.hasNext()) { HtmlAnchor ha = (HtmlAnchor) iterator.next(); if ("search-result-title".equals(ha.getAttribute("class"))) { System.out.println(ha.asText()); System.out.println("http://www.qixin.com" + ha.getAttribute("href")); } } // 关闭webclient webClient.close(); }
public static WebClient buildWebClient() { WebClient webClient = new WebClient(BrowserVersion.FIREFOX_10); webClient.setAjaxController(new NicelyResynchronizingAjaxController()); webClient.getOptions().setCssEnabled(true); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setUseInsecureSSL(true); webClient.setCssErrorHandler( new ErrorHandler() { @Override public void warning(CSSParseException exception) throws CSSException { // nothing to do here } @Override public void error(CSSParseException exception) throws CSSException { // todo: log or throw exception } @Override public void fatalError(CSSParseException exception) throws CSSException { // todo: log or throw exception } }); webClient.setIncorrectnessListener( new IncorrectnessListener() { @Override public void notify(String message, Object origin) { // todo: analyze and throw exception } }); webClient.waitForBackgroundJavaScript(100000); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setRedirectEnabled(true); return webClient; }
@Before public void setUp() throws IOException { if (server == null) { server = new Thread( new Runnable() { public void run() { try { Main.main(args); } catch (IOException e) { throw new RuntimeException(e); } } }); server.start(); } webClient.getOptions().setTimeout(30000); }
private void ProcessTest(WebTest test) { HtmlPage page = null; // Orville.LOG().info(String.format("Executing test %s\n", event)); WebClient webClient = new WebClient(); webClient.setRefreshHandler(new ThreadedRefreshHandler()); webClient.getOptions().setJavaScriptEnabled(false); try { if (test.getMethod().toLowerCase().equals("post")) { WebRequest request = new WebRequest(UrlUtils.toUrlUnsafe(test.getUrlString()), HttpMethod.POST); request.setRequestParameters(new ArrayList<NameValuePair>()); for (Map.Entry<String, String> entry : test.getPostData().entrySet()) { request.getRequestParameters().add(new NameValuePair(entry.getKey(), entry.getValue())); } page = webClient.getPage(request); } else { page = webClient.getPage(test.getUrlString()); } ProcessResult(test, page.getWebResponse()); } catch (FailingHttpStatusCodeException fsc) { Orville.LOG() .warning( String.format( "Failing HTTP Status code caught executing the test command: %s\n", fsc.getMessage())); } catch (IOException ioe) { Orville.LOG() .warning( String.format( "IO Exception caught executing the test command: %s\n", ioe.getMessage())); } webClient.closeAllWindows(); }
@JsfTest(JsfVersion.JSF_2_2_5) @Test public void testDropFlashCookie() throws Exception { HtmlPage page = webClient.getPage(webUrl + "faces/flashDropCookie.xhtml"); webClient.getOptions().setRedirectEnabled(true); HtmlTextInput textInput = (HtmlTextInput) page.getHtmlElementById("input"); textInput.setValueAttribute("test"); HtmlSubmitInput button = (HtmlSubmitInput) page.getHtmlElementById("submit"); int currentSize = webClient.getCookieManager().getCookies().size(); page = button.click(); HtmlElement element = page.getHtmlElementById("link"); page = element.click(); int newSize = webClient.getCookieManager().getCookies().size(); assertTrue(newSize < currentSize); }
/** * Set up method run before each test to create a new Web client and load the application's * landing page. */ @Before @RunAsClient public void setUp() { try { browser = new WebClient(); browser.getOptions().setThrowExceptionOnScriptError(false); landingPageResponse = browser.getPage(deploymentUrl.toString() + "index.xhtml"); Assert.assertEquals( "Could not load the application landing page.", "Cargo Tracker", landingPageResponse.getTitleText()); } catch (IOException ex) { Assert.fail( "An IOException was thrown during the test setup for class \"" + ViewDetailTest.class.getSimpleName() + "\" at method \"" + testName.getMethodName() + "\" with message: " + ex.getMessage()); } }
/** * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN * RISK.</span><br> * * <p>Submits this form to the server. If <tt>submitElement</tt> is <tt>null</tt>, then the * submission is treated as if it was triggered by JavaScript, and the <tt>onsubmit</tt> handler * will not be executed. * * <p><b>IMPORTANT:</b> Using this method directly is not the preferred way of submitting forms. * Most consumers should emulate the user's actions instead, probably by using something like * {@link HtmlElement#click()} or {@link HtmlElement#dblClick()}. * * @param submitElement the element that caused the submit to occur * @return a new page that reflects the results of this submission */ Page submit(final SubmittableElement submitElement) { final HtmlPage htmlPage = (HtmlPage) getPage(); final WebClient webClient = htmlPage.getWebClient(); if (webClient.getOptions().isJavaScriptEnabled()) { if (submitElement != null) { isPreventDefault_ = false; final ScriptResult scriptResult = fireEvent(Event.TYPE_SUBMIT); if (isPreventDefault_) { // null means 'nothing executed' if (scriptResult == null) { return htmlPage; } return scriptResult.getNewPage(); } } final String action = getActionAttribute().trim(); if (StringUtils.startsWithIgnoreCase(action, JavaScriptURLConnection.JAVASCRIPT_PREFIX)) { return htmlPage .executeJavaScriptIfPossible(action, "Form action", getStartLineNumber()) .getNewPage(); } } else { if (StringUtils.startsWithIgnoreCase( getActionAttribute(), JavaScriptURLConnection.JAVASCRIPT_PREFIX)) { // The action is JavaScript but JavaScript isn't enabled. // Return the current page. return htmlPage; } } final WebRequest request = getWebRequest(submitElement); final String target = htmlPage.getResolvedTarget(getTargetAttribute()); final WebWindow webWindow = htmlPage.getEnclosingWindow(); webClient.download(webWindow, target, request, false, "JS form.submit()"); return htmlPage; }
public static void savePage(String path) { // path = transformPath(path); if (!Main.list.contains(path) && path.contains("https://www.darts-ip.com")) { System.out.println(""); System.out.println("================================================="); System.out.println(""); System.out.println(path); System.out.println(""); Main.list.add(path); String content = ""; String fileName = ""; HtmlPage page = null; // ----------HTML-UNIT---------- try { WebClient webClient = new WebClient(); webClient.getOptions().setThrowExceptionOnScriptError(false); page = webClient.getPage(path); WebResponse response = page.getWebResponse(); content = response.getContentAsString(); fileName = path + ".html"; } catch (Exception e) { System.out.println(""); System.out.println("ERROR in Page.savePage #HTML-UNIT"); System.out.println(e); System.out.println(""); } // ----------SAVE-HTML-FILE---------- try { PrintWriter writer = new PrintWriter(fileName, "UTF-8"); writer.print(content); writer.close(); } catch (Exception e) { System.out.println(""); System.out.println("ERROR in ERROR in Page.savePage #SAVE-HTML-FILE"); System.out.println(e); System.out.println(""); } // ----------ZIP-HTML-FILE---------- try { FileInputStream fi = new FileInputStream(fileName); BufferedInputStream buffi = new BufferedInputStream(fi, Main.BUFFER); ZipEntry entry = new ZipEntry(fileName); Main.out.putNextEntry(entry); int count; while ((count = buffi.read(Main.data, 0, Main.BUFFER)) != -1) { Main.out.write(Main.data, 0, count); } Main.out.closeEntry(); buffi.close(); } catch (Exception e) { System.out.println(""); System.out.println("ERROR in Page.savePage #ZIP-HTML-FILE"); System.out.println(e); System.out.println(""); } // ----------DELETE-TMP-FILE---------- try { new File(fileName).delete(); } catch (Exception e) { System.out.println(""); System.out.println("ERROR in Page.savePage #DELETE-TMP-FILE"); System.out.println(e); System.out.println(""); } System.out.println(""); System.out.println("================================================="); // ----------EXPLORE-LINKS---------- exploreLinks(page); } }
/** * Processes requests for both HTTP <code>GET</code> and <code>POST</code> methods. * * @param request servlet request * @param response servlet response * @throws ServletException if a servlet-specific error occurs * @throws IOException if an I/O error occurs */ protected void processRequest(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=UTF-8"); PrintWriter out = response.getWriter(); int no = 0; try { no = Integer.parseInt(request.getParameter("no")); } catch (NumberFormatException exception) { no = 0; } try { /* TODO output your page here. You may use following sample code. */ out.println("<!DOCTYPE html>"); out.println("<html>"); out.println("<head>"); out.println("<title>Servlet NewServlet</title>"); out.println("</head>"); out.println("<body>"); final WebClient webClient = new WebClient(); webClient.getCookieManager().clearCookies(); webClient.getOptions().setUseInsecureSSL(true); webClient.setJavaScriptEnabled(false); final HtmlPage page = webClient.getPage("https://filestream.me/"); // out.println(page.getTitleText()); System.out.println("no of forms : " + page.getForms().size()); HtmlForm form = page.getForms().get(1); final HtmlTextInput textField = form.getInputByName("login"); textField.setAttribute("value", "*****@*****.**"); final HtmlPasswordInput passField = form.getInputByName("password"); passField.setAttribute("value", "ramkrishnan18"); HtmlSubmitInput htmlSubmitInput = form.getInputByValue("login"); HtmlPage page1 = htmlSubmitInput.click(); HtmlTable htmlTable = page1.getHtmlElementById("fileCatTable"); List<HtmlTableRow> listOfHtmlTableRow = htmlTable.getBodies().get(0).getRows(); List<String> allLinks = new ArrayList<String>(); for (HtmlTableRow htmlTableRow : listOfHtmlTableRow) { HtmlTableCell cell = htmlTableRow.getCells().get(htmlTableRow.getCells().size() - 1); for (DomElement domElement : cell.getChildElements()) { // out.println(domElement.getTagName()); if (domElement.getTagName().equals("div")) { boolean flag = true; for (DomElement celldomElement : domElement.getChildElements()) { if (flag) { // out.println(celldomElement.getTagName()); String title = celldomElement.getAttribute("title"); if ("Download".equals(title) || "Downloads".equals(title)) { String link = celldomElement.getAttribute("onclick"); String http = link.substring( link.indexOf('\'') + 1, link.indexOf('\'', link.indexOf('\'') + 1)); // out.println(celldomElement.getAttribute("onclick") + "<br/>"); // out.println(http + "<br/>"); allLinks.add(http); flag = false; } } } } } } // InputStream is =anchorAttachment.click().getWebResponse().getContentAsStream(); out.println("************ Start *************"); // HtmlAnchor anchorElement=HTMLAnchorElement.; // anchorElement.set // anchorElement.setHref(allLinks.get(0)); // anchorElement.cl final String u = allLinks.get(no); Thread t = new Thread( new Runnable() { @Override public void run() { try { URL url = new URL(u); System.out.println("Link : " + u); HttpURLConnection con = (HttpURLConnection) url.openConnection(); // System.out.println("Response Code : " + con.getInputStream().available()); con.setRequestProperty("Accept-Encoding", "gzip,deflate"); con.setRequestProperty( "User-Agent", "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36"); Map<String, List<String>> map = con.getHeaderFields(); for (String object : map.keySet()) { System.out.println(object + " --> " + map.get(object)); } System.out.println("Response Code : " + con.getResponseCode()); System.out.println("length :" + con.getContentLength()); System.out.println("Message " + con.getResponseMessage()); InputStream inputStream = con.getInputStream(); String path = System.getenv("OPENSHIFT_JBOSSAS_DIR"); FileOutputStream fileOutputStream = new FileOutputStream(path + "test.zip"); byte[] buffer = new byte[1024]; System.out.println(inputStream.available()); int bytesRead = 10; while (true) { // System.out.print("--"); bytesRead = inputStream.read(buffer); // System.out.println(bytesRead); if (bytesRead == -1) { break; } fileOutputStream.write(buffer, 0, bytesRead); // System.out.print(">"); } fileOutputStream.close(); System.out.println("File Completed"); } catch (IOException ex) { Logger.getLogger(MakeData.class.getName()).log(Level.SEVERE, null, ex); } } }); t.start(); // System.out.println(allLinks.get(0)); // System.out.println(webClient.getCookieManager().getCookies(new // URL(allLinks.get(0))).size()); // // System.out.println(httpPage.asText()); // HtmlPage httpPage = webClient.getPage(allLinks.get(0)); // // // final String pageAsXml = page1.asXml(); // // InputStream inputStream=httpPage.getWebResponse().getContentAsStream(); // // inputStream.available(); // System.out.println("Code : "+httpPage.getWebResponse().getStatusMessage()); // System.out.println("Code : "+httpPage.getWebResponse().getContentType()); // System.out.println("netCode : // "+httpPage.getWebResponse().getContentCharset()); // // out.println("<br/><br/>Code : "+httpPage.getWebResponse().getStatusCode()); // out.println("<br/><br/>Code : "+httpPage.getWebResponse().getContentType()); // out.println("<br/><br/>Code : "+httpPage.getWebResponse().getContentCharset()); // final String pageAsText = page1.asText(); // out.println(pageAsXml); webClient.closeAllWindows(); out.println("<h1>Servlet NewServlet at " + request.getContextPath() + "</h1>"); out.println("</body>"); out.println("</html>"); } finally { out.close(); } }
public static void main(String[] args) throws IOException { WebClient client = new WebClient(BrowserVersion.CHROME); CookieManager cookie = new CookieManager(); client.setCookieManager(cookie); client.getOptions().setJavaScriptEnabled(true); client.getOptions().setActiveXNative(false); client.getOptions().setCssEnabled(false); client.getOptions().setThrowExceptionOnScriptError(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setDoNotTrackEnabled(true); client.getOptions().setPrintContentOnFailingStatusCode(false); client.setAjaxController(new NicelyResynchronizingAjaxController()); client.setJavaScriptTimeout(Long.MAX_VALUE); List<Integer> TopCategory = new LinkedList<Integer>(TopCategoryUrl.keySet()); Collections.sort(TopCategory); long today = new Date().getTime(); long ymd = today - 1000 * 60 * 60 * 24; String date = sdf.format(ymd); FileWriter fw = new FileWriter("ShuTaobaoTop/search-" + date, false); BufferedWriter bw = new BufferedWriter(fw); while (TopCategory.size() > 0) { int cid = TopCategory.get(0); // if(cid<50002766) // { // TopCategory.remove(0); // continue; // } String cname = TopCategoryUrl.get(cid); String pid = "0", pname = ""; String topUrl = "http://shu.taobao.com/top/" + cid + "/search"; boolean success = false; ArrayList<String> result = new ArrayList<String>(); try { // client.getOptions().setJavaScriptEnabled(true); do { System.out.println("剩余类目数=" + TopCategory.size()); // ProxyUnit.configProxy(client, ProxyUnit.CHECKURL); } while (false == AccountLogin.loginTaoBao(client, AccountLogin.TAOBAOLOGINURL, true)); // client.getOptions().setJavaScriptEnabled(false); HtmlPage queryTrade = client.getPage(topUrl); Thread.sleep(1000L); System.out.println(queryTrade.getTitleText()); /* * 解析 */ Document doc = Jsoup.parse(queryTrade.asXml()); Element time = doc.getElementsByAttributeValue("class", "time").get(0); String startTime = time.text().substring(6, 16).replace("-", ""); // 统计时间: 2014-05-17 - 2014-05-23 String endTime = time.text().substring(19).replace("-", ""); Elements elements = doc.getElementsByAttributeValueStarting( "class", "mod "); // <div class="mod odd mod-10 sm-ua"> <h3 class="title"> for (Element element : elements) { String title = element.getElementsByTag("h3").get(0).text(); System.out.println(title); Elements lis = element.getElementsByTag("ol").get(0).getElementsByTag("li"); // <li class="up "> for (Element li : lis) { Elements spans = li.getElementsByTag("span"); String rank = spans.get(0).text(); String key = spans.get(1).text(); String rise = spans.get(2).text(); rise = rise.substring(0, rise.length() - 1); URL href = new URL( URLDecoder.decode( "http://shu.taobao.com" + spans.get(1).getElementsByTag("a").get(0).attr("href"), "utf-8")); String hrefQuery = href.getQuery(); if (hrefQuery.contains("cid=")) { if ("0".equals(pid)) { pid = String.valueOf(cid); pname = cname; } cid = Integer.parseInt(hrefQuery.split("cid=")[1].split("&")[0]); cname = title; } System.out.println( startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001" + pname + "\001" + rank + "\001" + key + "\001" + rise); result.add( startTime + "\001" + endTime + "\001" + cid + "\001" + cname + "\001" + pid + "\001" + pname + "\001" + rank + "\001" + key + "\001" + rise); } } success = true; } catch (Exception e) { // TODO Auto-generated catch block // e.printStackTrace(); } if (success) { TopCategory.remove(0); System.out.println("剩余类目数=" + TopCategory.size()); for (String r : result) { bw.write(r + "\n"); } } } bw.close(); fw.close(); }
@Test public void testNullInCompositionTemplate() throws Exception { webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); HtmlPage page = webClient.getPage(webUrl + "faces/compositionWithNullTemplate.xhtml"); assertTrue(page.asText().contains("Invalid path")); }
public InternetConnection() { client.getOptions().setJavaScriptEnabled(false); // Turns off annoying exceptions caused by broken javascript links, // which still cause problems despite javascript being turned off. client.getOptions().setThrowExceptionOnFailingStatusCode(false); }
/** Setup before testing. */ @Before public void setUp() { webUrl = System.getProperty("integration.url"); webClient = new WebClient(); webClient.getOptions().setThrowExceptionOnFailingStatusCode(true); }