private HtmlPage getPage(String url) throws IOException { final WebClient webClient = new WebClient( getBrowserVersionFromName( searchEngine.getDefaultBrowser())); // BrowserVersion.FIREFOX_24); webClient.getOptions().setJavaScriptEnabled(false); return webClient.getPage(url); }
private void search(SearchEngine se) { try { initializeJdbc(); final WebClient webClient = new WebClient( getBrowserVersionFromName(se.getDefaultBrowser())); // BrowserVersion.FIREFOX_24); // webClient.getOptions().setThrowExceptionOnScriptError(false); String htmlSource; HtmlPage searchResultPage = null; // String baseUrl = url; int numberOfUrlsExtracted; searchResultPage = clickSearchButtonMainPage(webClient, se); htmlSource = searchResultPage.getWebResponse().getContentAsString(); numberOfUrlsExtracted = ExtractAndInsertSeedUrls(searchResultPage, htmlSource, 1, 0, se.getName()); target.dispatchEvent(new EventSearchWorker(target, 1)); try { for (int i = 1; i < maxPage; i++) { HtmlAnchor ha = (HtmlAnchor) getNextButtonOrLink(searchResultPage, se.getNextButtonIdentifier()); searchResultPage = ha.click(); htmlSource = searchResultPage.getWebResponse().getContentAsString(); numberOfUrlsExtracted = ExtractAndInsertSeedUrls( searchResultPage, htmlSource, i + 1, numberOfUrlsExtracted, se.getName()); // nextUrl = getNextUrl(b, se, baseUrl); target.dispatchEvent(new EventSearchWorker(target, 1)); Random r = new Random(); int randomSleepTime = r.nextInt(2000) + 200; // minimum 200 ms max 2200 ms Thread.sleep(randomSleepTime); } // fw.close(); } catch (Exception ex) { String x = ex.getMessage(); } webClient.closeAllWindows(); } catch (Exception ex) { String x = ex.getMessage(); } }