/** * Run this method to start the crawl. * * @throws IOException when the output folder cannot be created or emptied. */ public static void main(String[] args) throws IOException { CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL); builder.crawlRules().insertRandomDataInInputForms(false); // We are going to use the WebScarab proxy plugin WebScarabProxyPlugin proxyPlugin = new WebScarabProxyPlugin(); // Provide the JS file to be inserted proxyPlugin.addPlugin(new JSInjectorProxyAddon(new File("foo.js"))); builder.addPlugin(proxyPlugin); // Configure the proxy to use the port 8084 (you can change this of course) builder.setProxyConfig(ProxyConfiguration.manualProxyOn("127.0.0.1", 8084)); // click these elements builder.crawlRules().clickDefaultElements(); builder.crawlRules().click("div").withAttribute("class", "clickable"); builder.setMaximumStates(4); // Set timeouts builder.crawlRules().waitAfterReloadUrl(WAIT_TIME_AFTER_RELOAD, TimeUnit.MILLISECONDS); builder.crawlRules().waitAfterEvent(WAIT_TIME_AFTER_EVENT, TimeUnit.MILLISECONDS); builder.setBrowserConfig(new BrowserConfiguration(BrowserType.firefox, 1)); CrawljaxRunner crawljax = new CrawljaxRunner(builder.build()); crawljax.call(); }
/** Entry point */ public static void main(String[] args) { System.setProperty( "webdriver.firefox.bin", "C:\\Program Files (x86)\\Mozilla Firefox\\Firefox.exe"); CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL); BrowserConfiguration browserConfig = new BrowserConfiguration(BrowserType.firefox); builder.setBrowserConfig(browserConfig); // PopUpCancel configuration = new PopUpCancel(); // limit the crawling scope builder.setMaximumStates(MAX_STATES); builder.setMaximumDepth(MAX_CRAWL_DEPTH); builder.crawlRules().setInputSpec(getInputSpecification()); CrawljaxController crawljax = new CrawljaxController(builder.build()); crawljax.run(); }
private CrawljaxConfiguration getCrawljaxBuilder() { String url = ""; if (options.getUrl().toLowerCase().startsWith("http://")) { url = options.getUrl(); } else { server = new Server(8080); ResourceHandler handler = new ResourceHandler(); try { File fileToCrawl = new File(options.getUrl()); handler.setBaseResource( Resource.newResource(fileToCrawl.getParentFile().getAbsolutePath())); server.setHandler(handler); server.start(); int port = ((ServerConnector) server.getConnectors()[0]).getLocalPort(); url = "http://localhost:" + port + "/" + fileToCrawl.getName(); // URI.create(url); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(url); // builder.addPlugin(new CrawlOverview()); builder.addPlugin( new OnNewStatePlugin() { @Override public void onNewState(CrawlerContext arg0, StateVertex arg1) { if (cancelSupplier != null) { if (cancelSupplier.getAsBoolean()) { crawljax.stop(); } } Document document; try { document = arg1.getDocument(); documents.add(document); notifyObservers(document); } catch (IOException e) { e.printStackTrace(); } } }); builder.setBrowserConfig(new BrowserConfiguration(BrowserType.PHANTOMJS, 1)); builder.setOutputDirectory( new File(options.getOutputDirectory().getAbsolutePath() + "/crawljax")); builder.setMaximumDepth(options.getMaxDepth()); builder.setMaximumStates(options.getMaxStates()); CrawlRulesBuilder crawlRules = builder.crawlRules(); if (options.shouldClickDefaultElements()) { crawlRules.clickDefaultElements(); } if (options.getDontClickElements().size() > 0) { for (String dontClick : options.getDontClickElements()) { crawlRules.dontClick(dontClick); // TODO: .withAttribute("value", "I don't recognize"); // TODO: .underXPath("//*[@id='pageFooter']"); // .underXPath("//*[@id='content']/div/div[2]"); } } if (options.getClickElements().size() > 0) { for (String click : options.getClickElements()) { crawlRules.click(click); // TODO: .withAttribute("type", "submit"); } } if (options.getDontClickElementsChildrenOf().size() > 0) { for (String dontClick : options.getDontClickElementsChildrenOf()) { crawlRules.dontClickChildrenOf(dontClick); } } crawlRules.insertRandomDataInInputForms(options.shouldPutRandomDataInForms()); crawlRules.clickElementsInRandomOrder(options.shouldClickElementsInRandomOrder()); crawlRules.crawlFrames(options.shouldCrawlFrames()); crawlRules.waitAfterReloadUrl(options.getWaitTimeAferReload(), TimeUnit.MILLISECONDS); crawlRules.waitAfterEvent(options.getWaitTimeAfterEvent(), TimeUnit.MILLISECONDS); crawlRules.clickOnce(options.shouldClickOnce()); crawlRules.crawlHiddenAnchors(options.shouldCrawlHiddenAnchorsButton()); return builder.build(); }