/** Entry point */ public static void main(String[] args) { System.setProperty( "webdriver.firefox.bin", "C:\\Program Files (x86)\\Mozilla Firefox\\Firefox.exe"); CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL); BrowserConfiguration browserConfig = new BrowserConfiguration(BrowserType.firefox); builder.setBrowserConfig(browserConfig); // PopUpCancel configuration = new PopUpCancel(); // limit the crawling scope builder.setMaximumStates(MAX_STATES); builder.setMaximumDepth(MAX_CRAWL_DEPTH); builder.crawlRules().setInputSpec(getInputSpecification()); CrawljaxController crawljax = new CrawljaxController(builder.build()); crawljax.run(); }
private CrawljaxConfiguration getCrawljaxBuilder() { String url = ""; if (options.getUrl().toLowerCase().startsWith("http://")) { url = options.getUrl(); } else { server = new Server(8080); ResourceHandler handler = new ResourceHandler(); try { File fileToCrawl = new File(options.getUrl()); handler.setBaseResource( Resource.newResource(fileToCrawl.getParentFile().getAbsolutePath())); server.setHandler(handler); server.start(); int port = ((ServerConnector) server.getConnectors()[0]).getLocalPort(); url = "http://localhost:" + port + "/" + fileToCrawl.getName(); // URI.create(url); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(url); // builder.addPlugin(new CrawlOverview()); builder.addPlugin( new OnNewStatePlugin() { @Override public void onNewState(CrawlerContext arg0, StateVertex arg1) { if (cancelSupplier != null) { if (cancelSupplier.getAsBoolean()) { crawljax.stop(); } } Document document; try { document = arg1.getDocument(); documents.add(document); notifyObservers(document); } catch (IOException e) { e.printStackTrace(); } } }); builder.setBrowserConfig(new BrowserConfiguration(BrowserType.PHANTOMJS, 1)); builder.setOutputDirectory( new File(options.getOutputDirectory().getAbsolutePath() + "/crawljax")); builder.setMaximumDepth(options.getMaxDepth()); builder.setMaximumStates(options.getMaxStates()); CrawlRulesBuilder crawlRules = builder.crawlRules(); if (options.shouldClickDefaultElements()) { crawlRules.clickDefaultElements(); } if (options.getDontClickElements().size() > 0) { for (String dontClick : options.getDontClickElements()) { crawlRules.dontClick(dontClick); // TODO: .withAttribute("value", "I don't recognize"); // TODO: .underXPath("//*[@id='pageFooter']"); // .underXPath("//*[@id='content']/div/div[2]"); } } if (options.getClickElements().size() > 0) { for (String click : options.getClickElements()) { crawlRules.click(click); // TODO: .withAttribute("type", "submit"); } } if (options.getDontClickElementsChildrenOf().size() > 0) { for (String dontClick : options.getDontClickElementsChildrenOf()) { crawlRules.dontClickChildrenOf(dontClick); } } crawlRules.insertRandomDataInInputForms(options.shouldPutRandomDataInForms()); crawlRules.clickElementsInRandomOrder(options.shouldClickElementsInRandomOrder()); crawlRules.crawlFrames(options.shouldCrawlFrames()); crawlRules.waitAfterReloadUrl(options.getWaitTimeAferReload(), TimeUnit.MILLISECONDS); crawlRules.waitAfterEvent(options.getWaitTimeAfterEvent(), TimeUnit.MILLISECONDS); crawlRules.clickOnce(options.shouldClickOnce()); crawlRules.crawlHiddenAnchors(options.shouldCrawlHiddenAnchorsButton()); return builder.build(); }