/** * Run Crawljax. * * @throws CrawljaxException If the browser cannot be instantiated. * @throws ConfigurationException if crawljax configuration fails. @NotThreadSafe */ public final void run() throws CrawljaxException { startCrawl = System.currentTimeMillis(); LOGGER.info( "Start crawling with {} crawl elements", configuration.getCrawlRules().getPreCrawlConfig().getIncludedElements()); // set up the browser of the PopUpCancel PopUpCancel.setBrowserType(configuration.getBrowserConfig().getBrowsertype()); // load pop up cancel exe PopUpCancel.closePopUps(); // Create the initailCrawler initialCrawler = new InitialCrawler(this, configuration.getPlugins()); // Start the Crawling by adding the initialCrawler to the the workQueue. addWorkToQueue(initialCrawler); try { // Block until the all the jobs are done workQueue.waitForTermination(); } catch (InterruptedException e) { LOGGER.error(e.getMessage(), e); } if (workQueue.isAborted()) { LOGGER.warn( "It apears to be that the workQueue was Aborted, " + "not running postcrawling plugins and not closing the browsers"); return; } long timeCrawlCalc = System.currentTimeMillis() - startCrawl; /** * Close all the opened browsers, this is run in separate thread to have the post crawl plugins * to execute in the meanwhile. */ Thread shutdownThread = browserPool.close(); // TODO Stefan; Now we "re-request" a browser instance for the PostCrawlingPlugins Thread, // this is not ideal... EmbeddedBrowser b = null; try { b = this.getBrowserPool().requestBrowser(); } catch (InterruptedException e1) { LOGGER.warn("Re-Request for a browser was interrupted", e1); } configuration.getPlugins().runPostCrawlingPlugins(session); this.getBrowserPool().freeBrowser(b); this.shutdown(timeCrawlCalc); try { // Delete autohotkey pop canceler exe PopUpCancel.killExe(); shutdownThread.join(); } catch (InterruptedException e) { LOGGER.error("could not wait for browsers to close.", e); } }