예제 #1
0
  /** Entry point */
  public static void main(String[] args) {
    System.setProperty(
        "webdriver.firefox.bin", "C:\\Program Files (x86)\\Mozilla Firefox\\Firefox.exe");

    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);

    BrowserConfiguration browserConfig = new BrowserConfiguration(BrowserType.firefox);

    builder.setBrowserConfig(browserConfig);

    // PopUpCancel configuration  = new PopUpCancel();

    // limit the crawling scope
    builder.setMaximumStates(MAX_STATES);
    builder.setMaximumDepth(MAX_CRAWL_DEPTH);

    builder.crawlRules().setInputSpec(getInputSpecification());

    CrawljaxController crawljax = new CrawljaxController(builder.build());
    crawljax.run();
  }
  private CrawljaxConfiguration getCrawljaxBuilder() {
    String url = "";
    if (options.getUrl().toLowerCase().startsWith("http://")) {
      url = options.getUrl();
    } else {
      server = new Server(8080);
      ResourceHandler handler = new ResourceHandler();
      try {
        File fileToCrawl = new File(options.getUrl());
        handler.setBaseResource(
            Resource.newResource(fileToCrawl.getParentFile().getAbsolutePath()));
        server.setHandler(handler);
        server.start();
        int port = ((ServerConnector) server.getConnectors()[0]).getLocalPort();
        url = "http://localhost:" + port + "/" + fileToCrawl.getName(); // URI.create(url);
      } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(url);
    // builder.addPlugin(new CrawlOverview());
    builder.addPlugin(
        new OnNewStatePlugin() {
          @Override
          public void onNewState(CrawlerContext arg0, StateVertex arg1) {
            if (cancelSupplier != null) {
              if (cancelSupplier.getAsBoolean()) {
                crawljax.stop();
              }
            }
            Document document;
            try {
              document = arg1.getDocument();
              documents.add(document);
              notifyObservers(document);
            } catch (IOException e) {
              e.printStackTrace();
            }
          }
        });
    builder.setBrowserConfig(new BrowserConfiguration(BrowserType.PHANTOMJS, 1));
    builder.setOutputDirectory(
        new File(options.getOutputDirectory().getAbsolutePath() + "/crawljax"));
    builder.setMaximumDepth(options.getMaxDepth());
    builder.setMaximumStates(options.getMaxStates());
    CrawlRulesBuilder crawlRules = builder.crawlRules();
    if (options.shouldClickDefaultElements()) {
      crawlRules.clickDefaultElements();
    }
    if (options.getDontClickElements().size() > 0) {
      for (String dontClick : options.getDontClickElements()) {
        crawlRules.dontClick(dontClick);
        // TODO: .withAttribute("value", "I don't recognize");
        // TODO: .underXPath("//*[@id='pageFooter']");
        //		.underXPath("//*[@id='content']/div/div[2]");
      }
    }
    if (options.getClickElements().size() > 0) {
      for (String click : options.getClickElements()) {
        crawlRules.click(click);
        // TODO: .withAttribute("type", "submit");
      }
    }
    if (options.getDontClickElementsChildrenOf().size() > 0) {
      for (String dontClick : options.getDontClickElementsChildrenOf()) {
        crawlRules.dontClickChildrenOf(dontClick);
      }
    }
    crawlRules.insertRandomDataInInputForms(options.shouldPutRandomDataInForms());
    crawlRules.clickElementsInRandomOrder(options.shouldClickElementsInRandomOrder());
    crawlRules.crawlFrames(options.shouldCrawlFrames());
    crawlRules.waitAfterReloadUrl(options.getWaitTimeAferReload(), TimeUnit.MILLISECONDS);
    crawlRules.waitAfterEvent(options.getWaitTimeAfterEvent(), TimeUnit.MILLISECONDS);
    crawlRules.clickOnce(options.shouldClickOnce());
    crawlRules.crawlHiddenAnchors(options.shouldCrawlHiddenAnchorsButton());

    return builder.build();
  }