예제 #1
0
  private String prepareResultPagesSection() {

    String resultsPath;
    try {
      resultsPath = rootDirectory.getCanonicalPath() + Crawler.RESULTS_PATH_LOCAL;
    } catch (IOException e) {
      System.out.println("HTTPRequest: Error root directory" + rootDirectory.toString());
      return "";
    }

    StringBuilder result = new StringBuilder();
    result.append("<div class=\"connectedDomains\"><ul>");
    File resultsFolder = new File(resultsPath);
    if (resultsFolder.exists() && resultsFolder.isDirectory()) {
      File[] allFiles = resultsFolder.listFiles();
      SimpleDateFormat format = new SimpleDateFormat("dd.MM.yyyy-HH:mm");
      for (File file : allFiles) {
        String filename = file.getName();
        String domain = Crawler.ResultsFilenameToDomain(filename);
        Date creationDate = Crawler.ResultsFilenameToDate(filename);
        String linkFormat = domain + "-" + format.format(creationDate);
        result.append("<li><a href=");
        result.append(Crawler.RESULTS_PATH_WEB);
        result.append(filename);
        result.append(">");
        result.append(linkFormat);
        result.append("</a></li>");
      }

      result.append("</ul></div>");
    }

    return result.toString();
  }
예제 #2
0
  private byte[] prepareDefaultPage(String message) {
    String head =
        "<!doctype html><html lang=\"en\"><head><title> Crawler HTML site </title></head>"
            + "<link href=\"css/style.css\" rel=\"stylesheet\" /><body><div class=\"header\"><h1>Crawler</h1></div>";
    String form;
    if (message != null) {
      form =
          "<div class=\"crawlerAnswer\"><h2>"
              + message
              + "</h2>"
              + "<a href=\"/\"><h3>Back to homepage</h3></a></div>";

    } else if (serverCrawler.isBusy()) {
      form = "<div class=\"crawlerAnswer\"><h2>Crawler is already running</h2></div>";
    } else {
      form =
          "<div class=\"crawlerForm\"><form id=\"generalform\" method=\"post\" action=\"execResult.html\" class=\"crawlerFormTable\">"
              + "<table><tr><td><h3>Enter Domain</h3></td><td><input type=\"text\" name=\"Domain\"></td></tr><tr>"
              + "<td><h3><input type=\"checkbox\" name=\"portscan\">Perform full TCP port scan</h3></td></tr><tr>"
              + "<td><h3><input type=\"checkbox\" name=\"robots.txt\">Disrespect robots.txt</h3></td></tr>"
              + "<tr><td></td><td><input type=\"submit\" value=\"Start Crawler\"></td></tr></table></form></div>";
    }

    String resultPages = prepareResultPagesSection();

    String finish = "</body></html>";
    String result = head + form + resultPages + finish;
    return result.getBytes();
  }
예제 #3
0
  private byte[] activateCrawler(HtmlRequest htmlRequest) throws IOException {

    byte[] bodyInBytes;
    String domain = htmlRequest.parametersInRequestBody.get("Domain");
    boolean ignoreRobots = false;
    boolean performPortScan = false;

    if (htmlRequest.parametersInRequestBody.containsKey("portscan")) {
      performPortScan = true;
    }
    if (htmlRequest.parametersInRequestBody.containsKey("robots.txt")) {
      ignoreRobots = true;
    }
    boolean isConfigureSucceeded =
        serverCrawler.ConfigureCrawler(domain, ignoreRobots, performPortScan);
    if (isConfigureSucceeded) {
      bodyInBytes = prepareDefaultPage("Crawler started succesfuly");
      System.out.println("Domain is: " + domain);
      System.out.println("Perform port scan: " + performPortScan);
      System.out.println("Ignore robots.txt: " + ignoreRobots);
    } else {
      bodyInBytes = prepareDefaultPage("Crawler is already running");
    }

    return bodyInBytes;
  }
예제 #4
0
  private HtmlResponse respond200(HtmlRequest htmlRequest)
      throws IOException, InterruptedException {
    HtmlResponse response200 = new HtmlResponse();
    byte[] bodyInBytes;

    if (htmlRequest.type.equals("TRACE")) {
      bodyInBytes = htmlRequest.unparsedRequest.getBytes();
    } else if (htmlRequest.type.equals("POST")) {
      if (htmlRequest.requestedFile.equals("/params_info.html")) {
        bodyInBytes = makeTable(htmlRequest.parametersInRequestBody);
      } else if (htmlRequest.requestedFile.equals(execResults)) {
        System.out.println(
            "Parameters for Crawler : " + htmlRequest.parametersInRequestBody.toString());
        if (serverCrawler.isBusy()) {
          bodyInBytes = prepareDefaultPage("Crawler is busy");
        } else {
          String crawlerInputCheckResults = checkCrawlerInput(htmlRequest);
          if (crawlerInputCheckResults == null) {
            bodyInBytes = activateCrawler(htmlRequest);
            Thread crawlerThread = new Thread(serverCrawler);
            crawlerThread.start();
          } else {
            bodyInBytes = prepareDefaultPage(crawlerInputCheckResults);
          }
        }
      } else {
        bodyInBytes = null;
      }
    } else {
      bodyInBytes = readFileForResponse(htmlRequest);
    }

    response200.setEntityBody(bodyInBytes);
    response200.setStatus(htmlRequest.httpVersion, 200);
    String contentType;

    if (!htmlRequest.type.equals("POST")) {
      contentType = getContentTypeFromFile(htmlRequest.requestedFile);
    } else {
      contentType = getContentTypeFromFile(htmlRequest.requestedFile);
    }

    response200.setContentTypeLine(contentType);

    return response200;
  }
예제 #5
0
  private String checkCrawlerInput(HtmlRequest htmlRequest) {

    String result = null;
    String domain = htmlRequest.parametersInRequestBody.get("Domain");
    String domainFound = Crawler.ParseURL(domain);

    if (domainFound.charAt(domainFound.length() - 1) == '\\') {
      domainFound = domainFound.substring(0, domainFound.length() - 1);
    }

    try {
      ClientRequest clientRequest = new ClientRequest(domainFound, ClientRequest.getRequest);
      if (clientRequest.responseHeaderFields == null) {
        return "Error connecting to: " + domain + "\n";
      }
    } catch (Exception e) {
      System.out.println("checkCrawlerInput: clientRequest generated error.");
      result = "Error connecting to: " + domain + "\n" + e.toString();
      e.printStackTrace();
    }

    return result;
  }
 protected void crawlContent(SimulatedArchivalUnit sau) {
   log.debug("crawlContent()");
   CrawlSpec spec = new SpiderCrawlSpec(sau.getNewContentCrawlUrls(), null);
   Crawler crawler = new NoCrawlEndActionsNewContentCrawler(sau, spec, new MockAuState());
   crawler.doCrawl();
 }