Ejemplo n.º 1
0
    @Override
    public void run() {

      try {
        for (int i = 0; i < otherWorkerIPPort.size(); i++) {

          BlockingQueue<URL> queue = getUrlsForOtherCrawlers().get(i);

          if (queue.isEmpty()) {
            continue;
          }

          StringBuilder contentBuilder = new StringBuilder();
          contentBuilder.append(DispatcherConstants.NEW_URLS_PARAM + "=");

          int items = DispatcherConstants.URLS_TO_SEND;

          while (!queue.isEmpty() || items < 0) {
            URL url = queue.take();
            String cleansedURL = URLEncoder.encode(url.toString(), CrawlerConstants.CHARSET);
            contentBuilder.append(cleansedURL + ";");
          }

          String urlString =
              "http://" + otherWorkerIPPort.get(i) + "/worker/" + DispatcherConstants.ADD_URLS_URL;
          URL url = new URL(urlString);
          DispatcherUtils.sendHttpRequest(
              url, contentBuilder.toString(), DispatcherUtils.Method.POST, true);
        }

      } catch (Exception e) {
        Utils.logStackTrace(e);
      }
    }
Ejemplo n.º 2
0
 /** Start the crawler */
 public void startCrawler() {
   try {
     logger.info(CLASSNAME + ": Starting crawler");
     initialise();
   } catch (Exception e) {
     Utils.logStackTrace(e);
   }
 }
Ejemplo n.º 3
0
 /**
  * Returns the number of documents that have been crawled and saved
  *
  * @return
  */
 public int getNumCrawledDocuments() {
   try {
     File storageDirectory =
         new File(CrawlerConstants.DB_DIRECTORY + CrawlerConstants.STORAGE_DIRECTORY);
     return storageDirectory.list().length;
   } catch (Exception e) {
     Utils.logStackTrace(e);
     return -1;
   }
 }