コード例 #1
0
ファイル: Frontier.java プロジェクト: narendramohan/mmqa
 public Frontier(Environment env, CrawlConfig config, DocIDServer docIdServer) {
   super(config);
   this.counters = new Counters(env, config);
   this.docIdServer = docIdServer;
   try {
     workQueues = new WorkQueues(env, "PendingURLsDB", config.isResumableCrawling());
     if (config.isResumableCrawling()) {
       scheduledPages = counters.getValue(ReservedCounterNames.SCHEDULED_PAGES);
       inProcessPages = new InProcessPagesDB(env);
       long numPreviouslyInProcessPages = inProcessPages.getLength();
       if (numPreviouslyInProcessPages > 0) {
         logger.info("Rescheduling " + numPreviouslyInProcessPages + " URLs from previous crawl.");
         scheduledPages -= numPreviouslyInProcessPages;
         while (true) {
           List<WebURL> urls = inProcessPages.get(100);
           if (urls.size() == 0) {
             break;
           }
           scheduleAll(urls);
           inProcessPages.delete(urls.size());
         }
       }
     } else {
       inProcessPages = null;
       scheduledPages = 0;
     }
   } catch (DatabaseException e) {
     logger.error("Error while initializing the Frontier: " + e.getMessage());
     workQueues = null;
   }
 }
コード例 #2
0
ファイル: Frontier.java プロジェクト: narendramohan/mmqa
 public void getNextURLs(int max, List<WebURL> result) {
   while (true) {
     synchronized (mutex) {
       if (isFinished) {
         return;
       }
       try {
         List<WebURL> curResults = workQueues.get(max);
         workQueues.delete(curResults.size());
         if (inProcessPages != null) {
           for (WebURL curPage : curResults) {
             inProcessPages.put(curPage);
           }
         }
         result.addAll(curResults);
       } catch (DatabaseException e) {
         logger.error("Error while getting next urls: " + e.getMessage());
         e.printStackTrace();
       }
       if (result.size() > 0) {
         return;
       }
     }
     try {
       synchronized (waitingList) {
         waitingList.wait();
       }
     } catch (InterruptedException ignored) {
       // Do nothing
     }
     if (isFinished) {
       return;
     }
   }
 }
コード例 #3
0
ファイル: Frontier.java プロジェクト: narendramohan/mmqa
 public void setProcessed(WebURL webURL) {
   counters.increment(ReservedCounterNames.PROCESSED_PAGES);
   if (inProcessPages != null) {
     if (!inProcessPages.removeURL(webURL)) {
       logger.warn("Could not remove: " + webURL.getURL() + " from list of processed pages.");
     }
   }
 }
コード例 #4
0
ファイル: Frontier.java プロジェクト: narendramohan/mmqa
 public long getNumberOfAssignedPages() {
   return inProcessPages.getLength();
 }