Esempio n. 1
0
 protected void process() {
   final DataStoreFactory dataStoreFactory = ComponentUtil.getDataStoreFactory();
   dataStore = dataStoreFactory.getDataStore(dataConfig.getHandlerName());
   if (dataStore == null) {
     logger.error("DataStore(" + dataConfig.getHandlerName() + ") is not found.");
   } else {
     try {
       dataStore.store(dataConfig, indexUpdateCallback, initParamMap);
     } catch (final Throwable e) {
       logger.error("Failed to process a data crawling: " + dataConfig.getName(), e);
       ComponentUtil.getComponent(FailureUrlService.class)
           .store(
               dataConfig,
               e.getClass().getCanonicalName(),
               dataConfig.getConfigId() + ":" + dataConfig.getName(),
               e);
     } finally {
       indexUpdateCallback.commit();
       deleteOldDocs();
     }
   }
 }
Esempio n. 2
0
  protected void doCrawl(final String sessionId, final List<DataConfig> configList) {
    final int multiprocessCrawlingCount = ComponentUtil.getFessConfig().getCrawlingThreadCount();

    final long startTime = System.currentTimeMillis();

    final IndexUpdateCallback indexUpdateCallback =
        ComponentUtil.getComponent(IndexUpdateCallback.class);

    final List<String> sessionIdList = new ArrayList<>();
    final Map<String, String> initParamMap = new HashMap<>();
    dataCrawlingThreadList.clear();
    final List<String> dataCrawlingThreadStatusList = new ArrayList<>();
    for (final DataConfig dataConfig : configList) {
      final String sid = crawlingConfigHelper.store(sessionId, dataConfig);
      sessionIdList.add(sid);

      initParamMap.put(Constants.SESSION_ID, sessionId);
      initParamMap.put(Constants.CRAWLING_INFO_ID, sid);

      final DataCrawlingThread dataCrawlingThread =
          new DataCrawlingThread(dataConfig, indexUpdateCallback, initParamMap);
      dataCrawlingThread.setPriority(crawlerPriority);
      dataCrawlingThread.setName(sid);
      dataCrawlingThread.setDaemon(true);

      dataCrawlingThreadList.add(dataCrawlingThread);
      dataCrawlingThreadStatusList.add(Constants.READY);
    }

    final SystemHelper systemHelper = ComponentUtil.getSystemHelper();

    int startedCrawlerNum = 0;
    int activeCrawlerNum = 0;
    while (startedCrawlerNum < dataCrawlingThreadList.size()) {
      // Force to stop crawl
      if (systemHelper.isForceStop()) {
        for (final DataCrawlingThread crawlerThread : dataCrawlingThreadList) {
          crawlerThread.stopCrawling();
        }
        break;
      }

      if (activeCrawlerNum < multiprocessCrawlingCount) {
        // start crawling
        dataCrawlingThreadList.get(startedCrawlerNum).start();
        dataCrawlingThreadStatusList.set(startedCrawlerNum, Constants.RUNNING);
        startedCrawlerNum++;
        activeCrawlerNum++;
        try {
          Thread.sleep(crawlingExecutionInterval);
        } catch (final InterruptedException e) {
          if (logger.isDebugEnabled()) {
            logger.debug("Interrupted.", e);
          }
        }
        continue;
      }

      // check status
      for (int i = 0; i < startedCrawlerNum; i++) {
        if (!dataCrawlingThreadList.get(i).isRunning()
            && dataCrawlingThreadStatusList.get(i).equals(Constants.RUNNING)) {
          dataCrawlingThreadList.get(i).awaitTermination();
          dataCrawlingThreadStatusList.set(i, Constants.DONE);
          activeCrawlerNum--;
        }
      }
      try {
        Thread.sleep(crawlingExecutionInterval);
      } catch (final InterruptedException e) {
        if (logger.isDebugEnabled()) {
          logger.debug("Interrupted.", e);
        }
      }
    }

    boolean finishedAll = false;
    while (!finishedAll) {
      finishedAll = true;
      for (int i = 0; i < dataCrawlingThreadList.size(); i++) {
        dataCrawlingThreadList.get(i).awaitTermination(crawlingExecutionInterval);
        if (!dataCrawlingThreadList.get(i).isRunning()
            && dataCrawlingThreadStatusList.get(i).equals(Constants.RUNNING)) {
          dataCrawlingThreadStatusList.set(i, Constants.DONE);
        }
        if (!dataCrawlingThreadStatusList.get(i).equals(Constants.DONE)) {
          finishedAll = false;
        }
      }
    }
    dataCrawlingThreadList.clear();
    dataCrawlingThreadStatusList.clear();

    // put cralwing info
    final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();

    final long execTime = System.currentTimeMillis() - startTime;
    crawlingInfoHelper.putToInfoMap(Constants.DATA_CRAWLING_EXEC_TIME, Long.toString(execTime));
    if (logger.isInfoEnabled()) {
      logger.info("[EXEC TIME] crawling time: " + execTime + "ms");
    }

    crawlingInfoHelper.putToInfoMap(
        Constants.DATA_INDEX_EXEC_TIME, Long.toString(indexUpdateCallback.getExecuteTime()));
    crawlingInfoHelper.putToInfoMap(
        Constants.DATA_INDEX_SIZE, Long.toString(indexUpdateCallback.getDocumentSize()));

    for (final String sid : sessionIdList) {
      // remove config
      crawlingConfigHelper.remove(sid);
    }
  }