protected void process() { final DataStoreFactory dataStoreFactory = ComponentUtil.getDataStoreFactory(); dataStore = dataStoreFactory.getDataStore(dataConfig.getHandlerName()); if (dataStore == null) { logger.error("DataStore(" + dataConfig.getHandlerName() + ") is not found."); } else { try { dataStore.store(dataConfig, indexUpdateCallback, initParamMap); } catch (final Throwable e) { logger.error("Failed to process a data crawling: " + dataConfig.getName(), e); ComponentUtil.getComponent(FailureUrlService.class) .store( dataConfig, e.getClass().getCanonicalName(), dataConfig.getConfigId() + ":" + dataConfig.getName(), e); } finally { indexUpdateCallback.commit(); deleteOldDocs(); } } }
protected void doCrawl(final String sessionId, final List<DataConfig> configList) { final int multiprocessCrawlingCount = ComponentUtil.getFessConfig().getCrawlingThreadCount(); final long startTime = System.currentTimeMillis(); final IndexUpdateCallback indexUpdateCallback = ComponentUtil.getComponent(IndexUpdateCallback.class); final List<String> sessionIdList = new ArrayList<>(); final Map<String, String> initParamMap = new HashMap<>(); dataCrawlingThreadList.clear(); final List<String> dataCrawlingThreadStatusList = new ArrayList<>(); for (final DataConfig dataConfig : configList) { final String sid = crawlingConfigHelper.store(sessionId, dataConfig); sessionIdList.add(sid); initParamMap.put(Constants.SESSION_ID, sessionId); initParamMap.put(Constants.CRAWLING_INFO_ID, sid); final DataCrawlingThread dataCrawlingThread = new DataCrawlingThread(dataConfig, indexUpdateCallback, initParamMap); dataCrawlingThread.setPriority(crawlerPriority); dataCrawlingThread.setName(sid); dataCrawlingThread.setDaemon(true); dataCrawlingThreadList.add(dataCrawlingThread); dataCrawlingThreadStatusList.add(Constants.READY); } final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); int startedCrawlerNum = 0; int activeCrawlerNum = 0; while (startedCrawlerNum < dataCrawlingThreadList.size()) { // Force to stop crawl if (systemHelper.isForceStop()) { for (final DataCrawlingThread crawlerThread : dataCrawlingThreadList) { crawlerThread.stopCrawling(); } break; } if (activeCrawlerNum < multiprocessCrawlingCount) { // start crawling dataCrawlingThreadList.get(startedCrawlerNum).start(); dataCrawlingThreadStatusList.set(startedCrawlerNum, Constants.RUNNING); startedCrawlerNum++; activeCrawlerNum++; try { Thread.sleep(crawlingExecutionInterval); } catch (final InterruptedException e) { if (logger.isDebugEnabled()) { logger.debug("Interrupted.", e); } } continue; } // check status for (int i = 0; i < startedCrawlerNum; i++) { if (!dataCrawlingThreadList.get(i).isRunning() && dataCrawlingThreadStatusList.get(i).equals(Constants.RUNNING)) { dataCrawlingThreadList.get(i).awaitTermination(); dataCrawlingThreadStatusList.set(i, Constants.DONE); activeCrawlerNum--; } } try { Thread.sleep(crawlingExecutionInterval); } catch (final InterruptedException e) { if (logger.isDebugEnabled()) { logger.debug("Interrupted.", e); } } } boolean finishedAll = false; while (!finishedAll) { finishedAll = true; for (int i = 0; i < dataCrawlingThreadList.size(); i++) { dataCrawlingThreadList.get(i).awaitTermination(crawlingExecutionInterval); if (!dataCrawlingThreadList.get(i).isRunning() && dataCrawlingThreadStatusList.get(i).equals(Constants.RUNNING)) { dataCrawlingThreadStatusList.set(i, Constants.DONE); } if (!dataCrawlingThreadStatusList.get(i).equals(Constants.DONE)) { finishedAll = false; } } } dataCrawlingThreadList.clear(); dataCrawlingThreadStatusList.clear(); // put cralwing info final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper(); final long execTime = System.currentTimeMillis() - startTime; crawlingInfoHelper.putToInfoMap(Constants.DATA_CRAWLING_EXEC_TIME, Long.toString(execTime)); if (logger.isInfoEnabled()) { logger.info("[EXEC TIME] crawling time: " + execTime + "ms"); } crawlingInfoHelper.putToInfoMap( Constants.DATA_INDEX_EXEC_TIME, Long.toString(indexUpdateCallback.getExecuteTime())); crawlingInfoHelper.putToInfoMap( Constants.DATA_INDEX_SIZE, Long.toString(indexUpdateCallback.getDocumentSize())); for (final String sid : sessionIdList) { // remove config crawlingConfigHelper.remove(sid); } }