public void run() { HarvestController hc; GenericProcessingController gpc; try { if (null == _harvesterController.get()) { // Some sort of internal bug? No idea... _harvesterController.set(new HarvestController()); } if (null == _genericController.get()) { // (ditto, not seen this but better safe than sorry) _genericController.set(new GenericProcessingController()); } List<DocumentPojo> toAdd = new LinkedList<DocumentPojo>(); List<DocumentPojo> toUpdate = new LinkedList<DocumentPojo>(); List<DocumentPojo> toRemove = new LinkedList<DocumentPojo>(); hc = _harvesterController.get(); hc.harvestSource(_sourceToProcess, toAdd, toUpdate, toRemove); // (toAdd includes toUpdate) if (HarvestEnum.error != _sourceToProcess.getHarvestStatus().getHarvest_status()) { gpc = _genericController.get(); gpc.processDocuments( SourceUtils.getHarvestType(_sourceToProcess), toAdd, toUpdate, toRemove, _sourceToProcess); // (toRemove includes toUpdate) SourceUtils.updateHarvestStatus( _sourceToProcess, HarvestEnum.success, toAdd, toRemove.size(), null); // (note also releases the "in_progress" lock) // (note also prunes sources based on "maxDocs") // (also handles the intra-source distribution logic) } // (if we've declared error, then "in_progress" lock already released so nothing to do) } catch (Error e) { // Don't like to catch these, but otherwise we leak away sources _sourceToProcess.setReachedMaxDocs(); // (will try again - this really just ensured // lastDistributedCycleComplete isn't tripped) SourceUtils.updateHarvestStatus( _sourceToProcess, HarvestEnum.error, null, 0, "Source error: " + e.getMessage()); _logger.error("Source error on " + _sourceToProcess.getKey() + ": " + e.getMessage()); e.printStackTrace(); } catch (Exception e) { // Limit any problems to a single source _sourceToProcess.setReachedMaxDocs(); // (will try again - this really just ensured // lastDistributedCycleComplete isn't tripped) SourceUtils.updateHarvestStatus( _sourceToProcess, HarvestEnum.error, null, 0, "Source error: " + e.getMessage()); _logger.error("Source error on " + _sourceToProcess.getKey() + ": " + e.getMessage()); e.printStackTrace(); } }
public void run() { if (!_bStopHarvest) { boolean bLocalSleep = _bCurrentlySleepingBeforeExit; _bCurrentlySleepingBeforeExit = true; // (so won't sleep now when it gets to the end) _logger.error("Clean shutdown attempt"); _bStopHarvest = true; HarvestController.killHarvester(); if (bLocalSleep) { _mainThread.interrupt(); // (Don't mind the minor race condition that's here, worst case have to wait a few more // minutes) } // Wait at most 10 minutes for (int i = 0; i < 600; ++i) { try { if (_bReadyToTerminate) { break; } Thread.sleep(1000); } catch (InterruptedException e) { } } if (!_bReadyToTerminate) { _logger.error("Unclean shutdown #1"); } } }
/** * Used to start the sync service * * @throws IOException * @throws InterruptedException */ public void startService(LinkedList<SourcePojo> sources) throws IOException, InterruptedException { // Let the client know the server is starting System.out.println("[SERVER] Harvest server is coming online"); // Intialize/update generic process controller (do this here so that it blocks before threading // fun starts) new GenericProcessingController().Initialize(); // Start the background aggregation thread (will do nothing if disabled) EntityBackgroundAggregationManager.startThread(); AssociationBackgroundAggregationManager.startThread(); _mainThread = Thread.currentThread(); String hostname = "unknown.host"; try { hostname = java.net.InetAddress.getLocalHost().getHostName(); } catch (Exception e) { } // Add the shutdown hook ShutdownHook shutdownHook = new ShutdownHook(); Runtime.getRuntime().addShutdownHook(shutdownHook); Date startDate = new Date(); _logger.info("Starting harvest process at: " + startDate + ", host=" + hostname); // Perform processing PropertiesManager threadConfig = new PropertiesManager(); String sThreadConfig = threadConfig.getHarvestThreadConfig(); HashSet<String> types = new HashSet<String>(); try { String harvestTypes = new com.ikanow.infinit.e.harvest.utils.PropertiesManager().getHarvesterTypes(); for (String s : harvestTypes.split("\\s*,\\s*")) { types.add(s.toLowerCase()); } } catch (Exception e) { _logger.error( Globals.populateStackTrace(new StringBuffer("Failed to register all harvest types"), e)); } // TESTED (by hand) // Max time for harvester (defaults to 25 mins) long maxTime_secs = threadConfig.getMaximumHarvestTime(); if (maxTime_secs > 0) { new Timer().schedule(new InternalShutdown(), maxTime_secs * 1000); // (arg in ms) } // TOTEST try { // All source types in a single thread int nThreads = Integer.parseInt(sThreadConfig); SourceTypeHarvesterRunnable allTypes = new SourceTypeHarvesterRunnable(sources, nThreads); _logger.info("(Launching " + nThreads + " threads for all source types)"); allTypes.run(); } catch (NumberFormatException e) { // The thread config must be comma-separated list of type:threads // (step over each type and launch that number of threads for that type) String[] sConfigBlocks = sThreadConfig.split("\\s*,\\s*"); ExecutorService exec = Executors.newFixedThreadPool(sConfigBlocks.length); for (String sConfigBlock : sConfigBlocks) { String[] sTypeOrNumThreads = sConfigBlock.split("\\s*:\\s*"); if (2 == sTypeOrNumThreads.length) { try { int nThreads = Integer.parseInt(sTypeOrNumThreads[1]); types.remove(sTypeOrNumThreads[0].toLowerCase()); SourceTypeHarvesterRunnable typeRunner = new SourceTypeHarvesterRunnable(sources, nThreads, sTypeOrNumThreads[0]); _logger.info( "(Launching " + nThreads + " threads for " + sTypeOrNumThreads[0] + " source types)"); exec.submit(typeRunner); } catch (NumberFormatException e2) { _logger.error("Error in harvester thread configuration: " + sThreadConfig); } } else { _logger.error("Error in harvester thread configuration: " + sThreadConfig); } } // (end loop over different file types) // (generate one thread for everything else) for (String unusedType : types) { // (note case unimportant) SourceTypeHarvesterRunnable typeRunner = new SourceTypeHarvesterRunnable(sources, 1, unusedType); _logger.info("(Launching 1 thread for " + unusedType + " source types)"); exec.submit(typeRunner); } // TESTED (by hand) exec.shutdown(); int i = 0; while (!exec.isTerminated()) { try { Thread.sleep(1000); } catch (InterruptedException e3) { } if (_bStopHarvest) i++; if (i > 14400) { // emergency shutdown time... _logger.error("Emergency shutdown after 4 hours of waiting for manual shutdown"); System.exit(0); } } } com.ikanow.infinit.e.processing.generic.utils.PropertiesManager aggProps = new com.ikanow.infinit.e.processing.generic.utils.PropertiesManager(); boolean bAggDisabled = aggProps.getAggregationDisabled(); StoreAndIndexManager dataStore = new StoreAndIndexManager(); boolean bResizedDB = dataStore.resizeDB(); boolean deletedDocs = true; if (!bAggDisabled) { deletedDocs = AggregationManager.updateEntitiesFromDeletedDocuments(dataStore.getUUID()); } if (deletedDocs) { // (or if agg disabled, in which case we don't know) dataStore.removeSoftDeletedDocuments(); if (!bAggDisabled) { AggregationManager.updateDocEntitiesFromDeletedDocuments(dataStore.getUUID()); } } if (bResizedDB) { _logger.info("(resized DB, now " + dataStore.getDatabaseSize() + " documents)"); } HarvestController.logHarvesterStats(); _logger.info("Completed harvest process at: " + new Date().toString()); Date endDate = new Date(); // Not allowed to cycle harvester runs too quickly // Sleep for some period: long nDiff = endDate.getTime() - startDate.getTime(); long nToSleep = threadConfig.getMinimumHarvestTimeMs() - nDiff; if ((nToSleep > 0) && !_bCurrentlySleepingBeforeExit) { try { _bCurrentlySleepingBeforeExit = true; // (don't really care there's a minor race condition here) Thread.sleep(nToSleep); } catch (InterruptedException e) { // Do nothing, probably got a signal } } // TESTED (cut and paste from tested Beta code) // Stop background aggregation EntityBackgroundAggregationManager.stopThreadAndWait(); AssociationBackgroundAggregationManager.stopThreadAndWait(); _logger.info("Harvest server is going offline"); _bStopHarvest = true; _bReadyToTerminate = true; // (if we were terminated manually tell the shutdown hook it can stop) System.exit(0); } // TESTED
@Override public void run() { _logger.info("Harvester reached max time, try to stop as quickly as possible"); _bStopHarvest = true; HarvestController.killHarvester(); }