/** * check if the module is running (was started and not yet complete/stopped) give a complete * answer, i.e. it's already consumed all files but it might have background threads running */ public boolean isModuleRunning(final IngestModuleAbstract module) { if (module.getType() == IngestModuleAbstract.ModuleType.AbstractFile) { IngestScheduler.FileScheduler fileScheduler = scheduler.getFileScheduler(); if (fileScheduler.hasModuleEnqueued((IngestModuleAbstractFile) module)) { // has work enqueued, so running return true; } else { // not in the queue, but could still have bkg work running return module.hasBackgroundJobsRunning(); } } else { // data source module synchronized (this) { if (dataSourceIngesters.isEmpty()) { return false; } IngestDataSourceThread imt = null; for (IngestDataSourceThread ii : dataSourceIngesters) { if (ii.getModule().equals(module)) { imt = ii; break; } } if (imt == null) { return false; } if (imt.isDone() == false) { return true; } else { return false; } } } }
/** * Check if file scheduler has files in queues * * @return true if more files in queues, false otherwise */ public boolean getFileSchedulerHasNext() { return scheduler.getFileScheduler().hasNext(); }
/** * Starts the needed worker threads. * * <p>if AbstractFile module is still running, do nothing and allow it to consume queue otherwise * start /restart AbstractFile worker * * <p>data source ingest workers run per (module,content). Checks if one for the same * (module,content) is already running otherwise start/restart the worker */ private synchronized void startAll() { final IngestScheduler.DataSourceScheduler dataSourceScheduler = scheduler.getDataSourceScheduler(); final IngestScheduler.FileScheduler fileScheduler = scheduler.getFileScheduler(); logger.log(Level.INFO, "DataSource queue: " + dataSourceScheduler.toString()); logger.log(Level.INFO, "File queue: " + fileScheduler.toString()); if (!ingestMonitor.isRunning()) { ingestMonitor.start(); } // image ingesters // cycle through each data source content in the queue while (dataSourceScheduler.hasNext()) { // dequeue // get next data source content and set of modules final ScheduledTask<IngestModuleDataSource> dataSourceTask = dataSourceScheduler.next(); // check if each module for this data source content is already running for (IngestModuleDataSource taskModule : dataSourceTask.getModules()) { boolean alreadyRunning = false; for (IngestDataSourceThread worker : dataSourceIngesters) { // ignore threads that are on different data sources if (!worker.getContent().equals(dataSourceTask.getContent())) { continue; // check next worker } // same data source, check module (by name, not id, since different instances) if (worker.getModule().getName().equals(taskModule.getName())) { alreadyRunning = true; logger.log( Level.INFO, "Data Source Ingester <" + dataSourceTask.getContent() + ", " + taskModule.getName() + "> is already running"); break; } } // checked all workers if (alreadyRunning == false) { logger.log( Level.INFO, "Starting new data source Ingester <" + dataSourceTask.getContent() + ", " + taskModule.getName() + ">"); // data source modules are now initialized per instance IngestModuleInit moduleInit = new IngestModuleInit(); PipelineContext<IngestModuleDataSource> dataSourcepipelineContext = new PipelineContext<IngestModuleDataSource>(dataSourceTask, getProcessUnallocSpace()); final IngestDataSourceThread newDataSourceWorker = new IngestDataSourceThread( this, dataSourcepipelineContext, dataSourceTask.getContent(), taskModule, moduleInit); dataSourceIngesters.add(newDataSourceWorker); // wrap the module in a worker, that will run init, process and complete on the module newDataSourceWorker.execute(); IngestManager.fireModuleEvent(IngestModuleEvent.STARTED.toString(), taskModule.getName()); } } } // AbstractFile ingester boolean startAbstractFileIngester = false; if (fileScheduler.hasNext()) { if (abstractFileIngester == null) { startAbstractFileIngester = true; logger.log(Level.INFO, "Starting initial AbstractFile ingester"); } // if worker had completed, restart it in case data is still enqueued else if (abstractFileIngester.isDone()) { startAbstractFileIngester = true; logger.log(Level.INFO, "Restarting AbstractFile ingester"); } } else { logger.log(Level.INFO, "no new AbstractFile enqueued, no ingester needed"); } if (startAbstractFileIngester) { stats = new IngestManagerStats(); abstractFileIngester = new IngestAbstractFileProcessor(); // init all fs modules, everytime new worker starts /* @@@ I don't understand why we do an init on each module. Should do only modules * that we are going to be using in the pipeline */ for (IngestModuleAbstractFile s : abstractFileModules) { IngestModuleInit moduleInit = new IngestModuleInit(); try { s.init(moduleInit); } catch (Exception e) { logger.log(Level.SEVERE, "File ingest module failed init(): " + s.getName()); } } abstractFileIngester.execute(); } }
/** stop currently running threads if any (e.g. when changing a case) */ synchronized void stopAll() { // stop queue worker if (queueWorker != null) { queueWorker.cancel(true); queueWorker = null; } // empty queues scheduler.getFileScheduler().empty(); scheduler.getDataSourceScheduler().empty(); // stop module workers if (abstractFileIngester != null) { // send signals to all file modules for (IngestModuleAbstractFile s : this.abstractFileModules) { if (isModuleRunning(s)) { try { s.stop(); } catch (Exception e) { logger.log( Level.WARNING, "Unexpected exception while stopping module: " + s.getName(), e); } } } // stop fs ingester thread boolean cancelled = abstractFileIngester.cancel(true); if (!cancelled) { logger.log(Level.INFO, "Unable to cancel file ingest worker, likely already stopped"); } abstractFileIngester = null; } List<IngestDataSourceThread> toStop = new ArrayList<IngestDataSourceThread>(); toStop.addAll(dataSourceIngesters); for (IngestDataSourceThread dataSourceWorker : toStop) { IngestModuleDataSource s = dataSourceWorker.getModule(); // stop the worker thread if thread is running boolean cancelled = dataSourceWorker.cancel(true); if (!cancelled) { logger.log( Level.INFO, "Unable to cancel data source ingest worker for module: " + dataSourceWorker.getModule().getName() + " data source: " + dataSourceWorker.getContent().getName()); } // stop notification to module to cleanup resources if (isModuleRunning(s)) { try { dataSourceWorker.getModule().stop(); } catch (Exception e) { logger.log(Level.WARNING, "Exception while stopping module: " + s.getName(), e); } } } logger.log(Level.INFO, "stopped all"); }
/** * Schedule a file for ingest and add it to ongoing file ingest process on the same data source. * Scheduler updates the current progress. * * <p>The file to be added is usually a product of a currently ran ingest. Now we want to process * this new file with the same ingest context. * * @param file file to be scheduled * @param pipelineContext ingest context used to ingest parent of the file to be scheduled */ void scheduleFile(AbstractFile file, PipelineContext pipelineContext) { scheduler.getFileScheduler().schedule(file, pipelineContext); }