@Override protected Object doInBackground() throws Exception { logger.log(Level.INFO, "Starting background ingest file processor"); logger.log(Level.INFO, PlatformUtil.getAllMemUsageInfo()); stats.start(); // notify main thread modules started for (IngestModuleAbstractFile s : abstractFileModules) { IngestManager.fireModuleEvent(IngestModuleEvent.STARTED.toString(), s.getName()); } final String displayName = "File Ingest"; progress = ProgressHandleFactory.createHandle( displayName, new Cancellable() { @Override public boolean cancel() { logger.log(Level.INFO, "Filed ingest cancelled by user."); if (progress != null) { progress.setDisplayName(displayName + " (Cancelling...)"); } return IngestAbstractFileProcessor.this.cancel(true); } }); final IngestScheduler.FileScheduler fileScheduler = scheduler.getFileScheduler(); // initialize the progress bar progress.start(); progress.switchToIndeterminate(); // set initial totals and processed (to be updated as we process or new files are scheduled) int totalEnqueuedFiles = fileScheduler.getFilesEnqueuedEst(); progress.switchToDeterminate(totalEnqueuedFiles); int processedFiles = 0; // process AbstractFiles queue while (fileScheduler.hasNext()) { final ProcessTask fileTask = fileScheduler.next(); final PipelineContext<IngestModuleAbstractFile> filepipelineContext = fileTask.context; final ScheduledTask<IngestModuleAbstractFile> fileIngestTask = filepipelineContext.getScheduledTask(); final AbstractFile fileToProcess = fileTask.file; // clear return values from modules for last file synchronized (abstractFileModulesRetValues) { abstractFileModulesRetValues.clear(); } // logger.log(Level.INFO, "IngestManager: Processing: {0}", fileToProcess.getName()); for (IngestModuleAbstractFile module : fileIngestTask.getModules()) { // process the file with every file module if (isCancelled()) { logger.log(Level.INFO, "Terminating file ingest due to cancellation."); return null; } progress.progress( fileToProcess.getName() + " (" + module.getName() + ")", processedFiles); try { stats.logFileModuleStartProcess(module); IngestModuleAbstractFile.ProcessResult result = module.process(filepipelineContext, fileToProcess); stats.logFileModuleEndProcess(module); // store the result for subsequent modules for this file synchronized (abstractFileModulesRetValues) { abstractFileModulesRetValues.put(module.getName(), result); } } catch (Exception e) { logger.log( Level.SEVERE, "Error: unexpected exception from module: " + module.getName(), e); stats.addError(module); } catch (OutOfMemoryError e) { logger.log(Level.SEVERE, "Error: out of memory from module: " + module.getName(), e); stats.addError(module); } } // end for every module // free the internal file resource after done with every module fileToProcess.close(); int newTotalEnqueuedFiles = fileScheduler.getFilesEnqueuedEst(); if (newTotalEnqueuedFiles > totalEnqueuedFiles) { // update if new enqueued totalEnqueuedFiles = newTotalEnqueuedFiles + 1; // + processedFiles + 1; // processedFiles = 0; // reset progress.switchToIndeterminate(); progress.switchToDeterminate(totalEnqueuedFiles); } if (processedFiles < totalEnqueuedFiles) { // fix for now to handle the same datasource Content enqueued // twice ++processedFiles; } // --totalEnqueuedFiles; } // end of for every AbstractFile logger.log(Level.INFO, "IngestManager: Finished processing files"); return null; }