Beispiel #1
0
    void start() {
      startTime = new Date();

      for (IngestModuleAbstractFile module : abstractFileModules) {
        fileModuleTimers.put(module.getName(), 0L);
      }
    }
Beispiel #2
0
 private void handleInterruption() {
   for (IngestModuleAbstractFile s : abstractFileModules) {
     if (isModuleRunning(s)) {
       try {
         s.stop();
       } catch (Exception e) {
         logger.log(Level.WARNING, "Exception while stopping module: " + s.getName(), e);
       }
     }
     IngestManager.fireModuleEvent(IngestModuleEvent.STOPPED.toString(), s.getName());
   }
   // empty queues
   scheduler.getFileScheduler().empty();
 }
Beispiel #3
0
    @Override
    protected void done() {
      try {
        super.get(); // block and get all exceptions thrown while doInBackground()
        // notify modules of completion
        if (!this.isCancelled()) {
          for (IngestModuleAbstractFile s : abstractFileModules) {
            s.complete();
            IngestManager.fireModuleEvent(IngestModuleEvent.COMPLETED.toString(), s.getName());
          }
        }

        logger.log(Level.INFO, PlatformUtil.getAllMemUsageInfo());
        logger.log(Level.INFO, "Freeing jvm heap resources post file pipeline run");
        System.gc();
        logger.log(Level.INFO, PlatformUtil.getAllMemUsageInfo());

      } catch (CancellationException e) {
        // task was cancelled
        handleInterruption();

      } catch (InterruptedException ex) {
        handleInterruption();
      } catch (ExecutionException ex) {
        handleInterruption();
        logger.log(Level.SEVERE, "Fatal error during ingest.", ex);

      } catch (Exception ex) {
        handleInterruption();
        logger.log(Level.SEVERE, "Fatal error during ingest.", ex);
      } finally {
        stats.end();
        progress.finish();

        if (!this.isCancelled()) {
          logger.log(Level.INFO, "Summary Report: " + stats.toString());
          logger.log(Level.INFO, "File module timings: " + stats.getFileModuleStats());
          if (ui != null) {
            logger.log(Level.INFO, "Ingest messages count: " + ui.getMessagesCount());
          }

          IngestManager.this.postMessage(
              IngestMessage.createManagerMessage("File Ingest Complete", stats.toHtmlString()));
        }
      }
    }
Beispiel #4
0
    @Override
    protected Object doInBackground() throws Exception {

      logger.log(Level.INFO, "Starting background ingest file processor");
      logger.log(Level.INFO, PlatformUtil.getAllMemUsageInfo());

      stats.start();

      // notify main thread modules started
      for (IngestModuleAbstractFile s : abstractFileModules) {
        IngestManager.fireModuleEvent(IngestModuleEvent.STARTED.toString(), s.getName());
      }

      final String displayName = "File Ingest";
      progress =
          ProgressHandleFactory.createHandle(
              displayName,
              new Cancellable() {
                @Override
                public boolean cancel() {
                  logger.log(Level.INFO, "Filed ingest cancelled by user.");
                  if (progress != null) {
                    progress.setDisplayName(displayName + " (Cancelling...)");
                  }
                  return IngestAbstractFileProcessor.this.cancel(true);
                }
              });

      final IngestScheduler.FileScheduler fileScheduler = scheduler.getFileScheduler();

      // initialize the progress bar
      progress.start();
      progress.switchToIndeterminate();
      // set initial totals and processed (to be updated as we process or new files are scheduled)
      int totalEnqueuedFiles = fileScheduler.getFilesEnqueuedEst();
      progress.switchToDeterminate(totalEnqueuedFiles);
      int processedFiles = 0;
      // process AbstractFiles queue
      while (fileScheduler.hasNext()) {
        final ProcessTask fileTask = fileScheduler.next();
        final PipelineContext<IngestModuleAbstractFile> filepipelineContext = fileTask.context;
        final ScheduledTask<IngestModuleAbstractFile> fileIngestTask =
            filepipelineContext.getScheduledTask();
        final AbstractFile fileToProcess = fileTask.file;

        // clear return values from modules for last file
        synchronized (abstractFileModulesRetValues) {
          abstractFileModulesRetValues.clear();
        }

        // logger.log(Level.INFO, "IngestManager: Processing: {0}", fileToProcess.getName());

        for (IngestModuleAbstractFile module : fileIngestTask.getModules()) {
          // process the file with every file module
          if (isCancelled()) {
            logger.log(Level.INFO, "Terminating file ingest due to cancellation.");
            return null;
          }
          progress.progress(
              fileToProcess.getName() + " (" + module.getName() + ")", processedFiles);

          try {
            stats.logFileModuleStartProcess(module);
            IngestModuleAbstractFile.ProcessResult result =
                module.process(filepipelineContext, fileToProcess);
            stats.logFileModuleEndProcess(module);

            // store the result for subsequent modules for this file
            synchronized (abstractFileModulesRetValues) {
              abstractFileModulesRetValues.put(module.getName(), result);
            }

          } catch (Exception e) {
            logger.log(
                Level.SEVERE, "Error: unexpected exception from module: " + module.getName(), e);
            stats.addError(module);
          } catch (OutOfMemoryError e) {
            logger.log(Level.SEVERE, "Error: out of memory from module: " + module.getName(), e);
            stats.addError(module);
          }
        } // end for every module

        // free the internal file resource after done with every module
        fileToProcess.close();

        int newTotalEnqueuedFiles = fileScheduler.getFilesEnqueuedEst();
        if (newTotalEnqueuedFiles > totalEnqueuedFiles) {
          // update if new enqueued
          totalEnqueuedFiles = newTotalEnqueuedFiles + 1; // + processedFiles + 1;
          // processedFiles = 0;
          // reset
          progress.switchToIndeterminate();
          progress.switchToDeterminate(totalEnqueuedFiles);
        }
        if (processedFiles
            < totalEnqueuedFiles) { // fix for now to handle the same datasource Content enqueued
                                    // twice
          ++processedFiles;
        }
        // --totalEnqueuedFiles;

      } // end of for every AbstractFile
      logger.log(Level.INFO, "IngestManager: Finished processing files");
      return null;
    }
Beispiel #5
0
  /** stop currently running threads if any (e.g. when changing a case) */
  synchronized void stopAll() {
    // stop queue worker
    if (queueWorker != null) {
      queueWorker.cancel(true);
      queueWorker = null;
    }

    // empty queues
    scheduler.getFileScheduler().empty();
    scheduler.getDataSourceScheduler().empty();

    // stop module workers
    if (abstractFileIngester != null) {
      // send signals to all file modules
      for (IngestModuleAbstractFile s : this.abstractFileModules) {
        if (isModuleRunning(s)) {
          try {
            s.stop();
          } catch (Exception e) {
            logger.log(
                Level.WARNING, "Unexpected exception while stopping module: " + s.getName(), e);
          }
        }
      }
      // stop fs ingester thread
      boolean cancelled = abstractFileIngester.cancel(true);
      if (!cancelled) {
        logger.log(Level.INFO, "Unable to cancel file ingest worker, likely already stopped");
      }

      abstractFileIngester = null;
    }

    List<IngestDataSourceThread> toStop = new ArrayList<IngestDataSourceThread>();
    toStop.addAll(dataSourceIngesters);

    for (IngestDataSourceThread dataSourceWorker : toStop) {
      IngestModuleDataSource s = dataSourceWorker.getModule();

      // stop the worker thread if thread is running
      boolean cancelled = dataSourceWorker.cancel(true);
      if (!cancelled) {
        logger.log(
            Level.INFO,
            "Unable to cancel data source ingest worker for module: "
                + dataSourceWorker.getModule().getName()
                + " data source: "
                + dataSourceWorker.getContent().getName());
      }

      // stop notification to module to cleanup resources
      if (isModuleRunning(s)) {
        try {
          dataSourceWorker.getModule().stop();
        } catch (Exception e) {
          logger.log(Level.WARNING, "Exception while stopping module: " + s.getName(), e);
        }
      }
    }

    logger.log(Level.INFO, "stopped all");
  }
Beispiel #6
0
  /**
   * Starts the needed worker threads.
   *
   * <p>if AbstractFile module is still running, do nothing and allow it to consume queue otherwise
   * start /restart AbstractFile worker
   *
   * <p>data source ingest workers run per (module,content). Checks if one for the same
   * (module,content) is already running otherwise start/restart the worker
   */
  private synchronized void startAll() {
    final IngestScheduler.DataSourceScheduler dataSourceScheduler =
        scheduler.getDataSourceScheduler();
    final IngestScheduler.FileScheduler fileScheduler = scheduler.getFileScheduler();

    logger.log(Level.INFO, "DataSource queue: " + dataSourceScheduler.toString());
    logger.log(Level.INFO, "File queue: " + fileScheduler.toString());

    if (!ingestMonitor.isRunning()) {
      ingestMonitor.start();
    }

    // image ingesters
    // cycle through each data source content in the queue
    while (dataSourceScheduler.hasNext()) {
      // dequeue
      // get next data source content and set of modules
      final ScheduledTask<IngestModuleDataSource> dataSourceTask = dataSourceScheduler.next();

      // check if each module for this data source content is already running
      for (IngestModuleDataSource taskModule : dataSourceTask.getModules()) {
        boolean alreadyRunning = false;
        for (IngestDataSourceThread worker : dataSourceIngesters) {
          // ignore threads that are on different data sources
          if (!worker.getContent().equals(dataSourceTask.getContent())) {
            continue; // check next worker
          }
          // same data source, check module (by name, not id, since different instances)
          if (worker.getModule().getName().equals(taskModule.getName())) {
            alreadyRunning = true;
            logger.log(
                Level.INFO,
                "Data Source Ingester <"
                    + dataSourceTask.getContent()
                    + ", "
                    + taskModule.getName()
                    + "> is already running");
            break;
          }
        }
        // checked all workers
        if (alreadyRunning == false) {
          logger.log(
              Level.INFO,
              "Starting new data source Ingester <"
                  + dataSourceTask.getContent()
                  + ", "
                  + taskModule.getName()
                  + ">");
          // data source modules are now initialized per instance

          IngestModuleInit moduleInit = new IngestModuleInit();

          PipelineContext<IngestModuleDataSource> dataSourcepipelineContext =
              new PipelineContext<IngestModuleDataSource>(dataSourceTask, getProcessUnallocSpace());
          final IngestDataSourceThread newDataSourceWorker =
              new IngestDataSourceThread(
                  this,
                  dataSourcepipelineContext,
                  dataSourceTask.getContent(),
                  taskModule,
                  moduleInit);

          dataSourceIngesters.add(newDataSourceWorker);

          // wrap the module in a worker, that will run init, process and complete on the module
          newDataSourceWorker.execute();
          IngestManager.fireModuleEvent(IngestModuleEvent.STARTED.toString(), taskModule.getName());
        }
      }
    }

    // AbstractFile ingester
    boolean startAbstractFileIngester = false;
    if (fileScheduler.hasNext()) {
      if (abstractFileIngester == null) {
        startAbstractFileIngester = true;
        logger.log(Level.INFO, "Starting initial AbstractFile ingester");
      }
      // if worker had completed, restart it in case data is still enqueued
      else if (abstractFileIngester.isDone()) {
        startAbstractFileIngester = true;
        logger.log(Level.INFO, "Restarting AbstractFile ingester");
      }
    } else {
      logger.log(Level.INFO, "no new AbstractFile enqueued, no ingester needed");
    }

    if (startAbstractFileIngester) {
      stats = new IngestManagerStats();
      abstractFileIngester = new IngestAbstractFileProcessor();
      // init all fs modules, everytime new worker starts
      /* @@@ I don't understand why we do an init on each module.  Should do only modules
       * that we are going to be using in the pipeline
       */
      for (IngestModuleAbstractFile s : abstractFileModules) {
        IngestModuleInit moduleInit = new IngestModuleInit();
        try {
          s.init(moduleInit);
        } catch (Exception e) {
          logger.log(Level.SEVERE, "File ingest module failed init(): " + s.getName());
        }
      }
      abstractFileIngester.execute();
    }
  }