Esempio n. 1
0
  private IngestManager() {
    dataSourceIngesters = new ArrayList<IngestDataSourceThread>();

    scheduler = IngestScheduler.getInstance();

    // setup current modules and listeners for modules changes
    initModules();
  }
Esempio n. 2
0
  /**
   * check if the module is running (was started and not yet complete/stopped) give a complete
   * answer, i.e. it's already consumed all files but it might have background threads running
   */
  public boolean isModuleRunning(final IngestModuleAbstract module) {

    if (module.getType() == IngestModuleAbstract.ModuleType.AbstractFile) {
      IngestScheduler.FileScheduler fileScheduler = scheduler.getFileScheduler();

      if (fileScheduler.hasModuleEnqueued((IngestModuleAbstractFile) module)) {
        // has work enqueued, so running
        return true;
      } else {
        // not in the queue, but could still have bkg work running
        return module.hasBackgroundJobsRunning();
      }

    } else {
      // data source module
      synchronized (this) {
        if (dataSourceIngesters.isEmpty()) {
          return false;
        }
        IngestDataSourceThread imt = null;
        for (IngestDataSourceThread ii : dataSourceIngesters) {
          if (ii.getModule().equals(module)) {
            imt = ii;
            break;
          }
        }

        if (imt == null) {
          return false;
        }

        if (imt.isDone() == false) {
          return true;
        } else {
          return false;
        }
      }
    }
  }
Esempio n. 3
0
 /**
  * Check if file scheduler has files in queues
  *
  * @return true if more files in queues, false otherwise
  */
 public boolean getFileSchedulerHasNext() {
   return scheduler.getFileScheduler().hasNext();
 }
Esempio n. 4
0
  /** stop currently running threads if any (e.g. when changing a case) */
  synchronized void stopAll() {
    // stop queue worker
    if (queueWorker != null) {
      queueWorker.cancel(true);
      queueWorker = null;
    }

    // empty queues
    scheduler.getFileScheduler().empty();
    scheduler.getDataSourceScheduler().empty();

    // stop module workers
    if (abstractFileIngester != null) {
      // send signals to all file modules
      for (IngestModuleAbstractFile s : this.abstractFileModules) {
        if (isModuleRunning(s)) {
          try {
            s.stop();
          } catch (Exception e) {
            logger.log(
                Level.WARNING, "Unexpected exception while stopping module: " + s.getName(), e);
          }
        }
      }
      // stop fs ingester thread
      boolean cancelled = abstractFileIngester.cancel(true);
      if (!cancelled) {
        logger.log(Level.INFO, "Unable to cancel file ingest worker, likely already stopped");
      }

      abstractFileIngester = null;
    }

    List<IngestDataSourceThread> toStop = new ArrayList<IngestDataSourceThread>();
    toStop.addAll(dataSourceIngesters);

    for (IngestDataSourceThread dataSourceWorker : toStop) {
      IngestModuleDataSource s = dataSourceWorker.getModule();

      // stop the worker thread if thread is running
      boolean cancelled = dataSourceWorker.cancel(true);
      if (!cancelled) {
        logger.log(
            Level.INFO,
            "Unable to cancel data source ingest worker for module: "
                + dataSourceWorker.getModule().getName()
                + " data source: "
                + dataSourceWorker.getContent().getName());
      }

      // stop notification to module to cleanup resources
      if (isModuleRunning(s)) {
        try {
          dataSourceWorker.getModule().stop();
        } catch (Exception e) {
          logger.log(Level.WARNING, "Exception while stopping module: " + s.getName(), e);
        }
      }
    }

    logger.log(Level.INFO, "stopped all");
  }
Esempio n. 5
0
  /**
   * Starts the needed worker threads.
   *
   * <p>if AbstractFile module is still running, do nothing and allow it to consume queue otherwise
   * start /restart AbstractFile worker
   *
   * <p>data source ingest workers run per (module,content). Checks if one for the same
   * (module,content) is already running otherwise start/restart the worker
   */
  private synchronized void startAll() {
    final IngestScheduler.DataSourceScheduler dataSourceScheduler =
        scheduler.getDataSourceScheduler();
    final IngestScheduler.FileScheduler fileScheduler = scheduler.getFileScheduler();

    logger.log(Level.INFO, "DataSource queue: " + dataSourceScheduler.toString());
    logger.log(Level.INFO, "File queue: " + fileScheduler.toString());

    if (!ingestMonitor.isRunning()) {
      ingestMonitor.start();
    }

    // image ingesters
    // cycle through each data source content in the queue
    while (dataSourceScheduler.hasNext()) {
      // dequeue
      // get next data source content and set of modules
      final ScheduledTask<IngestModuleDataSource> dataSourceTask = dataSourceScheduler.next();

      // check if each module for this data source content is already running
      for (IngestModuleDataSource taskModule : dataSourceTask.getModules()) {
        boolean alreadyRunning = false;
        for (IngestDataSourceThread worker : dataSourceIngesters) {
          // ignore threads that are on different data sources
          if (!worker.getContent().equals(dataSourceTask.getContent())) {
            continue; // check next worker
          }
          // same data source, check module (by name, not id, since different instances)
          if (worker.getModule().getName().equals(taskModule.getName())) {
            alreadyRunning = true;
            logger.log(
                Level.INFO,
                "Data Source Ingester <"
                    + dataSourceTask.getContent()
                    + ", "
                    + taskModule.getName()
                    + "> is already running");
            break;
          }
        }
        // checked all workers
        if (alreadyRunning == false) {
          logger.log(
              Level.INFO,
              "Starting new data source Ingester <"
                  + dataSourceTask.getContent()
                  + ", "
                  + taskModule.getName()
                  + ">");
          // data source modules are now initialized per instance

          IngestModuleInit moduleInit = new IngestModuleInit();

          PipelineContext<IngestModuleDataSource> dataSourcepipelineContext =
              new PipelineContext<IngestModuleDataSource>(dataSourceTask, getProcessUnallocSpace());
          final IngestDataSourceThread newDataSourceWorker =
              new IngestDataSourceThread(
                  this,
                  dataSourcepipelineContext,
                  dataSourceTask.getContent(),
                  taskModule,
                  moduleInit);

          dataSourceIngesters.add(newDataSourceWorker);

          // wrap the module in a worker, that will run init, process and complete on the module
          newDataSourceWorker.execute();
          IngestManager.fireModuleEvent(IngestModuleEvent.STARTED.toString(), taskModule.getName());
        }
      }
    }

    // AbstractFile ingester
    boolean startAbstractFileIngester = false;
    if (fileScheduler.hasNext()) {
      if (abstractFileIngester == null) {
        startAbstractFileIngester = true;
        logger.log(Level.INFO, "Starting initial AbstractFile ingester");
      }
      // if worker had completed, restart it in case data is still enqueued
      else if (abstractFileIngester.isDone()) {
        startAbstractFileIngester = true;
        logger.log(Level.INFO, "Restarting AbstractFile ingester");
      }
    } else {
      logger.log(Level.INFO, "no new AbstractFile enqueued, no ingester needed");
    }

    if (startAbstractFileIngester) {
      stats = new IngestManagerStats();
      abstractFileIngester = new IngestAbstractFileProcessor();
      // init all fs modules, everytime new worker starts
      /* @@@ I don't understand why we do an init on each module.  Should do only modules
       * that we are going to be using in the pipeline
       */
      for (IngestModuleAbstractFile s : abstractFileModules) {
        IngestModuleInit moduleInit = new IngestModuleInit();
        try {
          s.init(moduleInit);
        } catch (Exception e) {
          logger.log(Level.SEVERE, "File ingest module failed init(): " + s.getName());
        }
      }
      abstractFileIngester.execute();
    }
  }
Esempio n. 6
0
 /**
  * Schedule a file for ingest and add it to ongoing file ingest process on the same data source.
  * Scheduler updates the current progress.
  *
  * <p>The file to be added is usually a product of a currently ran ingest. Now we want to process
  * this new file with the same ingest context.
  *
  * @param file file to be scheduled
  * @param pipelineContext ingest context used to ingest parent of the file to be scheduled
  */
 void scheduleFile(AbstractFile file, PipelineContext pipelineContext) {
   scheduler.getFileScheduler().schedule(file, pipelineContext);
 }