Exemplo n.º 1
0
 /** check if the file-level ingest pipeline is running */
 public synchronized boolean isFileIngestRunning() {
   if (abstractFileIngester != null && !abstractFileIngester.isDone()) {
     return true;
   }
   return false;
 }
Exemplo n.º 2
0
  /**
   * Starts the needed worker threads.
   *
   * <p>if AbstractFile module is still running, do nothing and allow it to consume queue otherwise
   * start /restart AbstractFile worker
   *
   * <p>data source ingest workers run per (module,content). Checks if one for the same
   * (module,content) is already running otherwise start/restart the worker
   */
  private synchronized void startAll() {
    final IngestScheduler.DataSourceScheduler dataSourceScheduler =
        scheduler.getDataSourceScheduler();
    final IngestScheduler.FileScheduler fileScheduler = scheduler.getFileScheduler();

    logger.log(Level.INFO, "DataSource queue: " + dataSourceScheduler.toString());
    logger.log(Level.INFO, "File queue: " + fileScheduler.toString());

    if (!ingestMonitor.isRunning()) {
      ingestMonitor.start();
    }

    // image ingesters
    // cycle through each data source content in the queue
    while (dataSourceScheduler.hasNext()) {
      // dequeue
      // get next data source content and set of modules
      final ScheduledTask<IngestModuleDataSource> dataSourceTask = dataSourceScheduler.next();

      // check if each module for this data source content is already running
      for (IngestModuleDataSource taskModule : dataSourceTask.getModules()) {
        boolean alreadyRunning = false;
        for (IngestDataSourceThread worker : dataSourceIngesters) {
          // ignore threads that are on different data sources
          if (!worker.getContent().equals(dataSourceTask.getContent())) {
            continue; // check next worker
          }
          // same data source, check module (by name, not id, since different instances)
          if (worker.getModule().getName().equals(taskModule.getName())) {
            alreadyRunning = true;
            logger.log(
                Level.INFO,
                "Data Source Ingester <"
                    + dataSourceTask.getContent()
                    + ", "
                    + taskModule.getName()
                    + "> is already running");
            break;
          }
        }
        // checked all workers
        if (alreadyRunning == false) {
          logger.log(
              Level.INFO,
              "Starting new data source Ingester <"
                  + dataSourceTask.getContent()
                  + ", "
                  + taskModule.getName()
                  + ">");
          // data source modules are now initialized per instance

          IngestModuleInit moduleInit = new IngestModuleInit();

          PipelineContext<IngestModuleDataSource> dataSourcepipelineContext =
              new PipelineContext<IngestModuleDataSource>(dataSourceTask, getProcessUnallocSpace());
          final IngestDataSourceThread newDataSourceWorker =
              new IngestDataSourceThread(
                  this,
                  dataSourcepipelineContext,
                  dataSourceTask.getContent(),
                  taskModule,
                  moduleInit);

          dataSourceIngesters.add(newDataSourceWorker);

          // wrap the module in a worker, that will run init, process and complete on the module
          newDataSourceWorker.execute();
          IngestManager.fireModuleEvent(IngestModuleEvent.STARTED.toString(), taskModule.getName());
        }
      }
    }

    // AbstractFile ingester
    boolean startAbstractFileIngester = false;
    if (fileScheduler.hasNext()) {
      if (abstractFileIngester == null) {
        startAbstractFileIngester = true;
        logger.log(Level.INFO, "Starting initial AbstractFile ingester");
      }
      // if worker had completed, restart it in case data is still enqueued
      else if (abstractFileIngester.isDone()) {
        startAbstractFileIngester = true;
        logger.log(Level.INFO, "Restarting AbstractFile ingester");
      }
    } else {
      logger.log(Level.INFO, "no new AbstractFile enqueued, no ingester needed");
    }

    if (startAbstractFileIngester) {
      stats = new IngestManagerStats();
      abstractFileIngester = new IngestAbstractFileProcessor();
      // init all fs modules, everytime new worker starts
      /* @@@ I don't understand why we do an init on each module.  Should do only modules
       * that we are going to be using in the pipeline
       */
      for (IngestModuleAbstractFile s : abstractFileModules) {
        IngestModuleInit moduleInit = new IngestModuleInit();
        try {
          s.init(moduleInit);
        } catch (Exception e) {
          logger.log(Level.SEVERE, "File ingest module failed init(): " + s.getName());
        }
      }
      abstractFileIngester.execute();
    }
  }