/** Constructs a snapshot of ingest job progress. */ private ProgressSnapshot(boolean getIngestTasksSnapshot) { dataSourceModule = null; fileIngestRunning = false; fileIngestStartTime = null; dataSourceProcessingSnapshots = new ArrayList<>(); for (DataSourceIngestJob dataSourceJob : dataSourceJobs.values()) { DataSourceIngestJob.Snapshot snapshot = dataSourceJob.getSnapshot(getIngestTasksSnapshot); dataSourceProcessingSnapshots.add(new DataSourceProcessingSnapshot(snapshot)); if (null == dataSourceModule) { DataSourceIngestPipeline.PipelineModule module = snapshot.getDataSourceLevelIngestModule(); if (null != module) { dataSourceModule = new DataSourceIngestModuleHandle(dataSourceJobs.get(snapshot.getJobId()), module); } } if (snapshot.fileIngestIsRunning()) { fileIngestRunning = true; } Date childFileIngestStartTime = snapshot.fileIngestStartTime(); if (null != childFileIngestStartTime && (null == fileIngestStartTime || childFileIngestStartTime.before(fileIngestStartTime))) { fileIngestStartTime = childFileIngestStartTime; } } this.jobCancelled = cancelled; }
/** * Starts this ingest job by starting its ingest module pipelines and scheduling the ingest tasks * that make up the job. * * @return A collection of ingest module start up errors, empty on success. */ synchronized List<IngestModuleError> start() { List<IngestModuleError> errors = new ArrayList<>(); if (started) { errors.add( new IngestModuleError("IngestJob", new IllegalStateException("Job already started"))); return errors; } started = true; for (DataSourceIngestJob dataSourceJob : this.dataSourceJobs.values()) { errors.addAll(dataSourceJob.start()); if (!errors.isEmpty()) { break; } } /** * TODO: Need to let successfully started data source ingest jobs know they should shut down. * This means that the start up of the ingest module pipelines and the submission of ingest * tasks should be separated. This cancellation is just a stop gap; fortunately, if startup is * going to fail, it will likely fail for the first child data source ingest job. */ if (!errors.isEmpty()) { for (DataSourceIngestJob dataSourceJob : this.dataSourceJobs.values()) { dataSourceJob.cancel(); } } return errors; }
/** * Gets snapshots of the progress of each of this ingest job's child data source ingest jobs. * * @return A list of data source ingest job progress snapshots. */ List<DataSourceIngestJob.Snapshot> getDataSourceIngestJobSnapshots() { List<DataSourceIngestJob.Snapshot> snapshots = new ArrayList<>(); for (DataSourceIngestJob dataSourceJob : this.dataSourceJobs.values()) { snapshots.add(dataSourceJob.getSnapshot(true)); } return snapshots; }
/** * Checks to see if this ingest job has at least one ingest pipeline when its settings are * applied. * * @return True or false. */ boolean hasIngestPipeline() { for (DataSourceIngestJob dataSourceJob : this.dataSourceJobs.values()) { if (dataSourceJob.hasIngestPipeline()) { return true; } } return false; }
/** * Constructs an ingest job that runs a collection of data sources through a set of ingest modules * specified via ingest job settings. * * @param dataSources The data sources to be ingested. * @param settings The ingest job settings. * @param runInteractively Whether or not this job should use progress bars, message boxes for * errors, etc. */ IngestJob(Collection<Content> dataSources, IngestJobSettings settings, boolean runInteractively) { this.id = IngestJob.nextId.getAndIncrement(); this.dataSourceJobs = new ConcurrentHashMap<>(); for (Content dataSource : dataSources) { DataSourceIngestJob dataSourceIngestJob = new DataSourceIngestJob(this, dataSource, settings, runInteractively); this.dataSourceJobs.put(dataSourceIngestJob.getId(), dataSourceIngestJob); } incompleteJobsCount = new AtomicInteger(dataSourceJobs.size()); }
/** * Constructs a handle to a data source level ingest module that can be used to get basic * information about the module and to request cancellation of the module. * * @param DataSourceIngestJob The data source ingest job that owns the data source level ingest * module. * @param module The data source level ingest module. */ private DataSourceIngestModuleHandle( DataSourceIngestJob job, DataSourceIngestPipeline.PipelineModule module) { this.job = job; this.module = module; this.cancelled = job.currentDataSourceIngestModuleIsCancelled(); }
/** * Requests cancellation of this ingest job, which means discarding unfinished tasks and stopping * the ingest pipelines. Returns immediately, but there may be a delay before all of the ingest * modules in the pipelines respond by stopping processing. */ public void cancel() { for (DataSourceIngestJob job : this.dataSourceJobs.values()) { job.cancel(); } this.cancelled = true; }