/** * Get the final state of each construct used by this task and add it to the {@link * gobblin.runtime.TaskState}. * * @param extractor the {@link gobblin.instrumented.extractor.InstrumentedExtractorBase} used by * this task. * @param converter the {@link gobblin.converter.Converter} used by this task. * @param rowChecker the {@link RowLevelPolicyChecker} used by this task. */ private void addConstructsFinalStateToTaskState( InstrumentedExtractorBase<?, ?> extractor, Converter<?, ?, ?, ?> converter, RowLevelPolicyChecker rowChecker) { ConstructState constructState = new ConstructState(); if (extractor != null) { constructState.addConstructState( Constructs.EXTRACTOR, new ConstructState(extractor.getFinalState())); } if (converter != null) { constructState.addConstructState( Constructs.CONVERTER, new ConstructState(converter.getFinalState())); } if (rowChecker != null) { constructState.addConstructState( Constructs.ROW_QUALITY_CHECKER, new ConstructState(rowChecker.getFinalState())); } int forkIdx = 0; for (Optional<Fork> fork : this.forks) { if (fork.isPresent()) { constructState.addConstructState( Constructs.FORK_OPERATOR, new ConstructState(fork.get().getFinalState()), Integer.toString(forkIdx)); } forkIdx++; } constructState.mergeIntoWorkUnitState(this.taskState); }
@Override @SuppressWarnings("unchecked") public void run() { long startTime = System.currentTimeMillis(); this.taskState.setStartTime(startTime); this.taskState.setWorkingState(WorkUnitState.WorkingState.RUNNING); // Clear the list so it starts with a fresh list of forks for each run/retry this.forks.clear(); Closer closer = Closer.create(); Converter converter = null; InstrumentedExtractorBase extractor = null; RowLevelPolicyChecker rowChecker = null; try { extractor = closer.register( new InstrumentedExtractorDecorator(this.taskState, this.taskContext.getExtractor())); converter = closer.register(new MultiConverter(this.taskContext.getConverters())); // Get the fork operator. By default IdentityForkOperator is used with a single branch. ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator()); forkOperator.init(this.taskState); int branches = forkOperator.getBranches(this.taskState); // Set fork.branches explicitly here so the rest task flow can pick it up this.taskState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, branches); // Extract, convert, and fork the source schema. Object schema = converter.convertSchema(extractor.getSchema(), this.taskState); List<Boolean> forkedSchemas = forkOperator.forkSchema(this.taskState, schema); if (forkedSchemas.size() != branches) { throw new ForkBranchMismatchException( String.format( "Number of forked schemas [%d] is not equal to number of branches [%d]", forkedSchemas.size(), branches)); } if (inMultipleBranches(forkedSchemas) && !(schema instanceof Copyable)) { throw new CopyNotSupportedException(schema + " is not copyable"); } // Create one fork for each forked branch for (int i = 0; i < branches; i++) { if (forkedSchemas.get(i)) { Fork fork = closer.register( new Fork( this.taskContext, schema instanceof Copyable ? ((Copyable) schema).copy() : schema, branches, i)); // Run the Fork this.forkCompletionService.submit(fork, fork); this.forks.add(Optional.of(fork)); } else { this.forks.add(Optional.<Fork>absent()); } } // Build the row-level quality checker rowChecker = closer.register(this.taskContext.getRowLevelPolicyChecker()); RowLevelPolicyCheckResults rowResults = new RowLevelPolicyCheckResults(); long recordsPulled = 0; Object record; // Extract, convert, and fork one source record at a time. while ((record = extractor.readRecord(null)) != null) { recordsPulled++; for (Object convertedRecord : converter.convertRecord(schema, record, this.taskState)) { processRecord(convertedRecord, forkOperator, rowChecker, rowResults, branches); } } LOG.info("Extracted " + recordsPulled + " data records"); LOG.info("Row quality checker finished with results: " + rowResults.getResults()); this.taskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED, recordsPulled); this.taskState.setProp( ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED, extractor.getExpectedRecordCount()); for (Optional<Fork> fork : this.forks) { if (fork.isPresent()) { // Tell the fork that the main branch is completed and no new incoming data records should // be expected fork.get().markParentTaskDone(); } } for (Optional<Fork> fork : this.forks) { if (fork.isPresent()) { try { this.forkCompletionService.take(); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } } } // Check if all forks succeeded boolean allForksSucceeded = true; for (Optional<Fork> fork : this.forks) { if (fork.isPresent()) { if (fork.get().isSucceeded()) { if (!fork.get().commit()) { allForksSucceeded = false; } } else { allForksSucceeded = false; } } } if (allForksSucceeded) { // Set the task state to SUCCESSFUL. The state is not set to COMMITTED // as the data publisher will do that upon successful data publishing. this.taskState.setWorkingState(WorkUnitState.WorkingState.SUCCESSFUL); } else { LOG.error(String.format("Not all forks of task %s succeeded", this.taskId)); this.taskState.setWorkingState(WorkUnitState.WorkingState.FAILED); } } catch (Throwable t) { failTask(t); } finally { addConstructsFinalStateToTaskState(extractor, converter, rowChecker); this.taskState.setProp(ConfigurationKeys.WRITER_RECORDS_WRITTEN, getRecordsWritten()); this.taskState.setProp(ConfigurationKeys.WRITER_BYTES_WRITTEN, getBytesWritten()); try { closer.close(); } catch (Throwable t) { LOG.error("Failed to close all open resources", t); } try { if (shouldPublishDataInTask()) { // If data should be published by the task, publish the data and set the task state to // COMMITTED. // Task data can only be published after all forks have been closed by closer.close(). publishTaskData(); this.taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); } } catch (IOException ioe) { failTask(ioe); } finally { long endTime = System.currentTimeMillis(); this.taskState.setEndTime(endTime); this.taskState.setTaskDuration(endTime - startTime); this.taskStateTracker.onTaskCompletion(this); } } }