Esempio n. 1
0
  /**
   * Get the final state of each construct used by this task and add it to the {@link
   * gobblin.runtime.TaskState}.
   *
   * @param extractor the {@link gobblin.instrumented.extractor.InstrumentedExtractorBase} used by
   *     this task.
   * @param converter the {@link gobblin.converter.Converter} used by this task.
   * @param rowChecker the {@link RowLevelPolicyChecker} used by this task.
   */
  private void addConstructsFinalStateToTaskState(
      InstrumentedExtractorBase<?, ?> extractor,
      Converter<?, ?, ?, ?> converter,
      RowLevelPolicyChecker rowChecker) {
    ConstructState constructState = new ConstructState();
    if (extractor != null) {
      constructState.addConstructState(
          Constructs.EXTRACTOR, new ConstructState(extractor.getFinalState()));
    }
    if (converter != null) {
      constructState.addConstructState(
          Constructs.CONVERTER, new ConstructState(converter.getFinalState()));
    }
    if (rowChecker != null) {
      constructState.addConstructState(
          Constructs.ROW_QUALITY_CHECKER, new ConstructState(rowChecker.getFinalState()));
    }
    int forkIdx = 0;
    for (Optional<Fork> fork : this.forks) {
      if (fork.isPresent()) {
        constructState.addConstructState(
            Constructs.FORK_OPERATOR,
            new ConstructState(fork.get().getFinalState()),
            Integer.toString(forkIdx));
      }
      forkIdx++;
    }

    constructState.mergeIntoWorkUnitState(this.taskState);
  }
Esempio n. 2
0
  @Override
  @SuppressWarnings("unchecked")
  public void run() {
    long startTime = System.currentTimeMillis();
    this.taskState.setStartTime(startTime);
    this.taskState.setWorkingState(WorkUnitState.WorkingState.RUNNING);

    // Clear the list so it starts with a fresh list of forks for each run/retry
    this.forks.clear();

    Closer closer = Closer.create();
    Converter converter = null;
    InstrumentedExtractorBase extractor = null;
    RowLevelPolicyChecker rowChecker = null;
    try {
      extractor =
          closer.register(
              new InstrumentedExtractorDecorator(this.taskState, this.taskContext.getExtractor()));

      converter = closer.register(new MultiConverter(this.taskContext.getConverters()));

      // Get the fork operator. By default IdentityForkOperator is used with a single branch.
      ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator());
      forkOperator.init(this.taskState);
      int branches = forkOperator.getBranches(this.taskState);
      // Set fork.branches explicitly here so the rest task flow can pick it up
      this.taskState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, branches);

      // Extract, convert, and fork the source schema.
      Object schema = converter.convertSchema(extractor.getSchema(), this.taskState);
      List<Boolean> forkedSchemas = forkOperator.forkSchema(this.taskState, schema);
      if (forkedSchemas.size() != branches) {
        throw new ForkBranchMismatchException(
            String.format(
                "Number of forked schemas [%d] is not equal to number of branches [%d]",
                forkedSchemas.size(), branches));
      }

      if (inMultipleBranches(forkedSchemas) && !(schema instanceof Copyable)) {
        throw new CopyNotSupportedException(schema + " is not copyable");
      }

      // Create one fork for each forked branch
      for (int i = 0; i < branches; i++) {
        if (forkedSchemas.get(i)) {
          Fork fork =
              closer.register(
                  new Fork(
                      this.taskContext,
                      schema instanceof Copyable ? ((Copyable) schema).copy() : schema,
                      branches,
                      i));
          // Run the Fork
          this.forkCompletionService.submit(fork, fork);
          this.forks.add(Optional.of(fork));
        } else {
          this.forks.add(Optional.<Fork>absent());
        }
      }

      // Build the row-level quality checker
      rowChecker = closer.register(this.taskContext.getRowLevelPolicyChecker());
      RowLevelPolicyCheckResults rowResults = new RowLevelPolicyCheckResults();

      long recordsPulled = 0;
      Object record;
      // Extract, convert, and fork one source record at a time.
      while ((record = extractor.readRecord(null)) != null) {
        recordsPulled++;
        for (Object convertedRecord : converter.convertRecord(schema, record, this.taskState)) {
          processRecord(convertedRecord, forkOperator, rowChecker, rowResults, branches);
        }
      }

      LOG.info("Extracted " + recordsPulled + " data records");
      LOG.info("Row quality checker finished with results: " + rowResults.getResults());

      this.taskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED, recordsPulled);
      this.taskState.setProp(
          ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED, extractor.getExpectedRecordCount());

      for (Optional<Fork> fork : this.forks) {
        if (fork.isPresent()) {
          // Tell the fork that the main branch is completed and no new incoming data records should
          // be expected
          fork.get().markParentTaskDone();
        }
      }

      for (Optional<Fork> fork : this.forks) {
        if (fork.isPresent()) {
          try {
            this.forkCompletionService.take();
          } catch (InterruptedException ie) {
            Thread.currentThread().interrupt();
          }
        }
      }

      // Check if all forks succeeded
      boolean allForksSucceeded = true;
      for (Optional<Fork> fork : this.forks) {
        if (fork.isPresent()) {
          if (fork.get().isSucceeded()) {
            if (!fork.get().commit()) {
              allForksSucceeded = false;
            }
          } else {
            allForksSucceeded = false;
          }
        }
      }

      if (allForksSucceeded) {
        // Set the task state to SUCCESSFUL. The state is not set to COMMITTED
        // as the data publisher will do that upon successful data publishing.
        this.taskState.setWorkingState(WorkUnitState.WorkingState.SUCCESSFUL);
      } else {
        LOG.error(String.format("Not all forks of task %s succeeded", this.taskId));
        this.taskState.setWorkingState(WorkUnitState.WorkingState.FAILED);
      }

    } catch (Throwable t) {
      failTask(t);
    } finally {

      addConstructsFinalStateToTaskState(extractor, converter, rowChecker);

      this.taskState.setProp(ConfigurationKeys.WRITER_RECORDS_WRITTEN, getRecordsWritten());
      this.taskState.setProp(ConfigurationKeys.WRITER_BYTES_WRITTEN, getBytesWritten());

      try {
        closer.close();
      } catch (Throwable t) {
        LOG.error("Failed to close all open resources", t);
      }

      try {
        if (shouldPublishDataInTask()) {
          // If data should be published by the task, publish the data and set the task state to
          // COMMITTED.
          // Task data can only be published after all forks have been closed by closer.close().
          publishTaskData();
          this.taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
        }
      } catch (IOException ioe) {
        failTask(ioe);
      } finally {
        long endTime = System.currentTimeMillis();
        this.taskState.setEndTime(endTime);
        this.taskState.setTaskDuration(endTime - startTime);
        this.taskStateTracker.onTaskCompletion(this);
      }
    }
  }