Exemplo n.º 1
0
  /** Process a (possibly converted) record. */
  @SuppressWarnings("unchecked")
  private void processRecord(
      Object convertedRecord,
      ForkOperator forkOperator,
      RowLevelPolicyChecker rowChecker,
      RowLevelPolicyCheckResults rowResults,
      int branches)
      throws Exception {
    // Skip the record if quality checking fails
    if (!rowChecker.executePolicies(convertedRecord, rowResults)) {
      return;
    }

    List<Boolean> forkedRecords = forkOperator.forkDataRecord(this.taskState, convertedRecord);
    if (forkedRecords.size() != branches) {
      throw new ForkBranchMismatchException(
          String.format(
              "Number of forked data records [%d] is not equal to number of branches [%d]",
              forkedRecords.size(), branches));
    }

    if (inMultipleBranches(forkedRecords) && !(convertedRecord instanceof Copyable)) {
      throw new CopyNotSupportedException(convertedRecord + " is not copyable");
    }

    // If the record has been successfully put into the queues of every forks
    boolean allPutsSucceeded = false;

    // Use an array of primitive boolean type to avoid unnecessary boxing/unboxing
    boolean[] succeededPuts = new boolean[branches];

    // Put the record into the record queue of each fork. A put may timeout and return a false, in
    // which
    // case the put needs to be retried in the next iteration along with other failed puts. This
    // goes on
    // until all puts succeed, at which point the task moves to the next record.
    while (!allPutsSucceeded) {
      allPutsSucceeded = true;
      for (int i = 0; i < branches; i++) {
        if (succeededPuts[i]) {
          continue;
        }
        if (this.forks.get(i).isPresent() && forkedRecords.get(i)) {
          boolean succeeded =
              this.forks
                  .get(i)
                  .get()
                  .putRecord(
                      convertedRecord instanceof Copyable
                          ? ((Copyable) convertedRecord).copy()
                          : convertedRecord);
          succeededPuts[i] = succeeded;
          if (!succeeded) {
            allPutsSucceeded = false;
          }
        } else {
          succeededPuts[i] = true;
        }
      }
    }
  }
Exemplo n.º 2
0
  @Override
  @SuppressWarnings("unchecked")
  public void run() {
    long startTime = System.currentTimeMillis();
    this.taskState.setStartTime(startTime);
    this.taskState.setWorkingState(WorkUnitState.WorkingState.RUNNING);

    // Clear the list so it starts with a fresh list of forks for each run/retry
    this.forks.clear();

    Closer closer = Closer.create();
    Converter converter = null;
    InstrumentedExtractorBase extractor = null;
    RowLevelPolicyChecker rowChecker = null;
    try {
      extractor =
          closer.register(
              new InstrumentedExtractorDecorator(this.taskState, this.taskContext.getExtractor()));

      converter = closer.register(new MultiConverter(this.taskContext.getConverters()));

      // Get the fork operator. By default IdentityForkOperator is used with a single branch.
      ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator());
      forkOperator.init(this.taskState);
      int branches = forkOperator.getBranches(this.taskState);
      // Set fork.branches explicitly here so the rest task flow can pick it up
      this.taskState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, branches);

      // Extract, convert, and fork the source schema.
      Object schema = converter.convertSchema(extractor.getSchema(), this.taskState);
      List<Boolean> forkedSchemas = forkOperator.forkSchema(this.taskState, schema);
      if (forkedSchemas.size() != branches) {
        throw new ForkBranchMismatchException(
            String.format(
                "Number of forked schemas [%d] is not equal to number of branches [%d]",
                forkedSchemas.size(), branches));
      }

      if (inMultipleBranches(forkedSchemas) && !(schema instanceof Copyable)) {
        throw new CopyNotSupportedException(schema + " is not copyable");
      }

      // Create one fork for each forked branch
      for (int i = 0; i < branches; i++) {
        if (forkedSchemas.get(i)) {
          Fork fork =
              closer.register(
                  new Fork(
                      this.taskContext,
                      schema instanceof Copyable ? ((Copyable) schema).copy() : schema,
                      branches,
                      i));
          // Run the Fork
          this.forkCompletionService.submit(fork, fork);
          this.forks.add(Optional.of(fork));
        } else {
          this.forks.add(Optional.<Fork>absent());
        }
      }

      // Build the row-level quality checker
      rowChecker = closer.register(this.taskContext.getRowLevelPolicyChecker());
      RowLevelPolicyCheckResults rowResults = new RowLevelPolicyCheckResults();

      long recordsPulled = 0;
      Object record;
      // Extract, convert, and fork one source record at a time.
      while ((record = extractor.readRecord(null)) != null) {
        recordsPulled++;
        for (Object convertedRecord : converter.convertRecord(schema, record, this.taskState)) {
          processRecord(convertedRecord, forkOperator, rowChecker, rowResults, branches);
        }
      }

      LOG.info("Extracted " + recordsPulled + " data records");
      LOG.info("Row quality checker finished with results: " + rowResults.getResults());

      this.taskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED, recordsPulled);
      this.taskState.setProp(
          ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED, extractor.getExpectedRecordCount());

      for (Optional<Fork> fork : this.forks) {
        if (fork.isPresent()) {
          // Tell the fork that the main branch is completed and no new incoming data records should
          // be expected
          fork.get().markParentTaskDone();
        }
      }

      for (Optional<Fork> fork : this.forks) {
        if (fork.isPresent()) {
          try {
            this.forkCompletionService.take();
          } catch (InterruptedException ie) {
            Thread.currentThread().interrupt();
          }
        }
      }

      // Check if all forks succeeded
      boolean allForksSucceeded = true;
      for (Optional<Fork> fork : this.forks) {
        if (fork.isPresent()) {
          if (fork.get().isSucceeded()) {
            if (!fork.get().commit()) {
              allForksSucceeded = false;
            }
          } else {
            allForksSucceeded = false;
          }
        }
      }

      if (allForksSucceeded) {
        // Set the task state to SUCCESSFUL. The state is not set to COMMITTED
        // as the data publisher will do that upon successful data publishing.
        this.taskState.setWorkingState(WorkUnitState.WorkingState.SUCCESSFUL);
      } else {
        LOG.error(String.format("Not all forks of task %s succeeded", this.taskId));
        this.taskState.setWorkingState(WorkUnitState.WorkingState.FAILED);
      }

    } catch (Throwable t) {
      failTask(t);
    } finally {

      addConstructsFinalStateToTaskState(extractor, converter, rowChecker);

      this.taskState.setProp(ConfigurationKeys.WRITER_RECORDS_WRITTEN, getRecordsWritten());
      this.taskState.setProp(ConfigurationKeys.WRITER_BYTES_WRITTEN, getBytesWritten());

      try {
        closer.close();
      } catch (Throwable t) {
        LOG.error("Failed to close all open resources", t);
      }

      try {
        if (shouldPublishDataInTask()) {
          // If data should be published by the task, publish the data and set the task state to
          // COMMITTED.
          // Task data can only be published after all forks have been closed by closer.close().
          publishTaskData();
          this.taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
        }
      } catch (IOException ioe) {
        failTask(ioe);
      } finally {
        long endTime = System.currentTimeMillis();
        this.taskState.setEndTime(endTime);
        this.taskState.setTaskDuration(endTime - startTime);
        this.taskStateTracker.onTaskCompletion(this);
      }
    }
  }