Example #1
0
  /**
   * Whether the task should directly publish its output data to the final publisher output
   * directory.
   *
   * <p>The task should publish its output data directly if {@link
   * ConfigurationKeys#PUBLISH_DATA_AT_JOB_LEVEL} is set to false AND any of the following
   * conditions is satisfied:
   *
   * <ul>
   *   <li>The {@link JobCommitPolicy#COMMIT_ON_PARTIAL_SUCCESS} policy is used.
   *   <li>The {@link JobCommitPolicy#COMMIT_SUCCESSFUL_TASKS} policy is used. and all {@link Fork}s
   *       of this {@link Task} succeeded.
   * </ul>
   */
  private boolean shouldPublishDataInTask() {
    boolean publishDataAtJobLevel =
        this.taskState.getPropAsBoolean(
            ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL,
            ConfigurationKeys.DEFAULT_PUBLISH_DATA_AT_JOB_LEVEL);
    if (publishDataAtJobLevel) {
      LOG.info(
          String.format(
              "%s is true. Will publish data at the job level.",
              ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL));
      return false;
    }

    JobCommitPolicy jobCommitPolicy = JobCommitPolicy.getCommitPolicy(this.taskState);

    if (jobCommitPolicy == JobCommitPolicy.COMMIT_SUCCESSFUL_TASKS) {
      return this.taskState.getWorkingState() == WorkUnitState.WorkingState.SUCCESSFUL;
    }

    if (jobCommitPolicy == JobCommitPolicy.COMMIT_ON_PARTIAL_SUCCESS) {
      return true;
    }

    LOG.info("Will publish data at the job level with job commit policy: " + jobCommitPolicy);
    return false;
  }
Example #2
0
  /**
   * Get latest water mark from previous work unit states.
   *
   * @param state Source state
   * @return latest water mark (high water mark)
   */
  private long getLatestWatermarkFromMetadata(SourceState state) {
    LOG.debug("Get latest watermark from the previous run");
    long latestWaterMark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE;

    List<WorkUnitState> previousWorkUnitStates =
        Lists.newArrayList(state.getPreviousWorkUnitStates());
    List<Long> previousWorkUnitStateHighWatermarks = Lists.newArrayList();
    List<Long> previousWorkUnitLowWatermarks = Lists.newArrayList();

    if (previousWorkUnitStates.isEmpty()) {
      LOG.info(
          "No previous work unit states found; Latest watermark - Default watermark: "
              + latestWaterMark);
      return latestWaterMark;
    }

    boolean hasFailedRun = false;
    boolean isCommitOnFullSuccess = false;
    boolean isDataProcessedInPreviousRun = false;

    JobCommitPolicy commitPolicy =
        JobCommitPolicy.forName(
            state.getProp(
                ConfigurationKeys.JOB_COMMIT_POLICY_KEY,
                ConfigurationKeys.DEFAULT_JOB_COMMIT_POLICY));
    if (commitPolicy == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) {
      isCommitOnFullSuccess = true;
    }

    for (WorkUnitState workUnitState : previousWorkUnitStates) {
      long processedRecordCount = 0;
      LOG.info(
          "State of the previous task: "
              + workUnitState.getId()
              + ":"
              + workUnitState.getWorkingState());
      if (workUnitState.getWorkingState() == WorkingState.FAILED
          || workUnitState.getWorkingState() == WorkingState.CANCELLED
          || workUnitState.getWorkingState() == WorkingState.RUNNING
          || workUnitState.getWorkingState() == WorkingState.PENDING) {
        hasFailedRun = true;
      } else {
        processedRecordCount =
            workUnitState.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED);
        if (processedRecordCount != 0) {
          isDataProcessedInPreviousRun = true;
        }
      }

      LOG.info(
          "Low watermark of the previous task: "
              + workUnitState.getId()
              + ":"
              + workUnitState.getWorkunit().getLowWaterMark());
      LOG.info(
          "High watermark of the previous task: "
              + workUnitState.getId()
              + ":"
              + workUnitState.getHighWaterMark());
      LOG.info("Record count of the previous task: " + processedRecordCount + "\n");

      // Consider high water mark of the previous work unit, if it is
      // extracted any data
      if (processedRecordCount != 0) {
        previousWorkUnitStateHighWatermarks.add(workUnitState.getHighWaterMark());
      }

      previousWorkUnitLowWatermarks.add(this.getLowWatermarkFromWorkUnit(workUnitState));
    }

    // If commit policy is full and it has failed run, get latest water mark
    // as
    // minimum of low water marks from previous states.
    if (isCommitOnFullSuccess && hasFailedRun) {
      long previousLowWatermark = Collections.min(previousWorkUnitLowWatermarks);

      WorkUnitState previousState = previousWorkUnitStates.get(0);
      ExtractType extractType =
          ExtractType.valueOf(
              previousState
                  .getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE)
                  .toUpperCase());

      // add backup seconds only for snapshot extracts but not for appends
      if (extractType == ExtractType.SNAPSHOT) {
        int backupSecs =
            previousState.getPropAsInt(
                ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, 0);
        String watermarkType =
            previousState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE);
        latestWaterMark = this.addBackedUpSeconds(previousLowWatermark, backupSecs, watermarkType);
      } else {
        latestWaterMark = previousLowWatermark;
      }

      LOG.info(
          "Previous job was COMMIT_ON_FULL_SUCCESS but it was failed; Latest watermark - "
              + "Min watermark from WorkUnits: "
              + latestWaterMark);
    }

    // If commit policy is full and there are no failed tasks or commit
    // policy is partial,
    // get latest water mark as maximum of high water marks from previous
    // tasks.
    else {
      if (isDataProcessedInPreviousRun) {
        latestWaterMark = Collections.max(previousWorkUnitStateHighWatermarks);
        LOG.info(
            "Previous run was successful. Latest watermark - Max watermark from WorkUnitStates: "
                + latestWaterMark);
      } else {
        latestWaterMark = Collections.min(previousWorkUnitLowWatermarks);
        LOG.info(
            "Previous run was successful but no data found. Latest watermark - Min watermark from WorkUnitStates: "
                + latestWaterMark);
      }
    }

    return latestWaterMark;
  }