コード例 #1
0
ファイル: TaskState.java プロジェクト: xkrogen/gobblin
 public TaskState(WorkUnitState workUnitState) {
   // Since getWorkunit() returns an immutable WorkUnit object,
   // the WorkUnit object in this object is also immutable.
   super(workUnitState.getWorkunit());
   addAll(workUnitState);
   this.jobId = workUnitState.getProp(ConfigurationKeys.JOB_ID_KEY);
   this.taskId = workUnitState.getProp(ConfigurationKeys.TASK_ID_KEY);
   this.setId(this.taskId);
 }
コード例 #2
0
  /**
   * Get low water mark from the given work unit state.
   *
   * @param workUnitState Work unit state
   * @return latest low water mark
   */
  private long getLowWatermarkFromWorkUnit(WorkUnitState workUnitState) {
    String watermarkType =
        workUnitState.getProp(
            ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE,
            ConfigurationKeys.DEFAULT_WATERMARK_TYPE);
    long lowWaterMark = workUnitState.getWorkunit().getLowWaterMark();

    if (lowWaterMark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) {
      return lowWaterMark;
    }

    WatermarkType wmType = WatermarkType.valueOf(watermarkType.toUpperCase());
    int deltaNum = new WatermarkPredicate(wmType).getDeltaNumForNextWatermark();

    switch (wmType) {
      case SIMPLE:
        return lowWaterMark - deltaNum;
      default:
        Date lowWaterMarkDate = Utils.toDate(lowWaterMark, "yyyyMMddHHmmss");
        return Long.parseLong(
            Utils.dateToString(
                Utils.addSecondsToDate(lowWaterMarkDate, deltaNum * -1), "yyyyMMddHHmmss"));
    }
  }
コード例 #3
0
  /**
   * Get latest water mark from previous work unit states.
   *
   * @param state Source state
   * @return latest water mark (high water mark)
   */
  private long getLatestWatermarkFromMetadata(SourceState state) {
    LOG.debug("Get latest watermark from the previous run");
    long latestWaterMark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE;

    List<WorkUnitState> previousWorkUnitStates =
        Lists.newArrayList(state.getPreviousWorkUnitStates());
    List<Long> previousWorkUnitStateHighWatermarks = Lists.newArrayList();
    List<Long> previousWorkUnitLowWatermarks = Lists.newArrayList();

    if (previousWorkUnitStates.isEmpty()) {
      LOG.info(
          "No previous work unit states found; Latest watermark - Default watermark: "
              + latestWaterMark);
      return latestWaterMark;
    }

    boolean hasFailedRun = false;
    boolean isCommitOnFullSuccess = false;
    boolean isDataProcessedInPreviousRun = false;

    JobCommitPolicy commitPolicy =
        JobCommitPolicy.forName(
            state.getProp(
                ConfigurationKeys.JOB_COMMIT_POLICY_KEY,
                ConfigurationKeys.DEFAULT_JOB_COMMIT_POLICY));
    if (commitPolicy == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) {
      isCommitOnFullSuccess = true;
    }

    for (WorkUnitState workUnitState : previousWorkUnitStates) {
      long processedRecordCount = 0;
      LOG.info(
          "State of the previous task: "
              + workUnitState.getId()
              + ":"
              + workUnitState.getWorkingState());
      if (workUnitState.getWorkingState() == WorkingState.FAILED
          || workUnitState.getWorkingState() == WorkingState.CANCELLED
          || workUnitState.getWorkingState() == WorkingState.RUNNING
          || workUnitState.getWorkingState() == WorkingState.PENDING) {
        hasFailedRun = true;
      } else {
        processedRecordCount =
            workUnitState.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED);
        if (processedRecordCount != 0) {
          isDataProcessedInPreviousRun = true;
        }
      }

      LOG.info(
          "Low watermark of the previous task: "
              + workUnitState.getId()
              + ":"
              + workUnitState.getWorkunit().getLowWaterMark());
      LOG.info(
          "High watermark of the previous task: "
              + workUnitState.getId()
              + ":"
              + workUnitState.getHighWaterMark());
      LOG.info("Record count of the previous task: " + processedRecordCount + "\n");

      // Consider high water mark of the previous work unit, if it is
      // extracted any data
      if (processedRecordCount != 0) {
        previousWorkUnitStateHighWatermarks.add(workUnitState.getHighWaterMark());
      }

      previousWorkUnitLowWatermarks.add(this.getLowWatermarkFromWorkUnit(workUnitState));
    }

    // If commit policy is full and it has failed run, get latest water mark
    // as
    // minimum of low water marks from previous states.
    if (isCommitOnFullSuccess && hasFailedRun) {
      long previousLowWatermark = Collections.min(previousWorkUnitLowWatermarks);

      WorkUnitState previousState = previousWorkUnitStates.get(0);
      ExtractType extractType =
          ExtractType.valueOf(
              previousState
                  .getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE)
                  .toUpperCase());

      // add backup seconds only for snapshot extracts but not for appends
      if (extractType == ExtractType.SNAPSHOT) {
        int backupSecs =
            previousState.getPropAsInt(
                ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, 0);
        String watermarkType =
            previousState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE);
        latestWaterMark = this.addBackedUpSeconds(previousLowWatermark, backupSecs, watermarkType);
      } else {
        latestWaterMark = previousLowWatermark;
      }

      LOG.info(
          "Previous job was COMMIT_ON_FULL_SUCCESS but it was failed; Latest watermark - "
              + "Min watermark from WorkUnits: "
              + latestWaterMark);
    }

    // If commit policy is full and there are no failed tasks or commit
    // policy is partial,
    // get latest water mark as maximum of high water marks from previous
    // tasks.
    else {
      if (isDataProcessedInPreviousRun) {
        latestWaterMark = Collections.max(previousWorkUnitStateHighWatermarks);
        LOG.info(
            "Previous run was successful. Latest watermark - Max watermark from WorkUnitStates: "
                + latestWaterMark);
      } else {
        latestWaterMark = Collections.min(previousWorkUnitLowWatermarks);
        LOG.info(
            "Previous run was successful but no data found. Latest watermark - Min watermark from WorkUnitStates: "
                + latestWaterMark);
      }
    }

    return latestWaterMark;
  }
コード例 #4
0
 @Override
 public void setActualHighWatermark(WorkUnitState wus) {
   wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(LongWatermark.class));
 }