public TaskState(WorkUnitState workUnitState) { // Since getWorkunit() returns an immutable WorkUnit object, // the WorkUnit object in this object is also immutable. super(workUnitState.getWorkunit()); addAll(workUnitState); this.jobId = workUnitState.getProp(ConfigurationKeys.JOB_ID_KEY); this.taskId = workUnitState.getProp(ConfigurationKeys.TASK_ID_KEY); this.setId(this.taskId); }
/** * Get low water mark from the given work unit state. * * @param workUnitState Work unit state * @return latest low water mark */ private long getLowWatermarkFromWorkUnit(WorkUnitState workUnitState) { String watermarkType = workUnitState.getProp( ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE); long lowWaterMark = workUnitState.getWorkunit().getLowWaterMark(); if (lowWaterMark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) { return lowWaterMark; } WatermarkType wmType = WatermarkType.valueOf(watermarkType.toUpperCase()); int deltaNum = new WatermarkPredicate(wmType).getDeltaNumForNextWatermark(); switch (wmType) { case SIMPLE: return lowWaterMark - deltaNum; default: Date lowWaterMarkDate = Utils.toDate(lowWaterMark, "yyyyMMddHHmmss"); return Long.parseLong( Utils.dateToString( Utils.addSecondsToDate(lowWaterMarkDate, deltaNum * -1), "yyyyMMddHHmmss")); } }
/** * Get latest water mark from previous work unit states. * * @param state Source state * @return latest water mark (high water mark) */ private long getLatestWatermarkFromMetadata(SourceState state) { LOG.debug("Get latest watermark from the previous run"); long latestWaterMark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE; List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList(state.getPreviousWorkUnitStates()); List<Long> previousWorkUnitStateHighWatermarks = Lists.newArrayList(); List<Long> previousWorkUnitLowWatermarks = Lists.newArrayList(); if (previousWorkUnitStates.isEmpty()) { LOG.info( "No previous work unit states found; Latest watermark - Default watermark: " + latestWaterMark); return latestWaterMark; } boolean hasFailedRun = false; boolean isCommitOnFullSuccess = false; boolean isDataProcessedInPreviousRun = false; JobCommitPolicy commitPolicy = JobCommitPolicy.forName( state.getProp( ConfigurationKeys.JOB_COMMIT_POLICY_KEY, ConfigurationKeys.DEFAULT_JOB_COMMIT_POLICY)); if (commitPolicy == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { isCommitOnFullSuccess = true; } for (WorkUnitState workUnitState : previousWorkUnitStates) { long processedRecordCount = 0; LOG.info( "State of the previous task: " + workUnitState.getId() + ":" + workUnitState.getWorkingState()); if (workUnitState.getWorkingState() == WorkingState.FAILED || workUnitState.getWorkingState() == WorkingState.CANCELLED || workUnitState.getWorkingState() == WorkingState.RUNNING || workUnitState.getWorkingState() == WorkingState.PENDING) { hasFailedRun = true; } else { processedRecordCount = workUnitState.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED); if (processedRecordCount != 0) { isDataProcessedInPreviousRun = true; } } LOG.info( "Low watermark of the previous task: " + workUnitState.getId() + ":" + workUnitState.getWorkunit().getLowWaterMark()); LOG.info( "High watermark of the previous task: " + workUnitState.getId() + ":" + workUnitState.getHighWaterMark()); LOG.info("Record count of the previous task: " + processedRecordCount + "\n"); // Consider high water mark of the previous work unit, if it is // extracted any data if (processedRecordCount != 0) { previousWorkUnitStateHighWatermarks.add(workUnitState.getHighWaterMark()); } previousWorkUnitLowWatermarks.add(this.getLowWatermarkFromWorkUnit(workUnitState)); } // If commit policy is full and it has failed run, get latest water mark // as // minimum of low water marks from previous states. if (isCommitOnFullSuccess && hasFailedRun) { long previousLowWatermark = Collections.min(previousWorkUnitLowWatermarks); WorkUnitState previousState = previousWorkUnitStates.get(0); ExtractType extractType = ExtractType.valueOf( previousState .getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE) .toUpperCase()); // add backup seconds only for snapshot extracts but not for appends if (extractType == ExtractType.SNAPSHOT) { int backupSecs = previousState.getPropAsInt( ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, 0); String watermarkType = previousState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE); latestWaterMark = this.addBackedUpSeconds(previousLowWatermark, backupSecs, watermarkType); } else { latestWaterMark = previousLowWatermark; } LOG.info( "Previous job was COMMIT_ON_FULL_SUCCESS but it was failed; Latest watermark - " + "Min watermark from WorkUnits: " + latestWaterMark); } // If commit policy is full and there are no failed tasks or commit // policy is partial, // get latest water mark as maximum of high water marks from previous // tasks. else { if (isDataProcessedInPreviousRun) { latestWaterMark = Collections.max(previousWorkUnitStateHighWatermarks); LOG.info( "Previous run was successful. Latest watermark - Max watermark from WorkUnitStates: " + latestWaterMark); } else { latestWaterMark = Collections.min(previousWorkUnitLowWatermarks); LOG.info( "Previous run was successful but no data found. Latest watermark - Min watermark from WorkUnitStates: " + latestWaterMark); } } return latestWaterMark; }
@Override public void setActualHighWatermark(WorkUnitState wus) { wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(LongWatermark.class)); }