/** * Whether the task should directly publish its output data to the final publisher output * directory. * * <p>The task should publish its output data directly if {@link * ConfigurationKeys#PUBLISH_DATA_AT_JOB_LEVEL} is set to false AND any of the following * conditions is satisfied: * * <ul> * <li>The {@link JobCommitPolicy#COMMIT_ON_PARTIAL_SUCCESS} policy is used. * <li>The {@link JobCommitPolicy#COMMIT_SUCCESSFUL_TASKS} policy is used. and all {@link Fork}s * of this {@link Task} succeeded. * </ul> */ private boolean shouldPublishDataInTask() { boolean publishDataAtJobLevel = this.taskState.getPropAsBoolean( ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL, ConfigurationKeys.DEFAULT_PUBLISH_DATA_AT_JOB_LEVEL); if (publishDataAtJobLevel) { LOG.info( String.format( "%s is true. Will publish data at the job level.", ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL)); return false; } JobCommitPolicy jobCommitPolicy = JobCommitPolicy.getCommitPolicy(this.taskState); if (jobCommitPolicy == JobCommitPolicy.COMMIT_SUCCESSFUL_TASKS) { return this.taskState.getWorkingState() == WorkUnitState.WorkingState.SUCCESSFUL; } if (jobCommitPolicy == JobCommitPolicy.COMMIT_ON_PARTIAL_SUCCESS) { return true; } LOG.info("Will publish data at the job level with job commit policy: " + jobCommitPolicy); return false; }
/** * Get latest water mark from previous work unit states. * * @param state Source state * @return latest water mark (high water mark) */ private long getLatestWatermarkFromMetadata(SourceState state) { LOG.debug("Get latest watermark from the previous run"); long latestWaterMark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE; List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList(state.getPreviousWorkUnitStates()); List<Long> previousWorkUnitStateHighWatermarks = Lists.newArrayList(); List<Long> previousWorkUnitLowWatermarks = Lists.newArrayList(); if (previousWorkUnitStates.isEmpty()) { LOG.info( "No previous work unit states found; Latest watermark - Default watermark: " + latestWaterMark); return latestWaterMark; } boolean hasFailedRun = false; boolean isCommitOnFullSuccess = false; boolean isDataProcessedInPreviousRun = false; JobCommitPolicy commitPolicy = JobCommitPolicy.forName( state.getProp( ConfigurationKeys.JOB_COMMIT_POLICY_KEY, ConfigurationKeys.DEFAULT_JOB_COMMIT_POLICY)); if (commitPolicy == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { isCommitOnFullSuccess = true; } for (WorkUnitState workUnitState : previousWorkUnitStates) { long processedRecordCount = 0; LOG.info( "State of the previous task: " + workUnitState.getId() + ":" + workUnitState.getWorkingState()); if (workUnitState.getWorkingState() == WorkingState.FAILED || workUnitState.getWorkingState() == WorkingState.CANCELLED || workUnitState.getWorkingState() == WorkingState.RUNNING || workUnitState.getWorkingState() == WorkingState.PENDING) { hasFailedRun = true; } else { processedRecordCount = workUnitState.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED); if (processedRecordCount != 0) { isDataProcessedInPreviousRun = true; } } LOG.info( "Low watermark of the previous task: " + workUnitState.getId() + ":" + workUnitState.getWorkunit().getLowWaterMark()); LOG.info( "High watermark of the previous task: " + workUnitState.getId() + ":" + workUnitState.getHighWaterMark()); LOG.info("Record count of the previous task: " + processedRecordCount + "\n"); // Consider high water mark of the previous work unit, if it is // extracted any data if (processedRecordCount != 0) { previousWorkUnitStateHighWatermarks.add(workUnitState.getHighWaterMark()); } previousWorkUnitLowWatermarks.add(this.getLowWatermarkFromWorkUnit(workUnitState)); } // If commit policy is full and it has failed run, get latest water mark // as // minimum of low water marks from previous states. if (isCommitOnFullSuccess && hasFailedRun) { long previousLowWatermark = Collections.min(previousWorkUnitLowWatermarks); WorkUnitState previousState = previousWorkUnitStates.get(0); ExtractType extractType = ExtractType.valueOf( previousState .getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE) .toUpperCase()); // add backup seconds only for snapshot extracts but not for appends if (extractType == ExtractType.SNAPSHOT) { int backupSecs = previousState.getPropAsInt( ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, 0); String watermarkType = previousState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE); latestWaterMark = this.addBackedUpSeconds(previousLowWatermark, backupSecs, watermarkType); } else { latestWaterMark = previousLowWatermark; } LOG.info( "Previous job was COMMIT_ON_FULL_SUCCESS but it was failed; Latest watermark - " + "Min watermark from WorkUnits: " + latestWaterMark); } // If commit policy is full and there are no failed tasks or commit // policy is partial, // get latest water mark as maximum of high water marks from previous // tasks. else { if (isDataProcessedInPreviousRun) { latestWaterMark = Collections.max(previousWorkUnitStateHighWatermarks); LOG.info( "Previous run was successful. Latest watermark - Max watermark from WorkUnitStates: " + latestWaterMark); } else { latestWaterMark = Collections.min(previousWorkUnitLowWatermarks); LOG.info( "Previous run was successful but no data found. Latest watermark - Min watermark from WorkUnitStates: " + latestWaterMark); } } return latestWaterMark; }