/** * Check whether all tasks of a job are idle/errored/rebalancing. If a job is kicked, and * isFinished evaluates to true, then it can be assumed that every task ran at least once, * regardless of whether any rebalancing was started in the mean time. * * @return True if the job is finished */ public boolean isFinished() { for (JobTask jobTask : getCopyOfTasks()) { if (!taskStatesToFinishJob.contains(jobTask.getState())) { return false; } } return true; }
private synchronized void recountActiveTasks() { this.countActiveTasks = 0; for (JobTask t : nodes) { if (t.getState().isActiveState()) { this.countActiveTasks++; } } }
/** * Decide whether a task should be migrated based on the time of last migration and the size of * the task * * @param task The task to be migrated * @param targetHostId The host ID being considered for migration * @return True if the task should be migrated there */ public boolean shouldMigrateTaskToHost(JobTask task, String targetHostId) { String taskHost; if (task == null || targetHostId == null || task.getByteCount() == 0 || (taskHost = task.getHostUUID()) == null) { return false; // Suspicious tasks should not be migrated } return shouldKickTaskOnHost(targetHostId) && migrateHosts.getIfPresent(taskHost) == null && migrateHosts.getIfPresent(targetHostId) == null; }
public long calcAverageTaskSizeBytes() { List<JobTask> tasks = getCopyOfTasks(); if (tasks == null || tasks.size() <= 0) { return 0; } long rv = 0; for (JobTask task : tasks) { if (task != null) { rv += task.getByteCount(); } } return rv / (tasks.size()); }
private int countErrorTasks() { int count = 0; List<JobTask> tasks = getCopyOfTasks(); if (tasks == null) { return count; } for (JobTask task : tasks) { if (task != null && task.getState() == JobTaskState.ERROR) { count++; } } return count; }
@Override public synchronized JobTask getTask(int id) { if (nodes == null) { return null; } for (JobTask node : nodes) { if (node.getTaskID() == id) { node.setJobUUID(this.id); return node; } } return null; }
/** * Change a task's state, and update the job's state if appropriate * * @param task The task to modify * @param newState The new state to set * @param force Whether to force the state transition regardless of the expected transition map * @return True on success */ public synchronized boolean setTaskState(JobTask task, JobTaskState newState, boolean force) { JobTaskState prevState = task.getState(); if (!task.setState(newState, force)) { return false; } if (prevState.isActiveState() && !newState.isActiveState()) { this.countActiveTasks--; } else if (!prevState.isActiveState() && newState.isActiveState()) { this.countActiveTasks++; } if (newState == JobTaskState.ERROR) { this.disabled = true; } calculateJobState(force); return true; }
@Override public synchronized void addTask(JobTask task) { if (nodes == null) { nodes = new ArrayList<>(); } nodes.add(task); if (task.getState().isActiveState()) { this.countActiveTasks++; } }
public void setTaskFinished(JobTask task) { int preFailErrorCode = task.getPreFailErrorCode(); int oldErrorCode = task.getErrorCode(); if (task.getState() == JobTaskState.REPLICATE || task.getState() == JobTaskState.BACKUP) { if (preFailErrorCode > 0) { // Restore the old job error if it existed errorTask(task, preFailErrorCode); return; } } task.setErrorCode(0); setTaskState(task, JobTaskState.IDLE, true); if (getState() == JobState.IDLE) { setEndTime(JitterClock.globalTime()); } if (countErrorTasks() == 0 && oldErrorCode == JobTaskErrorCode.EXIT_REPLICATE_FAILURE || oldErrorCode == JobTaskErrorCode.EXIT_BACKUP_FAILURE) { // If the job is disabled because this task failed to replicate, enable it. log.warn("Enabling job " + getId() + " because the last replicate/backup error was resolved"); disabled = false; } }
@Override public boolean setEnabled(boolean enabled) { if (enabled == disabled) { disabled = !enabled; // Determine new states if (enabled && state == JobState.ERROR.getValue()) { for (JobTask task : getCopyOfTasks()) { JobTaskState state = task.getState(); task.setErrorCode(0); task.setPreFailErrorCode(0); if (state == JobTaskState.ERROR) { setTaskState(task, JobTaskState.IDLE, true); } } calculateJobState(true); } else if (enabled && state == JobState.DEGRADED.getValue()) { // Clear degraded state by recalculating calculateJobState(true); } return true; } return false; }
/** Calculate the job state based on the state of its tasks */ private boolean calculateJobState(boolean force) { boolean err = false, sched = false, run = false, reb = false, stopped = false; for (JobTask t : nodes) { if (t.getWasStopped()) { stopped = true; } if (t.getState() == JobTaskState.REBALANCE) { reb = true; } else if (t.isRunning()) { run = true; } else if (t.getState() == JobTaskState.ALLOCATED || t.getState().isQueuedState()) { sched = true; } else if (t.getState() == JobTaskState.ERROR) { err = true; break; } } JobState oldJobState = getState(); JobState nextState = (err) ? JobState.ERROR : (reb) ? JobState.REBALANCE : (run) ? JobState.RUNNING : (sched) ? JobState.SCHEDULED : JobState.IDLE; if (setState(nextState, force)) { // If transitioning from error to non-error state, enable job as long as it has run recently. if (oldJobState == JobState.ERROR && nextState != JobState.ERROR && getSubmitTime() != null && System.currentTimeMillis() - getSubmitTime() < AUTO_ENABLE_CUTOFF) { setEnabled(true); } wasStopped = stopped; return true; } else { return false; } }
public void errorTask(JobTask task, int errorCode) { setTaskState(task, JobTaskState.ERROR, true); task.setErrorCode(errorCode); }