コード例 #1
0
public class SpawnQueuesByPriority extends TreeMap<Integer, LinkedList<SpawnQueueItem>> {

  private static Logger log = LoggerFactory.getLogger(SpawnQueuesByPriority.class);
  private final Lock queueLock = new ReentrantLock();

  /* Internal map used to record outgoing task kicks that will not immediately be visible in the HostState */
  private final HashMap<String, Integer> hostAvailSlots = new HashMap<>();

  private static final int SPAWN_QUEUE_AVAIL_REFRESH =
      Parameter.intValue(
          "spawn.queue.avail.refresh",
          60_000); // Periodically refresh hostAvailSlots to the actual availableSlots count
  private static final int SPAWN_QUEUE_NEW_TASK_LAST_SLOT_DELAY =
      Parameter.intValue(
          "spawn.queue.new.task.last.slot.delay",
          90_000); // New tasks can't take the last slot of a host unless they wait this long

  private long lastAvailSlotsUpdate = 0;

  private static final boolean ENABLE_TASK_MIGRATION =
      Parameter.boolValue("task.migration.enable", true); // Whether tasks can migrate at all
  private static final long TASK_MIGRATION_MIN_BYTES =
      Parameter.longValue(
          "task.migration.min.bytes", 50_000_000); // Tasks this small can always migrate
  private static final long TASK_MIGRATION_MAX_BYTES =
      Parameter.longValue(
          "task.migration.max.bytes",
          10_000_000_000L); // Tasks up to this big can migrate if they stay in the queue long
                            // enough
  private static final long TASK_MIGRATION_LIMIT_GROWTH_INTERVAL =
      Parameter.longValue(
          "task.migration.limit.growth.interval",
          1_200_000); // The byte limit raises to the max value if tasks are queued this long (20
                      // minutes)
  private static final long TASK_MIGRATION_INTERVAL_PER_HOST =
      Parameter.longValue(
          "task.migration.interval",
          240_000); // Only migrate a task to a particular host once per interval
  private final Cache<String, Boolean>
      migrateHosts; // Use cache ttl to mark hosts that have recently performed or received a
                    // migration
  private final AtomicBoolean stoppedJob =
      new AtomicBoolean(
          false); // When tasks are stopped, track this behavior so that the queue can be modified
                  // as soon as possible

  /* This comparator should only be used within a block that is synchronized on hostAvailSlots.
  It does not internally synchronize to save a bunch of extra lock operations.*/
  private final Comparator<HostState> hostStateComparator =
      new Comparator<HostState>() {
        @Override
        public int compare(HostState o1, HostState o2) {
          int hostAvailSlots1 =
              hostAvailSlots.containsKey(o1.getHostUuid())
                  ? hostAvailSlots.get(o1.getHostUuid())
                  : 0;
          int hostAvailSlots2 =
              hostAvailSlots.containsKey(o2.getHostUuid())
                  ? hostAvailSlots.get(o2.getHostUuid())
                  : 0;
          if (hostAvailSlots1 != hostAvailSlots2) {
            return Integer.compare(
                -hostAvailSlots1,
                -hostAvailSlots2); // Return hosts with large number of slots first
          } else {
            return Double.compare(
                o1.getMeanActiveTasks(),
                o2.getMeanActiveTasks()); // Return hosts with small meanActiveTask value first
          }
        }
      };

  public SpawnQueuesByPriority() {
    super(
        new Comparator<Integer>() {
          public int compare(Integer int1, Integer int2) {
            return -int1.compareTo(int2);
          }
        });
    migrateHosts =
        CacheBuilder.newBuilder()
            .expireAfterWrite(TASK_MIGRATION_INTERVAL_PER_HOST, TimeUnit.MILLISECONDS)
            .build();
  }

  public void lock() {
    queueLock.lock();
  }

  public void unlock() {
    queueLock.unlock();
  }

  public boolean tryLock() {
    return queueLock.tryLock();
  }

  public boolean addTaskToQueue(
      int priority, JobKey task, boolean canIgnoreQuiesce, boolean toHead) {
    queueLock.lock();
    try {
      LinkedList<SpawnQueueItem> queue = this.get(priority);
      if (queue == null) {
        queue = new LinkedList<>();
        this.put(priority, queue);
      }
      if (toHead) {
        queue.add(0, new SpawnQueueItem(task, canIgnoreQuiesce));
        return true;
      }
      return queue.add(new SpawnQueueItem(task, canIgnoreQuiesce));
    } finally {
      queueLock.unlock();
    }
  }

  public boolean remove(int priority, JobKey task) {
    queueLock.lock();
    try {

      LinkedList<SpawnQueueItem> queue = get(priority);
      if (queue != null) {
        ListIterator<SpawnQueueItem> iter = queue.listIterator();
        while (iter.hasNext()) {
          JobKey nextKey = iter.next();
          if (nextKey != null && nextKey.matches(task)) {
            iter.remove();
            return true;
          }
        }
      }
      return false;
    } finally {
      queueLock.unlock();
    }
  }

  public int getTaskQueuedCount(int priority) {
    queueLock.lock();
    try {
      LinkedList<SpawnQueueItem> queueForPriority = this.get(priority);
      if (queueForPriority != null) {
        return queueForPriority.size();
      }
      return 0;
    } finally {
      queueLock.unlock();
    }
  }

  /**
   * Add an open slot to a host, probably in response to a task finishing
   *
   * @param hostID The host UUID to update
   */
  public void markHostAvailable(String hostID) {
    if (hostID == null) {
      return;
    }
    synchronized (hostAvailSlots) {
      if (hostAvailSlots.containsKey(hostID)) {
        hostAvailSlots.put(hostID, hostAvailSlots.get(hostID) + 1);
      } else {
        hostAvailSlots.put(hostID, 1);
      }
    }
  }

  /**
   * Out of a list of possible hosts to run a task, find the best one.
   *
   * @param inputHosts The legal hosts for a task
   * @param requireAvailableSlot Whether to require at least one available slot
   * @return One of the hosts, if one with free slots is found; null otherwise
   */
  public HostState findBestHostToRunTask(List<HostState> inputHosts, boolean requireAvailableSlot) {
    if (inputHosts == null || inputHosts.isEmpty()) {
      return null;
    }
    synchronized (hostAvailSlots) {
      HostState bestHost = Collections.min(inputHosts, hostStateComparator);
      if (bestHost != null) {
        if (!requireAvailableSlot
            || hostAvailSlots.containsKey(bestHost.getHostUuid())
                && hostAvailSlots.get(bestHost.getHostUuid()) > 0) {
          return bestHost;
        }
      }
      return null;
    }
  }

  /**
   * Update the available slots for each host if it has been sufficiently long since the last
   * update.
   *
   * @param hosts The hosts to input
   */
  public void updateAllHostAvailSlots(List<HostState> hosts) {
    synchronized (hostAvailSlots) {
      if (JitterClock.globalTime() - lastAvailSlotsUpdate < SPAWN_QUEUE_AVAIL_REFRESH) {
        return;
      }
      hostAvailSlots.clear();
      for (HostState host : hosts) {
        String hostID = host.getHostUuid();
        if (hostID != null) {
          hostAvailSlots.put(hostID, host.getAvailableTaskSlots());
        }
      }
    }
    lastAvailSlotsUpdate = JitterClock.globalTime();
    if (log.isTraceEnabled()) {
      log.trace("[SpawnQueuesByPriority] Host Avail Slots: " + hostAvailSlots);
    }
  }

  /**
   * Use the record of which hosts have pending task kicks to decide if a task should be sent to a
   * host
   *
   * @param hostID The host UUID to check
   * @return True if a new task should kick
   */
  public boolean shouldKickTaskOnHost(String hostID) {
    synchronized (hostAvailSlots) {
      return hostAvailSlots.containsKey(hostID) && hostAvailSlots.get(hostID) > 0;
    }
  }

  /**
   * Inform the queue that a task command is being sent to a host
   *
   * @param hostID The host UUID to update
   */
  public void markHostTaskActive(String hostID) {
    synchronized (hostAvailSlots) {
      int curr = hostAvailSlots.containsKey(hostID) ? hostAvailSlots.get(hostID) : 0;
      hostAvailSlots.put(hostID, Math.max(curr - 1, 0));
    }
  }

  public boolean isMigrationEnabled() {
    return ENABLE_TASK_MIGRATION;
  }

  /**
   * Decide whether a task should be migrated based on the time of last migration and the size of
   * the task
   *
   * @param task The task to be migrated
   * @param targetHostId The host ID being considered for migration
   * @return True if the task should be migrated there
   */
  public boolean shouldMigrateTaskToHost(JobTask task, String targetHostId) {
    String taskHost;
    if (task == null
        || targetHostId == null
        || task.getByteCount() == 0
        || (taskHost = task.getHostUUID()) == null) {
      return false; // Suspicious tasks should not be migrated
    }
    return shouldKickTaskOnHost(targetHostId)
        && migrateHosts.getIfPresent(taskHost) == null
        && migrateHosts.getIfPresent(targetHostId) == null;
  }

  /**
   * Record the fact that a migration happened between two hosts, preventing additional migrations
   * on either host for a period of time
   *
   * @param sourceHostId The host that the task is migrating from
   * @param targetHostId The host that the task is migrating to
   */
  public void markMigrationBetweenHosts(String sourceHostId, String targetHostId) {
    migrateHosts.put(sourceHostId, true);
    migrateHosts.put(targetHostId, true);
  }

  /**
   * Decide whether a task of the given size should be migrated, given how long it has been queued
   *
   * @param byteCount The size of the task in bytes
   * @param timeOnQueue How long the task has been queued in millis
   * @return True if the task should be allowed to migrate
   */
  public boolean checkSizeAgeForMigration(long byteCount, long timeOnQueue) {
    double intervalPercentage =
        Math.min(1, (double) (timeOnQueue) / TASK_MIGRATION_LIMIT_GROWTH_INTERVAL);
    // The limit is TASK_MIGRATION_MIN_BYTES for recently-queued tasks, then slowly grows to
    // TASK_MIGRATION_MAX_BYTES
    return byteCount
        < (long)
            (TASK_MIGRATION_MIN_BYTES
                + intervalPercentage * (TASK_MIGRATION_MAX_BYTES - TASK_MIGRATION_MIN_BYTES));
  }

  public static long getTaskMigrationMaxBytes() {
    return TASK_MIGRATION_MAX_BYTES;
  }

  public static long getTaskMigrationLimitGrowthInterval() {
    return TASK_MIGRATION_LIMIT_GROWTH_INTERVAL;
  }

  /**
   * When a job is stopped, we need to release the queue lock as quickly as possible to ensure that
   * we can remove tasks from the job as soon as possible. The stoppedJob variable enables this
   * behavior.
   *
   * @return True if a job was stopped since the last queue iteration
   */
  public boolean getStoppedJob() {
    return stoppedJob.get();
  }

  public void setStoppedJob(boolean stopped) {
    stoppedJob.set(stopped);
  }

  public boolean shouldKickNewTaskOnHost(long timeOnQueue, HostState host) {
    synchronized (hostAvailSlots) {
      if (hostAvailSlots.containsKey(host.getHostUuid())
          && hostAvailSlots.get(host.getHostUuid()) <= 1) {
        if (host.getMaxTaskSlots() == 1) {
          // If a host has only one slot to begin with, allow tasks to kick there.
          return true;
        }
        // Otherwise, don't let new tasks take the last slot for a set period
        return timeOnQueue > SPAWN_QUEUE_NEW_TASK_LAST_SLOT_DELAY;
      }
      return true;
    }
  }
}
コード例 #2
0
ファイル: Job.java プロジェクト: hancy2013/hydra
/** for job submission and tracking IJob that keeps everything in gone Codable Object graph */
@JsonAutoDetect(
    getterVisibility = JsonAutoDetect.Visibility.NONE,
    isGetterVisibility = JsonAutoDetect.Visibility.NONE,
    setterVisibility = JsonAutoDetect.Visibility.NONE)
public final class Job implements IJob {

  private static final Logger log = LoggerFactory.getLogger(Job.class);
  private static final Comparator<JobTask> taskNodeComparator =
      (t1, t2) -> Integer.compare(t1.getTaskID(), t2.getTaskID());

  @FieldConfig private int state;
  @FieldConfig private int countActiveTasks;
  /* creator of the job */
  @FieldConfig private String creator;
  /* owner of the job */
  @FieldConfig private String owner;
  /* group of the job */
  @FieldConfig private String group;
  /* can the owner modify the job */
  @FieldConfig private boolean ownerWritable;
  /* can the group modify the job */
  @FieldConfig private boolean groupWritable;
  /* can the world modify the job */
  @FieldConfig private boolean worldWritable;
  /* can the owner start/stop the job */
  @FieldConfig private boolean ownerExecutable;
  /* can the group start/stop the job */
  @FieldConfig private boolean groupExecutable;
  /* can the world start/stop the job */
  @FieldConfig private boolean worldExecutable;
  /* user who last modified the job */
  @FieldConfig private String lastModifiedBy;
  /* last modification time */
  @FieldConfig private long lastModifiedAt;
  /* purely ornamental description of this job */
  @FieldConfig private String description;
  /* key used for storing / retrieving this job */
  @FieldConfig private String id;
  /* higher means more important */
  @FieldConfig private int priority;
  /* Unix epoch offset of time job was created */
  @FieldConfig private Long createTime;
  /* Unix epoch offset of time job was last submitted */
  @FieldConfig private Long submitTime;
  /* Unix epoch offset of time first job node was assigned */
  @FieldConfig private Long startTime;
  /* Unix epoch offset of time last job node completed */
  @FieldConfig private Long endTime;
  /* minutes between re-kicking */
  @FieldConfig private Long rekickTimeout;
  /* minutes max time to allocate to job before it's interrupted */
  @FieldConfig private Long maxRunTime;
  /* list of nodes and their state */
  @FieldConfig private ArrayList<JobTask> nodes;
  /* JSON configuration url -- only read at submit time if conf empty */
  @FieldConfig private String config;
  /* URL for spawn to call on job complete. for automating workflows */
  @FieldConfig private String onComplete;
  @FieldConfig private String onError;
  /* timeout in seconds */
  @FieldConfig private int onCompleteTimeout;
  @FieldConfig private int onErrorTimeout;
  @FieldConfig private int runCount;
  @FieldConfig private long runTime;
  @FieldConfig private String command;
  @FieldConfig private boolean disabled;
  @FieldConfig private ArrayList<JobParameter> parameters;
  @FieldConfig private int hourlyBackups;
  @FieldConfig private int dailyBackups;
  @FieldConfig private int weeklyBackups;
  @FieldConfig private int monthlyBackups;
  @FieldConfig private int replicas;
  @FieldConfig private int readOnlyReplicas;
  @FieldConfig private boolean dontAutoBalanceMe;
  @FieldConfig private boolean dontDeleteMe;
  @FieldConfig private boolean dontCloneMe;
  @FieldConfig private boolean wasStopped;
  @FieldConfig private int maxSimulRunning;
  @FieldConfig private String minionType;
  @FieldConfig private boolean autoRetry;
  @FieldConfig private JobQueryConfig queryConfig;

  /* If all errored tasks from an errored job are resolved and the job has started within this cutoff, automatically
  enable the job. Default is 3 days. */
  private static final long AUTO_ENABLE_CUTOFF =
      Parameter.longValue("job.enable.cutoff", 1000 * 60 * 60 * 24 * 3);

  /* Task states that indicate that a job can be considered done. Rebalance/host-failure replications are included so
  these long-running operations will not delay the job rekick. */
  private static final Set<JobTaskState> taskStatesToFinishJob =
      ImmutableSet.of(
          JobTaskState.IDLE,
          JobTaskState.ERROR,
          JobTaskState.REBALANCE,
          JobTaskState.FULL_REPLICATE);

  // For codec only
  public Job() {}

  public Job(String id) {
    this(id, null);
  }

  public Job(String id, String creator) {
    this.id = id;
    this.creator = creator;
    this.createTime = JitterClock.globalTime();
    this.endTime = createTime;
    this.dontAutoBalanceMe = false;
    this.dontDeleteMe = false;
    this.dontCloneMe = false;
    this.config = "";
    this.queryConfig = new JobQueryConfig();
  }

  public Job(IJob job) {
    this.id = job.getId();
    this.setState(job.getState());
    this.creator = job.getCreator();
    this.owner = job.getOwner();
    this.group = job.getGroup();
    this.ownerWritable = job.isOwnerWritable();
    this.groupWritable = job.isGroupWritable();
    this.worldWritable = job.isWorldWritable();
    this.ownerExecutable = job.isOwnerExecutable();
    this.groupExecutable = job.isGroupExecutable();
    this.worldExecutable = job.isWorldExecutable();
    this.lastModifiedBy = job.lastModifiedBy();
    this.lastModifiedAt = job.lastModifiedAt();
    this.description = job.getDescription();
    this.priority = job.getPriority();
    this.createTime = job.getCreateTime();
    this.submitTime = job.getSubmitTime();
    this.startTime = job.getStartTime();
    this.endTime = job.getEndTime();
    this.rekickTimeout = job.getRekickTimeout();
    this.maxRunTime = job.getMaxRunTime();
    this.setTasks(job.getCopyOfTasks());
    recountActiveTasks();
    this.config = job.getConfig();
    this.onComplete = job.getOnCompleteURL();
    this.onError = job.getOnErrorURL();
    this.onCompleteTimeout = job.getOnCompleteTimeout();
    this.onErrorTimeout = job.getOnErrorTimeout();
    this.runCount = job.getRunCount();
    this.runTime = job.getRunTime();
    this.command = job.getCommand();
    this.parameters = job.getParameters() != null ? Lists.newArrayList(job.getParameters()) : null;
    this.hourlyBackups = job.getHourlyBackups();
    this.dailyBackups = job.getDailyBackups();
    this.weeklyBackups = job.getWeeklyBackups();
    this.monthlyBackups = job.getMonthlyBackups();
    this.autoRetry = job.getAutoRetry();
    this.replicas = job.getReplicas();
    this.queryConfig = job.getQueryConfig();
    this.dontAutoBalanceMe = job.getDontAutoBalanceMe();
    this.dontDeleteMe = job.getDontDeleteMe();
    this.dontCloneMe = job.getDontCloneMe();
    this.maxSimulRunning = job.getMaxSimulRunning();
    this.minionType = job.getMinionType();
    this.wasStopped = job.getWasStopped();
    setEnabled(job.isEnabled());
  }

  @Override
  public String getId() {
    return id;
  }

  @Override
  public String getCreator() {
    return creator;
  }

  @Override
  public void setCreator(String creator) {
    this.creator = creator;
  }

  @Override
  public String getOwner() {
    return owner;
  }

  @Override
  public void setOwner(String owner) {
    this.owner = owner;
  }

  @Override
  public String getGroup() {
    return group;
  }

  @Override
  public void setGroup(String group) {
    this.group = group;
  }

  @Override
  public boolean isOwnerWritable() {
    return ownerWritable;
  }

  @Override
  public void setOwnerWritable(boolean ownerWritable) {
    this.ownerWritable = ownerWritable;
  }

  @Override
  public boolean isGroupWritable() {
    return groupWritable;
  }

  @Override
  public void setGroupWritable(boolean groupWritable) {
    this.groupWritable = groupWritable;
  }

  @Override
  public boolean isWorldWritable() {
    return worldWritable;
  }

  @Override
  public void setWorldWritable(boolean worldWritable) {
    this.worldWritable = worldWritable;
  }

  @Override
  public boolean isOwnerExecutable() {
    return ownerExecutable;
  }

  @Override
  public void setOwnerExecutable(boolean ownerExecutable) {
    this.ownerExecutable = ownerExecutable;
  }

  @Override
  public boolean isGroupExecutable() {
    return groupExecutable;
  }

  @Override
  public void setGroupExecutable(boolean groupExecutable) {
    this.groupExecutable = groupExecutable;
  }

  @Override
  public boolean isWorldExecutable() {
    return worldExecutable;
  }

  @Override
  public void setWorldExecutable(boolean worldExecutable) {
    this.worldExecutable = worldExecutable;
  }

  @Override
  public String lastModifiedBy() {
    return lastModifiedBy;
  }

  @Override
  public void setLastModifiedBy(String user) {
    this.lastModifiedBy = user;
  }

  @Override
  public long lastModifiedAt() {
    return lastModifiedAt;
  }

  @Override
  public void setLastModifiedAt(long time) {
    this.lastModifiedAt = time;
  }

  @Override
  public long getCreateTime() {
    return createTime;
  }

  @Override
  public String getDescription() {
    return description;
  }

  @Override
  public void setDescription(String description) {
    this.description = description;
  }

  @Override
  public String getCommand() {
    return command;
  }

  @Override
  public void setCommand(String command) {
    this.command = command;
  }

  @Override
  public int getPriority() {
    return priority;
  }

  @Override
  public void setPriority(int priority) {
    this.priority = priority;
  }

  @Override
  public Long getSubmitTime() {
    return submitTime;
  }

  @Override
  public void setSubmitTime(long submitTime) {
    this.submitTime = submitTime;
  }

  @Override
  public Long getStartTime() {
    return startTime;
  }

  @Override
  public void setStartTime(Long startTime) {
    this.startTime = startTime;
  }

  @Override
  public Long getEndTime() {
    return endTime;
  }

  @Override
  public void setEndTime(Long endTime) {
    this.endTime = endTime;
  }

  public void setFinishTime(long finishTime) {
    if (startTime != null) {
      runTime += finishTime - startTime;
    }
    endTime = finishTime;
  }

  @Override
  public Long getRekickTimeout() {
    return rekickTimeout;
  }

  @Override
  public void setRekickTimeout(Long rekick) {
    rekickTimeout = rekick != null && rekick > 0 ? rekick : null;
  }

  @Override
  public Long getMaxRunTime() {
    return maxRunTime;
  }

  @Override
  public void setMaxRunTime(Long maxRunTime) {
    this.maxRunTime = maxRunTime;
  }

  @Override
  public boolean isEnabled() {
    return !disabled;
  }

  @Override
  public boolean setEnabled(boolean enabled) {
    if (enabled == disabled) {
      disabled = !enabled;
      // Determine new states
      if (enabled && state == JobState.ERROR.getValue()) {
        for (JobTask task : getCopyOfTasks()) {
          JobTaskState state = task.getState();
          task.setErrorCode(0);
          task.setPreFailErrorCode(0);
          if (state == JobTaskState.ERROR) {
            setTaskState(task, JobTaskState.IDLE, true);
          }
        }
        calculateJobState(true);
      } else if (enabled && state == JobState.DEGRADED.getValue()) {
        // Clear degraded state by recalculating
        calculateJobState(true);
      }
      return true;
    }
    return false;
  }

  @Override
  public Collection<JobParameter> getParameters() {
    return parameters;
  }

  @Override
  public void setParameters(Collection<JobParameter> parameters) {
    if (parameters != null) {
      this.parameters = new ArrayList<>(parameters.size());
      this.parameters.addAll(parameters);
    } else {
      this.parameters = null;
    }
  }

  @Override
  public String getConfig() {
    return config;
  }

  @Override
  public void setConfig(String config) {
    this.config = config;
  }

  @Override
  public String getOnCompleteURL() {
    return onComplete;
  }

  @Override
  public void setOnCompleteURL(String url) {
    this.onComplete = url;
  }

  @Override
  public String getOnErrorURL() {
    return onError;
  }

  @Override
  public void setOnErrorURL(String url) {
    this.onError = url;
  }

  @Override
  public int getOnCompleteTimeout() {
    return onCompleteTimeout;
  }

  @Override
  public void setOnCompleteTimeout(int timeout) {
    this.onCompleteTimeout = timeout;
  }

  @Override
  public int getOnErrorTimeout() {
    return onErrorTimeout;
  }

  @Override
  public void setOnErrorTimeout(int timeout) {
    this.onErrorTimeout = timeout;
  }

  @Override
  public int getReplicas() {
    return replicas;
  }

  @Override
  public void setReplicas(int replicas) {
    this.replicas = replicas;
  }

  @Override
  public int getRunCount() {
    return runCount;
  }

  @Override
  public int incrementRunCount() {
    return ++runCount;
  }

  @Override
  public long getRunTime() {
    return runTime;
  }

  @Override
  public JobState getState() {
    JobState jobState = JobState.makeState(state);
    return jobState == null ? JobState.UNKNOWN : jobState;
  }

  @Override
  public boolean setState(JobState state) {
    return setState(state, false);
  }

  public boolean setState(JobState state, boolean force) {
    JobState curr = getState();
    if (force
        || (isEnabled() && curr.canTransition(state))
        || (!isEnabled() && (state == JobState.IDLE))
        || (!isEnabled() && (state == JobState.ERROR))) {
      // Note dependence on ordering!
      this.state = state.ordinal();
      return true;
    } else if (state != curr) {
      log.warn(
          "[job.setstate] {}job {} cannot transition {} -> {}",
          (disabled) ? "disabled " : "",
          getId(),
          curr,
          state);
      for (StackTraceElement elt : Thread.currentThread().getStackTrace()) {
        log.warn(elt.toString());
      }
      return false;
    }
    return true;
  }

  public int getTaskCount() {
    return nodes.size();
  }

  @Override
  public synchronized JobTask getTask(int id) {
    if (nodes == null) {
      return null;
    }
    for (JobTask node : nodes) {
      if (node.getTaskID() == id) {
        node.setJobUUID(this.id);
        return node;
      }
    }
    return null;
  }

  @Override
  public synchronized List<JobTask> getCopyOfTasks() {
    if (nodes == null) {
      nodes = new ArrayList<>();
    }
    return ImmutableList.copyOf(nodes);
  }

  public List<JobTask> getCopyOfTasksSorted() {
    if (nodes == null) {
      nodes = new ArrayList<>();
    }
    List<JobTask> tasksCopy = Lists.newArrayList(nodes);
    Collections.sort(tasksCopy, taskNodeComparator);
    return tasksCopy;
  }

  @Override
  public synchronized void addTask(JobTask task) {
    if (nodes == null) {
      nodes = new ArrayList<>();
    }
    nodes.add(task);
    if (task.getState().isActiveState()) {
      this.countActiveTasks++;
    }
  }

  private synchronized void recountActiveTasks() {
    this.countActiveTasks = 0;
    for (JobTask t : nodes) {
      if (t.getState().isActiveState()) {
        this.countActiveTasks++;
      }
    }
  }

  @Override
  public synchronized void setTasks(List<JobTask> tasks) {
    this.nodes = Lists.newArrayList(tasks);
    recountActiveTasks();
  }

  public synchronized int getCountActiveTasks() {
    return countActiveTasks;
  }

  @Override
  public JobQueryConfig getQueryConfig() {
    return queryConfig;
  }

  @Override
  public void setQueryConfig(JobQueryConfig queryConfig) {
    this.queryConfig = queryConfig;
  }

  @Override
  public JSONObject toJSON() throws Exception {
    recountActiveTasks();
    return CodecJSON.encodeJSON(this);
  }

  @Override
  public String toString() {
    try {
      return CodecJSON.encodeString(this);
    } catch (Exception e) {
      return super.toString();
    }
  }

  @Override
  public int compareTo(IJob o) {
    return getSubmitTime() > o.getSubmitTime() ? 1 : -1;
  }

  public synchronized boolean setTaskState(JobTask task, JobTaskState newState) {
    return setTaskState(task, newState, false);
  }

  /**
   * Change a task's state, and update the job's state if appropriate
   *
   * @param task The task to modify
   * @param newState The new state to set
   * @param force Whether to force the state transition regardless of the expected transition map
   * @return True on success
   */
  public synchronized boolean setTaskState(JobTask task, JobTaskState newState, boolean force) {
    JobTaskState prevState = task.getState();

    if (!task.setState(newState, force)) {
      return false;
    }
    if (prevState.isActiveState() && !newState.isActiveState()) {
      this.countActiveTasks--;
    } else if (!prevState.isActiveState() && newState.isActiveState()) {
      this.countActiveTasks++;
    }
    if (newState == JobTaskState.ERROR) {
      this.disabled = true;
    }
    calculateJobState(force);
    return true;
  }

  /** Calculate the job state based on the state of its tasks */
  private boolean calculateJobState(boolean force) {
    boolean err = false, sched = false, run = false, reb = false, stopped = false;
    for (JobTask t : nodes) {
      if (t.getWasStopped()) {
        stopped = true;
      }
      if (t.getState() == JobTaskState.REBALANCE) {
        reb = true;
      } else if (t.isRunning()) {
        run = true;
      } else if (t.getState() == JobTaskState.ALLOCATED || t.getState().isQueuedState()) {
        sched = true;
      } else if (t.getState() == JobTaskState.ERROR) {
        err = true;
        break;
      }
    }
    JobState oldJobState = getState();
    JobState nextState =
        (err)
            ? JobState.ERROR
            : (reb)
                ? JobState.REBALANCE
                : (run) ? JobState.RUNNING : (sched) ? JobState.SCHEDULED : JobState.IDLE;
    if (setState(nextState, force)) {
      // If transitioning from error to non-error state, enable job as long as it has run recently.
      if (oldJobState == JobState.ERROR
          && nextState != JobState.ERROR
          && getSubmitTime() != null
          && System.currentTimeMillis() - getSubmitTime() < AUTO_ENABLE_CUTOFF) {
        setEnabled(true);
      }
      wasStopped = stopped;
      return true;
    } else {
      return false;
    }
  }

  public void errorTask(JobTask task, int errorCode) {
    setTaskState(task, JobTaskState.ERROR, true);
    task.setErrorCode(errorCode);
  }

  /**
   * Check whether all tasks of a job are idle/errored/rebalancing. If a job is kicked, and
   * isFinished evaluates to true, then it can be assumed that every task ran at least once,
   * regardless of whether any rebalancing was started in the mean time.
   *
   * @return True if the job is finished
   */
  public boolean isFinished() {
    for (JobTask jobTask : getCopyOfTasks()) {
      if (!taskStatesToFinishJob.contains(jobTask.getState())) {
        return false;
      }
    }
    return true;
  }

  @Override
  public boolean getDontAutoBalanceMe() {
    return dontAutoBalanceMe;
  }

  @Override
  public void setDontDeleteMe(boolean dontDeleteMe) {
    this.dontDeleteMe = dontDeleteMe;
  }

  @Override
  public boolean getDontDeleteMe() {
    return dontDeleteMe;
  }

  @Override
  public void setDontCloneMe(boolean dontCloneMe) {
    this.dontCloneMe = dontCloneMe;
  }

  @Override
  public boolean getDontCloneMe() {
    return dontCloneMe;
  }

  @Override
  public void setDontAutoBalanceMe(boolean dontAutoBalanceMe) {
    this.dontAutoBalanceMe = dontAutoBalanceMe;
  }

  @Override
  public int getHourlyBackups() {
    return hourlyBackups;
  }

  @Override
  public int getDailyBackups() {
    return dailyBackups;
  }

  @Override
  public int getWeeklyBackups() {
    return weeklyBackups;
  }

  @Override
  public int getMonthlyBackups() {
    return monthlyBackups;
  }

  @Override
  public void setHourlyBackups(int hourlyBackups) {
    this.hourlyBackups = hourlyBackups;
  }

  @Override
  public void setDailyBackups(int dailyBackups) {
    this.dailyBackups = dailyBackups;
  }

  @Override
  public void setWeeklyBackups(int weeklyBackups) {
    this.weeklyBackups = weeklyBackups;
  }

  @Override
  public void setMonthlyBackups(int monthlyBackups) {
    this.monthlyBackups = monthlyBackups;
  }

  @Override
  public boolean getWasStopped() {
    return wasStopped;
  }

  @Override
  public void setWasStopped(boolean wasStopped) {
    this.wasStopped = wasStopped;
  }

  public void setTaskFinished(JobTask task) {
    int preFailErrorCode = task.getPreFailErrorCode();
    int oldErrorCode = task.getErrorCode();
    if (task.getState() == JobTaskState.REPLICATE || task.getState() == JobTaskState.BACKUP) {
      if (preFailErrorCode > 0) {
        // Restore the old job error if it existed
        errorTask(task, preFailErrorCode);
        return;
      }
    }
    task.setErrorCode(0);
    setTaskState(task, JobTaskState.IDLE, true);
    if (getState() == JobState.IDLE) {
      setEndTime(JitterClock.globalTime());
    }
    if (countErrorTasks() == 0 && oldErrorCode == JobTaskErrorCode.EXIT_REPLICATE_FAILURE
        || oldErrorCode == JobTaskErrorCode.EXIT_BACKUP_FAILURE) {
      // If the job is disabled because this task failed to replicate, enable it.
      log.warn("Enabling job " + getId() + " because the last replicate/backup error was resolved");
      disabled = false;
    }
  }

  @Override
  public int getMaxSimulRunning() {
    return maxSimulRunning;
  }

  @Override
  public void setMaxSimulRunning(int maxSimulRunning) {
    this.maxSimulRunning = maxSimulRunning;
  }

  @Override
  public boolean getAutoRetry() {
    return autoRetry;
  }

  @Override
  public void setAutoRetry(boolean autoRetry) {
    this.autoRetry = autoRetry;
  }

  private int countErrorTasks() {
    int count = 0;
    List<JobTask> tasks = getCopyOfTasks();
    if (tasks == null) {
      return count;
    }
    for (JobTask task : tasks) {
      if (task != null && task.getState() == JobTaskState.ERROR) {
        count++;
      }
    }
    return count;
  }

  public long calcAverageTaskSizeBytes() {
    List<JobTask> tasks = getCopyOfTasks();
    if (tasks == null || tasks.size() <= 0) {
      return 0;
    }
    long rv = 0;
    for (JobTask task : tasks) {
      if (task != null) {
        rv += task.getByteCount();
      }
    }
    return rv / (tasks.size());
  }

  @Override
  public String getMinionType() {
    if (minionType == null) {
      minionType = Minion.defaultMinionType;
    }
    return minionType;
  }

  public Long getCanonicalTime() {
    // Get an estimate for the last time this job was run. Use end-time if non-null; otherwise,
    // startTime.
    return (endTime == null && getState() == JobState.IDLE) ? startTime : endTime;
  }

  @Override
  public void setMinionType(String minionType) {
    this.minionType = minionType;
  }

  public boolean shouldAutoRekick(long clock) {
    Long canonicalTime = getCanonicalTime();
    return isEnabled()
        && canonicalTime != null
        && getRunCount() > 0
        && getRekickTimeout() != null
        && getRekickTimeout() > 0
        && clock - canonicalTime >= (getRekickTimeout() * 60000L);
  }

  /** Log a job event to a rolling log file */
  public static void logJobEvent(Job job, JobEvent event, RollingLog eventLog) {
    LogUtil.log(
        eventLog,
        log,
        new StringMapHelper()
            .put("event", event)
            .put("time", System.currentTimeMillis())
            .put("jobid", job.getId())
            .put("creator", job.getCreator())
            .put("owner", job.getOwner())
            .put("createTime", job.getCreateTime())
            .put("priority", job.getPriority())
            .put("replicas", job.getReplicas())
            .put("runCount", job.getRunCount())
            .put("state", job.getState())
            .put("taskCount", job.getTaskCount())
            .put("avgTaskSize", job.calcAverageTaskSizeBytes())
            .put("startTime", job.getStartTime())
            .put("endTime", job.getEndTime())
            .put("submitTime", job.getSubmitTime())
            .put("command", job.getCommand()));
  }
}