public class WorkerTracker { // should be 'arbitrarily high' -- high enough that two frame-readers are almost certainly going // to // refer to the same WorkerData on request public static final int DEFAULT_CACHE_SIZE = Parameter.intValue("hydra.query.loadbalance.cache.size", 500); // maximum query tasks allowed per discovered query worker for this query master public static final int DEFAULT_WORKER_LEASES = Parameter.intValue("hydra.query.tasks.max", 3); final LoadingCache<String, WorkerData> workerCache; public WorkerTracker() { this.workerCache = CacheBuilder.newBuilder() .maximumSize(DEFAULT_CACHE_SIZE) .build( new CacheLoader<String, WorkerData>() { @Override public WorkerData load(String key) throws Exception { return new WorkerData(key, DEFAULT_WORKER_LEASES); } }); } public Collection<WorkerData> values() { return workerCache.asMap().values(); } public WorkerData get(String key) { return workerCache.getUnchecked(key); } }
public class SpawnQueuesByPriority extends TreeMap<Integer, LinkedList<SpawnQueueItem>> { private static Logger log = LoggerFactory.getLogger(SpawnQueuesByPriority.class); private final Lock queueLock = new ReentrantLock(); /* Internal map used to record outgoing task kicks that will not immediately be visible in the HostState */ private final HashMap<String, Integer> hostAvailSlots = new HashMap<>(); private static final int SPAWN_QUEUE_AVAIL_REFRESH = Parameter.intValue( "spawn.queue.avail.refresh", 60_000); // Periodically refresh hostAvailSlots to the actual availableSlots count private static final int SPAWN_QUEUE_NEW_TASK_LAST_SLOT_DELAY = Parameter.intValue( "spawn.queue.new.task.last.slot.delay", 90_000); // New tasks can't take the last slot of a host unless they wait this long private long lastAvailSlotsUpdate = 0; private static final boolean ENABLE_TASK_MIGRATION = Parameter.boolValue("task.migration.enable", true); // Whether tasks can migrate at all private static final long TASK_MIGRATION_MIN_BYTES = Parameter.longValue( "task.migration.min.bytes", 50_000_000); // Tasks this small can always migrate private static final long TASK_MIGRATION_MAX_BYTES = Parameter.longValue( "task.migration.max.bytes", 10_000_000_000L); // Tasks up to this big can migrate if they stay in the queue long // enough private static final long TASK_MIGRATION_LIMIT_GROWTH_INTERVAL = Parameter.longValue( "task.migration.limit.growth.interval", 1_200_000); // The byte limit raises to the max value if tasks are queued this long (20 // minutes) private static final long TASK_MIGRATION_INTERVAL_PER_HOST = Parameter.longValue( "task.migration.interval", 240_000); // Only migrate a task to a particular host once per interval private final Cache<String, Boolean> migrateHosts; // Use cache ttl to mark hosts that have recently performed or received a // migration private final AtomicBoolean stoppedJob = new AtomicBoolean( false); // When tasks are stopped, track this behavior so that the queue can be modified // as soon as possible /* This comparator should only be used within a block that is synchronized on hostAvailSlots. It does not internally synchronize to save a bunch of extra lock operations.*/ private final Comparator<HostState> hostStateComparator = new Comparator<HostState>() { @Override public int compare(HostState o1, HostState o2) { int hostAvailSlots1 = hostAvailSlots.containsKey(o1.getHostUuid()) ? hostAvailSlots.get(o1.getHostUuid()) : 0; int hostAvailSlots2 = hostAvailSlots.containsKey(o2.getHostUuid()) ? hostAvailSlots.get(o2.getHostUuid()) : 0; if (hostAvailSlots1 != hostAvailSlots2) { return Integer.compare( -hostAvailSlots1, -hostAvailSlots2); // Return hosts with large number of slots first } else { return Double.compare( o1.getMeanActiveTasks(), o2.getMeanActiveTasks()); // Return hosts with small meanActiveTask value first } } }; public SpawnQueuesByPriority() { super( new Comparator<Integer>() { public int compare(Integer int1, Integer int2) { return -int1.compareTo(int2); } }); migrateHosts = CacheBuilder.newBuilder() .expireAfterWrite(TASK_MIGRATION_INTERVAL_PER_HOST, TimeUnit.MILLISECONDS) .build(); } public void lock() { queueLock.lock(); } public void unlock() { queueLock.unlock(); } public boolean tryLock() { return queueLock.tryLock(); } public boolean addTaskToQueue( int priority, JobKey task, boolean canIgnoreQuiesce, boolean toHead) { queueLock.lock(); try { LinkedList<SpawnQueueItem> queue = this.get(priority); if (queue == null) { queue = new LinkedList<>(); this.put(priority, queue); } if (toHead) { queue.add(0, new SpawnQueueItem(task, canIgnoreQuiesce)); return true; } return queue.add(new SpawnQueueItem(task, canIgnoreQuiesce)); } finally { queueLock.unlock(); } } public boolean remove(int priority, JobKey task) { queueLock.lock(); try { LinkedList<SpawnQueueItem> queue = get(priority); if (queue != null) { ListIterator<SpawnQueueItem> iter = queue.listIterator(); while (iter.hasNext()) { JobKey nextKey = iter.next(); if (nextKey != null && nextKey.matches(task)) { iter.remove(); return true; } } } return false; } finally { queueLock.unlock(); } } public int getTaskQueuedCount(int priority) { queueLock.lock(); try { LinkedList<SpawnQueueItem> queueForPriority = this.get(priority); if (queueForPriority != null) { return queueForPriority.size(); } return 0; } finally { queueLock.unlock(); } } /** * Add an open slot to a host, probably in response to a task finishing * * @param hostID The host UUID to update */ public void markHostAvailable(String hostID) { if (hostID == null) { return; } synchronized (hostAvailSlots) { if (hostAvailSlots.containsKey(hostID)) { hostAvailSlots.put(hostID, hostAvailSlots.get(hostID) + 1); } else { hostAvailSlots.put(hostID, 1); } } } /** * Out of a list of possible hosts to run a task, find the best one. * * @param inputHosts The legal hosts for a task * @param requireAvailableSlot Whether to require at least one available slot * @return One of the hosts, if one with free slots is found; null otherwise */ public HostState findBestHostToRunTask(List<HostState> inputHosts, boolean requireAvailableSlot) { if (inputHosts == null || inputHosts.isEmpty()) { return null; } synchronized (hostAvailSlots) { HostState bestHost = Collections.min(inputHosts, hostStateComparator); if (bestHost != null) { if (!requireAvailableSlot || hostAvailSlots.containsKey(bestHost.getHostUuid()) && hostAvailSlots.get(bestHost.getHostUuid()) > 0) { return bestHost; } } return null; } } /** * Update the available slots for each host if it has been sufficiently long since the last * update. * * @param hosts The hosts to input */ public void updateAllHostAvailSlots(List<HostState> hosts) { synchronized (hostAvailSlots) { if (JitterClock.globalTime() - lastAvailSlotsUpdate < SPAWN_QUEUE_AVAIL_REFRESH) { return; } hostAvailSlots.clear(); for (HostState host : hosts) { String hostID = host.getHostUuid(); if (hostID != null) { hostAvailSlots.put(hostID, host.getAvailableTaskSlots()); } } } lastAvailSlotsUpdate = JitterClock.globalTime(); if (log.isTraceEnabled()) { log.trace("[SpawnQueuesByPriority] Host Avail Slots: " + hostAvailSlots); } } /** * Use the record of which hosts have pending task kicks to decide if a task should be sent to a * host * * @param hostID The host UUID to check * @return True if a new task should kick */ public boolean shouldKickTaskOnHost(String hostID) { synchronized (hostAvailSlots) { return hostAvailSlots.containsKey(hostID) && hostAvailSlots.get(hostID) > 0; } } /** * Inform the queue that a task command is being sent to a host * * @param hostID The host UUID to update */ public void markHostTaskActive(String hostID) { synchronized (hostAvailSlots) { int curr = hostAvailSlots.containsKey(hostID) ? hostAvailSlots.get(hostID) : 0; hostAvailSlots.put(hostID, Math.max(curr - 1, 0)); } } public boolean isMigrationEnabled() { return ENABLE_TASK_MIGRATION; } /** * Decide whether a task should be migrated based on the time of last migration and the size of * the task * * @param task The task to be migrated * @param targetHostId The host ID being considered for migration * @return True if the task should be migrated there */ public boolean shouldMigrateTaskToHost(JobTask task, String targetHostId) { String taskHost; if (task == null || targetHostId == null || task.getByteCount() == 0 || (taskHost = task.getHostUUID()) == null) { return false; // Suspicious tasks should not be migrated } return shouldKickTaskOnHost(targetHostId) && migrateHosts.getIfPresent(taskHost) == null && migrateHosts.getIfPresent(targetHostId) == null; } /** * Record the fact that a migration happened between two hosts, preventing additional migrations * on either host for a period of time * * @param sourceHostId The host that the task is migrating from * @param targetHostId The host that the task is migrating to */ public void markMigrationBetweenHosts(String sourceHostId, String targetHostId) { migrateHosts.put(sourceHostId, true); migrateHosts.put(targetHostId, true); } /** * Decide whether a task of the given size should be migrated, given how long it has been queued * * @param byteCount The size of the task in bytes * @param timeOnQueue How long the task has been queued in millis * @return True if the task should be allowed to migrate */ public boolean checkSizeAgeForMigration(long byteCount, long timeOnQueue) { double intervalPercentage = Math.min(1, (double) (timeOnQueue) / TASK_MIGRATION_LIMIT_GROWTH_INTERVAL); // The limit is TASK_MIGRATION_MIN_BYTES for recently-queued tasks, then slowly grows to // TASK_MIGRATION_MAX_BYTES return byteCount < (long) (TASK_MIGRATION_MIN_BYTES + intervalPercentage * (TASK_MIGRATION_MAX_BYTES - TASK_MIGRATION_MIN_BYTES)); } public static long getTaskMigrationMaxBytes() { return TASK_MIGRATION_MAX_BYTES; } public static long getTaskMigrationLimitGrowthInterval() { return TASK_MIGRATION_LIMIT_GROWTH_INTERVAL; } /** * When a job is stopped, we need to release the queue lock as quickly as possible to ensure that * we can remove tasks from the job as soon as possible. The stoppedJob variable enables this * behavior. * * @return True if a job was stopped since the last queue iteration */ public boolean getStoppedJob() { return stoppedJob.get(); } public void setStoppedJob(boolean stopped) { stoppedJob.set(stopped); } public boolean shouldKickNewTaskOnHost(long timeOnQueue, HostState host) { synchronized (hostAvailSlots) { if (hostAvailSlots.containsKey(host.getHostUuid()) && hostAvailSlots.get(host.getHostUuid()) <= 1) { if (host.getMaxTaskSlots() == 1) { // If a host has only one slot to begin with, allow tasks to kick there. return true; } // Otherwise, don't let new tasks take the last slot for a set period return timeOnQueue > SPAWN_QUEUE_NEW_TASK_LAST_SLOT_DELAY; } return true; } } }
public class JobExpand { private static final int maxDepth = Parameter.intValue("spawn.macro.expand.depth", 256); private static final Logger log = LoggerFactory.getLogger(JobExpand.class); private static class MacroTokenReplacer extends TokenReplacer { private final Spawn spawn; private static final Logger log = LoggerFactory.getLogger(MacroTokenReplacer.class); private static final Joiner joiner = Joiner.on(',').skipNulls(); private static final Pattern macroPattern = Pattern.compile("%\\{(.+?)\\}%"); MacroTokenReplacer(Spawn spawn) { super("%{", "}%"); this.spawn = spawn; } @Override public long getMaxDepth() { return maxDepth; } @Override public String replace(Region region, String label) { if (label.startsWith("http://")) { try { return new String(HttpUtil.httpGet(label, 0).getBody(), "UTF-8"); } catch (Exception ex) { throw new RuntimeException(ex); } } JobMacro macro = spawn.getJobMacroManager().getEntity(label); String target = null; if (macro != null) { target = macro.getMacro(); } else { List<String> aliases = spawn.getAliasManager().aliasToJobs(label); if (aliases != null) { target = joiner.join(aliases); } } if (target != null) { List<String> contents = new ArrayList<>(); List<String> delimiters = new ArrayList<>(); CommentTokenizer commentTokenizer = new CommentTokenizer(target); commentTokenizer.tokenize(contents, delimiters); StringBuilder builder = new StringBuilder(); int length = contents.size(); builder.append(contents.get(0)); builder.append(delimiters.get(0)); for (int i = 1; i < length; i++) { String delimiter = delimiters.get(i - 1); if (delimiter.equals("//") || delimiter.equals("/*")) { /* disable any macros inside comments so they don't get expanded */ builder.append(macroPattern.matcher(contents.get(i)).replaceAll("%_{$1}_%")); } else { builder.append(contents.get(i)); } builder.append(delimiters.get(i)); } return builder.toString(); } else { String msg = "non-existent macro referenced : " + label; log.warn(msg); throw new RuntimeException(msg); } } } // initialize optional/3rd party job config expanders private static final Map<String, JobConfigExpander> expanders = new HashMap<>(); static { PluginMap expanderMap = PluginRegistry.defaultRegistry().asMap().get("job expander"); if (expanderMap != null) { for (Map.Entry<String, Class<?>> expanderPlugin : expanderMap.asBiMap().entrySet()) { registerExpander( expanderPlugin.getKey(), (Class<? extends JobConfigExpander>) expanderPlugin.getValue()); } } } static void registerExpander(String macroName, Class<? extends JobConfigExpander> clazz) { Object o = null; try { o = clazz.newInstance(); expanders.put(macroName, (JobConfigExpander) o); } catch (InstantiationException | IllegalAccessException e) { log.warn( "Class '" + clazz + "' registered for '" + macroName + "' cannot be initialized: " + e, e); } catch (ClassCastException e) { log.warn( "Class '" + clazz + "' registered for '" + macroName + "' is not JobConfigExpander but '" + o.getClass() + "'"); } } private static String macroTemplateParamsHelper(String input, final HashMap<String, String> map) throws TokenReplacerOverflowException { return new TokenReplacer("%[", "]%") { @Override public String replace(Region region, String label) { return map.get(LessStrings.splitArray(label, ":")[0]); } @Override public long getMaxDepth() { return maxDepth; } }.process(input); } public static String macroTemplateParams(String expandedJob, Collection<JobParameter> params) throws TokenReplacerOverflowException { if (params != null && expandedJob != null) { final HashMap<String, String> map = new HashMap<>(); for (JobParameter param : params) { String name = param.getName(); String value = param.getValue(); String defaultValue = param.getDefaultValue(); map.put(name, LessStrings.isEmpty(value) ? defaultValue : value); } StringBuilder builder = new StringBuilder(); List<String> contents = new ArrayList<>(); List<String> delimiters = new ArrayList<>(); CommentTokenizer commentTokenizer = new CommentTokenizer(expandedJob); commentTokenizer.tokenize(contents, delimiters); int length = contents.size(); builder.append(macroTemplateParamsHelper(contents.get(0), map)); String firstDelimiter = delimiters.get(0); if (firstDelimiter != "%[" && firstDelimiter != "]%") { builder.append(firstDelimiter); } for (int i = 1; i < length; i++) { String prevDelimiter = delimiters.get(i - 1); String nextDelimiter = delimiters.get(i); // Ignore parameters inside of comments if (prevDelimiter.equals("//") || prevDelimiter.equals("/*")) { builder.append(contents.get(i)); } else if (prevDelimiter.equals("%[") && nextDelimiter.equals("]%")) { String value = map.get(LessStrings.splitArray(contents.get(i), ":")[0]); if (value != null) { builder.append(value); } } else { // Delimiters such as double-quotes may contain parameters inside them builder.append(macroTemplateParamsHelper(contents.get(i), map)); } if (nextDelimiter != "%[" && nextDelimiter != "]%") { builder.append(nextDelimiter); } } return builder.toString(); } return expandedJob; } private static void addParameter(String paramString, Map<String, JobParameter> params) { JobParameter param = new JobParameter(); String[] tokens = paramString.split(":", 2); param.setName(tokens[0]); if (tokens.length > 1) { param.setDefaultValue(tokens[1]); } /** re-declarations not allowed -- iow, first instance wins (for defaulting values) */ if (params.get(param.getName()) == null) { params.put(param.getName(), param); } } private static void macroFindParametersHelper( String jobFragment, Map<String, JobParameter> params) { int index = 0; while (true) { int next = jobFragment.indexOf("%[", index); if (next >= 0) { int end = jobFragment.indexOf("]%", next + 2); if (end > 0) { addParameter(jobFragment.substring(next + 2, end), params); index = end + 2; } else { index = next + 2; } } else { break; } } } /** find parameters in the expanded job */ public static Map<String, JobParameter> macroFindParameters(String expandedJob) { LinkedHashMap<String, JobParameter> params = new LinkedHashMap<>(); if (expandedJob == null) { return params; } List<String> contents = new ArrayList<>(); List<String> delimiters = new ArrayList<>(); CommentTokenizer commentTokenizer = new CommentTokenizer(expandedJob); commentTokenizer.tokenize(contents, delimiters); int length = contents.size(); macroFindParametersHelper(contents.get(0), params); for (int i = 1; i < length; i++) { String delimiter = delimiters.get(i - 1); // Ignore parameters inside of comments if (delimiter.equals("//") || delimiter.equals("/*")) { // do nothing } else if (delimiter.equals("%[") && delimiters.get(i).equals("]%")) { addParameter(contents.get(i), params); } else { // Delimiters such as double-quotes may contain parameters inside them macroFindParametersHelper(contents.get(i), params); } } return params; } /** * recursively expand macros * * @throws IllegalStateException if expanded config exceeds the max length allowed. */ public static String macroExpand(Spawn spawn, String rawtext) throws TokenReplacerOverflowException, IllegalStateException { MacroTokenReplacer replacer = new MacroTokenReplacer(spawn); List<String> contents = new ArrayList<>(); List<String> delimiters = new ArrayList<>(); CommentTokenizer commentTokenizer = new CommentTokenizer(rawtext); commentTokenizer.tokenize(contents, delimiters); StringBuilder builder = new StringBuilder(); int length = contents.size(); builder.append(replacer.process(contents.get(0))); builder.append(delimiters.get(0)); for (int i = 1; i < length; i++) { if (builder.length() > Spawn.inputMaxNumberOfCharacters) { throw new IllegalStateException( "Expanded job config length of at least " + builder.length() + " characters is greater than max length of " + Spawn.inputMaxNumberOfCharacters); } String delimiter = delimiters.get(i - 1); if (delimiter.equals("//") || delimiter.equals("/*")) { builder.append(contents.get(i)); } else { builder.append(replacer.process(contents.get(i))); } builder.append(delimiters.get(i)); } if (builder.length() > Spawn.inputMaxNumberOfCharacters) { throw new IllegalStateException( "Expanded job config length of " + builder.length() + " characters is greater than max length of " + Spawn.inputMaxNumberOfCharacters); } return builder.toString(); } /* special pass that injects spawn metadata and specific tokens * TODO - expand to include job shards option */ public static String magicMacroExpand(final Spawn spawn, String rawtext, final String jobId) throws TokenReplacerOverflowException { return new TokenReplacer("%(", ")%") { @Override public String replace(Region region, String label) { List<String> mfn = Lists.newArrayList(Splitter.on(' ').split(label)); String macroName = mfn.get(0); List<String> tokens = mfn.subList(1, mfn.size()); if (macroName.equals("jobhosts")) { JobMacro macro = spawn.createJobHostMacro(tokens.get(0), Integer.parseInt(tokens.get(1))); return macro.getMacro(); } else if (expanders.containsKey(macroName)) { return expanders.get(macroName).expand(spawn.getSpawnDataStore(), jobId, tokens); } else { String msg = "non-existent magic macro referenced : " + label; log.warn(msg); throw new RuntimeException(msg); } } @Override public long getMaxDepth() { return maxDepth; } }.process(rawtext); } }
/** for job submission and tracking IJob that keeps everything in gone Codable Object graph */ @JsonAutoDetect( getterVisibility = JsonAutoDetect.Visibility.NONE, isGetterVisibility = JsonAutoDetect.Visibility.NONE, setterVisibility = JsonAutoDetect.Visibility.NONE) public final class Job implements IJob { private static final Logger log = LoggerFactory.getLogger(Job.class); private static final Comparator<JobTask> taskNodeComparator = (t1, t2) -> Integer.compare(t1.getTaskID(), t2.getTaskID()); @FieldConfig private int state; @FieldConfig private int countActiveTasks; /* creator of the job */ @FieldConfig private String creator; /* owner of the job */ @FieldConfig private String owner; /* group of the job */ @FieldConfig private String group; /* can the owner modify the job */ @FieldConfig private boolean ownerWritable; /* can the group modify the job */ @FieldConfig private boolean groupWritable; /* can the world modify the job */ @FieldConfig private boolean worldWritable; /* can the owner start/stop the job */ @FieldConfig private boolean ownerExecutable; /* can the group start/stop the job */ @FieldConfig private boolean groupExecutable; /* can the world start/stop the job */ @FieldConfig private boolean worldExecutable; /* user who last modified the job */ @FieldConfig private String lastModifiedBy; /* last modification time */ @FieldConfig private long lastModifiedAt; /* purely ornamental description of this job */ @FieldConfig private String description; /* key used for storing / retrieving this job */ @FieldConfig private String id; /* higher means more important */ @FieldConfig private int priority; /* Unix epoch offset of time job was created */ @FieldConfig private Long createTime; /* Unix epoch offset of time job was last submitted */ @FieldConfig private Long submitTime; /* Unix epoch offset of time first job node was assigned */ @FieldConfig private Long startTime; /* Unix epoch offset of time last job node completed */ @FieldConfig private Long endTime; /* minutes between re-kicking */ @FieldConfig private Long rekickTimeout; /* minutes max time to allocate to job before it's interrupted */ @FieldConfig private Long maxRunTime; /* list of nodes and their state */ @FieldConfig private ArrayList<JobTask> nodes; /* JSON configuration url -- only read at submit time if conf empty */ @FieldConfig private String config; /* URL for spawn to call on job complete. for automating workflows */ @FieldConfig private String onComplete; @FieldConfig private String onError; /* timeout in seconds */ @FieldConfig private int onCompleteTimeout; @FieldConfig private int onErrorTimeout; @FieldConfig private int runCount; @FieldConfig private long runTime; @FieldConfig private String command; @FieldConfig private boolean disabled; @FieldConfig private ArrayList<JobParameter> parameters; @FieldConfig private int hourlyBackups; @FieldConfig private int dailyBackups; @FieldConfig private int weeklyBackups; @FieldConfig private int monthlyBackups; @FieldConfig private int replicas; @FieldConfig private int readOnlyReplicas; @FieldConfig private boolean dontAutoBalanceMe; @FieldConfig private boolean dontDeleteMe; @FieldConfig private boolean dontCloneMe; @FieldConfig private boolean wasStopped; @FieldConfig private int maxSimulRunning; @FieldConfig private String minionType; @FieldConfig private boolean autoRetry; @FieldConfig private JobQueryConfig queryConfig; /* If all errored tasks from an errored job are resolved and the job has started within this cutoff, automatically enable the job. Default is 3 days. */ private static final long AUTO_ENABLE_CUTOFF = Parameter.longValue("job.enable.cutoff", 1000 * 60 * 60 * 24 * 3); /* Task states that indicate that a job can be considered done. Rebalance/host-failure replications are included so these long-running operations will not delay the job rekick. */ private static final Set<JobTaskState> taskStatesToFinishJob = ImmutableSet.of( JobTaskState.IDLE, JobTaskState.ERROR, JobTaskState.REBALANCE, JobTaskState.FULL_REPLICATE); // For codec only public Job() {} public Job(String id) { this(id, null); } public Job(String id, String creator) { this.id = id; this.creator = creator; this.createTime = JitterClock.globalTime(); this.endTime = createTime; this.dontAutoBalanceMe = false; this.dontDeleteMe = false; this.dontCloneMe = false; this.config = ""; this.queryConfig = new JobQueryConfig(); } public Job(IJob job) { this.id = job.getId(); this.setState(job.getState()); this.creator = job.getCreator(); this.owner = job.getOwner(); this.group = job.getGroup(); this.ownerWritable = job.isOwnerWritable(); this.groupWritable = job.isGroupWritable(); this.worldWritable = job.isWorldWritable(); this.ownerExecutable = job.isOwnerExecutable(); this.groupExecutable = job.isGroupExecutable(); this.worldExecutable = job.isWorldExecutable(); this.lastModifiedBy = job.lastModifiedBy(); this.lastModifiedAt = job.lastModifiedAt(); this.description = job.getDescription(); this.priority = job.getPriority(); this.createTime = job.getCreateTime(); this.submitTime = job.getSubmitTime(); this.startTime = job.getStartTime(); this.endTime = job.getEndTime(); this.rekickTimeout = job.getRekickTimeout(); this.maxRunTime = job.getMaxRunTime(); this.setTasks(job.getCopyOfTasks()); recountActiveTasks(); this.config = job.getConfig(); this.onComplete = job.getOnCompleteURL(); this.onError = job.getOnErrorURL(); this.onCompleteTimeout = job.getOnCompleteTimeout(); this.onErrorTimeout = job.getOnErrorTimeout(); this.runCount = job.getRunCount(); this.runTime = job.getRunTime(); this.command = job.getCommand(); this.parameters = job.getParameters() != null ? Lists.newArrayList(job.getParameters()) : null; this.hourlyBackups = job.getHourlyBackups(); this.dailyBackups = job.getDailyBackups(); this.weeklyBackups = job.getWeeklyBackups(); this.monthlyBackups = job.getMonthlyBackups(); this.autoRetry = job.getAutoRetry(); this.replicas = job.getReplicas(); this.queryConfig = job.getQueryConfig(); this.dontAutoBalanceMe = job.getDontAutoBalanceMe(); this.dontDeleteMe = job.getDontDeleteMe(); this.dontCloneMe = job.getDontCloneMe(); this.maxSimulRunning = job.getMaxSimulRunning(); this.minionType = job.getMinionType(); this.wasStopped = job.getWasStopped(); setEnabled(job.isEnabled()); } @Override public String getId() { return id; } @Override public String getCreator() { return creator; } @Override public void setCreator(String creator) { this.creator = creator; } @Override public String getOwner() { return owner; } @Override public void setOwner(String owner) { this.owner = owner; } @Override public String getGroup() { return group; } @Override public void setGroup(String group) { this.group = group; } @Override public boolean isOwnerWritable() { return ownerWritable; } @Override public void setOwnerWritable(boolean ownerWritable) { this.ownerWritable = ownerWritable; } @Override public boolean isGroupWritable() { return groupWritable; } @Override public void setGroupWritable(boolean groupWritable) { this.groupWritable = groupWritable; } @Override public boolean isWorldWritable() { return worldWritable; } @Override public void setWorldWritable(boolean worldWritable) { this.worldWritable = worldWritable; } @Override public boolean isOwnerExecutable() { return ownerExecutable; } @Override public void setOwnerExecutable(boolean ownerExecutable) { this.ownerExecutable = ownerExecutable; } @Override public boolean isGroupExecutable() { return groupExecutable; } @Override public void setGroupExecutable(boolean groupExecutable) { this.groupExecutable = groupExecutable; } @Override public boolean isWorldExecutable() { return worldExecutable; } @Override public void setWorldExecutable(boolean worldExecutable) { this.worldExecutable = worldExecutable; } @Override public String lastModifiedBy() { return lastModifiedBy; } @Override public void setLastModifiedBy(String user) { this.lastModifiedBy = user; } @Override public long lastModifiedAt() { return lastModifiedAt; } @Override public void setLastModifiedAt(long time) { this.lastModifiedAt = time; } @Override public long getCreateTime() { return createTime; } @Override public String getDescription() { return description; } @Override public void setDescription(String description) { this.description = description; } @Override public String getCommand() { return command; } @Override public void setCommand(String command) { this.command = command; } @Override public int getPriority() { return priority; } @Override public void setPriority(int priority) { this.priority = priority; } @Override public Long getSubmitTime() { return submitTime; } @Override public void setSubmitTime(long submitTime) { this.submitTime = submitTime; } @Override public Long getStartTime() { return startTime; } @Override public void setStartTime(Long startTime) { this.startTime = startTime; } @Override public Long getEndTime() { return endTime; } @Override public void setEndTime(Long endTime) { this.endTime = endTime; } public void setFinishTime(long finishTime) { if (startTime != null) { runTime += finishTime - startTime; } endTime = finishTime; } @Override public Long getRekickTimeout() { return rekickTimeout; } @Override public void setRekickTimeout(Long rekick) { rekickTimeout = rekick != null && rekick > 0 ? rekick : null; } @Override public Long getMaxRunTime() { return maxRunTime; } @Override public void setMaxRunTime(Long maxRunTime) { this.maxRunTime = maxRunTime; } @Override public boolean isEnabled() { return !disabled; } @Override public boolean setEnabled(boolean enabled) { if (enabled == disabled) { disabled = !enabled; // Determine new states if (enabled && state == JobState.ERROR.getValue()) { for (JobTask task : getCopyOfTasks()) { JobTaskState state = task.getState(); task.setErrorCode(0); task.setPreFailErrorCode(0); if (state == JobTaskState.ERROR) { setTaskState(task, JobTaskState.IDLE, true); } } calculateJobState(true); } else if (enabled && state == JobState.DEGRADED.getValue()) { // Clear degraded state by recalculating calculateJobState(true); } return true; } return false; } @Override public Collection<JobParameter> getParameters() { return parameters; } @Override public void setParameters(Collection<JobParameter> parameters) { if (parameters != null) { this.parameters = new ArrayList<>(parameters.size()); this.parameters.addAll(parameters); } else { this.parameters = null; } } @Override public String getConfig() { return config; } @Override public void setConfig(String config) { this.config = config; } @Override public String getOnCompleteURL() { return onComplete; } @Override public void setOnCompleteURL(String url) { this.onComplete = url; } @Override public String getOnErrorURL() { return onError; } @Override public void setOnErrorURL(String url) { this.onError = url; } @Override public int getOnCompleteTimeout() { return onCompleteTimeout; } @Override public void setOnCompleteTimeout(int timeout) { this.onCompleteTimeout = timeout; } @Override public int getOnErrorTimeout() { return onErrorTimeout; } @Override public void setOnErrorTimeout(int timeout) { this.onErrorTimeout = timeout; } @Override public int getReplicas() { return replicas; } @Override public void setReplicas(int replicas) { this.replicas = replicas; } @Override public int getRunCount() { return runCount; } @Override public int incrementRunCount() { return ++runCount; } @Override public long getRunTime() { return runTime; } @Override public JobState getState() { JobState jobState = JobState.makeState(state); return jobState == null ? JobState.UNKNOWN : jobState; } @Override public boolean setState(JobState state) { return setState(state, false); } public boolean setState(JobState state, boolean force) { JobState curr = getState(); if (force || (isEnabled() && curr.canTransition(state)) || (!isEnabled() && (state == JobState.IDLE)) || (!isEnabled() && (state == JobState.ERROR))) { // Note dependence on ordering! this.state = state.ordinal(); return true; } else if (state != curr) { log.warn( "[job.setstate] {}job {} cannot transition {} -> {}", (disabled) ? "disabled " : "", getId(), curr, state); for (StackTraceElement elt : Thread.currentThread().getStackTrace()) { log.warn(elt.toString()); } return false; } return true; } public int getTaskCount() { return nodes.size(); } @Override public synchronized JobTask getTask(int id) { if (nodes == null) { return null; } for (JobTask node : nodes) { if (node.getTaskID() == id) { node.setJobUUID(this.id); return node; } } return null; } @Override public synchronized List<JobTask> getCopyOfTasks() { if (nodes == null) { nodes = new ArrayList<>(); } return ImmutableList.copyOf(nodes); } public List<JobTask> getCopyOfTasksSorted() { if (nodes == null) { nodes = new ArrayList<>(); } List<JobTask> tasksCopy = Lists.newArrayList(nodes); Collections.sort(tasksCopy, taskNodeComparator); return tasksCopy; } @Override public synchronized void addTask(JobTask task) { if (nodes == null) { nodes = new ArrayList<>(); } nodes.add(task); if (task.getState().isActiveState()) { this.countActiveTasks++; } } private synchronized void recountActiveTasks() { this.countActiveTasks = 0; for (JobTask t : nodes) { if (t.getState().isActiveState()) { this.countActiveTasks++; } } } @Override public synchronized void setTasks(List<JobTask> tasks) { this.nodes = Lists.newArrayList(tasks); recountActiveTasks(); } public synchronized int getCountActiveTasks() { return countActiveTasks; } @Override public JobQueryConfig getQueryConfig() { return queryConfig; } @Override public void setQueryConfig(JobQueryConfig queryConfig) { this.queryConfig = queryConfig; } @Override public JSONObject toJSON() throws Exception { recountActiveTasks(); return CodecJSON.encodeJSON(this); } @Override public String toString() { try { return CodecJSON.encodeString(this); } catch (Exception e) { return super.toString(); } } @Override public int compareTo(IJob o) { return getSubmitTime() > o.getSubmitTime() ? 1 : -1; } public synchronized boolean setTaskState(JobTask task, JobTaskState newState) { return setTaskState(task, newState, false); } /** * Change a task's state, and update the job's state if appropriate * * @param task The task to modify * @param newState The new state to set * @param force Whether to force the state transition regardless of the expected transition map * @return True on success */ public synchronized boolean setTaskState(JobTask task, JobTaskState newState, boolean force) { JobTaskState prevState = task.getState(); if (!task.setState(newState, force)) { return false; } if (prevState.isActiveState() && !newState.isActiveState()) { this.countActiveTasks--; } else if (!prevState.isActiveState() && newState.isActiveState()) { this.countActiveTasks++; } if (newState == JobTaskState.ERROR) { this.disabled = true; } calculateJobState(force); return true; } /** Calculate the job state based on the state of its tasks */ private boolean calculateJobState(boolean force) { boolean err = false, sched = false, run = false, reb = false, stopped = false; for (JobTask t : nodes) { if (t.getWasStopped()) { stopped = true; } if (t.getState() == JobTaskState.REBALANCE) { reb = true; } else if (t.isRunning()) { run = true; } else if (t.getState() == JobTaskState.ALLOCATED || t.getState().isQueuedState()) { sched = true; } else if (t.getState() == JobTaskState.ERROR) { err = true; break; } } JobState oldJobState = getState(); JobState nextState = (err) ? JobState.ERROR : (reb) ? JobState.REBALANCE : (run) ? JobState.RUNNING : (sched) ? JobState.SCHEDULED : JobState.IDLE; if (setState(nextState, force)) { // If transitioning from error to non-error state, enable job as long as it has run recently. if (oldJobState == JobState.ERROR && nextState != JobState.ERROR && getSubmitTime() != null && System.currentTimeMillis() - getSubmitTime() < AUTO_ENABLE_CUTOFF) { setEnabled(true); } wasStopped = stopped; return true; } else { return false; } } public void errorTask(JobTask task, int errorCode) { setTaskState(task, JobTaskState.ERROR, true); task.setErrorCode(errorCode); } /** * Check whether all tasks of a job are idle/errored/rebalancing. If a job is kicked, and * isFinished evaluates to true, then it can be assumed that every task ran at least once, * regardless of whether any rebalancing was started in the mean time. * * @return True if the job is finished */ public boolean isFinished() { for (JobTask jobTask : getCopyOfTasks()) { if (!taskStatesToFinishJob.contains(jobTask.getState())) { return false; } } return true; } @Override public boolean getDontAutoBalanceMe() { return dontAutoBalanceMe; } @Override public void setDontDeleteMe(boolean dontDeleteMe) { this.dontDeleteMe = dontDeleteMe; } @Override public boolean getDontDeleteMe() { return dontDeleteMe; } @Override public void setDontCloneMe(boolean dontCloneMe) { this.dontCloneMe = dontCloneMe; } @Override public boolean getDontCloneMe() { return dontCloneMe; } @Override public void setDontAutoBalanceMe(boolean dontAutoBalanceMe) { this.dontAutoBalanceMe = dontAutoBalanceMe; } @Override public int getHourlyBackups() { return hourlyBackups; } @Override public int getDailyBackups() { return dailyBackups; } @Override public int getWeeklyBackups() { return weeklyBackups; } @Override public int getMonthlyBackups() { return monthlyBackups; } @Override public void setHourlyBackups(int hourlyBackups) { this.hourlyBackups = hourlyBackups; } @Override public void setDailyBackups(int dailyBackups) { this.dailyBackups = dailyBackups; } @Override public void setWeeklyBackups(int weeklyBackups) { this.weeklyBackups = weeklyBackups; } @Override public void setMonthlyBackups(int monthlyBackups) { this.monthlyBackups = monthlyBackups; } @Override public boolean getWasStopped() { return wasStopped; } @Override public void setWasStopped(boolean wasStopped) { this.wasStopped = wasStopped; } public void setTaskFinished(JobTask task) { int preFailErrorCode = task.getPreFailErrorCode(); int oldErrorCode = task.getErrorCode(); if (task.getState() == JobTaskState.REPLICATE || task.getState() == JobTaskState.BACKUP) { if (preFailErrorCode > 0) { // Restore the old job error if it existed errorTask(task, preFailErrorCode); return; } } task.setErrorCode(0); setTaskState(task, JobTaskState.IDLE, true); if (getState() == JobState.IDLE) { setEndTime(JitterClock.globalTime()); } if (countErrorTasks() == 0 && oldErrorCode == JobTaskErrorCode.EXIT_REPLICATE_FAILURE || oldErrorCode == JobTaskErrorCode.EXIT_BACKUP_FAILURE) { // If the job is disabled because this task failed to replicate, enable it. log.warn("Enabling job " + getId() + " because the last replicate/backup error was resolved"); disabled = false; } } @Override public int getMaxSimulRunning() { return maxSimulRunning; } @Override public void setMaxSimulRunning(int maxSimulRunning) { this.maxSimulRunning = maxSimulRunning; } @Override public boolean getAutoRetry() { return autoRetry; } @Override public void setAutoRetry(boolean autoRetry) { this.autoRetry = autoRetry; } private int countErrorTasks() { int count = 0; List<JobTask> tasks = getCopyOfTasks(); if (tasks == null) { return count; } for (JobTask task : tasks) { if (task != null && task.getState() == JobTaskState.ERROR) { count++; } } return count; } public long calcAverageTaskSizeBytes() { List<JobTask> tasks = getCopyOfTasks(); if (tasks == null || tasks.size() <= 0) { return 0; } long rv = 0; for (JobTask task : tasks) { if (task != null) { rv += task.getByteCount(); } } return rv / (tasks.size()); } @Override public String getMinionType() { if (minionType == null) { minionType = Minion.defaultMinionType; } return minionType; } public Long getCanonicalTime() { // Get an estimate for the last time this job was run. Use end-time if non-null; otherwise, // startTime. return (endTime == null && getState() == JobState.IDLE) ? startTime : endTime; } @Override public void setMinionType(String minionType) { this.minionType = minionType; } public boolean shouldAutoRekick(long clock) { Long canonicalTime = getCanonicalTime(); return isEnabled() && canonicalTime != null && getRunCount() > 0 && getRekickTimeout() != null && getRekickTimeout() > 0 && clock - canonicalTime >= (getRekickTimeout() * 60000L); } /** Log a job event to a rolling log file */ public static void logJobEvent(Job job, JobEvent event, RollingLog eventLog) { LogUtil.log( eventLog, log, new StringMapHelper() .put("event", event) .put("time", System.currentTimeMillis()) .put("jobid", job.getId()) .put("creator", job.getCreator()) .put("owner", job.getOwner()) .put("createTime", job.getCreateTime()) .put("priority", job.getPriority()) .put("replicas", job.getReplicas()) .put("runCount", job.getRunCount()) .put("state", job.getState()) .put("taskCount", job.getTaskCount()) .put("avgTaskSize", job.calcAverageTaskSizeBytes()) .put("startTime", job.getStartTime()) .put("endTime", job.getEndTime()) .put("submitTime", job.getSubmitTime()) .put("command", job.getCommand())); } }