@Override public void start() { try { Configuration conf = getConf(); eagerInitListener.start(); taskTrackerManager.addJobInProgressListener(eagerInitListener); taskTrackerManager.addJobInProgressListener(jobListener); poolMgr = new PoolManager(conf); loadMgr = (LoadManager) ReflectionUtils.newInstance( conf.getClass( "mapred.fairscheduler.loadmanager", CapBasedLoadManager.class, LoadManager.class), conf); loadMgr.setTaskTrackerManager(taskTrackerManager); loadMgr.start(); taskSelector = (TaskSelector) ReflectionUtils.newInstance( conf.getClass( "mapred.fairscheduler.taskselector", DefaultTaskSelector.class, TaskSelector.class), conf); taskSelector.setTaskTrackerManager(taskTrackerManager); taskSelector.start(); Class<?> weightAdjClass = conf.getClass("mapred.fairscheduler.weightadjuster", null); if (weightAdjClass != null) { weightAdjuster = (WeightAdjuster) ReflectionUtils.newInstance(weightAdjClass, conf); } assignMultiple = conf.getBoolean("mapred.fairscheduler.assignmultiple", false); sizeBasedWeight = conf.getBoolean("mapred.fairscheduler.sizebasedweight", false); initialized = true; running = true; lastUpdateTime = clock.getTime(); // Start a thread to update deficits every UPDATE_INTERVAL if (runBackgroundUpdates) new UpdateThread().start(); // Register servlet with JobTracker's Jetty server if (taskTrackerManager instanceof JobTracker) { JobTracker jobTracker = (JobTracker) taskTrackerManager; StatusHttpServer infoServer = jobTracker.infoServer; infoServer.setAttribute("scheduler", this); infoServer.addServlet("scheduler", "/scheduler", FairSchedulerServlet.class); } } catch (Exception e) { // Can't load one of the managers - crash the JobTracker now while it is // starting up so that the user notices. throw new RuntimeException("Failed to start FairScheduler", e); } LOG.info("Successfully configured FairScheduler"); }
/** * Cancel grant on a node * * @param nodeName the node the grant is on * @param sessionId the session the grant was given to * @param requestId the request this grant satisfied */ public void cancelGrant(String nodeName, String sessionId, int requestId) { ClusterNode node = nameToNode.get(nodeName); if (node == null) { LOG.warn("Canceling grant for non-existent node: " + nodeName); return; } synchronized (node) { if (node.deleted) { LOG.warn("Canceling grant for deleted node: " + nodeName); return; } String hoststr = node.getClusterNodeInfo().getAddress().getHost(); if (!canAllowNode(hoststr)) { LOG.warn("Canceling grant for excluded node: " + hoststr); return; } ResourceRequestInfo req = node.getRequestForGrant(sessionId, requestId); if (req != null) { ResourceRequest unitReq = Utilities.getUnitResourceRequest(req.getType()); boolean previouslyRunnable = node.checkForGrant(unitReq, resourceLimit); node.cancelGrant(sessionId, requestId); loadManager.decrementLoad(req.getType()); if (!previouslyRunnable && node.checkForGrant(unitReq, resourceLimit)) { RunnableIndices r = typeToIndices.get(req.getType()); if (!faultManager.isBlacklisted(node.getName(), req.getType())) { r.addRunnable(node); } } } } }
/** * Add a grant to a node * * @param node the node the grant is on * @param sessionId the session the grant is given to * @param req the request this grant satisfies * @return true if the grant can be added to the node, false otherwise */ public boolean addGrant(ClusterNode node, String sessionId, ResourceRequestInfo req) { synchronized (node) { if (node.deleted) { return false; } if (!node.checkForGrant(Utilities.getUnitResourceRequest(req.getType()), resourceLimit)) { return false; } node.addGrant(sessionId, req); loadManager.incrementLoad(req.getType()); hostsToSessions.get(node).add(sessionId); if (!node.checkForGrant(Utilities.getUnitResourceRequest(req.getType()), resourceLimit)) { RunnableIndices r = typeToIndices.get(req.getType()); r.deleteRunnable(node); } } return true; }
/** * This method rebuilds members related to the NodeManager instance, which were not directly * persisted themselves. * * @throws IOException */ public void restoreAfterSafeModeRestart() throws IOException { if (!clusterManager.safeMode) { throw new IOException( "restoreAfterSafeModeRestart() called while the " + "Cluster Manager was not in Safe Mode"); } // Restoring all the ClusterNode(s) for (ClusterNode clusterNode : nameToNode.values()) { restoreClusterNode(clusterNode); } // Restoring all the RequestedNodes(s) for (ClusterNode clusterNode : nameToNode.values()) { for (ResourceRequestInfo resourceRequestInfo : clusterNode.grants.values()) { // Fix the RequestedNode(s) restoreResourceRequestInfo(resourceRequestInfo); loadManager.incrementLoad(resourceRequestInfo.getType()); } } }
@Override public synchronized List<Task> assignTasks(TaskTrackerStatus tracker) throws IOException { if (!initialized) // Don't try to assign tasks if we haven't yet started up return null; // Reload allocations file if it hasn't been loaded in a while poolMgr.reloadAllocsIfNecessary(); // Compute total runnable maps and reduces int runnableMaps = 0; int runnableReduces = 0; for (JobInProgress job : infos.keySet()) { runnableMaps += runnableTasks(job, TaskType.MAP); runnableReduces += runnableTasks(job, TaskType.REDUCE); } // Compute total map/reduce slots // In the future we can precompute this if the Scheduler becomes a // listener of tracker join/leave events. int totalMapSlots = getTotalSlots(TaskType.MAP); int totalReduceSlots = getTotalSlots(TaskType.REDUCE); // Scan to see whether any job needs to run a map, then a reduce ArrayList<Task> tasks = new ArrayList<Task>(); TaskType[] types = new TaskType[] {TaskType.MAP, TaskType.REDUCE}; for (TaskType taskType : types) { boolean canAssign = (taskType == TaskType.MAP) ? loadMgr.canAssignMap(tracker, runnableMaps, totalMapSlots) : loadMgr.canAssignReduce(tracker, runnableReduces, totalReduceSlots); if (canAssign) { // Figure out the jobs that need this type of task List<JobInProgress> candidates = new ArrayList<JobInProgress>(); for (JobInProgress job : infos.keySet()) { if (job.getStatus().getRunState() == JobStatus.RUNNING && neededTasks(job, taskType) > 0) { candidates.add(job); } } // Sort jobs by deficit (for Fair Sharing) or submit time (for FIFO) Comparator<JobInProgress> comparator = useFifo ? new FifoJobComparator() : new DeficitComparator(taskType); Collections.sort(candidates, comparator); for (JobInProgress job : candidates) { Task task = (taskType == TaskType.MAP ? taskSelector.obtainNewMapTask(tracker, job) : taskSelector.obtainNewReduceTask(tracker, job)); if (task != null) { // Update the JobInfo for this job so we account for the launched // tasks during this update interval and don't try to launch more // tasks than the job needed on future heartbeats JobInfo info = infos.get(job); if (taskType == TaskType.MAP) { info.runningMaps++; info.neededMaps--; } else { info.runningReduces++; info.neededReduces--; } tasks.add(task); if (!assignMultiple) return tasks; break; } } } } // If no tasks were found, return null return tasks.isEmpty() ? null : tasks; }