/** * @@@ Do we need just handle the case whose type is ASSIGN_TYPE_NEW? * * @return */ private Map<String, String> generateSidToHost() { Map<String, String> sidToHostname = new HashMap<String, String>(); if (oldAssignment != null) { sidToHostname.putAll(oldAssignment.getNodeHost()); } for (Entry<String, SupervisorInfo> entry : cluster.entrySet()) { String supervisorId = entry.getKey(); SupervisorInfo supervisorInfo = entry.getValue(); sidToHostname.put(supervisorId, supervisorInfo.getHostName()); } return sidToHostname; }
private static Set<ResourceWorkerSlot> mkLocalAssignment(TopologyAssignContext context) { Set<ResourceWorkerSlot> result = new HashSet<ResourceWorkerSlot>(); Map<String, SupervisorInfo> cluster = context.getCluster(); if (cluster.size() != 1) throw new RuntimeException(); SupervisorInfo localSupervisor = null; String supervisorId = null; for (Entry<String, SupervisorInfo> entry : cluster.entrySet()) { supervisorId = entry.getKey(); localSupervisor = entry.getValue(); } int port = localSupervisor.getWorkerPorts().iterator().next(); ResourceWorkerSlot worker = new ResourceWorkerSlot(supervisorId, port); worker.setTasks(new HashSet<Integer>(context.getAllTaskIds())); worker.setHostname(localSupervisor.getHostName()); result.add(worker); return result; }
/** * Get free resources 获得所有的空闲资源 * * @param supervisorInfos * @param stormClusterState * @throws Exception */ public static void getFreeSlots( Map<String, SupervisorInfo> supervisorInfos, StormClusterState stormClusterState) throws Exception { Map<String, Assignment> assignments = Cluster.get_all_assignment(stormClusterState, null); for (Entry<String, Assignment> entry : assignments.entrySet()) { String topologyId = entry.getKey(); Assignment assignment = entry.getValue(); Set<ResourceWorkerSlot> workers = assignment.getWorkers(); for (ResourceWorkerSlot worker : workers) { SupervisorInfo supervisorInfo = supervisorInfos.get(worker.getNodeId()); if (supervisorInfo == null) { // the supervisor is dead continue; } supervisorInfo.getWorkerPorts().remove(worker.getPort()); } } }
/** * @@@ Here maybe exist one problem, some dead slots have been free * * @param context */ protected void freeUsed(TopologyAssignContext context) { Set<Integer> canFree = new HashSet<Integer>(); canFree.addAll(context.getAllTaskIds()); canFree.removeAll(context.getUnstoppedTaskIds()); Map<String, SupervisorInfo> cluster = context.getCluster(); Map<Integer, ResourceAssignment> oldAssigns = context.getOldAssignment().getTaskToResource(); for (Integer task : canFree) { ResourceAssignment oldAssign = oldAssigns.get(task); if (oldAssign == null) { LOG.warn("When free rebalance resource, no ResourceAssignment of task " + task); continue; } SupervisorInfo supervisorInfo = cluster.get(oldAssign.getSupervisorId()); if (supervisorInfo == null) { continue; } supervisorInfo.getCpuPool().free(oldAssign.getCpuSlotNum(), context); supervisorInfo.getMemPool().free(oldAssign.getMemSlotNum(), context); supervisorInfo.getDiskPool().free(oldAssign.getDiskSlot(), context); supervisorInfo.getNetPool().free(oldAssign.getPort(), context); } }
public static Map<String, String> getTopologyNodeHost( Map<String, SupervisorInfo> supervisorMap, Assignment existingAssignment, Set<ResourceWorkerSlot> workers) { // the following is that remove unused node from allNodeHost Set<String> usedNodes = new HashSet<String>(); for (ResourceWorkerSlot worker : workers) { usedNodes.add(worker.getNodeId()); } // map<supervisorId, hostname> Map<String, String> allNodeHost = new HashMap<String, String>(); if (existingAssignment != null) { allNodeHost.putAll(existingAssignment.getNodeHost()); } // get alive supervisorMap Map<supervisorId, hostname> Map<String, String> nodeHost = SupervisorInfo.getNodeHost(supervisorMap); if (nodeHost != null) { allNodeHost.putAll(nodeHost); } Map<String, String> ret = new HashMap<String, String>(); for (String supervisorId : usedNodes) { if (allNodeHost.containsKey(supervisorId)) { ret.put(supervisorId, allNodeHost.get(supervisorId)); } else { LOG.warn("Node " + supervisorId + " doesn't in the supervisor list"); } } return ret; }
@Override public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException { try { StormClusterState stormClusterState = data.getStormClusterState(); String supervisorId = null; SupervisorInfo supervisorInfo = null; String ip = NetWorkUtils.host2Ip(host); String hostName = NetWorkUtils.ip2Host(host); // all supervisors Map<String, SupervisorInfo> supervisorInfos = Cluster.allSupervisorInfo(stormClusterState, null); for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) { SupervisorInfo info = entry.getValue(); if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) { supervisorId = entry.getKey(); supervisorInfo = info; break; } } if (supervisorId == null) { throw new TException("No supervisor of " + host); } Map<String, Assignment> assignments = new HashMap<String, Assignment>(); // get all active topology's StormBase Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState); for (Entry<String, StormBase> entry : bases.entrySet()) { String topologyId = entry.getKey(); StormBase base = entry.getValue(); Assignment assignment = stormClusterState.assignment_info(topologyId, null); if (assignment == null) { LOG.error("Failed to get assignment of " + topologyId); continue; } assignments.put(topologyId, assignment); } Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>(); for (Entry<String, Assignment> entry : assignments.entrySet()) { String topologyId = entry.getKey(); Assignment assignment = entry.getValue(); Map<Integer, String> taskToComponent = Cluster.topology_task_info(stormClusterState, topologyId); Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource(); for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) { Integer taskId = resourceEntry.getKey(); ResourceAssignment resourceAssignment = resourceEntry.getValue(); if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) { continue; } supervisorInfo.allocResource(resourceAssignment); Integer port = resourceAssignment.getPort(); WorkerSummary workerSummary = portWorkerSummarys.get(port); if (workerSummary == null) { workerSummary = new WorkerSummary(); workerSummary.set_port(port); workerSummary.set_topology(topologyId); workerSummary.set_tasks(new ArrayList<TaskSummary>()); portWorkerSummarys.put(port, workerSummary); } String componentName = taskToComponent.get(taskId); int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId)); List<TaskSummary> tasks = workerSummary.get_tasks(); TaskSummary taskSummary = NimbusUtils.mkSimpleTaskSummary( resourceAssignment, taskId, componentName, host, uptime); tasks.add(taskSummary); } } List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>(); wokersList.addAll(portWorkerSummarys.values()); SupervisorSummary supervisorSummary = NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId); return new SupervisorWorkers(supervisorSummary, wokersList); } catch (TException e) { LOG.info("Failed to get ClusterSummary ", e); throw e; } catch (Exception e) { LOG.info("Failed to get ClusterSummary ", e); throw new TException(e); } }
@Override public Map<Integer, ResourceAssignment> assignTasks(TopologyAssignContext context) throws FailedAssignTopologyException { int assignType = context.getAssignType(); if (TopologyAssignContext.isAssignTypeValid(assignType) == false) { throw new FailedAssignTopologyException("Invalide Assign Type " + assignType); } DefaultTopologyAssignContext defaultContext = new DefaultTopologyAssignContext(context); if (assignType == TopologyAssignContext.ASSIGN_TYPE_REBALANCE) { freeUsed(defaultContext); } LOG.info("Dead tasks:" + defaultContext.getDeadTaskIds()); LOG.info("Unstopped tasks:" + defaultContext.getUnstoppedTaskIds()); Set<Integer> needAssignTasks = getNeedAssignTasks(defaultContext); Map<Integer, ResourceAssignment> keepAssigns = getKeepAssign(defaultContext, needAssignTasks); // please use tree map to make task sequence Map<Integer, ResourceAssignment> ret = new TreeMap<Integer, ResourceAssignment>(); ret.putAll(keepAssigns); ret.putAll(defaultContext.getUnstoppedAssignments()); Map<WorkerSlot, List<Integer>> keepAssignWorkers = Assignment.getWorkerTasks(keepAssigns); int allocWorkerNum = defaultContext.getTotalWorkerNum() - defaultContext.getUnstoppedWorkerNum() - keepAssignWorkers.size(); if (allocWorkerNum <= 0) { LOG.warn( "Don't need assign workers, all workers are fine " + defaultContext.toDetailString()); throw new FailedAssignTopologyException("Don't need assign worker, all workers are fine "); } Set<String> outputConfigComponents = new HashSet<String>(); Map<ComponentAssignType, Pair<Set<Integer>, IPreassignTask>> typeHandler = registerPreAssignHandler(defaultContext, needAssignTasks); Map<Integer, ResourceAssignment> newAssigns = new HashMap<Integer, ResourceAssignment>(); Set<String> usedSupervisorIds = new HashSet<String>(); List<Integer> lastFailed = new ArrayList<Integer>(); for (Entry<ComponentAssignType, Pair<Set<Integer>, IPreassignTask>> entry : typeHandler.entrySet()) { ComponentAssignType type = entry.getKey(); Set<Integer> tasks = entry.getValue().getFirst(); IPreassignTask handler = entry.getValue().getSecond(); tasks.addAll(lastFailed); lastFailed.clear(); List<Integer> sortedTasks = sortAssignTasks(defaultContext, tasks); StormTopology sysTopology = defaultContext.getSysTopology(); for (Integer task : sortedTasks) { Set<String> canUsedSupervisorIds = getCanUsedSupervisors(defaultContext, usedSupervisorIds, allocWorkerNum); String componentName = defaultContext.getTaskToComponent().get(task); ComponentCommon componentCommon = ThriftTopologyUtils.getComponentCommon(sysTopology, componentName); Map componentMap = (Map) JStormUtils.from_json(componentCommon.get_json_conf()); if (componentMap == null) { componentMap = Maps.newHashMap(); } if (outputConfigComponents.contains(componentName) == false) { LOG.info("Component map of " + componentName + "\n" + componentMap); outputConfigComponents.add(componentName); } ResourceAssignment preAssignment = handler.preAssign( task, defaultContext, componentMap, componentName, canUsedSupervisorIds, ret, newAssigns); if (preAssignment == null) { // pre assign fail lastFailed.add(task); } else { // sucess to do preAssign SupervisorInfo supervisorInfo = defaultContext.getCluster().get(preAssignment.getSupervisorId()); LOG.info("Task " + task + " had been assigned to " + supervisorInfo.getHostName()); newAssigns.put(task, preAssignment); ret.put(task, preAssignment); usedSupervisorIds.add(preAssignment.getSupervisorId()); } } } if (lastFailed.isEmpty() == false) { throw new FailedAssignTopologyException("Failed to assign tasks " + lastFailed); } // Here just hardcode IPostAssignTask postAssignHandler = new PostAssignTaskPort(); postAssignHandler.postAssign(defaultContext, newAssigns, allocWorkerNum); LOG.info("Keep Alive slots:" + keepAssigns); LOG.info("Unstopped slots:" + defaultContext.getUnstoppedAssignments()); LOG.info("New assign slots:" + newAssigns); return ret; }