/** * Get the task Map which the task is alive and will be kept Only when type is * ASSIGN_TYPE_MONITOR, it is valid * * @param defaultContext * @param needAssigns * @return */ public Map<Integer, ResourceAssignment> getKeepAssign( DefaultTopologyAssignContext defaultContext, Set<Integer> needAssigns) { Set<Integer> keepAssignIds = new HashSet<Integer>(); keepAssignIds.addAll(defaultContext.getAllTaskIds()); keepAssignIds.removeAll(defaultContext.getUnstoppedTaskIds()); keepAssignIds.removeAll(needAssigns); Map<Integer, ResourceAssignment> keeps = new HashMap<Integer, ResourceAssignment>(); if (keepAssignIds.isEmpty()) { return keeps; } Assignment oldAssignment = defaultContext.getOldAssignment(); if (oldAssignment == null) { return keeps; } Map<Integer, ResourceAssignment> olds = oldAssignment.getTaskToResource(); for (Integer task : keepAssignIds) { ResourceAssignment oldResource = olds.get(task); if (oldResource == null) { LOG.warn("No old assignment of " + task + ", " + defaultContext.toDetailString()); continue; } keeps.put(task, oldResource); } return keeps; }
/** Get unstopped slots from alive task list 获得所有supervisor已经dead,但是还没有dead的任务 */ public Set<Integer> getUnstoppedSlots( Set<Integer> aliveTasks, Map<String, SupervisorInfo> supInfos, Assignment existAssignment) { Set<Integer> ret = new HashSet<Integer>(); Set<ResourceWorkerSlot> oldWorkers = existAssignment.getWorkers(); Set<String> aliveSupervisors = supInfos.keySet(); for (ResourceWorkerSlot worker : oldWorkers) { for (Integer taskId : worker.getTasks()) { if (aliveTasks.contains(taskId) == false) { // task is dead continue; } String oldTaskSupervisorId = worker.getNodeId(); if (aliveSupervisors.contains(oldTaskSupervisorId) == false) { // supervisor is dead ret.add(taskId); continue; } } } return ret; }
/** * @param existingAssignment * @param taskWorkerSlot * @return * @throws Exception */ public static Map<Integer, Integer> getTaskStartTimes( TopologyAssignContext context, NimbusData nimbusData, String topologyId, Assignment existingAssignment, Set<ResourceWorkerSlot> workers) throws Exception { Map<Integer, Integer> startTimes = new TreeMap<Integer, Integer>(); if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_NEW) { int nowSecs = TimeUtils.current_time_secs(); for (ResourceWorkerSlot worker : workers) { for (Integer changedTaskId : worker.getTasks()) { startTimes.put(changedTaskId, nowSecs); } } return startTimes; } Set<ResourceWorkerSlot> oldWorkers = new HashSet<ResourceWorkerSlot>(); if (existingAssignment != null) { Map<Integer, Integer> taskStartTimeSecs = existingAssignment.getTaskStartTimeSecs(); if (taskStartTimeSecs != null) { startTimes.putAll(taskStartTimeSecs); } if (existingAssignment.getWorkers() != null) { oldWorkers = existingAssignment.getWorkers(); } } StormClusterState zkClusterState = nimbusData.getStormClusterState(); Set<Integer> changeTaskIds = getChangeTaskIds(oldWorkers, workers); int nowSecs = TimeUtils.current_time_secs(); for (Integer changedTaskId : changeTaskIds) { startTimes.put(changedTaskId, nowSecs); zkClusterState.remove_task_heartbeat(topologyId, changedTaskId); } LOG.info("Task assignment has been changed " + changeTaskIds); return startTimes; }
/** * Get free resources 获得所有的空闲资源 * * @param supervisorInfos * @param stormClusterState * @throws Exception */ public static void getFreeSlots( Map<String, SupervisorInfo> supervisorInfos, StormClusterState stormClusterState) throws Exception { Map<String, Assignment> assignments = Cluster.get_all_assignment(stormClusterState, null); for (Entry<String, Assignment> entry : assignments.entrySet()) { String topologyId = entry.getKey(); Assignment assignment = entry.getValue(); Set<ResourceWorkerSlot> workers = assignment.getWorkers(); for (ResourceWorkerSlot worker : workers) { SupervisorInfo supervisorInfo = supervisorInfos.get(worker.getNodeId()); if (supervisorInfo == null) { // the supervisor is dead continue; } supervisorInfo.getWorkerPorts().remove(worker.getPort()); } } }
private Set<ResourceWorkerSlot> getUnstoppedWorkers( Set<Integer> aliveTasks, Assignment existAssignment) { Set<ResourceWorkerSlot> ret = new HashSet<ResourceWorkerSlot>(); for (ResourceWorkerSlot worker : existAssignment.getWorkers()) { boolean alive = true; for (Integer task : worker.getTasks()) { if (!aliveTasks.contains(task)) { alive = false; break; } } if (alive) { ret.add(worker); } } return ret; }
public static Map<String, String> getTopologyNodeHost( Map<String, SupervisorInfo> supervisorMap, Assignment existingAssignment, Set<ResourceWorkerSlot> workers) { // the following is that remove unused node from allNodeHost Set<String> usedNodes = new HashSet<String>(); for (ResourceWorkerSlot worker : workers) { usedNodes.add(worker.getNodeId()); } // map<supervisorId, hostname> Map<String, String> allNodeHost = new HashMap<String, String>(); if (existingAssignment != null) { allNodeHost.putAll(existingAssignment.getNodeHost()); } // get alive supervisorMap Map<supervisorId, hostname> Map<String, String> nodeHost = SupervisorInfo.getNodeHost(supervisorMap); if (nodeHost != null) { allNodeHost.putAll(nodeHost); } Map<String, String> ret = new HashMap<String, String>(); for (String supervisorId : usedNodes) { if (allNodeHost.containsKey(supervisorId)) { ret.put(supervisorId, allNodeHost.get(supervisorId)); } else { LOG.warn("Node " + supervisorId + " doesn't in the supervisor list"); } } return ret; }
@SuppressWarnings({"rawtypes", "unchecked"}) public WorkerData( Map conf, IContext context, String topology_id, String supervisor_id, int port, String worker_id, String jar_path) throws Exception { this.conf = conf; this.context = context; this.topologyId = topology_id; this.supervisorId = supervisor_id; this.port = port; this.workerId = worker_id; this.active = new AtomicBoolean(true); this.topologyStatus = StatusType.active; if (StormConfig.cluster_mode(conf).equals("distributed")) { String pidDir = StormConfig.worker_pids_root(conf, worker_id); JStormServerUtils.createPid(pidDir); } // create zk interface this.zkClusterstate = ZkTool.mk_distributed_cluster_state(conf); this.zkCluster = Cluster.mk_storm_cluster_state(zkClusterstate); Map rawConf = StormConfig.read_supervisor_topology_conf(conf, topology_id); this.stormConf = new HashMap<Object, Object>(); this.stormConf.putAll(conf); this.stormConf.putAll(rawConf); LOG.info("Worker Configuration " + stormConf); try { boolean enableClassloader = ConfigExtension.isEnableTopologyClassLoader(stormConf); boolean enableDebugClassloader = ConfigExtension.isEnableClassloaderDebug(stormConf); if (jar_path == null && enableClassloader == true) { LOG.error("enable classloader, but not app jar"); throw new InvalidParameterException(); } URL[] urlArray = new URL[0]; if (jar_path != null) { String[] paths = jar_path.split(":"); Set<URL> urls = new HashSet<URL>(); for (String path : paths) { if (StringUtils.isBlank(path)) continue; URL url = new URL("File:" + path); urls.add(url); } urlArray = urls.toArray(new URL[0]); } WorkerClassLoader.mkInstance( urlArray, ClassLoader.getSystemClassLoader(), ClassLoader.getSystemClassLoader().getParent(), enableClassloader, enableDebugClassloader); } catch (Exception e) { // TODO Auto-generated catch block LOG.error("init jarClassLoader error!", e); throw new InvalidParameterException(); } if (this.context == null) { this.context = TransportFactory.makeContext(stormConf); } boolean disruptorUseSleep = ConfigExtension.isDisruptorUseSleep(stormConf); DisruptorQueue.setUseSleep(disruptorUseSleep); boolean isLimited = ConfigExtension.getTopologyBufferSizeLimited(stormConf); DisruptorQueue.setLimited(isLimited); LOG.info("Disruptor use sleep:" + disruptorUseSleep + ", limited size:" + isLimited); // this.transferQueue = new LinkedBlockingQueue<TransferData>(); int buffer_size = Utils.getInt(conf.get(Config.TOPOLOGY_TRANSFER_BUFFER_SIZE)); WaitStrategy waitStrategy = (WaitStrategy) Utils.newInstance((String) conf.get(Config.TOPOLOGY_DISRUPTOR_WAIT_STRATEGY)); this.transferQueue = DisruptorQueue.mkInstance("TotalTransfer", ProducerType.MULTI, buffer_size, waitStrategy); this.transferQueue.consumerStarted(); this.sendingQueue = DisruptorQueue.mkInstance("TotalSending", ProducerType.MULTI, buffer_size, waitStrategy); this.sendingQueue.consumerStarted(); this.nodeportSocket = new ConcurrentHashMap<WorkerSlot, IConnection>(); this.taskNodeport = new ConcurrentHashMap<Integer, WorkerSlot>(); this.workerToResource = new ConcurrentSkipListSet<ResourceWorkerSlot>(); this.innerTaskTransfer = new ConcurrentHashMap<Integer, DisruptorQueue>(); this.deserializeQueues = new ConcurrentHashMap<Integer, DisruptorQueue>(); Assignment assignment = zkCluster.assignment_info(topologyId, null); if (assignment == null) { String errMsg = "Failed to get Assignment of " + topologyId; LOG.error(errMsg); throw new RuntimeException(errMsg); } workerToResource.addAll(assignment.getWorkers()); // get current worker's task list this.taskids = assignment.getCurrentWorkerTasks(supervisorId, port); if (taskids.size() == 0) { throw new RuntimeException("No tasks running current workers"); } LOG.info("Current worker taskList:" + taskids); // deserialize topology code from local dir rawTopology = StormConfig.read_supervisor_topology_code(conf, topology_id); sysTopology = Common.system_topology(stormConf, rawTopology); generateMaps(); contextMaker = new ContextMaker(this); metricReporter = new MetricReporter(this); outTaskStatus = new HashMap<Integer, Boolean>(); threadPool = Executors.newScheduledThreadPool(THREAD_POOL_NUM); TimerTrigger.setScheduledExecutorService(threadPool); LOG.info("Successfully create WorkerData"); }
@Override public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException { try { StormClusterState stormClusterState = data.getStormClusterState(); String supervisorId = null; SupervisorInfo supervisorInfo = null; String ip = NetWorkUtils.host2Ip(host); String hostName = NetWorkUtils.ip2Host(host); // all supervisors Map<String, SupervisorInfo> supervisorInfos = Cluster.allSupervisorInfo(stormClusterState, null); for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) { SupervisorInfo info = entry.getValue(); if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) { supervisorId = entry.getKey(); supervisorInfo = info; break; } } if (supervisorId == null) { throw new TException("No supervisor of " + host); } Map<String, Assignment> assignments = new HashMap<String, Assignment>(); // get all active topology's StormBase Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState); for (Entry<String, StormBase> entry : bases.entrySet()) { String topologyId = entry.getKey(); StormBase base = entry.getValue(); Assignment assignment = stormClusterState.assignment_info(topologyId, null); if (assignment == null) { LOG.error("Failed to get assignment of " + topologyId); continue; } assignments.put(topologyId, assignment); } Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>(); for (Entry<String, Assignment> entry : assignments.entrySet()) { String topologyId = entry.getKey(); Assignment assignment = entry.getValue(); Map<Integer, String> taskToComponent = Cluster.topology_task_info(stormClusterState, topologyId); Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource(); for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) { Integer taskId = resourceEntry.getKey(); ResourceAssignment resourceAssignment = resourceEntry.getValue(); if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) { continue; } supervisorInfo.allocResource(resourceAssignment); Integer port = resourceAssignment.getPort(); WorkerSummary workerSummary = portWorkerSummarys.get(port); if (workerSummary == null) { workerSummary = new WorkerSummary(); workerSummary.set_port(port); workerSummary.set_topology(topologyId); workerSummary.set_tasks(new ArrayList<TaskSummary>()); portWorkerSummarys.put(port, workerSummary); } String componentName = taskToComponent.get(taskId); int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId)); List<TaskSummary> tasks = workerSummary.get_tasks(); TaskSummary taskSummary = NimbusUtils.mkSimpleTaskSummary( resourceAssignment, taskId, componentName, host, uptime); tasks.add(taskSummary); } } List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>(); wokersList.addAll(portWorkerSummarys.values()); SupervisorSummary supervisorSummary = NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId); return new SupervisorWorkers(supervisorSummary, wokersList); } catch (TException e) { LOG.info("Failed to get ClusterSummary ", e); throw e; } catch (Exception e) { LOG.info("Failed to get ClusterSummary ", e); throw new TException(e); } }
@Override public Map<Integer, ResourceAssignment> assignTasks(TopologyAssignContext context) throws FailedAssignTopologyException { int assignType = context.getAssignType(); if (TopologyAssignContext.isAssignTypeValid(assignType) == false) { throw new FailedAssignTopologyException("Invalide Assign Type " + assignType); } DefaultTopologyAssignContext defaultContext = new DefaultTopologyAssignContext(context); if (assignType == TopologyAssignContext.ASSIGN_TYPE_REBALANCE) { freeUsed(defaultContext); } LOG.info("Dead tasks:" + defaultContext.getDeadTaskIds()); LOG.info("Unstopped tasks:" + defaultContext.getUnstoppedTaskIds()); Set<Integer> needAssignTasks = getNeedAssignTasks(defaultContext); Map<Integer, ResourceAssignment> keepAssigns = getKeepAssign(defaultContext, needAssignTasks); // please use tree map to make task sequence Map<Integer, ResourceAssignment> ret = new TreeMap<Integer, ResourceAssignment>(); ret.putAll(keepAssigns); ret.putAll(defaultContext.getUnstoppedAssignments()); Map<WorkerSlot, List<Integer>> keepAssignWorkers = Assignment.getWorkerTasks(keepAssigns); int allocWorkerNum = defaultContext.getTotalWorkerNum() - defaultContext.getUnstoppedWorkerNum() - keepAssignWorkers.size(); if (allocWorkerNum <= 0) { LOG.warn( "Don't need assign workers, all workers are fine " + defaultContext.toDetailString()); throw new FailedAssignTopologyException("Don't need assign worker, all workers are fine "); } Set<String> outputConfigComponents = new HashSet<String>(); Map<ComponentAssignType, Pair<Set<Integer>, IPreassignTask>> typeHandler = registerPreAssignHandler(defaultContext, needAssignTasks); Map<Integer, ResourceAssignment> newAssigns = new HashMap<Integer, ResourceAssignment>(); Set<String> usedSupervisorIds = new HashSet<String>(); List<Integer> lastFailed = new ArrayList<Integer>(); for (Entry<ComponentAssignType, Pair<Set<Integer>, IPreassignTask>> entry : typeHandler.entrySet()) { ComponentAssignType type = entry.getKey(); Set<Integer> tasks = entry.getValue().getFirst(); IPreassignTask handler = entry.getValue().getSecond(); tasks.addAll(lastFailed); lastFailed.clear(); List<Integer> sortedTasks = sortAssignTasks(defaultContext, tasks); StormTopology sysTopology = defaultContext.getSysTopology(); for (Integer task : sortedTasks) { Set<String> canUsedSupervisorIds = getCanUsedSupervisors(defaultContext, usedSupervisorIds, allocWorkerNum); String componentName = defaultContext.getTaskToComponent().get(task); ComponentCommon componentCommon = ThriftTopologyUtils.getComponentCommon(sysTopology, componentName); Map componentMap = (Map) JStormUtils.from_json(componentCommon.get_json_conf()); if (componentMap == null) { componentMap = Maps.newHashMap(); } if (outputConfigComponents.contains(componentName) == false) { LOG.info("Component map of " + componentName + "\n" + componentMap); outputConfigComponents.add(componentName); } ResourceAssignment preAssignment = handler.preAssign( task, defaultContext, componentMap, componentName, canUsedSupervisorIds, ret, newAssigns); if (preAssignment == null) { // pre assign fail lastFailed.add(task); } else { // sucess to do preAssign SupervisorInfo supervisorInfo = defaultContext.getCluster().get(preAssignment.getSupervisorId()); LOG.info("Task " + task + " had been assigned to " + supervisorInfo.getHostName()); newAssigns.put(task, preAssignment); ret.put(task, preAssignment); usedSupervisorIds.add(preAssignment.getSupervisorId()); } } } if (lastFailed.isEmpty() == false) { throw new FailedAssignTopologyException("Failed to assign tasks " + lastFailed); } // Here just hardcode IPostAssignTask postAssignHandler = new PostAssignTaskPort(); postAssignHandler.postAssign(defaultContext, newAssigns, allocWorkerNum); LOG.info("Keep Alive slots:" + keepAssigns); LOG.info("Unstopped slots:" + defaultContext.getUnstoppedAssignments()); LOG.info("New assign slots:" + newAssigns); return ret; }