/** * cleanup the topologies which are not in ZK /topology, but in other place * * @param nimbusData * @param active_topologys * @throws Exception */ public void cleanupDisappearedTopology() throws Exception { StormClusterState clusterState = nimbusData.getStormClusterState(); List<String> active_topologys = clusterState.active_storms(); if (active_topologys == null) { return; } Set<String> cleanupIds = get_cleanup_ids(clusterState, active_topologys); for (String topologyId : cleanupIds) { LOG.info("Cleaning up " + topologyId); clusterState.try_remove_storm(topologyId); // nimbusData.getTaskHeartbeatsCache().remove(topologyId); // get /nimbus/stormdist/topologyId String master_stormdist_root = StormConfig.masterStormdistRoot(nimbusData.getConf(), topologyId); try { // delete topologyId local dir PathUtils.rmr(master_stormdist_root); } catch (IOException e) { LOG.warn("Failed to delete " + master_stormdist_root + ",", e); } } }
public void metricsMonitor(TopologyAssignEvent event) { String topologyId = event.getTopologyId(); try { Map<Object, Object> conf = nimbusData.getConf(); boolean isEnable = ConfigExtension.isEnablePerformanceMetrics(conf); StormClusterState zkClusterState = nimbusData.getStormClusterState(); StormMonitor monitor = new StormMonitor(isEnable); zkClusterState.set_storm_monitor(topologyId, monitor); } catch (Exception e) { LOG.warn("Failed to update metrics monitor status of " + topologyId, e); } }
/** * make assignments for a topology The nimbus core function, this function has been totally * rewrite * * @param nimbusData NimbusData * @param topologyId String * @param isScratch Boolean: isScratch is false unless rebalancing the topology * @throws Exception */ public Assignment mkAssignment(TopologyAssignEvent event) throws Exception { String topologyId = event.getTopologyId(); LOG.info("Determining assignment for " + topologyId); TopologyAssignContext context = prepareTopologyAssign(event); Set<ResourceWorkerSlot> assignments = null; if (!StormConfig.local_mode(nimbusData.getConf())) { IToplogyScheduler scheduler = schedulers.get(DEFAULT_SCHEDULER_NAME); assignments = scheduler.assignTasks(context); } else { assignments = mkLocalAssignment(context); } Assignment assignment = null; Map<String, String> nodeHost = getTopologyNodeHost(context.getCluster(), context.getOldAssignment(), assignments); Map<Integer, Integer> startTimes = getTaskStartTimes(context, nimbusData, topologyId, context.getOldAssignment(), assignments); String codeDir = StormConfig.masterStormdistRoot(nimbusData.getConf(), topologyId); assignment = new Assignment(codeDir, assignments, nodeHost, startTimes); StormClusterState stormClusterState = nimbusData.getStormClusterState(); stormClusterState.set_assignment(topologyId, assignment); // update task heartbeat's start time NimbusUtils.updateTaskHbStartTime(nimbusData, assignment, topologyId); // Update metrics information in ZK when rebalance or reassignment // Only update metrics monitor status when creating topology if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_REBALANCE || context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_MONITOR) NimbusUtils.updateMetricsInfo(nimbusData, topologyId, assignment); else metricsMonitor(event); LOG.info("Successfully make assignment for topology id " + topologyId + ": " + assignment); return assignment; }
@Override public ByteBuffer downloadChunk(String id) throws TException { TimeCacheMap<Object, Object> downloaders = data.getDownloaders(); Object obj = downloaders.get(id); if (obj == null) { throw new TException("Could not find input stream for that id"); } try { if (obj instanceof BufferFileInputStream) { BufferFileInputStream is = (BufferFileInputStream) obj; byte[] ret = is.read(); if (ret != null) { downloaders.put(id, (BufferFileInputStream) is); return ByteBuffer.wrap(ret); } } else { throw new TException("Object isn't BufferFileInputStream for " + id); } } catch (IOException e) { LOG.error("BufferFileInputStream read failed when downloadChunk ", e); throw new TException(e); } byte[] empty = {}; return ByteBuffer.wrap(empty); }
/** * start a topology: set active status of the topology * * @param topologyName * @param stormClusterState * @param topologyId * @throws Exception */ public void setTopologyStatus(TopologyAssignEvent event) throws Exception { StormClusterState stormClusterState = nimbusData.getStormClusterState(); String topologyId = event.getTopologyId(); String topologyName = event.getTopologyName(); String group = event.getGroup(); StormStatus status = new StormStatus(StatusType.active); if (event.getOldStatus() != null) { status = event.getOldStatus(); } StormBase stormBase = stormClusterState.storm_base(topologyId, null); if (stormBase == null) { stormBase = new StormBase(topologyName, TimeUtils.current_time_secs(), status, group); stormClusterState.activate_storm(topologyId, stormBase); } else { stormClusterState.update_storm(topologyId, status); // here exist one hack operation // when monitor/rebalance/startup topologyName is null if (topologyName == null) { event.setTopologyName(stormBase.getStormName()); } } LOG.info("Update " + topologyId + " " + status); }
private void setTaskInfo(StormTopology oldTopology, StormTopology newTopology) throws Exception { StormClusterState clusterState = data.getStormClusterState(); // Retrieve the max task ID TreeSet<Integer> taskIds = new TreeSet<Integer>(clusterState.task_ids(topologyid)); int cnt = taskIds.descendingIterator().next(); cnt = setBoltInfo(oldTopology, newTopology, cnt, clusterState); cnt = setSpoutInfo(oldTopology, newTopology, cnt, clusterState); }
private void removeTask(String topologyId, int taskId, StormClusterState clusterState) throws Exception { Set<Integer> taskIds = new HashSet<Integer>(taskId); clusterState.remove_task(topologyid, taskIds); Map<Integer, TkHbCacheTime> TkHbs = data.getTaskHeartbeatsCache(topologyid, false); if (TkHbs != null) { TkHbs.remove(taskId); } }
/** * check whether the topology is bActive? * * @param nimbus * @param topologyName * @param bActive * @throws Exception */ public void checkTopologyActive(NimbusData nimbus, String topologyName, boolean bActive) throws Exception { if (isTopologyActive(nimbus.getStormClusterState(), topologyName) != bActive) { if (bActive) { throw new NotAliveException(topologyName + " is not alive"); } else { throw new AlreadyAliveException(topologyName + " is already active"); } } }
/** Topology分配线程 */ public void init(NimbusData nimbusData) { this.nimbusData = nimbusData; this.schedulers = new HashMap<String, IToplogyScheduler>(); IToplogyScheduler defaultScheduler = new DefaultTopologyScheduler(); defaultScheduler.prepare(nimbusData.getConf()); schedulers.put(DEFAULT_SCHEDULER_NAME, defaultScheduler); thread = new Thread(this); thread.setName("TopologyAssign"); thread.setDaemon(true); thread.start(); }
@Override public String beginFileDownload(String file) throws TException { BufferFileInputStream is = null; String id = null; try { is = new BufferFileInputStream(file); id = UUID.randomUUID().toString(); data.getDownloaders().put(id, is); } catch (FileNotFoundException e) { LOG.error(e + "file:" + file + " not found"); throw new TException(e); } return id; }
/** * get topology ids which need to be cleanup * * @param clusterState * @return * @throws Exception */ private Set<String> get_cleanup_ids(StormClusterState clusterState, List<String> active_topologys) throws Exception { List<String> task_ids = clusterState.task_storms(); List<String> heartbeat_ids = clusterState.heartbeat_storms(); List<String> error_ids = clusterState.task_error_storms(); List<String> assignment_ids = clusterState.assignments(null); List<String> monitor_ids = clusterState.monitors(); String master_stormdist_root = StormConfig.masterStormdistRoot(nimbusData.getConf()); // listdir /local-dir/nimbus/stormdist List<String> code_ids = PathUtils.read_dir_contents(master_stormdist_root); // Set<String> assigned_ids = // JStormUtils.listToSet(clusterState.active_storms()); Set<String> to_cleanup_ids = new HashSet<String>(); if (task_ids != null) { to_cleanup_ids.addAll(task_ids); } if (heartbeat_ids != null) { to_cleanup_ids.addAll(heartbeat_ids); } if (error_ids != null) { to_cleanup_ids.addAll(error_ids); } if (assignment_ids != null) { to_cleanup_ids.addAll(assignment_ids); } if (monitor_ids != null) { to_cleanup_ids.addAll(monitor_ids); } if (code_ids != null) { to_cleanup_ids.addAll(code_ids); } if (active_topologys != null) { to_cleanup_ids.removeAll(active_topologys); } return to_cleanup_ids; }
/** * prepare to uploading topology jar, return the file location * * @throws */ @Override public String beginFileUpload() throws TException { String fileLoc = null; try { fileLoc = StormConfig.masterInbox(conf) + "/stormjar-" + UUID.randomUUID() + ".jar"; data.getUploaders().put(fileLoc, Channels.newChannel(new FileOutputStream(fileLoc))); LOG.info("Uploading file from client to " + fileLoc); } catch (FileNotFoundException e) { LOG.error(" file not found " + fileLoc); throw new TException(e); } catch (IOException e) { LOG.error(" IOException " + fileLoc, e); throw new TException(e); } return fileLoc; }
/** * @param existingAssignment * @param taskWorkerSlot * @return * @throws Exception */ public static Map<Integer, Integer> getTaskStartTimes( TopologyAssignContext context, NimbusData nimbusData, String topologyId, Assignment existingAssignment, Set<ResourceWorkerSlot> workers) throws Exception { Map<Integer, Integer> startTimes = new TreeMap<Integer, Integer>(); if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_NEW) { int nowSecs = TimeUtils.current_time_secs(); for (ResourceWorkerSlot worker : workers) { for (Integer changedTaskId : worker.getTasks()) { startTimes.put(changedTaskId, nowSecs); } } return startTimes; } Set<ResourceWorkerSlot> oldWorkers = new HashSet<ResourceWorkerSlot>(); if (existingAssignment != null) { Map<Integer, Integer> taskStartTimeSecs = existingAssignment.getTaskStartTimeSecs(); if (taskStartTimeSecs != null) { startTimes.putAll(taskStartTimeSecs); } if (existingAssignment.getWorkers() != null) { oldWorkers = existingAssignment.getWorkers(); } } StormClusterState zkClusterState = nimbusData.getStormClusterState(); Set<Integer> changeTaskIds = getChangeTaskIds(oldWorkers, workers); int nowSecs = TimeUtils.current_time_secs(); for (Integer changedTaskId : changeTaskIds) { startTimes.put(changedTaskId, nowSecs); zkClusterState.remove_task_heartbeat(topologyId, changedTaskId); } LOG.info("Task assignment has been changed " + changeTaskIds); return startTimes; }
/** * Get TopologyInfo, it contain all data of the topology running status * * @return TopologyInfo */ @Override public TopologyInfo getTopologyInfo(String topologyId) throws NotAliveException, TException { TopologyInfo topologyInfo = new TopologyInfo(); StormClusterState stormClusterState = data.getStormClusterState(); try { // get topology's StormBase StormBase base = stormClusterState.storm_base(topologyId, null); if (base == null) { throw new NotAliveException("No topology of " + topologyId); } topologyInfo.set_id(topologyId); topologyInfo.set_name(base.getStormName()); topologyInfo.set_uptime_secs(TimeUtils.time_delta(base.getLanchTimeSecs())); topologyInfo.set_status(base.getStatusString()); // get topology's Assignment Assignment assignment = stormClusterState.assignment_info(topologyId, null); if (assignment == null) { throw new TException("Failed to get StormBase from ZK of " + topologyId); } // get topology's map<taskId, componentId> Map<Integer, String> taskInfo = Cluster.topology_task_info(stormClusterState, topologyId); List<TaskSummary> tasks = NimbusUtils.mkTaskSummary(stormClusterState, assignment, taskInfo, topologyId); topologyInfo.set_tasks(tasks); return topologyInfo; } catch (TException e) { LOG.info("Failed to get topologyInfo " + topologyId, e); throw e; } catch (Exception e) { LOG.info("Failed to get topologyInfo " + topologyId, e); throw new TException("Failed to get topologyInfo" + topologyId); } }
@Override public void finishFileUpload(String location) throws TException { TimeCacheMap<Object, Object> uploaders = data.getUploaders(); Object obj = uploaders.get(location); if (obj == null) { throw new TException("File for that location does not exist (or timed out)"); } try { if (obj instanceof WritableByteChannel) { WritableByteChannel channel = (WritableByteChannel) obj; channel.close(); uploaders.remove(location); LOG.info("Finished uploading file from client: " + location); } else { throw new TException("Object isn't WritableByteChannel for " + location); } } catch (IOException e) { LOG.error(" WritableByteChannel close failed when finishFileUpload " + location); } }
/** uploading topology jar data */ @Override public void uploadChunk(String location, ByteBuffer chunk) throws TException { TimeCacheMap<Object, Object> uploaders = data.getUploaders(); Object obj = uploaders.get(location); if (obj == null) { throw new TException("File for that location does not exist (or timed out) " + location); } try { if (obj instanceof WritableByteChannel) { WritableByteChannel channel = (WritableByteChannel) obj; channel.write(chunk); uploaders.put(location, channel); } else { throw new TException("Object isn't WritableByteChannel for " + location); } } catch (IOException e) { String errMsg = " WritableByteChannel write filed when uploadChunk " + location; LOG.error(errMsg); throw new TException(e); } }
/** * Backup the toplogy's Assignment to ZK @@@ Question Do we need to do backup operation every * time? * * @param assignment * @param event */ public void backupAssignment(Assignment assignment, TopologyAssignEvent event) { String topologyId = event.getTopologyId(); String topologyName = event.getTopologyName(); try { StormClusterState zkClusterState = nimbusData.getStormClusterState(); // one little problem, get tasks twice when assign one topology HashMap<Integer, String> tasks = Cluster.topology_task_info(zkClusterState, topologyId); Map<String, List<Integer>> componentTasks = JStormUtils.reverse_map(tasks); for (Entry<String, List<Integer>> entry : componentTasks.entrySet()) { List<Integer> keys = entry.getValue(); Collections.sort(keys); } AssignmentBak assignmentBak = new AssignmentBak(componentTasks, assignment); zkClusterState.backup_assignment(topologyName, assignmentBak); } catch (Exception e) { LOG.warn("Failed to backup " + topologyId + " assignment " + assignment, e); } }
@Override public <T> Object execute(T... args) { boolean isSetTaskInfo = false; try { Boolean reassign = (Boolean) args[1]; Map<Object, Object> conf = (Map<Object, Object>) args[2]; // args[0]: // delay, // args[1]: // reassign_flag, // args[2]: // conf if (conf != null) { boolean isConfUpdate = false; Map stormConf = data.getConf(); // Update topology code Map topoConf = StormConfig.read_nimbus_topology_conf(stormConf, topologyid); StormTopology rawOldTopology = StormConfig.read_nimbus_topology_code(stormConf, topologyid); StormTopology rawNewTopology = NimbusUtils.normalizeTopology(conf, rawOldTopology, true); StormTopology sysOldTopology = rawOldTopology.deepCopy(); StormTopology sysNewTopology = rawNewTopology.deepCopy(); if (conf.get(Config.TOPOLOGY_ACKER_EXECUTORS) != null) { Common.add_acker(topoConf, sysOldTopology); Common.add_acker(conf, sysNewTopology); int ackerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_ACKER_EXECUTORS)); int oldAckerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_ACKER_EXECUTORS)); LOG.info("Update acker from oldAckerNum=" + oldAckerNum + " to ackerNum=" + ackerNum); topoConf.put(Config.TOPOLOGY_ACKER_EXECUTORS, ackerNum); isConfUpdate = true; } // If scale-out, setup task info for new added tasks setTaskInfo(sysOldTopology, sysNewTopology); isSetTaskInfo = true; // If everything is OK, write topology code into disk StormConfig.write_nimbus_topology_code( stormConf, topologyid, Utils.serialize(rawNewTopology)); // Update topology conf if worker num has been updated Set<Object> keys = conf.keySet(); Integer workerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_WORKERS)); if (workerNum != null) { Integer oldWorkerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_WORKERS)); topoConf.put(Config.TOPOLOGY_WORKERS, workerNum); isConfUpdate = true; LOG.info("Update worker num from " + oldWorkerNum + " to " + workerNum); } if (keys.contains(Config.ISOLATION_SCHEDULER_MACHINES)) { topoConf.put( Config.ISOLATION_SCHEDULER_MACHINES, conf.get(Config.ISOLATION_SCHEDULER_MACHINES)); } if (isConfUpdate) { StormConfig.write_nimbus_topology_conf(stormConf, topologyid, topoConf); } } TopologyAssignEvent event = new TopologyAssignEvent(); event.setTopologyId(topologyid); event.setScratch(true); event.setOldStatus(oldStatus); event.setReassign(reassign); if (conf != null) event.setScaleTopology(true); TopologyAssign.push(event); event.waitFinish(); } catch (Exception e) { LOG.error("do-rebalance error!", e); // Rollback the changes on ZK if (isSetTaskInfo) { try { StormClusterState clusterState = data.getStormClusterState(); clusterState.remove_task(topologyid, newTasks); } catch (Exception e1) { LOG.error("Failed to rollback the changes on ZK for task-" + newTasks, e); } } } DelayStatusTransitionCallback delayCallback = new DelayStatusTransitionCallback( data, topologyid, oldStatus, StatusType.rebalancing, StatusType.done_rebalance); return delayCallback.execute(); }
protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event) throws Exception { TopologyAssignContext ret = new TopologyAssignContext(); String topologyId = event.getTopologyId(); /** 读取本地目录下的stormconf.ser和stormcode.ser */ Map<Object, Object> nimbusConf = nimbusData.getConf(); Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(nimbusConf, topologyId); StormTopology rawTopology = StormConfig.read_nimbus_topology_code(nimbusConf, topologyId); ret.setRawTopology(rawTopology); Map stormConf = new HashMap(); stormConf.putAll(nimbusConf); stormConf.putAll(topologyConf); ret.setStormConf(stormConf); StormClusterState stormClusterState = nimbusData.getStormClusterState(); // get all running supervisor, don't need callback to watch supervisor /** 获取所有的运行的supervisor,以及supervisorInfo */ Map<String, SupervisorInfo> supInfos = Cluster.allSupervisorInfo(stormClusterState, null); if (supInfos.size() == 0) { throw new FailedAssignTopologyException( "Failed to make assignment " + topologyId + ", due to no alive supervisor"); } /** 获取topologyId下的所有tasks */ Map<Integer, String> taskToComponent = Cluster.topology_task_info(stormClusterState, topologyId); ret.setTaskToComponent(taskToComponent); // get taskids /ZK/tasks/topologyId Set<Integer> allTaskIds = taskToComponent.keySet(); if (allTaskIds == null || allTaskIds.size() == 0) { String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId; LOG.warn(errMsg); throw new IOException(errMsg); } ret.setAllTaskIds(allTaskIds); Set<Integer> aliveTasks = new HashSet<Integer>(); // unstoppedTasks are tasks which are alive on no supervisor's(dead) // machine /** 未完成的任务,supervisor已死,任务没完成 */ Set<Integer> unstoppedTasks = new HashSet<Integer>(); Set<Integer> deadTasks = new HashSet<Integer>(); Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>(); Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null); if (existingAssignment != null) { aliveTasks = getAliveTasks(topologyId, allTaskIds); unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment); deadTasks.addAll(allTaskIds); deadTasks.removeAll(aliveTasks); } ret.setDeadTaskIds(deadTasks); ret.setUnstoppedTaskIds(unstoppedTasks); // Step 2: get all slots resource, free slots/ alive slots/ unstopped // slots getFreeSlots(supInfos, stormClusterState); ret.setCluster(supInfos); if (existingAssignment == null) { ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW); try { AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName()); if (lastAssignment != null) { ret.setOldAssignment(lastAssignment.getAssignment()); } } catch (Exception e) { LOG.warn("Fail to get old assignment", e); } } else { ret.setOldAssignment(existingAssignment); if (event.isScratch()) { ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE); unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment); ret.setUnstoppedWorkers(unstoppedWorkers); } else { ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR); unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment); ret.setUnstoppedWorkers(unstoppedWorkers); } } return ret; }
public ServiceHandler(NimbusData data) { this.data = data; conf = data.getConf(); }
/** * Submit one Topology * * @param topologyname String: topology name * @param uploadedJarLocation String: already uploaded jar path * @param jsonConf String: jsonConf serialize all toplogy configuration to Json * @param topology StormTopology: topology Object */ @SuppressWarnings("unchecked") @Override public void submitTopologyWithOpts( String topologyname, String uploadedJarLocation, String jsonConf, StormTopology topology, SubmitOptions options) throws AlreadyAliveException, InvalidTopologyException, TopologyAssignException, TException { LOG.info("Receive " + topologyname + ", uploadedJarLocation:" + uploadedJarLocation); // @@@ Move validate topologyname in client code try { checkTopologyActive(data, topologyname, false); } catch (AlreadyAliveException e) { LOG.info(topologyname + " is already exist "); throw e; } catch (Exception e) { LOG.info("Failed to check whether topology is alive or not", e); throw new TException(e); } int counter = data.getSubmittedCount().incrementAndGet(); String topologyId = topologyname + "-" + counter + "-" + TimeUtils.current_time_secs(); Map<Object, Object> serializedConf = (Map<Object, Object>) JStormUtils.from_json(jsonConf); if (serializedConf == null) { LOG.warn("Failed to serialized Configuration"); throw new InvalidTopologyException("Failed to serilaze topology configuration"); } serializedConf.put(Config.TOPOLOGY_ID, topologyId); serializedConf.put(Config.TOPOLOGY_NAME, topologyname); try { Map<Object, Object> stormConf; stormConf = NimbusUtils.normalizeConf(conf, serializedConf, topology); Map<Object, Object> totalStormConf = new HashMap<Object, Object>(conf); totalStormConf.putAll(stormConf); StormTopology normalizedTopology = NimbusUtils.normalizeTopology(stormConf, topology); // this validates the structure of the topology Common.validate_basic(normalizedTopology, totalStormConf, topologyId); // don't need generate real topology, so skip Common.system_topology // Common.system_topology(totalStormConf, topology); StormClusterState stormClusterState = data.getStormClusterState(); // create /local-dir/nimbus/topologyId/xxxx files setupStormCode(conf, topologyId, uploadedJarLocation, stormConf, normalizedTopology); // generate TaskInfo for every bolt or spout in ZK // /ZK/tasks/topoologyId/xxx setupZkTaskInfo(conf, topologyId, stormClusterState); // make assignments for a topology TopologyAssignEvent assignEvent = new TopologyAssignEvent(); assignEvent.setTopologyId(topologyId); assignEvent.setScratch(false); assignEvent.setTopologyName(topologyname); assignEvent.setOldStatus( Thrift.topologyInitialStatusToStormStatus(options.get_initial_status())); TopologyAssign.push(assignEvent); LOG.info("Submit for " + topologyname + " with conf " + serializedConf); boolean isSuccess = assignEvent.waitFinish(); if (isSuccess == true) { LOG.info("Finish submit for " + topologyname); } else { throw new FailedAssignTopologyException(assignEvent.getErrorMsg()); } } catch (FailedAssignTopologyException e) { StringBuilder sb = new StringBuilder(); sb.append("Fail to sumbit topology, Root cause:"); if (e.getMessage() == null) { sb.append("submit timeout"); } else { sb.append(e.getMessage()); } sb.append("\n\n"); sb.append("topologyId:" + topologyId); sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n"); LOG.error(sb.toString(), e); throw new TopologyAssignException(sb.toString()); } catch (InvalidParameterException e) { StringBuilder sb = new StringBuilder(); sb.append("Fail to sumbit topology "); sb.append(e.getMessage()); sb.append(", cause:" + e.getCause()); sb.append("\n\n"); sb.append("topologyId:" + topologyId); sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n"); LOG.error(sb.toString(), e); throw new InvalidParameterException(sb.toString()); } catch (Throwable e) { StringBuilder sb = new StringBuilder(); sb.append("Fail to sumbit topology "); sb.append(e.getMessage()); sb.append(", cause:" + e.getCause()); sb.append("\n\n"); sb.append("topologyId:" + topologyId); sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n"); LOG.error(sb.toString(), e); throw new TopologyAssignException(sb.toString()); } }
/** * get cluster's summary, it will contain SupervisorSummary and TopologySummary * * @return ClusterSummary */ @Override public ClusterSummary getClusterInfo() throws TException { try { StormClusterState stormClusterState = data.getStormClusterState(); Map<String, Assignment> assignments = new HashMap<String, Assignment>(); // get nimbus running time int uptime = data.uptime(); // get TopologySummary List<TopologySummary> topologySummaries = new ArrayList<TopologySummary>(); // get all active topology's StormBase Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState); for (Entry<String, StormBase> entry : bases.entrySet()) { String topologyId = entry.getKey(); StormBase base = entry.getValue(); Assignment assignment = stormClusterState.assignment_info(topologyId, null); if (assignment == null) { LOG.error("Failed to get assignment of " + topologyId); continue; } assignments.put(topologyId, assignment); String group = "default"; if (data.isGroupMode()) group = base.getGroup(); if (group == null) group = "default"; TopologySummary topology = NimbusUtils.mkTopologySummary( assignment, topologyId, base.getStormName(), base.getStatusString(), TimeUtils.time_delta(base.getLanchTimeSecs()), group); topologySummaries.add(topology); } // all supervisors Map<String, SupervisorInfo> supervisorInfos = Cluster.allSupervisorInfo(stormClusterState, null); // generate SupervisorSummaries List<SupervisorSummary> supervisorSummaries = NimbusUtils.mkSupervisorSummaries(supervisorInfos, assignments); return new ClusterSummary( supervisorSummaries, uptime, topologySummaries, data.getGroupToTopology(), data.getGroupToResource(), data.getGroupToUsedResource(), data.isGroupMode()); } catch (TException e) { LOG.info("Failed to get ClusterSummary ", e); throw e; } catch (Exception e) { LOG.info("Failed to get ClusterSummary ", e); throw new TException(e); } }
@Override public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException { try { StormClusterState stormClusterState = data.getStormClusterState(); String supervisorId = null; SupervisorInfo supervisorInfo = null; String ip = NetWorkUtils.host2Ip(host); String hostName = NetWorkUtils.ip2Host(host); // all supervisors Map<String, SupervisorInfo> supervisorInfos = Cluster.allSupervisorInfo(stormClusterState, null); for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) { SupervisorInfo info = entry.getValue(); if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) { supervisorId = entry.getKey(); supervisorInfo = info; break; } } if (supervisorId == null) { throw new TException("No supervisor of " + host); } Map<String, Assignment> assignments = new HashMap<String, Assignment>(); // get all active topology's StormBase Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState); for (Entry<String, StormBase> entry : bases.entrySet()) { String topologyId = entry.getKey(); StormBase base = entry.getValue(); Assignment assignment = stormClusterState.assignment_info(topologyId, null); if (assignment == null) { LOG.error("Failed to get assignment of " + topologyId); continue; } assignments.put(topologyId, assignment); } Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>(); for (Entry<String, Assignment> entry : assignments.entrySet()) { String topologyId = entry.getKey(); Assignment assignment = entry.getValue(); Map<Integer, String> taskToComponent = Cluster.topology_task_info(stormClusterState, topologyId); Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource(); for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) { Integer taskId = resourceEntry.getKey(); ResourceAssignment resourceAssignment = resourceEntry.getValue(); if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) { continue; } supervisorInfo.allocResource(resourceAssignment); Integer port = resourceAssignment.getPort(); WorkerSummary workerSummary = portWorkerSummarys.get(port); if (workerSummary == null) { workerSummary = new WorkerSummary(); workerSummary.set_port(port); workerSummary.set_topology(topologyId); workerSummary.set_tasks(new ArrayList<TaskSummary>()); portWorkerSummarys.put(port, workerSummary); } String componentName = taskToComponent.get(taskId); int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId)); List<TaskSummary> tasks = workerSummary.get_tasks(); TaskSummary taskSummary = NimbusUtils.mkSimpleTaskSummary( resourceAssignment, taskId, componentName, host, uptime); tasks.add(taskSummary); } } List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>(); wokersList.addAll(portWorkerSummarys.values()); SupervisorSummary supervisorSummary = NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId); return new SupervisorWorkers(supervisorSummary, wokersList); } catch (TException e) { LOG.info("Failed to get ClusterSummary ", e); throw e; } catch (Exception e) { LOG.info("Failed to get ClusterSummary ", e); throw new TException(e); } }