/** * get topology configuration * * @param id String: topology id * @return String */ @Override public String getTopologyConf(String id) throws NotAliveException, TException { String rtn; try { Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(conf, id); rtn = JStormUtils.to_json(topologyConf); } catch (IOException e) { // TODO Auto-generated catch block LOG.info("Failed to get configuration of " + id, e); throw new TException(e); } return rtn; }
/** * get StormTopology throw deserialize local files * * @param id String: topology id * @return StormTopology */ @Override public StormTopology getTopology(String id) throws NotAliveException, TException { StormTopology topology = null; try { StormTopology stormtopology = StormConfig.read_nimbus_topology_code(conf, id); if (stormtopology == null) { throw new TException("topology:" + id + "is null"); } Map<Object, Object> topologyConf = (Map<Object, Object>) StormConfig.read_nimbus_topology_conf(conf, id); topology = Common.system_topology(topologyConf, stormtopology); } catch (Exception e) { LOG.error("Failed to get topology " + id + ",", e); throw new TException("Failed to get system_topology"); } return topology; }
/** * generate a taskid(Integer) for every task * * @param conf * @param topologyid * @return Map<Integer, String>: from taskid to componentid * @throws IOException * @throws InvalidTopologyException */ public Map<Integer, String> mkTaskComponentAssignments( Map<Object, Object> conf, String topologyid) throws IOException, InvalidTopologyException { // @@@ here exist a little problem, // we can directly pass stormConf from Submit method Map<Object, Object> stormConf = StormConfig.read_nimbus_topology_conf(conf, topologyid); StormTopology stopology = StormConfig.read_nimbus_topology_code(conf, topologyid); // use TreeMap to make task as sequence Map<Integer, String> rtn = new TreeMap<Integer, String>(); StormTopology topology = Common.system_topology(stormConf, stopology); Integer count = 0; count = mkTaskMaker(stormConf, topology.get_bolts(), rtn, count); count = mkTaskMaker(stormConf, topology.get_spouts(), rtn, count); count = mkTaskMaker(stormConf, topology.get_state_spouts(), rtn, count); return rtn; }
@Override public <T> Object execute(T... args) { boolean isSetTaskInfo = false; try { Boolean reassign = (Boolean) args[1]; Map<Object, Object> conf = (Map<Object, Object>) args[2]; // args[0]: // delay, // args[1]: // reassign_flag, // args[2]: // conf if (conf != null) { boolean isConfUpdate = false; Map stormConf = data.getConf(); // Update topology code Map topoConf = StormConfig.read_nimbus_topology_conf(stormConf, topologyid); StormTopology rawOldTopology = StormConfig.read_nimbus_topology_code(stormConf, topologyid); StormTopology rawNewTopology = NimbusUtils.normalizeTopology(conf, rawOldTopology, true); StormTopology sysOldTopology = rawOldTopology.deepCopy(); StormTopology sysNewTopology = rawNewTopology.deepCopy(); if (conf.get(Config.TOPOLOGY_ACKER_EXECUTORS) != null) { Common.add_acker(topoConf, sysOldTopology); Common.add_acker(conf, sysNewTopology); int ackerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_ACKER_EXECUTORS)); int oldAckerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_ACKER_EXECUTORS)); LOG.info("Update acker from oldAckerNum=" + oldAckerNum + " to ackerNum=" + ackerNum); topoConf.put(Config.TOPOLOGY_ACKER_EXECUTORS, ackerNum); isConfUpdate = true; } // If scale-out, setup task info for new added tasks setTaskInfo(sysOldTopology, sysNewTopology); isSetTaskInfo = true; // If everything is OK, write topology code into disk StormConfig.write_nimbus_topology_code( stormConf, topologyid, Utils.serialize(rawNewTopology)); // Update topology conf if worker num has been updated Set<Object> keys = conf.keySet(); Integer workerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_WORKERS)); if (workerNum != null) { Integer oldWorkerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_WORKERS)); topoConf.put(Config.TOPOLOGY_WORKERS, workerNum); isConfUpdate = true; LOG.info("Update worker num from " + oldWorkerNum + " to " + workerNum); } if (keys.contains(Config.ISOLATION_SCHEDULER_MACHINES)) { topoConf.put( Config.ISOLATION_SCHEDULER_MACHINES, conf.get(Config.ISOLATION_SCHEDULER_MACHINES)); } if (isConfUpdate) { StormConfig.write_nimbus_topology_conf(stormConf, topologyid, topoConf); } } TopologyAssignEvent event = new TopologyAssignEvent(); event.setTopologyId(topologyid); event.setScratch(true); event.setOldStatus(oldStatus); event.setReassign(reassign); if (conf != null) event.setScaleTopology(true); TopologyAssign.push(event); event.waitFinish(); } catch (Exception e) { LOG.error("do-rebalance error!", e); // Rollback the changes on ZK if (isSetTaskInfo) { try { StormClusterState clusterState = data.getStormClusterState(); clusterState.remove_task(topologyid, newTasks); } catch (Exception e1) { LOG.error("Failed to rollback the changes on ZK for task-" + newTasks, e); } } } DelayStatusTransitionCallback delayCallback = new DelayStatusTransitionCallback( data, topologyid, oldStatus, StatusType.rebalancing, StatusType.done_rebalance); return delayCallback.execute(); }
protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event) throws Exception { TopologyAssignContext ret = new TopologyAssignContext(); String topologyId = event.getTopologyId(); /** 读取本地目录下的stormconf.ser和stormcode.ser */ Map<Object, Object> nimbusConf = nimbusData.getConf(); Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(nimbusConf, topologyId); StormTopology rawTopology = StormConfig.read_nimbus_topology_code(nimbusConf, topologyId); ret.setRawTopology(rawTopology); Map stormConf = new HashMap(); stormConf.putAll(nimbusConf); stormConf.putAll(topologyConf); ret.setStormConf(stormConf); StormClusterState stormClusterState = nimbusData.getStormClusterState(); // get all running supervisor, don't need callback to watch supervisor /** 获取所有的运行的supervisor,以及supervisorInfo */ Map<String, SupervisorInfo> supInfos = Cluster.allSupervisorInfo(stormClusterState, null); if (supInfos.size() == 0) { throw new FailedAssignTopologyException( "Failed to make assignment " + topologyId + ", due to no alive supervisor"); } /** 获取topologyId下的所有tasks */ Map<Integer, String> taskToComponent = Cluster.topology_task_info(stormClusterState, topologyId); ret.setTaskToComponent(taskToComponent); // get taskids /ZK/tasks/topologyId Set<Integer> allTaskIds = taskToComponent.keySet(); if (allTaskIds == null || allTaskIds.size() == 0) { String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId; LOG.warn(errMsg); throw new IOException(errMsg); } ret.setAllTaskIds(allTaskIds); Set<Integer> aliveTasks = new HashSet<Integer>(); // unstoppedTasks are tasks which are alive on no supervisor's(dead) // machine /** 未完成的任务,supervisor已死,任务没完成 */ Set<Integer> unstoppedTasks = new HashSet<Integer>(); Set<Integer> deadTasks = new HashSet<Integer>(); Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>(); Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null); if (existingAssignment != null) { aliveTasks = getAliveTasks(topologyId, allTaskIds); unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment); deadTasks.addAll(allTaskIds); deadTasks.removeAll(aliveTasks); } ret.setDeadTaskIds(deadTasks); ret.setUnstoppedTaskIds(unstoppedTasks); // Step 2: get all slots resource, free slots/ alive slots/ unstopped // slots getFreeSlots(supInfos, stormClusterState); ret.setCluster(supInfos); if (existingAssignment == null) { ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW); try { AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName()); if (lastAssignment != null) { ret.setOldAssignment(lastAssignment.getAssignment()); } } catch (Exception e) { LOG.warn("Fail to get old assignment", e); } } else { ret.setOldAssignment(existingAssignment); if (event.isScratch()) { ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE); unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment); ret.setUnstoppedWorkers(unstoppedWorkers); } else { ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR); unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment); ret.setUnstoppedWorkers(unstoppedWorkers); } } return ret; }