public DefaultTopologyAssignContext(TopologyAssignContext context) { super(context); try { sysTopology = Common.system_topology(stormConf, rawTopology); } catch (Exception e) { throw new FailedAssignTopologyException("Failed to generate system topology"); } sidToHostname = generateSidToHost(); hostToSid = JStormUtils.reverse_map(sidToHostname); if (oldAssignment != null && oldAssignment.getWorkers() != null) { oldWorkers = oldAssignment.getWorkers(); } else { oldWorkers = new HashSet<ResourceWorkerSlot>(); } refineDeadTasks(); componentTasks = JStormUtils.reverse_map(context.getTaskToComponent()); for (Entry<String, List<Integer>> entry : componentTasks.entrySet()) { List<Integer> componentTaskList = entry.getValue(); Collections.sort(componentTaskList); } totalWorkerNum = computeWorkerNum(); unstoppedWorkerNum = computeUnstoppedAssignments(); }
/** * get StormTopology throw deserialize local files * * @param id String: topology id * @return StormTopology */ @Override public StormTopology getTopology(String id) throws NotAliveException, TException { StormTopology topology = null; try { StormTopology stormtopology = StormConfig.read_nimbus_topology_code(conf, id); if (stormtopology == null) { throw new TException("topology:" + id + "is null"); } Map<Object, Object> topologyConf = (Map<Object, Object>) StormConfig.read_nimbus_topology_conf(conf, id); topology = Common.system_topology(topologyConf, stormtopology); } catch (Exception e) { LOG.error("Failed to get topology " + id + ",", e); throw new TException("Failed to get system_topology"); } return topology; }
/** * generate a taskid(Integer) for every task * * @param conf * @param topologyid * @return Map<Integer, String>: from taskid to componentid * @throws IOException * @throws InvalidTopologyException */ public Map<Integer, String> mkTaskComponentAssignments( Map<Object, Object> conf, String topologyid) throws IOException, InvalidTopologyException { // @@@ here exist a little problem, // we can directly pass stormConf from Submit method Map<Object, Object> stormConf = StormConfig.read_nimbus_topology_conf(conf, topologyid); StormTopology stopology = StormConfig.read_nimbus_topology_code(conf, topologyid); // use TreeMap to make task as sequence Map<Integer, String> rtn = new TreeMap<Integer, String>(); StormTopology topology = Common.system_topology(stormConf, stopology); Integer count = 0; count = mkTaskMaker(stormConf, topology.get_bolts(), rtn, count); count = mkTaskMaker(stormConf, topology.get_spouts(), rtn, count); count = mkTaskMaker(stormConf, topology.get_state_spouts(), rtn, count); return rtn; }
@Override public <T> Object execute(T... args) { boolean isSetTaskInfo = false; try { Boolean reassign = (Boolean) args[1]; Map<Object, Object> conf = (Map<Object, Object>) args[2]; // args[0]: // delay, // args[1]: // reassign_flag, // args[2]: // conf if (conf != null) { boolean isConfUpdate = false; Map stormConf = data.getConf(); // Update topology code Map topoConf = StormConfig.read_nimbus_topology_conf(stormConf, topologyid); StormTopology rawOldTopology = StormConfig.read_nimbus_topology_code(stormConf, topologyid); StormTopology rawNewTopology = NimbusUtils.normalizeTopology(conf, rawOldTopology, true); StormTopology sysOldTopology = rawOldTopology.deepCopy(); StormTopology sysNewTopology = rawNewTopology.deepCopy(); if (conf.get(Config.TOPOLOGY_ACKER_EXECUTORS) != null) { Common.add_acker(topoConf, sysOldTopology); Common.add_acker(conf, sysNewTopology); int ackerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_ACKER_EXECUTORS)); int oldAckerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_ACKER_EXECUTORS)); LOG.info("Update acker from oldAckerNum=" + oldAckerNum + " to ackerNum=" + ackerNum); topoConf.put(Config.TOPOLOGY_ACKER_EXECUTORS, ackerNum); isConfUpdate = true; } // If scale-out, setup task info for new added tasks setTaskInfo(sysOldTopology, sysNewTopology); isSetTaskInfo = true; // If everything is OK, write topology code into disk StormConfig.write_nimbus_topology_code( stormConf, topologyid, Utils.serialize(rawNewTopology)); // Update topology conf if worker num has been updated Set<Object> keys = conf.keySet(); Integer workerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_WORKERS)); if (workerNum != null) { Integer oldWorkerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_WORKERS)); topoConf.put(Config.TOPOLOGY_WORKERS, workerNum); isConfUpdate = true; LOG.info("Update worker num from " + oldWorkerNum + " to " + workerNum); } if (keys.contains(Config.ISOLATION_SCHEDULER_MACHINES)) { topoConf.put( Config.ISOLATION_SCHEDULER_MACHINES, conf.get(Config.ISOLATION_SCHEDULER_MACHINES)); } if (isConfUpdate) { StormConfig.write_nimbus_topology_conf(stormConf, topologyid, topoConf); } } TopologyAssignEvent event = new TopologyAssignEvent(); event.setTopologyId(topologyid); event.setScratch(true); event.setOldStatus(oldStatus); event.setReassign(reassign); if (conf != null) event.setScaleTopology(true); TopologyAssign.push(event); event.waitFinish(); } catch (Exception e) { LOG.error("do-rebalance error!", e); // Rollback the changes on ZK if (isSetTaskInfo) { try { StormClusterState clusterState = data.getStormClusterState(); clusterState.remove_task(topologyid, newTasks); } catch (Exception e1) { LOG.error("Failed to rollback the changes on ZK for task-" + newTasks, e); } } } DelayStatusTransitionCallback delayCallback = new DelayStatusTransitionCallback( data, topologyid, oldStatus, StatusType.rebalancing, StatusType.done_rebalance); return delayCallback.execute(); }
@SuppressWarnings({"rawtypes", "unchecked"}) public WorkerData( Map conf, IContext context, String topology_id, String supervisor_id, int port, String worker_id, String jar_path) throws Exception { this.conf = conf; this.context = context; this.topologyId = topology_id; this.supervisorId = supervisor_id; this.port = port; this.workerId = worker_id; this.active = new AtomicBoolean(true); this.topologyStatus = StatusType.active; if (StormConfig.cluster_mode(conf).equals("distributed")) { String pidDir = StormConfig.worker_pids_root(conf, worker_id); JStormServerUtils.createPid(pidDir); } // create zk interface this.zkClusterstate = ZkTool.mk_distributed_cluster_state(conf); this.zkCluster = Cluster.mk_storm_cluster_state(zkClusterstate); Map rawConf = StormConfig.read_supervisor_topology_conf(conf, topology_id); this.stormConf = new HashMap<Object, Object>(); this.stormConf.putAll(conf); this.stormConf.putAll(rawConf); LOG.info("Worker Configuration " + stormConf); try { boolean enableClassloader = ConfigExtension.isEnableTopologyClassLoader(stormConf); boolean enableDebugClassloader = ConfigExtension.isEnableClassloaderDebug(stormConf); if (jar_path == null && enableClassloader == true) { LOG.error("enable classloader, but not app jar"); throw new InvalidParameterException(); } URL[] urlArray = new URL[0]; if (jar_path != null) { String[] paths = jar_path.split(":"); Set<URL> urls = new HashSet<URL>(); for (String path : paths) { if (StringUtils.isBlank(path)) continue; URL url = new URL("File:" + path); urls.add(url); } urlArray = urls.toArray(new URL[0]); } WorkerClassLoader.mkInstance( urlArray, ClassLoader.getSystemClassLoader(), ClassLoader.getSystemClassLoader().getParent(), enableClassloader, enableDebugClassloader); } catch (Exception e) { // TODO Auto-generated catch block LOG.error("init jarClassLoader error!", e); throw new InvalidParameterException(); } if (this.context == null) { this.context = TransportFactory.makeContext(stormConf); } boolean disruptorUseSleep = ConfigExtension.isDisruptorUseSleep(stormConf); DisruptorQueue.setUseSleep(disruptorUseSleep); boolean isLimited = ConfigExtension.getTopologyBufferSizeLimited(stormConf); DisruptorQueue.setLimited(isLimited); LOG.info("Disruptor use sleep:" + disruptorUseSleep + ", limited size:" + isLimited); // this.transferQueue = new LinkedBlockingQueue<TransferData>(); int buffer_size = Utils.getInt(conf.get(Config.TOPOLOGY_TRANSFER_BUFFER_SIZE)); WaitStrategy waitStrategy = (WaitStrategy) Utils.newInstance((String) conf.get(Config.TOPOLOGY_DISRUPTOR_WAIT_STRATEGY)); this.transferQueue = DisruptorQueue.mkInstance("TotalTransfer", ProducerType.MULTI, buffer_size, waitStrategy); this.transferQueue.consumerStarted(); this.sendingQueue = DisruptorQueue.mkInstance("TotalSending", ProducerType.MULTI, buffer_size, waitStrategy); this.sendingQueue.consumerStarted(); this.nodeportSocket = new ConcurrentHashMap<WorkerSlot, IConnection>(); this.taskNodeport = new ConcurrentHashMap<Integer, WorkerSlot>(); this.workerToResource = new ConcurrentSkipListSet<ResourceWorkerSlot>(); this.innerTaskTransfer = new ConcurrentHashMap<Integer, DisruptorQueue>(); this.deserializeQueues = new ConcurrentHashMap<Integer, DisruptorQueue>(); Assignment assignment = zkCluster.assignment_info(topologyId, null); if (assignment == null) { String errMsg = "Failed to get Assignment of " + topologyId; LOG.error(errMsg); throw new RuntimeException(errMsg); } workerToResource.addAll(assignment.getWorkers()); // get current worker's task list this.taskids = assignment.getCurrentWorkerTasks(supervisorId, port); if (taskids.size() == 0) { throw new RuntimeException("No tasks running current workers"); } LOG.info("Current worker taskList:" + taskids); // deserialize topology code from local dir rawTopology = StormConfig.read_supervisor_topology_code(conf, topology_id); sysTopology = Common.system_topology(stormConf, rawTopology); generateMaps(); contextMaker = new ContextMaker(this); metricReporter = new MetricReporter(this); outTaskStatus = new HashMap<Integer, Boolean>(); threadPool = Executors.newScheduledThreadPool(THREAD_POOL_NUM); TimerTrigger.setScheduledExecutorService(threadPool); LOG.info("Successfully create WorkerData"); }
/** * Submit one Topology * * @param topologyname String: topology name * @param uploadedJarLocation String: already uploaded jar path * @param jsonConf String: jsonConf serialize all toplogy configuration to Json * @param topology StormTopology: topology Object */ @SuppressWarnings("unchecked") @Override public void submitTopologyWithOpts( String topologyname, String uploadedJarLocation, String jsonConf, StormTopology topology, SubmitOptions options) throws AlreadyAliveException, InvalidTopologyException, TopologyAssignException, TException { LOG.info("Receive " + topologyname + ", uploadedJarLocation:" + uploadedJarLocation); // @@@ Move validate topologyname in client code try { checkTopologyActive(data, topologyname, false); } catch (AlreadyAliveException e) { LOG.info(topologyname + " is already exist "); throw e; } catch (Exception e) { LOG.info("Failed to check whether topology is alive or not", e); throw new TException(e); } int counter = data.getSubmittedCount().incrementAndGet(); String topologyId = topologyname + "-" + counter + "-" + TimeUtils.current_time_secs(); Map<Object, Object> serializedConf = (Map<Object, Object>) JStormUtils.from_json(jsonConf); if (serializedConf == null) { LOG.warn("Failed to serialized Configuration"); throw new InvalidTopologyException("Failed to serilaze topology configuration"); } serializedConf.put(Config.TOPOLOGY_ID, topologyId); serializedConf.put(Config.TOPOLOGY_NAME, topologyname); try { Map<Object, Object> stormConf; stormConf = NimbusUtils.normalizeConf(conf, serializedConf, topology); Map<Object, Object> totalStormConf = new HashMap<Object, Object>(conf); totalStormConf.putAll(stormConf); StormTopology normalizedTopology = NimbusUtils.normalizeTopology(stormConf, topology); // this validates the structure of the topology Common.validate_basic(normalizedTopology, totalStormConf, topologyId); // don't need generate real topology, so skip Common.system_topology // Common.system_topology(totalStormConf, topology); StormClusterState stormClusterState = data.getStormClusterState(); // create /local-dir/nimbus/topologyId/xxxx files setupStormCode(conf, topologyId, uploadedJarLocation, stormConf, normalizedTopology); // generate TaskInfo for every bolt or spout in ZK // /ZK/tasks/topoologyId/xxx setupZkTaskInfo(conf, topologyId, stormClusterState); // make assignments for a topology TopologyAssignEvent assignEvent = new TopologyAssignEvent(); assignEvent.setTopologyId(topologyId); assignEvent.setScratch(false); assignEvent.setTopologyName(topologyname); assignEvent.setOldStatus( Thrift.topologyInitialStatusToStormStatus(options.get_initial_status())); TopologyAssign.push(assignEvent); LOG.info("Submit for " + topologyname + " with conf " + serializedConf); boolean isSuccess = assignEvent.waitFinish(); if (isSuccess == true) { LOG.info("Finish submit for " + topologyname); } else { throw new FailedAssignTopologyException(assignEvent.getErrorMsg()); } } catch (FailedAssignTopologyException e) { StringBuilder sb = new StringBuilder(); sb.append("Fail to sumbit topology, Root cause:"); if (e.getMessage() == null) { sb.append("submit timeout"); } else { sb.append(e.getMessage()); } sb.append("\n\n"); sb.append("topologyId:" + topologyId); sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n"); LOG.error(sb.toString(), e); throw new TopologyAssignException(sb.toString()); } catch (InvalidParameterException e) { StringBuilder sb = new StringBuilder(); sb.append("Fail to sumbit topology "); sb.append(e.getMessage()); sb.append(", cause:" + e.getCause()); sb.append("\n\n"); sb.append("topologyId:" + topologyId); sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n"); LOG.error(sb.toString(), e); throw new InvalidParameterException(sb.toString()); } catch (Throwable e) { StringBuilder sb = new StringBuilder(); sb.append("Fail to sumbit topology "); sb.append(e.getMessage()); sb.append(", cause:" + e.getCause()); sb.append("\n\n"); sb.append("topologyId:" + topologyId); sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n"); LOG.error(sb.toString(), e); throw new TopologyAssignException(sb.toString()); } }