/** * create and start one supervisor * * @param conf : configurationdefault.yaml storm.yaml * @param sharedContext : null (right now) * @return SupervisorManger: which is used to shutdown all workers and supervisor */ @SuppressWarnings("rawtypes") public SupervisorManger mkSupervisor(Map conf, IContext sharedContext) throws Exception { LOG.info("Starting Supervisor with conf " + conf); active = new AtomicBoolean(true); /** Step 1: cleanup all files in /storm-local-dir/supervisor/tmp */ String path = StormConfig.supervisorTmpDir(conf); FileUtils.cleanDirectory(new File(path)); /* * Step 2: create ZK operation instance StromClusterState */ StormClusterState stormClusterState = Cluster.mk_storm_cluster_state(conf); /* * Step 3, create LocalStat LocalStat is one KV database 4.1 create * LocalState instance 4.2 get supervisorId, if no supervisorId, create * one */ LocalState localState = StormConfig.supervisorState(conf); String supervisorId = (String) localState.get(Common.LS_ID); if (supervisorId == null) { supervisorId = UUID.randomUUID().toString(); localState.put(Common.LS_ID, supervisorId); } Vector<SmartThread> threads = new Vector<SmartThread>(); // Step 5 create HeartBeat // every supervisor.heartbeat.frequency.secs, write SupervisorInfo to ZK String myHostName = null; myHostName = ConfigExtension.getSupervisorHost(conf); if (myHostName == null) { myHostName = NetWorkUtils.hostname(); } Heartbeat hb = new Heartbeat(conf, stormClusterState, supervisorId, myHostName, active); hb.update(); AsyncLoopThread heartbeat = new AsyncLoopThread(hb, false, null, Thread.MIN_PRIORITY, true); threads.add(heartbeat); // Step 6 create and start sync Supervisor thread // every supervisor.monitor.frequency.secs second run SyncSupervisor EventManager processEventManager = new EventManagerImp(false); ConcurrentHashMap<String, String> workerThreadPids = new ConcurrentHashMap<String, String>(); SyncProcessEvent syncProcessEvent = new SyncProcessEvent(supervisorId, conf, localState, workerThreadPids, sharedContext); EventManager syncSupEventManager = new EventManagerImp(false); SyncSupervisorEvent syncSupervisorEvent = new SyncSupervisorEvent( supervisorId, conf, processEventManager, syncSupEventManager, stormClusterState, localState, syncProcessEvent); int syncFrequence = JStormUtils.parseInt(conf.get(Config.SUPERVISOR_MONITOR_FREQUENCY_SECS)); EventManagerPusher syncSupervisorPusher = new EventManagerPusher(syncSupEventManager, syncSupervisorEvent, active, syncFrequence); AsyncLoopThread syncSupervisorThread = new AsyncLoopThread(syncSupervisorPusher); threads.add(syncSupervisorThread); // Step 7 start sync process thread // every supervisor.monitor.frequency.secs run SyncProcesses // skip thread to do syncProcess, due to nimbus will check whether // worker is dead or not, if dead, it will reassign a new worker // // int syncProcessFrequence = syncFrequence/2; // EventManagerPusher syncProcessPusher = new EventManagerPusher( // processEventManager, syncProcessEvent, active, // syncProcessFrequence); // AsyncLoopThread syncProcessThread = new // AsyncLoopThread(syncProcessPusher); // threads.add(syncProcessThread); // Step 7 start httpserver Httpserver httpserver = new Httpserver(conf); httpserver.start(); LOG.info("Starting supervisor with id " + supervisorId + " at host " + myHostName); // SupervisorManger which can shutdown all supervisor and workers return new SupervisorManger( conf, supervisorId, active, threads, syncSupEventManager, processEventManager, httpserver, stormClusterState, workerThreadPids); }
@Override public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException { try { StormClusterState stormClusterState = data.getStormClusterState(); String supervisorId = null; SupervisorInfo supervisorInfo = null; String ip = NetWorkUtils.host2Ip(host); String hostName = NetWorkUtils.ip2Host(host); // all supervisors Map<String, SupervisorInfo> supervisorInfos = Cluster.allSupervisorInfo(stormClusterState, null); for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) { SupervisorInfo info = entry.getValue(); if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) { supervisorId = entry.getKey(); supervisorInfo = info; break; } } if (supervisorId == null) { throw new TException("No supervisor of " + host); } Map<String, Assignment> assignments = new HashMap<String, Assignment>(); // get all active topology's StormBase Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState); for (Entry<String, StormBase> entry : bases.entrySet()) { String topologyId = entry.getKey(); StormBase base = entry.getValue(); Assignment assignment = stormClusterState.assignment_info(topologyId, null); if (assignment == null) { LOG.error("Failed to get assignment of " + topologyId); continue; } assignments.put(topologyId, assignment); } Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>(); for (Entry<String, Assignment> entry : assignments.entrySet()) { String topologyId = entry.getKey(); Assignment assignment = entry.getValue(); Map<Integer, String> taskToComponent = Cluster.topology_task_info(stormClusterState, topologyId); Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource(); for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) { Integer taskId = resourceEntry.getKey(); ResourceAssignment resourceAssignment = resourceEntry.getValue(); if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) { continue; } supervisorInfo.allocResource(resourceAssignment); Integer port = resourceAssignment.getPort(); WorkerSummary workerSummary = portWorkerSummarys.get(port); if (workerSummary == null) { workerSummary = new WorkerSummary(); workerSummary.set_port(port); workerSummary.set_topology(topologyId); workerSummary.set_tasks(new ArrayList<TaskSummary>()); portWorkerSummarys.put(port, workerSummary); } String componentName = taskToComponent.get(taskId); int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId)); List<TaskSummary> tasks = workerSummary.get_tasks(); TaskSummary taskSummary = NimbusUtils.mkSimpleTaskSummary( resourceAssignment, taskId, componentName, host, uptime); tasks.add(taskSummary); } } List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>(); wokersList.addAll(portWorkerSummarys.values()); SupervisorSummary supervisorSummary = NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId); return new SupervisorWorkers(supervisorSummary, wokersList); } catch (TException e) { LOG.info("Failed to get ClusterSummary ", e); throw e; } catch (Exception e) { LOG.info("Failed to get ClusterSummary ", e); throw new TException(e); } }
public MessageDecoder(boolean isServer, Map conf) { this.isServer = isServer; this.localPort = ConfigExtension.getLocalWorkerPort(conf); this.localIp = NetWorkUtils.ip(); }