/** * start a topology: set active status of the topology * * @param topologyName * @param stormClusterState * @param topologyId * @throws Exception */ public void setTopologyStatus(TopologyAssignEvent event) throws Exception { StormClusterState stormClusterState = nimbusData.getStormClusterState(); String topologyId = event.getTopologyId(); String topologyName = event.getTopologyName(); String group = event.getGroup(); StormStatus status = new StormStatus(StatusType.active); if (event.getOldStatus() != null) { status = event.getOldStatus(); } StormBase stormBase = stormClusterState.storm_base(topologyId, null); if (stormBase == null) { stormBase = new StormBase(topologyName, TimeUtils.current_time_secs(), status, group); stormClusterState.activate_storm(topologyId, stormBase); } else { stormClusterState.update_storm(topologyId, status); // here exist one hack operation // when monitor/rebalance/startup topologyName is null if (topologyName == null) { event.setTopologyName(stormBase.getStormName()); } } LOG.info("Update " + topologyId + " " + status); }
// Utility functions, just used here public static Long tuple_time_delta(RotatingMap<Tuple, Long> start_times, Tuple tuple) { Long start_time = (Long) start_times.remove(tuple); if (start_time != null) { return TimeUtils.time_delta_ms(start_time); } return null; }
public TaskHeartbeatTrigger( Map conf, String name, DisruptorQueue queue, BlockingQueue<Object> controlQueue, int taskId, String componentId, TopologyContext sysTopologyCtx, ITaskReportErr reportError) { this.name = name; this.queue = queue; this.controlQueue = controlQueue; this.opCode = TimerConstants.TASK_HEARTBEAT; this.taskId = taskId; this.componentId = componentId; this.sysTopologyCtx = sysTopologyCtx; this.frequence = JStormUtils.parseInt(conf.get(Config.TASK_HEARTBEAT_FREQUENCY_SECS), 10); this.firstTime = frequence; this.executeThreadHbTime = TimeUtils.current_time_secs(); this.taskHbTimeout = JStormUtils.parseInt(conf.get(Config.NIMBUS_TASK_TIMEOUT_SECS), 180); this.intervalCheck = new IntervalCheck(); this.intervalCheck.setInterval(taskHbTimeout); this.intervalCheck.start(); this.reportError = reportError; this.uptime = new UptimeComputer(); }
/** * @param existingAssignment * @param taskWorkerSlot * @return * @throws Exception */ public static Map<Integer, Integer> getTaskStartTimes( TopologyAssignContext context, NimbusData nimbusData, String topologyId, Assignment existingAssignment, Set<ResourceWorkerSlot> workers) throws Exception { Map<Integer, Integer> startTimes = new TreeMap<Integer, Integer>(); if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_NEW) { int nowSecs = TimeUtils.current_time_secs(); for (ResourceWorkerSlot worker : workers) { for (Integer changedTaskId : worker.getTasks()) { startTimes.put(changedTaskId, nowSecs); } } return startTimes; } Set<ResourceWorkerSlot> oldWorkers = new HashSet<ResourceWorkerSlot>(); if (existingAssignment != null) { Map<Integer, Integer> taskStartTimeSecs = existingAssignment.getTaskStartTimeSecs(); if (taskStartTimeSecs != null) { startTimes.putAll(taskStartTimeSecs); } if (existingAssignment.getWorkers() != null) { oldWorkers = existingAssignment.getWorkers(); } } StormClusterState zkClusterState = nimbusData.getStormClusterState(); Set<Integer> changeTaskIds = getChangeTaskIds(oldWorkers, workers); int nowSecs = TimeUtils.current_time_secs(); for (Integer changedTaskId : changeTaskIds) { startTimes.put(changedTaskId, nowSecs); zkClusterState.remove_task_heartbeat(topologyId, changedTaskId); } LOG.info("Task assignment has been changed " + changeTaskIds); return startTimes; }
private void checkExecuteThreadHb() { long currentTime = TimeUtils.current_time_secs(); if (currentTime - executeThreadHbTime > taskHbTimeout) { String error = "No response from Task-" + taskId + ", last report time(sec) is " + executeThreadHbTime; reportError.report(error); } }
@Override public void onEvent(Object event, long sequence, boolean endOfBatch) throws Exception { if (event == null) { return; } boltExeTimer.start(); try { if (event instanceof RotatingMapTrigger.Tick) { // don't check the timetick name to improve performance Map<Tuple, Long> timeoutMap = tuple_start_times.rotate(); if (ackerNum > 0) { // only when acker is enable for (Entry<Tuple, Long> entry : timeoutMap.entrySet()) { Tuple input = entry.getKey(); task_stats.bolt_failed_tuple(input.getSourceComponent(), input.getSourceStreamId()); } } return; } Tuple tuple = (Tuple) event; task_stats.recv_tuple(tuple.getSourceComponent(), tuple.getSourceStreamId()); tuple_start_times.put(tuple, System.currentTimeMillis()); try { bolt.execute(tuple); } catch (Throwable e) { error = e; LOG.error("bolt execute error ", e); report_error.report(e); } if (ackerNum == 0) { // only when acker is disable // get tuple process latency Long start_time = (Long) tuple_start_times.remove(tuple); if (start_time != null) { Long delta = TimeUtils.time_delta_ms(start_time); task_stats.bolt_acked_tuple(tuple.getSourceComponent(), tuple.getSourceStreamId(), delta); } } } finally { boltExeTimer.stop(); } }
/** * Get TopologyInfo, it contain all data of the topology running status * * @return TopologyInfo */ @Override public TopologyInfo getTopologyInfo(String topologyId) throws NotAliveException, TException { TopologyInfo topologyInfo = new TopologyInfo(); StormClusterState stormClusterState = data.getStormClusterState(); try { // get topology's StormBase StormBase base = stormClusterState.storm_base(topologyId, null); if (base == null) { throw new NotAliveException("No topology of " + topologyId); } topologyInfo.set_id(topologyId); topologyInfo.set_name(base.getStormName()); topologyInfo.set_uptime_secs(TimeUtils.time_delta(base.getLanchTimeSecs())); topologyInfo.set_status(base.getStatusString()); // get topology's Assignment Assignment assignment = stormClusterState.assignment_info(topologyId, null); if (assignment == null) { throw new TException("Failed to get StormBase from ZK of " + topologyId); } // get topology's map<taskId, componentId> Map<Integer, String> taskInfo = Cluster.topology_task_info(stormClusterState, topologyId); List<TaskSummary> tasks = NimbusUtils.mkTaskSummary(stormClusterState, assignment, taskInfo, topologyId); topologyInfo.set_tasks(tasks); return topologyInfo; } catch (TException e) { LOG.info("Failed to get topologyInfo " + topologyId, e); throw e; } catch (Exception e) { LOG.info("Failed to get topologyInfo " + topologyId, e); throw new TException("Failed to get topologyInfo" + topologyId); } }
@Override public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException { try { StormClusterState stormClusterState = data.getStormClusterState(); String supervisorId = null; SupervisorInfo supervisorInfo = null; String ip = NetWorkUtils.host2Ip(host); String hostName = NetWorkUtils.ip2Host(host); // all supervisors Map<String, SupervisorInfo> supervisorInfos = Cluster.allSupervisorInfo(stormClusterState, null); for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) { SupervisorInfo info = entry.getValue(); if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) { supervisorId = entry.getKey(); supervisorInfo = info; break; } } if (supervisorId == null) { throw new TException("No supervisor of " + host); } Map<String, Assignment> assignments = new HashMap<String, Assignment>(); // get all active topology's StormBase Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState); for (Entry<String, StormBase> entry : bases.entrySet()) { String topologyId = entry.getKey(); StormBase base = entry.getValue(); Assignment assignment = stormClusterState.assignment_info(topologyId, null); if (assignment == null) { LOG.error("Failed to get assignment of " + topologyId); continue; } assignments.put(topologyId, assignment); } Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>(); for (Entry<String, Assignment> entry : assignments.entrySet()) { String topologyId = entry.getKey(); Assignment assignment = entry.getValue(); Map<Integer, String> taskToComponent = Cluster.topology_task_info(stormClusterState, topologyId); Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource(); for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) { Integer taskId = resourceEntry.getKey(); ResourceAssignment resourceAssignment = resourceEntry.getValue(); if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) { continue; } supervisorInfo.allocResource(resourceAssignment); Integer port = resourceAssignment.getPort(); WorkerSummary workerSummary = portWorkerSummarys.get(port); if (workerSummary == null) { workerSummary = new WorkerSummary(); workerSummary.set_port(port); workerSummary.set_topology(topologyId); workerSummary.set_tasks(new ArrayList<TaskSummary>()); portWorkerSummarys.put(port, workerSummary); } String componentName = taskToComponent.get(taskId); int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId)); List<TaskSummary> tasks = workerSummary.get_tasks(); TaskSummary taskSummary = NimbusUtils.mkSimpleTaskSummary( resourceAssignment, taskId, componentName, host, uptime); tasks.add(taskSummary); } } List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>(); wokersList.addAll(portWorkerSummarys.values()); SupervisorSummary supervisorSummary = NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId); return new SupervisorWorkers(supervisorSummary, wokersList); } catch (TException e) { LOG.info("Failed to get ClusterSummary ", e); throw e; } catch (Exception e) { LOG.info("Failed to get ClusterSummary ", e); throw new TException(e); } }
/** * get cluster's summary, it will contain SupervisorSummary and TopologySummary * * @return ClusterSummary */ @Override public ClusterSummary getClusterInfo() throws TException { try { StormClusterState stormClusterState = data.getStormClusterState(); Map<String, Assignment> assignments = new HashMap<String, Assignment>(); // get nimbus running time int uptime = data.uptime(); // get TopologySummary List<TopologySummary> topologySummaries = new ArrayList<TopologySummary>(); // get all active topology's StormBase Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState); for (Entry<String, StormBase> entry : bases.entrySet()) { String topologyId = entry.getKey(); StormBase base = entry.getValue(); Assignment assignment = stormClusterState.assignment_info(topologyId, null); if (assignment == null) { LOG.error("Failed to get assignment of " + topologyId); continue; } assignments.put(topologyId, assignment); String group = "default"; if (data.isGroupMode()) group = base.getGroup(); if (group == null) group = "default"; TopologySummary topology = NimbusUtils.mkTopologySummary( assignment, topologyId, base.getStormName(), base.getStatusString(), TimeUtils.time_delta(base.getLanchTimeSecs()), group); topologySummaries.add(topology); } // all supervisors Map<String, SupervisorInfo> supervisorInfos = Cluster.allSupervisorInfo(stormClusterState, null); // generate SupervisorSummaries List<SupervisorSummary> supervisorSummaries = NimbusUtils.mkSupervisorSummaries(supervisorInfos, assignments); return new ClusterSummary( supervisorSummaries, uptime, topologySummaries, data.getGroupToTopology(), data.getGroupToResource(), data.getGroupToUsedResource(), data.isGroupMode()); } catch (TException e) { LOG.info("Failed to get ClusterSummary ", e); throw e; } catch (Exception e) { LOG.info("Failed to get ClusterSummary ", e); throw new TException(e); } }
/** * Submit one Topology * * @param topologyname String: topology name * @param uploadedJarLocation String: already uploaded jar path * @param jsonConf String: jsonConf serialize all toplogy configuration to Json * @param topology StormTopology: topology Object */ @SuppressWarnings("unchecked") @Override public void submitTopologyWithOpts( String topologyname, String uploadedJarLocation, String jsonConf, StormTopology topology, SubmitOptions options) throws AlreadyAliveException, InvalidTopologyException, TopologyAssignException, TException { LOG.info("Receive " + topologyname + ", uploadedJarLocation:" + uploadedJarLocation); // @@@ Move validate topologyname in client code try { checkTopologyActive(data, topologyname, false); } catch (AlreadyAliveException e) { LOG.info(topologyname + " is already exist "); throw e; } catch (Exception e) { LOG.info("Failed to check whether topology is alive or not", e); throw new TException(e); } int counter = data.getSubmittedCount().incrementAndGet(); String topologyId = topologyname + "-" + counter + "-" + TimeUtils.current_time_secs(); Map<Object, Object> serializedConf = (Map<Object, Object>) JStormUtils.from_json(jsonConf); if (serializedConf == null) { LOG.warn("Failed to serialized Configuration"); throw new InvalidTopologyException("Failed to serilaze topology configuration"); } serializedConf.put(Config.TOPOLOGY_ID, topologyId); serializedConf.put(Config.TOPOLOGY_NAME, topologyname); try { Map<Object, Object> stormConf; stormConf = NimbusUtils.normalizeConf(conf, serializedConf, topology); Map<Object, Object> totalStormConf = new HashMap<Object, Object>(conf); totalStormConf.putAll(stormConf); StormTopology normalizedTopology = NimbusUtils.normalizeTopology(stormConf, topology); // this validates the structure of the topology Common.validate_basic(normalizedTopology, totalStormConf, topologyId); // don't need generate real topology, so skip Common.system_topology // Common.system_topology(totalStormConf, topology); StormClusterState stormClusterState = data.getStormClusterState(); // create /local-dir/nimbus/topologyId/xxxx files setupStormCode(conf, topologyId, uploadedJarLocation, stormConf, normalizedTopology); // generate TaskInfo for every bolt or spout in ZK // /ZK/tasks/topoologyId/xxx setupZkTaskInfo(conf, topologyId, stormClusterState); // make assignments for a topology TopologyAssignEvent assignEvent = new TopologyAssignEvent(); assignEvent.setTopologyId(topologyId); assignEvent.setScratch(false); assignEvent.setTopologyName(topologyname); assignEvent.setOldStatus( Thrift.topologyInitialStatusToStormStatus(options.get_initial_status())); TopologyAssign.push(assignEvent); LOG.info("Submit for " + topologyname + " with conf " + serializedConf); boolean isSuccess = assignEvent.waitFinish(); if (isSuccess == true) { LOG.info("Finish submit for " + topologyname); } else { throw new FailedAssignTopologyException(assignEvent.getErrorMsg()); } } catch (FailedAssignTopologyException e) { StringBuilder sb = new StringBuilder(); sb.append("Fail to sumbit topology, Root cause:"); if (e.getMessage() == null) { sb.append("submit timeout"); } else { sb.append(e.getMessage()); } sb.append("\n\n"); sb.append("topologyId:" + topologyId); sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n"); LOG.error(sb.toString(), e); throw new TopologyAssignException(sb.toString()); } catch (InvalidParameterException e) { StringBuilder sb = new StringBuilder(); sb.append("Fail to sumbit topology "); sb.append(e.getMessage()); sb.append(", cause:" + e.getCause()); sb.append("\n\n"); sb.append("topologyId:" + topologyId); sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n"); LOG.error(sb.toString(), e); throw new InvalidParameterException(sb.toString()); } catch (Throwable e) { StringBuilder sb = new StringBuilder(); sb.append("Fail to sumbit topology "); sb.append(e.getMessage()); sb.append(", cause:" + e.getCause()); sb.append("\n\n"); sb.append("topologyId:" + topologyId); sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n"); LOG.error(sb.toString(), e); throw new TopologyAssignException(sb.toString()); } }