示例#1
0
  /**
   * start a topology: set active status of the topology
   *
   * @param topologyName
   * @param stormClusterState
   * @param topologyId
   * @throws Exception
   */
  public void setTopologyStatus(TopologyAssignEvent event) throws Exception {
    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    String topologyId = event.getTopologyId();
    String topologyName = event.getTopologyName();
    String group = event.getGroup();

    StormStatus status = new StormStatus(StatusType.active);
    if (event.getOldStatus() != null) {
      status = event.getOldStatus();
    }

    StormBase stormBase = stormClusterState.storm_base(topologyId, null);
    if (stormBase == null) {
      stormBase = new StormBase(topologyName, TimeUtils.current_time_secs(), status, group);
      stormClusterState.activate_storm(topologyId, stormBase);

    } else {

      stormClusterState.update_storm(topologyId, status);

      // here exist one hack operation
      // when monitor/rebalance/startup topologyName is null
      if (topologyName == null) {
        event.setTopologyName(stormBase.getStormName());
      }
    }

    LOG.info("Update " + topologyId + " " + status);
  }
示例#2
0
 // Utility functions, just used here
 public static Long tuple_time_delta(RotatingMap<Tuple, Long> start_times, Tuple tuple) {
   Long start_time = (Long) start_times.remove(tuple);
   if (start_time != null) {
     return TimeUtils.time_delta_ms(start_time);
   }
   return null;
 }
示例#3
0
  public TaskHeartbeatTrigger(
      Map conf,
      String name,
      DisruptorQueue queue,
      BlockingQueue<Object> controlQueue,
      int taskId,
      String componentId,
      TopologyContext sysTopologyCtx,
      ITaskReportErr reportError) {
    this.name = name;
    this.queue = queue;
    this.controlQueue = controlQueue;
    this.opCode = TimerConstants.TASK_HEARTBEAT;

    this.taskId = taskId;
    this.componentId = componentId;
    this.sysTopologyCtx = sysTopologyCtx;

    this.frequence = JStormUtils.parseInt(conf.get(Config.TASK_HEARTBEAT_FREQUENCY_SECS), 10);
    this.firstTime = frequence;

    this.executeThreadHbTime = TimeUtils.current_time_secs();
    this.taskHbTimeout = JStormUtils.parseInt(conf.get(Config.NIMBUS_TASK_TIMEOUT_SECS), 180);
    this.intervalCheck = new IntervalCheck();
    this.intervalCheck.setInterval(taskHbTimeout);
    this.intervalCheck.start();

    this.reportError = reportError;

    this.uptime = new UptimeComputer();
  }
示例#4
0
  /**
   * @param existingAssignment
   * @param taskWorkerSlot
   * @return
   * @throws Exception
   */
  public static Map<Integer, Integer> getTaskStartTimes(
      TopologyAssignContext context,
      NimbusData nimbusData,
      String topologyId,
      Assignment existingAssignment,
      Set<ResourceWorkerSlot> workers)
      throws Exception {

    Map<Integer, Integer> startTimes = new TreeMap<Integer, Integer>();

    if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_NEW) {
      int nowSecs = TimeUtils.current_time_secs();
      for (ResourceWorkerSlot worker : workers) {
        for (Integer changedTaskId : worker.getTasks()) {
          startTimes.put(changedTaskId, nowSecs);
        }
      }

      return startTimes;
    }

    Set<ResourceWorkerSlot> oldWorkers = new HashSet<ResourceWorkerSlot>();

    if (existingAssignment != null) {
      Map<Integer, Integer> taskStartTimeSecs = existingAssignment.getTaskStartTimeSecs();
      if (taskStartTimeSecs != null) {
        startTimes.putAll(taskStartTimeSecs);
      }

      if (existingAssignment.getWorkers() != null) {
        oldWorkers = existingAssignment.getWorkers();
      }
    }

    StormClusterState zkClusterState = nimbusData.getStormClusterState();
    Set<Integer> changeTaskIds = getChangeTaskIds(oldWorkers, workers);
    int nowSecs = TimeUtils.current_time_secs();
    for (Integer changedTaskId : changeTaskIds) {
      startTimes.put(changedTaskId, nowSecs);

      zkClusterState.remove_task_heartbeat(topologyId, changedTaskId);
    }

    LOG.info("Task assignment has been changed " + changeTaskIds);
    return startTimes;
  }
示例#5
0
 private void checkExecuteThreadHb() {
   long currentTime = TimeUtils.current_time_secs();
   if (currentTime - executeThreadHbTime > taskHbTimeout) {
     String error =
         "No response from Task-" + taskId + ", last report time(sec) is " + executeThreadHbTime;
     reportError.report(error);
   }
 }
示例#6
0
  @Override
  public void onEvent(Object event, long sequence, boolean endOfBatch) throws Exception {

    if (event == null) {
      return;
    }

    boltExeTimer.start();

    try {

      if (event instanceof RotatingMapTrigger.Tick) {
        // don't check the timetick name to improve performance

        Map<Tuple, Long> timeoutMap = tuple_start_times.rotate();

        if (ackerNum > 0) {
          // only when acker is enable
          for (Entry<Tuple, Long> entry : timeoutMap.entrySet()) {
            Tuple input = entry.getKey();
            task_stats.bolt_failed_tuple(input.getSourceComponent(), input.getSourceStreamId());
          }
        }

        return;
      }

      Tuple tuple = (Tuple) event;

      task_stats.recv_tuple(tuple.getSourceComponent(), tuple.getSourceStreamId());

      tuple_start_times.put(tuple, System.currentTimeMillis());

      try {
        bolt.execute(tuple);
      } catch (Throwable e) {
        error = e;
        LOG.error("bolt execute error ", e);
        report_error.report(e);
      }

      if (ackerNum == 0) {
        // only when acker is disable
        // get tuple process latency
        Long start_time = (Long) tuple_start_times.remove(tuple);
        if (start_time != null) {
          Long delta = TimeUtils.time_delta_ms(start_time);
          task_stats.bolt_acked_tuple(tuple.getSourceComponent(), tuple.getSourceStreamId(), delta);
        }
      }
    } finally {
      boltExeTimer.stop();
    }
  }
示例#7
0
  /**
   * Get TopologyInfo, it contain all data of the topology running status
   *
   * @return TopologyInfo
   */
  @Override
  public TopologyInfo getTopologyInfo(String topologyId) throws NotAliveException, TException {

    TopologyInfo topologyInfo = new TopologyInfo();

    StormClusterState stormClusterState = data.getStormClusterState();

    try {

      // get topology's StormBase
      StormBase base = stormClusterState.storm_base(topologyId, null);
      if (base == null) {
        throw new NotAliveException("No topology of " + topologyId);
      }
      topologyInfo.set_id(topologyId);
      topologyInfo.set_name(base.getStormName());
      topologyInfo.set_uptime_secs(TimeUtils.time_delta(base.getLanchTimeSecs()));
      topologyInfo.set_status(base.getStatusString());

      // get topology's Assignment
      Assignment assignment = stormClusterState.assignment_info(topologyId, null);
      if (assignment == null) {
        throw new TException("Failed to get StormBase from ZK of " + topologyId);
      }

      // get topology's map<taskId, componentId>
      Map<Integer, String> taskInfo = Cluster.topology_task_info(stormClusterState, topologyId);

      List<TaskSummary> tasks =
          NimbusUtils.mkTaskSummary(stormClusterState, assignment, taskInfo, topologyId);
      topologyInfo.set_tasks(tasks);

      return topologyInfo;
    } catch (TException e) {
      LOG.info("Failed to get topologyInfo " + topologyId, e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get topologyInfo " + topologyId, e);
      throw new TException("Failed to get topologyInfo" + topologyId);
    }
  }
示例#8
0
  @Override
  public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException {
    try {
      StormClusterState stormClusterState = data.getStormClusterState();

      String supervisorId = null;
      SupervisorInfo supervisorInfo = null;

      String ip = NetWorkUtils.host2Ip(host);
      String hostName = NetWorkUtils.ip2Host(host);

      // all supervisors
      Map<String, SupervisorInfo> supervisorInfos =
          Cluster.allSupervisorInfo(stormClusterState, null);

      for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) {

        SupervisorInfo info = entry.getValue();
        if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) {
          supervisorId = entry.getKey();
          supervisorInfo = info;
          break;
        }
      }

      if (supervisorId == null) {
        throw new TException("No supervisor of " + host);
      }

      Map<String, Assignment> assignments = new HashMap<String, Assignment>();

      // get all active topology's StormBase
      Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState);
      for (Entry<String, StormBase> entry : bases.entrySet()) {

        String topologyId = entry.getKey();
        StormBase base = entry.getValue();

        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
          LOG.error("Failed to get assignment of " + topologyId);
          continue;
        }
        assignments.put(topologyId, assignment);
      }

      Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>();
      for (Entry<String, Assignment> entry : assignments.entrySet()) {
        String topologyId = entry.getKey();
        Assignment assignment = entry.getValue();

        Map<Integer, String> taskToComponent =
            Cluster.topology_task_info(stormClusterState, topologyId);

        Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource();

        for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) {
          Integer taskId = resourceEntry.getKey();
          ResourceAssignment resourceAssignment = resourceEntry.getValue();

          if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) {
            continue;
          }

          supervisorInfo.allocResource(resourceAssignment);

          Integer port = resourceAssignment.getPort();
          WorkerSummary workerSummary = portWorkerSummarys.get(port);
          if (workerSummary == null) {
            workerSummary = new WorkerSummary();
            workerSummary.set_port(port);
            workerSummary.set_topology(topologyId);
            workerSummary.set_tasks(new ArrayList<TaskSummary>());

            portWorkerSummarys.put(port, workerSummary);
          }

          String componentName = taskToComponent.get(taskId);
          int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId));
          List<TaskSummary> tasks = workerSummary.get_tasks();

          TaskSummary taskSummary =
              NimbusUtils.mkSimpleTaskSummary(
                  resourceAssignment, taskId, componentName, host, uptime);

          tasks.add(taskSummary);
        }
      }

      List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>();
      wokersList.addAll(portWorkerSummarys.values());

      SupervisorSummary supervisorSummary =
          NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId);
      return new SupervisorWorkers(supervisorSummary, wokersList);

    } catch (TException e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw new TException(e);
    }
  }
示例#9
0
  /**
   * get cluster's summary, it will contain SupervisorSummary and TopologySummary
   *
   * @return ClusterSummary
   */
  @Override
  public ClusterSummary getClusterInfo() throws TException {

    try {

      StormClusterState stormClusterState = data.getStormClusterState();

      Map<String, Assignment> assignments = new HashMap<String, Assignment>();

      // get nimbus running time
      int uptime = data.uptime();

      // get TopologySummary
      List<TopologySummary> topologySummaries = new ArrayList<TopologySummary>();

      // get all active topology's StormBase
      Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState);
      for (Entry<String, StormBase> entry : bases.entrySet()) {

        String topologyId = entry.getKey();
        StormBase base = entry.getValue();

        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
          LOG.error("Failed to get assignment of " + topologyId);
          continue;
        }
        assignments.put(topologyId, assignment);
        String group = "default";
        if (data.isGroupMode()) group = base.getGroup();
        if (group == null) group = "default";

        TopologySummary topology =
            NimbusUtils.mkTopologySummary(
                assignment,
                topologyId,
                base.getStormName(),
                base.getStatusString(),
                TimeUtils.time_delta(base.getLanchTimeSecs()),
                group);

        topologySummaries.add(topology);
      }

      // all supervisors
      Map<String, SupervisorInfo> supervisorInfos =
          Cluster.allSupervisorInfo(stormClusterState, null);

      // generate SupervisorSummaries
      List<SupervisorSummary> supervisorSummaries =
          NimbusUtils.mkSupervisorSummaries(supervisorInfos, assignments);

      return new ClusterSummary(
          supervisorSummaries,
          uptime,
          topologySummaries,
          data.getGroupToTopology(),
          data.getGroupToResource(),
          data.getGroupToUsedResource(),
          data.isGroupMode());

    } catch (TException e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw new TException(e);
    }
  }
示例#10
0
  /**
   * Submit one Topology
   *
   * @param topologyname String: topology name
   * @param uploadedJarLocation String: already uploaded jar path
   * @param jsonConf String: jsonConf serialize all toplogy configuration to Json
   * @param topology StormTopology: topology Object
   */
  @SuppressWarnings("unchecked")
  @Override
  public void submitTopologyWithOpts(
      String topologyname,
      String uploadedJarLocation,
      String jsonConf,
      StormTopology topology,
      SubmitOptions options)
      throws AlreadyAliveException, InvalidTopologyException, TopologyAssignException, TException {
    LOG.info("Receive " + topologyname + ", uploadedJarLocation:" + uploadedJarLocation);
    // @@@ Move validate topologyname in client code
    try {
      checkTopologyActive(data, topologyname, false);
    } catch (AlreadyAliveException e) {
      LOG.info(topologyname + " is already exist ");
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to check whether topology is alive or not", e);
      throw new TException(e);
    }

    int counter = data.getSubmittedCount().incrementAndGet();
    String topologyId = topologyname + "-" + counter + "-" + TimeUtils.current_time_secs();

    Map<Object, Object> serializedConf = (Map<Object, Object>) JStormUtils.from_json(jsonConf);
    if (serializedConf == null) {
      LOG.warn("Failed to serialized Configuration");
      throw new InvalidTopologyException("Failed to serilaze topology configuration");
    }

    serializedConf.put(Config.TOPOLOGY_ID, topologyId);
    serializedConf.put(Config.TOPOLOGY_NAME, topologyname);

    try {
      Map<Object, Object> stormConf;

      stormConf = NimbusUtils.normalizeConf(conf, serializedConf, topology);

      Map<Object, Object> totalStormConf = new HashMap<Object, Object>(conf);
      totalStormConf.putAll(stormConf);

      StormTopology normalizedTopology = NimbusUtils.normalizeTopology(stormConf, topology);

      // this validates the structure of the topology
      Common.validate_basic(normalizedTopology, totalStormConf, topologyId);
      // don't need generate real topology, so skip Common.system_topology
      // Common.system_topology(totalStormConf, topology);

      StormClusterState stormClusterState = data.getStormClusterState();

      // create /local-dir/nimbus/topologyId/xxxx files
      setupStormCode(conf, topologyId, uploadedJarLocation, stormConf, normalizedTopology);

      // generate TaskInfo for every bolt or spout in ZK
      // /ZK/tasks/topoologyId/xxx
      setupZkTaskInfo(conf, topologyId, stormClusterState);

      // make assignments for a topology
      TopologyAssignEvent assignEvent = new TopologyAssignEvent();
      assignEvent.setTopologyId(topologyId);
      assignEvent.setScratch(false);
      assignEvent.setTopologyName(topologyname);
      assignEvent.setOldStatus(
          Thrift.topologyInitialStatusToStormStatus(options.get_initial_status()));

      TopologyAssign.push(assignEvent);
      LOG.info("Submit for " + topologyname + " with conf " + serializedConf);

      boolean isSuccess = assignEvent.waitFinish();
      if (isSuccess == true) {
        LOG.info("Finish submit for " + topologyname);
      } else {
        throw new FailedAssignTopologyException(assignEvent.getErrorMsg());
      }

    } catch (FailedAssignTopologyException e) {
      StringBuilder sb = new StringBuilder();
      sb.append("Fail to sumbit topology, Root cause:");
      if (e.getMessage() == null) {
        sb.append("submit timeout");
      } else {
        sb.append(e.getMessage());
      }

      sb.append("\n\n");
      sb.append("topologyId:" + topologyId);
      sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
      LOG.error(sb.toString(), e);
      throw new TopologyAssignException(sb.toString());
    } catch (InvalidParameterException e) {
      StringBuilder sb = new StringBuilder();
      sb.append("Fail to sumbit topology ");
      sb.append(e.getMessage());
      sb.append(", cause:" + e.getCause());
      sb.append("\n\n");
      sb.append("topologyId:" + topologyId);
      sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
      LOG.error(sb.toString(), e);
      throw new InvalidParameterException(sb.toString());
    } catch (Throwable e) {
      StringBuilder sb = new StringBuilder();
      sb.append("Fail to sumbit topology ");
      sb.append(e.getMessage());
      sb.append(", cause:" + e.getCause());
      sb.append("\n\n");
      sb.append("topologyId:" + topologyId);
      sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
      LOG.error(sb.toString(), e);
      throw new TopologyAssignException(sb.toString());
    }
  }