Пример #1
0
  /**
   * get cluster's summary, it will contain SupervisorSummary and TopologySummary
   *
   * @return ClusterSummary
   */
  @Override
  public ClusterSummary getClusterInfo() throws TException {

    try {

      StormClusterState stormClusterState = data.getStormClusterState();

      Map<String, Assignment> assignments = new HashMap<String, Assignment>();

      // get nimbus running time
      int uptime = data.uptime();

      // get TopologySummary
      List<TopologySummary> topologySummaries = new ArrayList<TopologySummary>();

      // get all active topology's StormBase
      Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState);
      for (Entry<String, StormBase> entry : bases.entrySet()) {

        String topologyId = entry.getKey();
        StormBase base = entry.getValue();

        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
          LOG.error("Failed to get assignment of " + topologyId);
          continue;
        }
        assignments.put(topologyId, assignment);
        String group = "default";
        if (data.isGroupMode()) group = base.getGroup();
        if (group == null) group = "default";

        TopologySummary topology =
            NimbusUtils.mkTopologySummary(
                assignment,
                topologyId,
                base.getStormName(),
                base.getStatusString(),
                TimeUtils.time_delta(base.getLanchTimeSecs()),
                group);

        topologySummaries.add(topology);
      }

      // all supervisors
      Map<String, SupervisorInfo> supervisorInfos =
          Cluster.allSupervisorInfo(stormClusterState, null);

      // generate SupervisorSummaries
      List<SupervisorSummary> supervisorSummaries =
          NimbusUtils.mkSupervisorSummaries(supervisorInfos, assignments);

      return new ClusterSummary(
          supervisorSummaries,
          uptime,
          topologySummaries,
          data.getGroupToTopology(),
          data.getGroupToResource(),
          data.getGroupToUsedResource(),
          data.isGroupMode());

    } catch (TException e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw new TException(e);
    }
  }
Пример #2
0
  @Override
  public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException {
    try {
      StormClusterState stormClusterState = data.getStormClusterState();

      String supervisorId = null;
      SupervisorInfo supervisorInfo = null;

      String ip = NetWorkUtils.host2Ip(host);
      String hostName = NetWorkUtils.ip2Host(host);

      // all supervisors
      Map<String, SupervisorInfo> supervisorInfos =
          Cluster.allSupervisorInfo(stormClusterState, null);

      for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) {

        SupervisorInfo info = entry.getValue();
        if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) {
          supervisorId = entry.getKey();
          supervisorInfo = info;
          break;
        }
      }

      if (supervisorId == null) {
        throw new TException("No supervisor of " + host);
      }

      Map<String, Assignment> assignments = new HashMap<String, Assignment>();

      // get all active topology's StormBase
      Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState);
      for (Entry<String, StormBase> entry : bases.entrySet()) {

        String topologyId = entry.getKey();
        StormBase base = entry.getValue();

        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
          LOG.error("Failed to get assignment of " + topologyId);
          continue;
        }
        assignments.put(topologyId, assignment);
      }

      Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>();
      for (Entry<String, Assignment> entry : assignments.entrySet()) {
        String topologyId = entry.getKey();
        Assignment assignment = entry.getValue();

        Map<Integer, String> taskToComponent =
            Cluster.topology_task_info(stormClusterState, topologyId);

        Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource();

        for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) {
          Integer taskId = resourceEntry.getKey();
          ResourceAssignment resourceAssignment = resourceEntry.getValue();

          if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) {
            continue;
          }

          supervisorInfo.allocResource(resourceAssignment);

          Integer port = resourceAssignment.getPort();
          WorkerSummary workerSummary = portWorkerSummarys.get(port);
          if (workerSummary == null) {
            workerSummary = new WorkerSummary();
            workerSummary.set_port(port);
            workerSummary.set_topology(topologyId);
            workerSummary.set_tasks(new ArrayList<TaskSummary>());

            portWorkerSummarys.put(port, workerSummary);
          }

          String componentName = taskToComponent.get(taskId);
          int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId));
          List<TaskSummary> tasks = workerSummary.get_tasks();

          TaskSummary taskSummary =
              NimbusUtils.mkSimpleTaskSummary(
                  resourceAssignment, taskId, componentName, host, uptime);

          tasks.add(taskSummary);
        }
      }

      List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>();
      wokersList.addAll(portWorkerSummarys.values());

      SupervisorSummary supervisorSummary =
          NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId);
      return new SupervisorWorkers(supervisorSummary, wokersList);

    } catch (TException e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw new TException(e);
    }
  }
Пример #3
0
  protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event)
      throws Exception {
    TopologyAssignContext ret = new TopologyAssignContext();

    String topologyId = event.getTopologyId();

    /** 读取本地目录下的stormconf.ser和stormcode.ser */
    Map<Object, Object> nimbusConf = nimbusData.getConf();
    Map<Object, Object> topologyConf =
        StormConfig.read_nimbus_topology_conf(nimbusConf, topologyId);

    StormTopology rawTopology = StormConfig.read_nimbus_topology_code(nimbusConf, topologyId);
    ret.setRawTopology(rawTopology);

    Map stormConf = new HashMap();
    stormConf.putAll(nimbusConf);
    stormConf.putAll(topologyConf);
    ret.setStormConf(stormConf);

    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    // get all running supervisor, don't need callback to watch supervisor
    /** 获取所有的运行的supervisor,以及supervisorInfo */
    Map<String, SupervisorInfo> supInfos = Cluster.allSupervisorInfo(stormClusterState, null);
    if (supInfos.size() == 0) {
      throw new FailedAssignTopologyException(
          "Failed to make assignment " + topologyId + ", due to no alive supervisor");
    }

    /** 获取topologyId下的所有tasks */
    Map<Integer, String> taskToComponent =
        Cluster.topology_task_info(stormClusterState, topologyId);
    ret.setTaskToComponent(taskToComponent);

    // get taskids /ZK/tasks/topologyId
    Set<Integer> allTaskIds = taskToComponent.keySet();
    if (allTaskIds == null || allTaskIds.size() == 0) {
      String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
      LOG.warn(errMsg);
      throw new IOException(errMsg);
    }
    ret.setAllTaskIds(allTaskIds);

    Set<Integer> aliveTasks = new HashSet<Integer>();
    // unstoppedTasks are tasks which are alive on no supervisor's(dead)
    // machine
    /** 未完成的任务,supervisor已死,任务没完成 */
    Set<Integer> unstoppedTasks = new HashSet<Integer>();
    Set<Integer> deadTasks = new HashSet<Integer>();
    Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();

    Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
    if (existingAssignment != null) {
      aliveTasks = getAliveTasks(topologyId, allTaskIds);
      unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);

      deadTasks.addAll(allTaskIds);
      deadTasks.removeAll(aliveTasks);
    }

    ret.setDeadTaskIds(deadTasks);
    ret.setUnstoppedTaskIds(unstoppedTasks);

    // Step 2: get all slots resource, free slots/ alive slots/ unstopped
    // slots
    getFreeSlots(supInfos, stormClusterState);
    ret.setCluster(supInfos);

    if (existingAssignment == null) {
      ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);

      try {
        AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
        if (lastAssignment != null) {
          ret.setOldAssignment(lastAssignment.getAssignment());
        }
      } catch (Exception e) {
        LOG.warn("Fail to get old assignment", e);
      }
    } else {
      ret.setOldAssignment(existingAssignment);
      if (event.isScratch()) {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
        unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      } else {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
        unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      }
    }

    return ret;
  }