Ejemplo n.º 1
0
  /**
   * start a topology: set active status of the topology
   *
   * @param topologyName
   * @param stormClusterState
   * @param topologyId
   * @throws Exception
   */
  public void setTopologyStatus(TopologyAssignEvent event) throws Exception {
    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    String topologyId = event.getTopologyId();
    String topologyName = event.getTopologyName();
    String group = event.getGroup();

    StormStatus status = new StormStatus(StatusType.active);
    if (event.getOldStatus() != null) {
      status = event.getOldStatus();
    }

    StormBase stormBase = stormClusterState.storm_base(topologyId, null);
    if (stormBase == null) {
      stormBase = new StormBase(topologyName, TimeUtils.current_time_secs(), status, group);
      stormClusterState.activate_storm(topologyId, stormBase);

    } else {

      stormClusterState.update_storm(topologyId, status);

      // here exist one hack operation
      // when monitor/rebalance/startup topologyName is null
      if (topologyName == null) {
        event.setTopologyName(stormBase.getStormName());
      }
    }

    LOG.info("Update " + topologyId + " " + status);
  }
Ejemplo n.º 2
0
  /**
   * cleanup the topologies which are not in ZK /topology, but in other place
   *
   * @param nimbusData
   * @param active_topologys
   * @throws Exception
   */
  public void cleanupDisappearedTopology() throws Exception {
    StormClusterState clusterState = nimbusData.getStormClusterState();

    List<String> active_topologys = clusterState.active_storms();
    if (active_topologys == null) {
      return;
    }

    Set<String> cleanupIds = get_cleanup_ids(clusterState, active_topologys);

    for (String topologyId : cleanupIds) {

      LOG.info("Cleaning up " + topologyId);

      clusterState.try_remove_storm(topologyId);
      //
      nimbusData.getTaskHeartbeatsCache().remove(topologyId);

      // get /nimbus/stormdist/topologyId
      String master_stormdist_root =
          StormConfig.masterStormdistRoot(nimbusData.getConf(), topologyId);
      try {
        // delete topologyId local dir
        PathUtils.rmr(master_stormdist_root);
      } catch (IOException e) {
        LOG.warn("Failed to delete " + master_stormdist_root + ",", e);
      }
    }
  }
  private void setTaskInfo(StormTopology oldTopology, StormTopology newTopology) throws Exception {
    StormClusterState clusterState = data.getStormClusterState();
    // Retrieve the max task ID
    TreeSet<Integer> taskIds = new TreeSet<Integer>(clusterState.task_ids(topologyid));
    int cnt = taskIds.descendingIterator().next();

    cnt = setBoltInfo(oldTopology, newTopology, cnt, clusterState);
    cnt = setSpoutInfo(oldTopology, newTopology, cnt, clusterState);
  }
Ejemplo n.º 4
0
 /**
  * check whether the topology is bActive?
  *
  * @param nimbus
  * @param topologyName
  * @param bActive
  * @throws Exception
  */
 public void checkTopologyActive(NimbusData nimbus, String topologyName, boolean bActive)
     throws Exception {
   if (isTopologyActive(nimbus.getStormClusterState(), topologyName) != bActive) {
     if (bActive) {
       throw new NotAliveException(topologyName + " is not alive");
     } else {
       throw new AlreadyAliveException(topologyName + " is already active");
     }
   }
 }
Ejemplo n.º 5
0
 public void metricsMonitor(TopologyAssignEvent event) {
   String topologyId = event.getTopologyId();
   try {
     Map<Object, Object> conf = nimbusData.getConf();
     boolean isEnable = ConfigExtension.isEnablePerformanceMetrics(conf);
     StormClusterState zkClusterState = nimbusData.getStormClusterState();
     StormMonitor monitor = new StormMonitor(isEnable);
     zkClusterState.set_storm_monitor(topologyId, monitor);
   } catch (Exception e) {
     LOG.warn("Failed to update metrics monitor status of " + topologyId, e);
   }
 }
Ejemplo n.º 6
0
  /**
   * make assignments for a topology The nimbus core function, this function has been totally
   * rewrite
   *
   * @param nimbusData NimbusData
   * @param topologyId String
   * @param isScratch Boolean: isScratch is false unless rebalancing the topology
   * @throws Exception
   */
  public Assignment mkAssignment(TopologyAssignEvent event) throws Exception {
    String topologyId = event.getTopologyId();

    LOG.info("Determining assignment for " + topologyId);

    TopologyAssignContext context = prepareTopologyAssign(event);

    Set<ResourceWorkerSlot> assignments = null;

    if (!StormConfig.local_mode(nimbusData.getConf())) {

      IToplogyScheduler scheduler = schedulers.get(DEFAULT_SCHEDULER_NAME);

      assignments = scheduler.assignTasks(context);

    } else {
      assignments = mkLocalAssignment(context);
    }
    Assignment assignment = null;

    Map<String, String> nodeHost =
        getTopologyNodeHost(context.getCluster(), context.getOldAssignment(), assignments);

    Map<Integer, Integer> startTimes =
        getTaskStartTimes(context, nimbusData, topologyId, context.getOldAssignment(), assignments);

    String codeDir = StormConfig.masterStormdistRoot(nimbusData.getConf(), topologyId);

    assignment = new Assignment(codeDir, assignments, nodeHost, startTimes);

    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    stormClusterState.set_assignment(topologyId, assignment);

    // update task heartbeat's start time
    NimbusUtils.updateTaskHbStartTime(nimbusData, assignment, topologyId);

    // Update metrics information in ZK when rebalance or reassignment
    // Only update metrics monitor status when creating topology
    if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_REBALANCE
        || context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_MONITOR)
      NimbusUtils.updateMetricsInfo(nimbusData, topologyId, assignment);
    else metricsMonitor(event);

    LOG.info("Successfully make assignment for topology id " + topologyId + ": " + assignment);

    return assignment;
  }
Ejemplo n.º 7
0
  /**
   * @param existingAssignment
   * @param taskWorkerSlot
   * @return
   * @throws Exception
   */
  public static Map<Integer, Integer> getTaskStartTimes(
      TopologyAssignContext context,
      NimbusData nimbusData,
      String topologyId,
      Assignment existingAssignment,
      Set<ResourceWorkerSlot> workers)
      throws Exception {

    Map<Integer, Integer> startTimes = new TreeMap<Integer, Integer>();

    if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_NEW) {
      int nowSecs = TimeUtils.current_time_secs();
      for (ResourceWorkerSlot worker : workers) {
        for (Integer changedTaskId : worker.getTasks()) {
          startTimes.put(changedTaskId, nowSecs);
        }
      }

      return startTimes;
    }

    Set<ResourceWorkerSlot> oldWorkers = new HashSet<ResourceWorkerSlot>();

    if (existingAssignment != null) {
      Map<Integer, Integer> taskStartTimeSecs = existingAssignment.getTaskStartTimeSecs();
      if (taskStartTimeSecs != null) {
        startTimes.putAll(taskStartTimeSecs);
      }

      if (existingAssignment.getWorkers() != null) {
        oldWorkers = existingAssignment.getWorkers();
      }
    }

    StormClusterState zkClusterState = nimbusData.getStormClusterState();
    Set<Integer> changeTaskIds = getChangeTaskIds(oldWorkers, workers);
    int nowSecs = TimeUtils.current_time_secs();
    for (Integer changedTaskId : changeTaskIds) {
      startTimes.put(changedTaskId, nowSecs);

      zkClusterState.remove_task_heartbeat(topologyId, changedTaskId);
    }

    LOG.info("Task assignment has been changed " + changeTaskIds);
    return startTimes;
  }
Ejemplo n.º 8
0
  /**
   * Get TopologyInfo, it contain all data of the topology running status
   *
   * @return TopologyInfo
   */
  @Override
  public TopologyInfo getTopologyInfo(String topologyId) throws NotAliveException, TException {

    TopologyInfo topologyInfo = new TopologyInfo();

    StormClusterState stormClusterState = data.getStormClusterState();

    try {

      // get topology's StormBase
      StormBase base = stormClusterState.storm_base(topologyId, null);
      if (base == null) {
        throw new NotAliveException("No topology of " + topologyId);
      }
      topologyInfo.set_id(topologyId);
      topologyInfo.set_name(base.getStormName());
      topologyInfo.set_uptime_secs(TimeUtils.time_delta(base.getLanchTimeSecs()));
      topologyInfo.set_status(base.getStatusString());

      // get topology's Assignment
      Assignment assignment = stormClusterState.assignment_info(topologyId, null);
      if (assignment == null) {
        throw new TException("Failed to get StormBase from ZK of " + topologyId);
      }

      // get topology's map<taskId, componentId>
      Map<Integer, String> taskInfo = Cluster.topology_task_info(stormClusterState, topologyId);

      List<TaskSummary> tasks =
          NimbusUtils.mkTaskSummary(stormClusterState, assignment, taskInfo, topologyId);
      topologyInfo.set_tasks(tasks);

      return topologyInfo;
    } catch (TException e) {
      LOG.info("Failed to get topologyInfo " + topologyId, e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get topologyInfo " + topologyId, e);
      throw new TException("Failed to get topologyInfo" + topologyId);
    }
  }
Ejemplo n.º 9
0
  /**
   * Backup the toplogy's Assignment to ZK @@@ Question Do we need to do backup operation every
   * time?
   *
   * @param assignment
   * @param event
   */
  public void backupAssignment(Assignment assignment, TopologyAssignEvent event) {
    String topologyId = event.getTopologyId();
    String topologyName = event.getTopologyName();
    try {

      StormClusterState zkClusterState = nimbusData.getStormClusterState();
      // one little problem, get tasks twice when assign one topology
      HashMap<Integer, String> tasks = Cluster.topology_task_info(zkClusterState, topologyId);

      Map<String, List<Integer>> componentTasks = JStormUtils.reverse_map(tasks);

      for (Entry<String, List<Integer>> entry : componentTasks.entrySet()) {
        List<Integer> keys = entry.getValue();

        Collections.sort(keys);
      }

      AssignmentBak assignmentBak = new AssignmentBak(componentTasks, assignment);
      zkClusterState.backup_assignment(topologyName, assignmentBak);

    } catch (Exception e) {
      LOG.warn("Failed to backup " + topologyId + " assignment " + assignment, e);
    }
  }
  @Override
  public <T> Object execute(T... args) {
    boolean isSetTaskInfo = false;
    try {
      Boolean reassign = (Boolean) args[1];
      Map<Object, Object> conf = (Map<Object, Object>) args[2]; // args[0]:
      // delay,
      // args[1]:
      // reassign_flag,
      // args[2]:
      // conf
      if (conf != null) {
        boolean isConfUpdate = false;
        Map stormConf = data.getConf();

        // Update topology code
        Map topoConf = StormConfig.read_nimbus_topology_conf(stormConf, topologyid);
        StormTopology rawOldTopology = StormConfig.read_nimbus_topology_code(stormConf, topologyid);
        StormTopology rawNewTopology = NimbusUtils.normalizeTopology(conf, rawOldTopology, true);
        StormTopology sysOldTopology = rawOldTopology.deepCopy();
        StormTopology sysNewTopology = rawNewTopology.deepCopy();
        if (conf.get(Config.TOPOLOGY_ACKER_EXECUTORS) != null) {
          Common.add_acker(topoConf, sysOldTopology);
          Common.add_acker(conf, sysNewTopology);
          int ackerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_ACKER_EXECUTORS));
          int oldAckerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_ACKER_EXECUTORS));
          LOG.info("Update acker from oldAckerNum=" + oldAckerNum + " to ackerNum=" + ackerNum);
          topoConf.put(Config.TOPOLOGY_ACKER_EXECUTORS, ackerNum);
          isConfUpdate = true;
        }

        // If scale-out, setup task info for new added tasks
        setTaskInfo(sysOldTopology, sysNewTopology);
        isSetTaskInfo = true;

        // If everything is OK, write topology code into disk
        StormConfig.write_nimbus_topology_code(
            stormConf, topologyid, Utils.serialize(rawNewTopology));

        // Update topology conf if worker num has been updated
        Set<Object> keys = conf.keySet();
        Integer workerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_WORKERS));
        if (workerNum != null) {
          Integer oldWorkerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_WORKERS));
          topoConf.put(Config.TOPOLOGY_WORKERS, workerNum);
          isConfUpdate = true;

          LOG.info("Update worker num from " + oldWorkerNum + " to " + workerNum);
        }

        if (keys.contains(Config.ISOLATION_SCHEDULER_MACHINES)) {
          topoConf.put(
              Config.ISOLATION_SCHEDULER_MACHINES, conf.get(Config.ISOLATION_SCHEDULER_MACHINES));
        }

        if (isConfUpdate) {
          StormConfig.write_nimbus_topology_conf(stormConf, topologyid, topoConf);
        }
      }

      TopologyAssignEvent event = new TopologyAssignEvent();

      event.setTopologyId(topologyid);
      event.setScratch(true);
      event.setOldStatus(oldStatus);
      event.setReassign(reassign);
      if (conf != null) event.setScaleTopology(true);
      TopologyAssign.push(event);
      event.waitFinish();
    } catch (Exception e) {
      LOG.error("do-rebalance error!", e);
      // Rollback the changes on ZK
      if (isSetTaskInfo) {
        try {
          StormClusterState clusterState = data.getStormClusterState();
          clusterState.remove_task(topologyid, newTasks);
        } catch (Exception e1) {
          LOG.error("Failed to rollback the changes on ZK for task-" + newTasks, e);
        }
      }
    }

    DelayStatusTransitionCallback delayCallback =
        new DelayStatusTransitionCallback(
            data, topologyid, oldStatus, StatusType.rebalancing, StatusType.done_rebalance);
    return delayCallback.execute();
  }
Ejemplo n.º 11
0
  @Override
  public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException {
    try {
      StormClusterState stormClusterState = data.getStormClusterState();

      String supervisorId = null;
      SupervisorInfo supervisorInfo = null;

      String ip = NetWorkUtils.host2Ip(host);
      String hostName = NetWorkUtils.ip2Host(host);

      // all supervisors
      Map<String, SupervisorInfo> supervisorInfos =
          Cluster.allSupervisorInfo(stormClusterState, null);

      for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) {

        SupervisorInfo info = entry.getValue();
        if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) {
          supervisorId = entry.getKey();
          supervisorInfo = info;
          break;
        }
      }

      if (supervisorId == null) {
        throw new TException("No supervisor of " + host);
      }

      Map<String, Assignment> assignments = new HashMap<String, Assignment>();

      // get all active topology's StormBase
      Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState);
      for (Entry<String, StormBase> entry : bases.entrySet()) {

        String topologyId = entry.getKey();
        StormBase base = entry.getValue();

        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
          LOG.error("Failed to get assignment of " + topologyId);
          continue;
        }
        assignments.put(topologyId, assignment);
      }

      Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>();
      for (Entry<String, Assignment> entry : assignments.entrySet()) {
        String topologyId = entry.getKey();
        Assignment assignment = entry.getValue();

        Map<Integer, String> taskToComponent =
            Cluster.topology_task_info(stormClusterState, topologyId);

        Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource();

        for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) {
          Integer taskId = resourceEntry.getKey();
          ResourceAssignment resourceAssignment = resourceEntry.getValue();

          if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) {
            continue;
          }

          supervisorInfo.allocResource(resourceAssignment);

          Integer port = resourceAssignment.getPort();
          WorkerSummary workerSummary = portWorkerSummarys.get(port);
          if (workerSummary == null) {
            workerSummary = new WorkerSummary();
            workerSummary.set_port(port);
            workerSummary.set_topology(topologyId);
            workerSummary.set_tasks(new ArrayList<TaskSummary>());

            portWorkerSummarys.put(port, workerSummary);
          }

          String componentName = taskToComponent.get(taskId);
          int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId));
          List<TaskSummary> tasks = workerSummary.get_tasks();

          TaskSummary taskSummary =
              NimbusUtils.mkSimpleTaskSummary(
                  resourceAssignment, taskId, componentName, host, uptime);

          tasks.add(taskSummary);
        }
      }

      List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>();
      wokersList.addAll(portWorkerSummarys.values());

      SupervisorSummary supervisorSummary =
          NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId);
      return new SupervisorWorkers(supervisorSummary, wokersList);

    } catch (TException e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw new TException(e);
    }
  }
Ejemplo n.º 12
0
  /**
   * get cluster's summary, it will contain SupervisorSummary and TopologySummary
   *
   * @return ClusterSummary
   */
  @Override
  public ClusterSummary getClusterInfo() throws TException {

    try {

      StormClusterState stormClusterState = data.getStormClusterState();

      Map<String, Assignment> assignments = new HashMap<String, Assignment>();

      // get nimbus running time
      int uptime = data.uptime();

      // get TopologySummary
      List<TopologySummary> topologySummaries = new ArrayList<TopologySummary>();

      // get all active topology's StormBase
      Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState);
      for (Entry<String, StormBase> entry : bases.entrySet()) {

        String topologyId = entry.getKey();
        StormBase base = entry.getValue();

        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
          LOG.error("Failed to get assignment of " + topologyId);
          continue;
        }
        assignments.put(topologyId, assignment);
        String group = "default";
        if (data.isGroupMode()) group = base.getGroup();
        if (group == null) group = "default";

        TopologySummary topology =
            NimbusUtils.mkTopologySummary(
                assignment,
                topologyId,
                base.getStormName(),
                base.getStatusString(),
                TimeUtils.time_delta(base.getLanchTimeSecs()),
                group);

        topologySummaries.add(topology);
      }

      // all supervisors
      Map<String, SupervisorInfo> supervisorInfos =
          Cluster.allSupervisorInfo(stormClusterState, null);

      // generate SupervisorSummaries
      List<SupervisorSummary> supervisorSummaries =
          NimbusUtils.mkSupervisorSummaries(supervisorInfos, assignments);

      return new ClusterSummary(
          supervisorSummaries,
          uptime,
          topologySummaries,
          data.getGroupToTopology(),
          data.getGroupToResource(),
          data.getGroupToUsedResource(),
          data.isGroupMode());

    } catch (TException e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw new TException(e);
    }
  }
Ejemplo n.º 13
0
  /**
   * Submit one Topology
   *
   * @param topologyname String: topology name
   * @param uploadedJarLocation String: already uploaded jar path
   * @param jsonConf String: jsonConf serialize all toplogy configuration to Json
   * @param topology StormTopology: topology Object
   */
  @SuppressWarnings("unchecked")
  @Override
  public void submitTopologyWithOpts(
      String topologyname,
      String uploadedJarLocation,
      String jsonConf,
      StormTopology topology,
      SubmitOptions options)
      throws AlreadyAliveException, InvalidTopologyException, TopologyAssignException, TException {
    LOG.info("Receive " + topologyname + ", uploadedJarLocation:" + uploadedJarLocation);
    // @@@ Move validate topologyname in client code
    try {
      checkTopologyActive(data, topologyname, false);
    } catch (AlreadyAliveException e) {
      LOG.info(topologyname + " is already exist ");
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to check whether topology is alive or not", e);
      throw new TException(e);
    }

    int counter = data.getSubmittedCount().incrementAndGet();
    String topologyId = topologyname + "-" + counter + "-" + TimeUtils.current_time_secs();

    Map<Object, Object> serializedConf = (Map<Object, Object>) JStormUtils.from_json(jsonConf);
    if (serializedConf == null) {
      LOG.warn("Failed to serialized Configuration");
      throw new InvalidTopologyException("Failed to serilaze topology configuration");
    }

    serializedConf.put(Config.TOPOLOGY_ID, topologyId);
    serializedConf.put(Config.TOPOLOGY_NAME, topologyname);

    try {
      Map<Object, Object> stormConf;

      stormConf = NimbusUtils.normalizeConf(conf, serializedConf, topology);

      Map<Object, Object> totalStormConf = new HashMap<Object, Object>(conf);
      totalStormConf.putAll(stormConf);

      StormTopology normalizedTopology = NimbusUtils.normalizeTopology(stormConf, topology);

      // this validates the structure of the topology
      Common.validate_basic(normalizedTopology, totalStormConf, topologyId);
      // don't need generate real topology, so skip Common.system_topology
      // Common.system_topology(totalStormConf, topology);

      StormClusterState stormClusterState = data.getStormClusterState();

      // create /local-dir/nimbus/topologyId/xxxx files
      setupStormCode(conf, topologyId, uploadedJarLocation, stormConf, normalizedTopology);

      // generate TaskInfo for every bolt or spout in ZK
      // /ZK/tasks/topoologyId/xxx
      setupZkTaskInfo(conf, topologyId, stormClusterState);

      // make assignments for a topology
      TopologyAssignEvent assignEvent = new TopologyAssignEvent();
      assignEvent.setTopologyId(topologyId);
      assignEvent.setScratch(false);
      assignEvent.setTopologyName(topologyname);
      assignEvent.setOldStatus(
          Thrift.topologyInitialStatusToStormStatus(options.get_initial_status()));

      TopologyAssign.push(assignEvent);
      LOG.info("Submit for " + topologyname + " with conf " + serializedConf);

      boolean isSuccess = assignEvent.waitFinish();
      if (isSuccess == true) {
        LOG.info("Finish submit for " + topologyname);
      } else {
        throw new FailedAssignTopologyException(assignEvent.getErrorMsg());
      }

    } catch (FailedAssignTopologyException e) {
      StringBuilder sb = new StringBuilder();
      sb.append("Fail to sumbit topology, Root cause:");
      if (e.getMessage() == null) {
        sb.append("submit timeout");
      } else {
        sb.append(e.getMessage());
      }

      sb.append("\n\n");
      sb.append("topologyId:" + topologyId);
      sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
      LOG.error(sb.toString(), e);
      throw new TopologyAssignException(sb.toString());
    } catch (InvalidParameterException e) {
      StringBuilder sb = new StringBuilder();
      sb.append("Fail to sumbit topology ");
      sb.append(e.getMessage());
      sb.append(", cause:" + e.getCause());
      sb.append("\n\n");
      sb.append("topologyId:" + topologyId);
      sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
      LOG.error(sb.toString(), e);
      throw new InvalidParameterException(sb.toString());
    } catch (Throwable e) {
      StringBuilder sb = new StringBuilder();
      sb.append("Fail to sumbit topology ");
      sb.append(e.getMessage());
      sb.append(", cause:" + e.getCause());
      sb.append("\n\n");
      sb.append("topologyId:" + topologyId);
      sb.append(", uploadedJarLocation:" + uploadedJarLocation + "\n");
      LOG.error(sb.toString(), e);
      throw new TopologyAssignException(sb.toString());
    }
  }
Ejemplo n.º 14
0
  protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event)
      throws Exception {
    TopologyAssignContext ret = new TopologyAssignContext();

    String topologyId = event.getTopologyId();

    /** 读取本地目录下的stormconf.ser和stormcode.ser */
    Map<Object, Object> nimbusConf = nimbusData.getConf();
    Map<Object, Object> topologyConf =
        StormConfig.read_nimbus_topology_conf(nimbusConf, topologyId);

    StormTopology rawTopology = StormConfig.read_nimbus_topology_code(nimbusConf, topologyId);
    ret.setRawTopology(rawTopology);

    Map stormConf = new HashMap();
    stormConf.putAll(nimbusConf);
    stormConf.putAll(topologyConf);
    ret.setStormConf(stormConf);

    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    // get all running supervisor, don't need callback to watch supervisor
    /** 获取所有的运行的supervisor,以及supervisorInfo */
    Map<String, SupervisorInfo> supInfos = Cluster.allSupervisorInfo(stormClusterState, null);
    if (supInfos.size() == 0) {
      throw new FailedAssignTopologyException(
          "Failed to make assignment " + topologyId + ", due to no alive supervisor");
    }

    /** 获取topologyId下的所有tasks */
    Map<Integer, String> taskToComponent =
        Cluster.topology_task_info(stormClusterState, topologyId);
    ret.setTaskToComponent(taskToComponent);

    // get taskids /ZK/tasks/topologyId
    Set<Integer> allTaskIds = taskToComponent.keySet();
    if (allTaskIds == null || allTaskIds.size() == 0) {
      String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
      LOG.warn(errMsg);
      throw new IOException(errMsg);
    }
    ret.setAllTaskIds(allTaskIds);

    Set<Integer> aliveTasks = new HashSet<Integer>();
    // unstoppedTasks are tasks which are alive on no supervisor's(dead)
    // machine
    /** 未完成的任务,supervisor已死,任务没完成 */
    Set<Integer> unstoppedTasks = new HashSet<Integer>();
    Set<Integer> deadTasks = new HashSet<Integer>();
    Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();

    Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
    if (existingAssignment != null) {
      aliveTasks = getAliveTasks(topologyId, allTaskIds);
      unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);

      deadTasks.addAll(allTaskIds);
      deadTasks.removeAll(aliveTasks);
    }

    ret.setDeadTaskIds(deadTasks);
    ret.setUnstoppedTaskIds(unstoppedTasks);

    // Step 2: get all slots resource, free slots/ alive slots/ unstopped
    // slots
    getFreeSlots(supInfos, stormClusterState);
    ret.setCluster(supInfos);

    if (existingAssignment == null) {
      ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);

      try {
        AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
        if (lastAssignment != null) {
          ret.setOldAssignment(lastAssignment.getAssignment());
        }
      } catch (Exception e) {
        LOG.warn("Fail to get old assignment", e);
      }
    } else {
      ret.setOldAssignment(existingAssignment);
      if (event.isScratch()) {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
        unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      } else {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
        unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      }
    }

    return ret;
  }