Пример #1
0
 /**
  * Copy jar to /local-dir/nimbus/topologyId/stormjar.jar
  *
  * @param conf
  * @param tmpJarLocation
  * @param stormroot
  * @throws IOException
  */
 private void setupJar(Map<Object, Object> conf, String tmpJarLocation, String stormroot)
     throws IOException {
   if (!StormConfig.local_mode(conf)) {
     File srcFile = new File(tmpJarLocation);
     if (!srcFile.exists()) {
       throw new IllegalArgumentException(
           tmpJarLocation + " to copy to " + stormroot + " does not exist!");
     }
     String path = StormConfig.stormjar_path(stormroot);
     File destFile = new File(path);
     FileUtils.copyFile(srcFile, destFile);
   }
 }
Пример #2
0
  /**
   * make assignments for a topology The nimbus core function, this function has been totally
   * rewrite
   *
   * @param nimbusData NimbusData
   * @param topologyId String
   * @param isScratch Boolean: isScratch is false unless rebalancing the topology
   * @throws Exception
   */
  public Assignment mkAssignment(TopologyAssignEvent event) throws Exception {
    String topologyId = event.getTopologyId();

    LOG.info("Determining assignment for " + topologyId);

    TopologyAssignContext context = prepareTopologyAssign(event);

    Set<ResourceWorkerSlot> assignments = null;

    if (!StormConfig.local_mode(nimbusData.getConf())) {

      IToplogyScheduler scheduler = schedulers.get(DEFAULT_SCHEDULER_NAME);

      assignments = scheduler.assignTasks(context);

    } else {
      assignments = mkLocalAssignment(context);
    }
    Assignment assignment = null;

    Map<String, String> nodeHost =
        getTopologyNodeHost(context.getCluster(), context.getOldAssignment(), assignments);

    Map<Integer, Integer> startTimes =
        getTaskStartTimes(context, nimbusData, topologyId, context.getOldAssignment(), assignments);

    String codeDir = StormConfig.masterStormdistRoot(nimbusData.getConf(), topologyId);

    assignment = new Assignment(codeDir, assignments, nodeHost, startTimes);

    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    stormClusterState.set_assignment(topologyId, assignment);

    // update task heartbeat's start time
    NimbusUtils.updateTaskHbStartTime(nimbusData, assignment, topologyId);

    // Update metrics information in ZK when rebalance or reassignment
    // Only update metrics monitor status when creating topology
    if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_REBALANCE
        || context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_MONITOR)
      NimbusUtils.updateMetricsInfo(nimbusData, topologyId, assignment);
    else metricsMonitor(event);

    LOG.info("Successfully make assignment for topology id " + topologyId + ": " + assignment);

    return assignment;
  }
Пример #3
0
  /**
   * cleanup the topologies which are not in ZK /topology, but in other place
   *
   * @param nimbusData
   * @param active_topologys
   * @throws Exception
   */
  public void cleanupDisappearedTopology() throws Exception {
    StormClusterState clusterState = nimbusData.getStormClusterState();

    List<String> active_topologys = clusterState.active_storms();
    if (active_topologys == null) {
      return;
    }

    Set<String> cleanupIds = get_cleanup_ids(clusterState, active_topologys);

    for (String topologyId : cleanupIds) {

      LOG.info("Cleaning up " + topologyId);

      clusterState.try_remove_storm(topologyId);
      //
      nimbusData.getTaskHeartbeatsCache().remove(topologyId);

      // get /nimbus/stormdist/topologyId
      String master_stormdist_root =
          StormConfig.masterStormdistRoot(nimbusData.getConf(), topologyId);
      try {
        // delete topologyId local dir
        PathUtils.rmr(master_stormdist_root);
      } catch (IOException e) {
        LOG.warn("Failed to delete " + master_stormdist_root + ",", e);
      }
    }
  }
Пример #4
0
  /**
   * get StormTopology throw deserialize local files
   *
   * @param id String: topology id
   * @return StormTopology
   */
  @Override
  public StormTopology getTopology(String id) throws NotAliveException, TException {
    StormTopology topology = null;
    try {
      StormTopology stormtopology = StormConfig.read_nimbus_topology_code(conf, id);
      if (stormtopology == null) {
        throw new TException("topology:" + id + "is null");
      }

      Map<Object, Object> topologyConf =
          (Map<Object, Object>) StormConfig.read_nimbus_topology_conf(conf, id);

      topology = Common.system_topology(topologyConf, stormtopology);
    } catch (Exception e) {
      LOG.error("Failed to get topology " + id + ",", e);
      throw new TException("Failed to get system_topology");
    }
    return topology;
  }
Пример #5
0
  /**
   * generate a taskid(Integer) for every task
   *
   * @param conf
   * @param topologyid
   * @return Map<Integer, String>: from taskid to componentid
   * @throws IOException
   * @throws InvalidTopologyException
   */
  public Map<Integer, String> mkTaskComponentAssignments(
      Map<Object, Object> conf, String topologyid) throws IOException, InvalidTopologyException {

    // @@@ here exist a little problem,
    // we can directly pass stormConf from Submit method
    Map<Object, Object> stormConf = StormConfig.read_nimbus_topology_conf(conf, topologyid);

    StormTopology stopology = StormConfig.read_nimbus_topology_code(conf, topologyid);

    // use TreeMap to make task as sequence
    Map<Integer, String> rtn = new TreeMap<Integer, String>();

    StormTopology topology = Common.system_topology(stormConf, stopology);

    Integer count = 0;
    count = mkTaskMaker(stormConf, topology.get_bolts(), rtn, count);
    count = mkTaskMaker(stormConf, topology.get_spouts(), rtn, count);
    count = mkTaskMaker(stormConf, topology.get_state_spouts(), rtn, count);

    return rtn;
  }
Пример #6
0
 /**
  * get topology configuration
  *
  * @param id String: topology id
  * @return String
  */
 @Override
 public String getTopologyConf(String id) throws NotAliveException, TException {
   String rtn;
   try {
     Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(conf, id);
     rtn = JStormUtils.to_json(topologyConf);
   } catch (IOException e) {
     // TODO Auto-generated catch block
     LOG.info("Failed to get configuration of " + id, e);
     throw new TException(e);
   }
   return rtn;
 }
Пример #7
0
  /**
   * create local topology files /local-dir/nimbus/topologyId/stormjar.jar
   * /local-dir/nimbus/topologyId/stormcode.ser /local-dir/nimbus/topologyId/stormconf.ser
   *
   * @param conf
   * @param topologyId
   * @param tmpJarLocation
   * @param stormConf
   * @param topology
   * @throws IOException
   */
  private void setupStormCode(
      Map<Object, Object> conf,
      String topologyId,
      String tmpJarLocation,
      Map<Object, Object> stormConf,
      StormTopology topology)
      throws IOException {
    // local-dir/nimbus/stormdist/topologyId
    String stormroot = StormConfig.masterStormdistRoot(conf, topologyId);

    FileUtils.forceMkdir(new File(stormroot));
    FileUtils.cleanDirectory(new File(stormroot));

    // copy jar to /local-dir/nimbus/topologyId/stormjar.jar
    setupJar(conf, tmpJarLocation, stormroot);

    // serialize to file /local-dir/nimbus/topologyId/stormcode.ser
    FileUtils.writeByteArrayToFile(
        new File(StormConfig.stormcode_path(stormroot)), Utils.serialize(topology));

    // serialize to file /local-dir/nimbus/topologyId/stormconf.ser
    FileUtils.writeByteArrayToFile(
        new File(StormConfig.sotrmconf_path(stormroot)), Utils.serialize(stormConf));
  }
Пример #8
0
  /**
   * prepare to uploading topology jar, return the file location
   *
   * @throws
   */
  @Override
  public String beginFileUpload() throws TException {
    String fileLoc = null;
    try {
      fileLoc = StormConfig.masterInbox(conf) + "/stormjar-" + UUID.randomUUID() + ".jar";

      data.getUploaders().put(fileLoc, Channels.newChannel(new FileOutputStream(fileLoc)));
      LOG.info("Uploading file from client to " + fileLoc);
    } catch (FileNotFoundException e) {
      LOG.error(" file not found " + fileLoc);
      throw new TException(e);
    } catch (IOException e) {
      LOG.error(" IOException  " + fileLoc, e);
      throw new TException(e);
    }
    return fileLoc;
  }
Пример #9
0
  /**
   * get topology ids which need to be cleanup
   *
   * @param clusterState
   * @return
   * @throws Exception
   */
  private Set<String> get_cleanup_ids(StormClusterState clusterState, List<String> active_topologys)
      throws Exception {

    List<String> task_ids = clusterState.task_storms();
    List<String> heartbeat_ids = clusterState.heartbeat_storms();
    List<String> error_ids = clusterState.task_error_storms();
    List<String> assignment_ids = clusterState.assignments(null);
    List<String> monitor_ids = clusterState.monitors();

    String master_stormdist_root = StormConfig.masterStormdistRoot(nimbusData.getConf());
    // listdir /local-dir/nimbus/stormdist
    List<String> code_ids = PathUtils.read_dir_contents(master_stormdist_root);

    // Set<String> assigned_ids =
    // JStormUtils.listToSet(clusterState.active_storms());
    Set<String> to_cleanup_ids = new HashSet<String>();

    if (task_ids != null) {
      to_cleanup_ids.addAll(task_ids);
    }

    if (heartbeat_ids != null) {
      to_cleanup_ids.addAll(heartbeat_ids);
    }

    if (error_ids != null) {
      to_cleanup_ids.addAll(error_ids);
    }

    if (assignment_ids != null) {
      to_cleanup_ids.addAll(assignment_ids);
    }

    if (monitor_ids != null) {
      to_cleanup_ids.addAll(monitor_ids);
    }

    if (code_ids != null) {
      to_cleanup_ids.addAll(code_ids);
    }

    if (active_topologys != null) {
      to_cleanup_ids.removeAll(active_topologys);
    }
    return to_cleanup_ids;
  }
Пример #10
0
  /** start supervisor */
  public void run() {

    SupervisorManger supervisorManager = null;
    try {
      Map<Object, Object> conf = Utils.readStormConfig();

      StormConfig.validate_distributed_mode(conf);

      supervisorManager = mkSupervisor(conf, null);

    } catch (Exception e) {
      LOG.error("Failed to start supervisor\n", e);
      System.exit(1);
    }

    while (supervisorManager.isFinishShutdown() == false) {
      try {
        Thread.sleep(1000);
      } catch (InterruptedException e) {

      }
    }
  }
  @Override
  public <T> Object execute(T... args) {
    boolean isSetTaskInfo = false;
    try {
      Boolean reassign = (Boolean) args[1];
      Map<Object, Object> conf = (Map<Object, Object>) args[2]; // args[0]:
      // delay,
      // args[1]:
      // reassign_flag,
      // args[2]:
      // conf
      if (conf != null) {
        boolean isConfUpdate = false;
        Map stormConf = data.getConf();

        // Update topology code
        Map topoConf = StormConfig.read_nimbus_topology_conf(stormConf, topologyid);
        StormTopology rawOldTopology = StormConfig.read_nimbus_topology_code(stormConf, topologyid);
        StormTopology rawNewTopology = NimbusUtils.normalizeTopology(conf, rawOldTopology, true);
        StormTopology sysOldTopology = rawOldTopology.deepCopy();
        StormTopology sysNewTopology = rawNewTopology.deepCopy();
        if (conf.get(Config.TOPOLOGY_ACKER_EXECUTORS) != null) {
          Common.add_acker(topoConf, sysOldTopology);
          Common.add_acker(conf, sysNewTopology);
          int ackerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_ACKER_EXECUTORS));
          int oldAckerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_ACKER_EXECUTORS));
          LOG.info("Update acker from oldAckerNum=" + oldAckerNum + " to ackerNum=" + ackerNum);
          topoConf.put(Config.TOPOLOGY_ACKER_EXECUTORS, ackerNum);
          isConfUpdate = true;
        }

        // If scale-out, setup task info for new added tasks
        setTaskInfo(sysOldTopology, sysNewTopology);
        isSetTaskInfo = true;

        // If everything is OK, write topology code into disk
        StormConfig.write_nimbus_topology_code(
            stormConf, topologyid, Utils.serialize(rawNewTopology));

        // Update topology conf if worker num has been updated
        Set<Object> keys = conf.keySet();
        Integer workerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_WORKERS));
        if (workerNum != null) {
          Integer oldWorkerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_WORKERS));
          topoConf.put(Config.TOPOLOGY_WORKERS, workerNum);
          isConfUpdate = true;

          LOG.info("Update worker num from " + oldWorkerNum + " to " + workerNum);
        }

        if (keys.contains(Config.ISOLATION_SCHEDULER_MACHINES)) {
          topoConf.put(
              Config.ISOLATION_SCHEDULER_MACHINES, conf.get(Config.ISOLATION_SCHEDULER_MACHINES));
        }

        if (isConfUpdate) {
          StormConfig.write_nimbus_topology_conf(stormConf, topologyid, topoConf);
        }
      }

      TopologyAssignEvent event = new TopologyAssignEvent();

      event.setTopologyId(topologyid);
      event.setScratch(true);
      event.setOldStatus(oldStatus);
      event.setReassign(reassign);
      if (conf != null) event.setScaleTopology(true);
      TopologyAssign.push(event);
      event.waitFinish();
    } catch (Exception e) {
      LOG.error("do-rebalance error!", e);
      // Rollback the changes on ZK
      if (isSetTaskInfo) {
        try {
          StormClusterState clusterState = data.getStormClusterState();
          clusterState.remove_task(topologyid, newTasks);
        } catch (Exception e1) {
          LOG.error("Failed to rollback the changes on ZK for task-" + newTasks, e);
        }
      }
    }

    DelayStatusTransitionCallback delayCallback =
        new DelayStatusTransitionCallback(
            data, topologyid, oldStatus, StatusType.rebalancing, StatusType.done_rebalance);
    return delayCallback.execute();
  }
Пример #12
0
  @SuppressWarnings({"rawtypes", "unchecked"})
  public WorkerData(
      Map conf,
      IContext context,
      String topology_id,
      String supervisor_id,
      int port,
      String worker_id,
      String jar_path)
      throws Exception {

    this.conf = conf;
    this.context = context;
    this.topologyId = topology_id;
    this.supervisorId = supervisor_id;
    this.port = port;
    this.workerId = worker_id;

    this.active = new AtomicBoolean(true);
    this.topologyStatus = StatusType.active;

    if (StormConfig.cluster_mode(conf).equals("distributed")) {
      String pidDir = StormConfig.worker_pids_root(conf, worker_id);
      JStormServerUtils.createPid(pidDir);
    }

    // create zk interface
    this.zkClusterstate = ZkTool.mk_distributed_cluster_state(conf);
    this.zkCluster = Cluster.mk_storm_cluster_state(zkClusterstate);

    Map rawConf = StormConfig.read_supervisor_topology_conf(conf, topology_id);
    this.stormConf = new HashMap<Object, Object>();
    this.stormConf.putAll(conf);
    this.stormConf.putAll(rawConf);

    LOG.info("Worker Configuration " + stormConf);

    try {

      boolean enableClassloader = ConfigExtension.isEnableTopologyClassLoader(stormConf);
      boolean enableDebugClassloader = ConfigExtension.isEnableClassloaderDebug(stormConf);

      if (jar_path == null && enableClassloader == true) {
        LOG.error("enable classloader, but not app jar");
        throw new InvalidParameterException();
      }

      URL[] urlArray = new URL[0];
      if (jar_path != null) {
        String[] paths = jar_path.split(":");
        Set<URL> urls = new HashSet<URL>();
        for (String path : paths) {
          if (StringUtils.isBlank(path)) continue;
          URL url = new URL("File:" + path);
          urls.add(url);
        }
        urlArray = urls.toArray(new URL[0]);
      }

      WorkerClassLoader.mkInstance(
          urlArray,
          ClassLoader.getSystemClassLoader(),
          ClassLoader.getSystemClassLoader().getParent(),
          enableClassloader,
          enableDebugClassloader);
    } catch (Exception e) {
      // TODO Auto-generated catch block
      LOG.error("init jarClassLoader error!", e);
      throw new InvalidParameterException();
    }

    if (this.context == null) {
      this.context = TransportFactory.makeContext(stormConf);
    }

    boolean disruptorUseSleep = ConfigExtension.isDisruptorUseSleep(stormConf);
    DisruptorQueue.setUseSleep(disruptorUseSleep);
    boolean isLimited = ConfigExtension.getTopologyBufferSizeLimited(stormConf);
    DisruptorQueue.setLimited(isLimited);
    LOG.info("Disruptor use sleep:" + disruptorUseSleep + ", limited size:" + isLimited);

    // this.transferQueue = new LinkedBlockingQueue<TransferData>();
    int buffer_size = Utils.getInt(conf.get(Config.TOPOLOGY_TRANSFER_BUFFER_SIZE));
    WaitStrategy waitStrategy =
        (WaitStrategy)
            Utils.newInstance((String) conf.get(Config.TOPOLOGY_DISRUPTOR_WAIT_STRATEGY));
    this.transferQueue =
        DisruptorQueue.mkInstance("TotalTransfer", ProducerType.MULTI, buffer_size, waitStrategy);
    this.transferQueue.consumerStarted();
    this.sendingQueue =
        DisruptorQueue.mkInstance("TotalSending", ProducerType.MULTI, buffer_size, waitStrategy);
    this.sendingQueue.consumerStarted();

    this.nodeportSocket = new ConcurrentHashMap<WorkerSlot, IConnection>();
    this.taskNodeport = new ConcurrentHashMap<Integer, WorkerSlot>();
    this.workerToResource = new ConcurrentSkipListSet<ResourceWorkerSlot>();
    this.innerTaskTransfer = new ConcurrentHashMap<Integer, DisruptorQueue>();
    this.deserializeQueues = new ConcurrentHashMap<Integer, DisruptorQueue>();

    Assignment assignment = zkCluster.assignment_info(topologyId, null);
    if (assignment == null) {
      String errMsg = "Failed to get Assignment of " + topologyId;
      LOG.error(errMsg);
      throw new RuntimeException(errMsg);
    }
    workerToResource.addAll(assignment.getWorkers());

    // get current worker's task list

    this.taskids = assignment.getCurrentWorkerTasks(supervisorId, port);
    if (taskids.size() == 0) {
      throw new RuntimeException("No tasks running current workers");
    }
    LOG.info("Current worker taskList:" + taskids);

    // deserialize topology code from local dir
    rawTopology = StormConfig.read_supervisor_topology_code(conf, topology_id);
    sysTopology = Common.system_topology(stormConf, rawTopology);

    generateMaps();

    contextMaker = new ContextMaker(this);

    metricReporter = new MetricReporter(this);

    outTaskStatus = new HashMap<Integer, Boolean>();

    threadPool = Executors.newScheduledThreadPool(THREAD_POOL_NUM);
    TimerTrigger.setScheduledExecutorService(threadPool);

    LOG.info("Successfully create WorkerData");
  }
Пример #13
0
  protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event)
      throws Exception {
    TopologyAssignContext ret = new TopologyAssignContext();

    String topologyId = event.getTopologyId();

    /** 读取本地目录下的stormconf.ser和stormcode.ser */
    Map<Object, Object> nimbusConf = nimbusData.getConf();
    Map<Object, Object> topologyConf =
        StormConfig.read_nimbus_topology_conf(nimbusConf, topologyId);

    StormTopology rawTopology = StormConfig.read_nimbus_topology_code(nimbusConf, topologyId);
    ret.setRawTopology(rawTopology);

    Map stormConf = new HashMap();
    stormConf.putAll(nimbusConf);
    stormConf.putAll(topologyConf);
    ret.setStormConf(stormConf);

    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    // get all running supervisor, don't need callback to watch supervisor
    /** 获取所有的运行的supervisor,以及supervisorInfo */
    Map<String, SupervisorInfo> supInfos = Cluster.allSupervisorInfo(stormClusterState, null);
    if (supInfos.size() == 0) {
      throw new FailedAssignTopologyException(
          "Failed to make assignment " + topologyId + ", due to no alive supervisor");
    }

    /** 获取topologyId下的所有tasks */
    Map<Integer, String> taskToComponent =
        Cluster.topology_task_info(stormClusterState, topologyId);
    ret.setTaskToComponent(taskToComponent);

    // get taskids /ZK/tasks/topologyId
    Set<Integer> allTaskIds = taskToComponent.keySet();
    if (allTaskIds == null || allTaskIds.size() == 0) {
      String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
      LOG.warn(errMsg);
      throw new IOException(errMsg);
    }
    ret.setAllTaskIds(allTaskIds);

    Set<Integer> aliveTasks = new HashSet<Integer>();
    // unstoppedTasks are tasks which are alive on no supervisor's(dead)
    // machine
    /** 未完成的任务,supervisor已死,任务没完成 */
    Set<Integer> unstoppedTasks = new HashSet<Integer>();
    Set<Integer> deadTasks = new HashSet<Integer>();
    Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();

    Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
    if (existingAssignment != null) {
      aliveTasks = getAliveTasks(topologyId, allTaskIds);
      unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);

      deadTasks.addAll(allTaskIds);
      deadTasks.removeAll(aliveTasks);
    }

    ret.setDeadTaskIds(deadTasks);
    ret.setUnstoppedTaskIds(unstoppedTasks);

    // Step 2: get all slots resource, free slots/ alive slots/ unstopped
    // slots
    getFreeSlots(supInfos, stormClusterState);
    ret.setCluster(supInfos);

    if (existingAssignment == null) {
      ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);

      try {
        AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
        if (lastAssignment != null) {
          ret.setOldAssignment(lastAssignment.getAssignment());
        }
      } catch (Exception e) {
        LOG.warn("Fail to get old assignment", e);
      }
    } else {
      ret.setOldAssignment(existingAssignment);
      if (event.isScratch()) {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
        unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      } else {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
        unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      }
    }

    return ret;
  }
Пример #14
0
  /**
   * create and start one supervisor
   *
   * @param conf : configurationdefault.yaml storm.yaml
   * @param sharedContext : null (right now)
   * @return SupervisorManger: which is used to shutdown all workers and supervisor
   */
  @SuppressWarnings("rawtypes")
  public SupervisorManger mkSupervisor(Map conf, IContext sharedContext) throws Exception {

    LOG.info("Starting Supervisor with conf " + conf);

    active = new AtomicBoolean(true);

    /** Step 1: cleanup all files in /storm-local-dir/supervisor/tmp */
    String path = StormConfig.supervisorTmpDir(conf);
    FileUtils.cleanDirectory(new File(path));

    /*
     * Step 2: create ZK operation instance StromClusterState
     */

    StormClusterState stormClusterState = Cluster.mk_storm_cluster_state(conf);

    /*
     * Step 3, create LocalStat LocalStat is one KV database 4.1 create
     * LocalState instance 4.2 get supervisorId, if no supervisorId, create
     * one
     */

    LocalState localState = StormConfig.supervisorState(conf);

    String supervisorId = (String) localState.get(Common.LS_ID);
    if (supervisorId == null) {
      supervisorId = UUID.randomUUID().toString();
      localState.put(Common.LS_ID, supervisorId);
    }

    Vector<SmartThread> threads = new Vector<SmartThread>();

    // Step 5 create HeartBeat
    // every supervisor.heartbeat.frequency.secs, write SupervisorInfo to ZK
    String myHostName = null;
    myHostName = ConfigExtension.getSupervisorHost(conf);
    if (myHostName == null) {
      myHostName = NetWorkUtils.hostname();
    }
    Heartbeat hb = new Heartbeat(conf, stormClusterState, supervisorId, myHostName, active);
    hb.update();
    AsyncLoopThread heartbeat = new AsyncLoopThread(hb, false, null, Thread.MIN_PRIORITY, true);
    threads.add(heartbeat);

    // Step 6 create and start sync Supervisor thread
    // every supervisor.monitor.frequency.secs second run SyncSupervisor
    EventManager processEventManager = new EventManagerImp(false);
    ConcurrentHashMap<String, String> workerThreadPids = new ConcurrentHashMap<String, String>();
    SyncProcessEvent syncProcessEvent =
        new SyncProcessEvent(supervisorId, conf, localState, workerThreadPids, sharedContext);

    EventManager syncSupEventManager = new EventManagerImp(false);
    SyncSupervisorEvent syncSupervisorEvent =
        new SyncSupervisorEvent(
            supervisorId,
            conf,
            processEventManager,
            syncSupEventManager,
            stormClusterState,
            localState,
            syncProcessEvent);

    int syncFrequence = JStormUtils.parseInt(conf.get(Config.SUPERVISOR_MONITOR_FREQUENCY_SECS));
    EventManagerPusher syncSupervisorPusher =
        new EventManagerPusher(syncSupEventManager, syncSupervisorEvent, active, syncFrequence);
    AsyncLoopThread syncSupervisorThread = new AsyncLoopThread(syncSupervisorPusher);
    threads.add(syncSupervisorThread);

    // Step 7 start sync process thread
    // every supervisor.monitor.frequency.secs run SyncProcesses
    // skip thread to do syncProcess, due to nimbus will check whether
    // worker is dead or not, if dead, it will reassign a new worker
    //
    // int syncProcessFrequence = syncFrequence/2;
    // EventManagerPusher syncProcessPusher = new EventManagerPusher(
    // processEventManager, syncProcessEvent, active,
    // syncProcessFrequence);
    // AsyncLoopThread syncProcessThread = new
    // AsyncLoopThread(syncProcessPusher);
    // threads.add(syncProcessThread);

    // Step 7 start httpserver
    Httpserver httpserver = new Httpserver(conf);
    httpserver.start();

    LOG.info("Starting supervisor with id " + supervisorId + " at host " + myHostName);

    // SupervisorManger which can shutdown all supervisor and workers
    return new SupervisorManger(
        conf,
        supervisorId,
        active,
        threads,
        syncSupEventManager,
        processEventManager,
        httpserver,
        stormClusterState,
        workerThreadPids);
  }