Ejemplo n.º 1
0
 /**
  * whether the topology is active by topology name
  *
  * @param stormClusterState see Cluster_clj
  * @param topologyName
  * @return boolean if the storm is active, return true, otherwise return false;
  * @throws Exception
  */
 public boolean isTopologyActive(StormClusterState stormClusterState, String topologyName)
     throws Exception {
   boolean rtn = false;
   if (Cluster.get_topology_id(stormClusterState, topologyName) != null) {
     rtn = true;
   }
   return rtn;
 }
Ejemplo n.º 2
0
  /**
   * private ConcurrentHashMap<Integer, WorkerSlot> taskNodeport; private HashMap<Integer, String>
   * tasksToComponent; private Map<String, List<Integer>> componentToSortedTasks; private
   * Map<String, Map<String, Fields>> componentToStreamToFields; private Map<String, Object>
   * defaultResources; private Map<String, Object> userResources; private Map<String, Object>
   * executorData; private Map registeredMetrics;
   *
   * @throws Exception
   */
  private void generateMaps() throws Exception {
    this.tasksToComponent = Cluster.topology_task_info(zkCluster, topologyId);
    LOG.info("Map<taskId, component>:" + tasksToComponent);

    this.componentToSortedTasks = JStormUtils.reverse_map(tasksToComponent);
    for (java.util.Map.Entry<String, List<Integer>> entry : componentToSortedTasks.entrySet()) {
      List<Integer> tasks = entry.getValue();

      Collections.sort(tasks);
    }

    this.defaultResources = new HashMap<String, Object>();
    this.userResources = new HashMap<String, Object>();
    this.executorData = new HashMap<String, Object>();
    this.registeredMetrics = new HashMap();
  }
Ejemplo n.º 3
0
  /**
   * Get TopologyInfo, it contain all data of the topology running status
   *
   * @return TopologyInfo
   */
  @Override
  public TopologyInfo getTopologyInfo(String topologyId) throws NotAliveException, TException {

    TopologyInfo topologyInfo = new TopologyInfo();

    StormClusterState stormClusterState = data.getStormClusterState();

    try {

      // get topology's StormBase
      StormBase base = stormClusterState.storm_base(topologyId, null);
      if (base == null) {
        throw new NotAliveException("No topology of " + topologyId);
      }
      topologyInfo.set_id(topologyId);
      topologyInfo.set_name(base.getStormName());
      topologyInfo.set_uptime_secs(TimeUtils.time_delta(base.getLanchTimeSecs()));
      topologyInfo.set_status(base.getStatusString());

      // get topology's Assignment
      Assignment assignment = stormClusterState.assignment_info(topologyId, null);
      if (assignment == null) {
        throw new TException("Failed to get StormBase from ZK of " + topologyId);
      }

      // get topology's map<taskId, componentId>
      Map<Integer, String> taskInfo = Cluster.topology_task_info(stormClusterState, topologyId);

      List<TaskSummary> tasks =
          NimbusUtils.mkTaskSummary(stormClusterState, assignment, taskInfo, topologyId);
      topologyInfo.set_tasks(tasks);

      return topologyInfo;
    } catch (TException e) {
      LOG.info("Failed to get topologyInfo " + topologyId, e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get topologyInfo " + topologyId, e);
      throw new TException("Failed to get topologyInfo" + topologyId);
    }
  }
Ejemplo n.º 4
0
  /**
   * Get free resources 获得所有的空闲资源
   *
   * @param supervisorInfos
   * @param stormClusterState
   * @throws Exception
   */
  public static void getFreeSlots(
      Map<String, SupervisorInfo> supervisorInfos, StormClusterState stormClusterState)
      throws Exception {

    Map<String, Assignment> assignments = Cluster.get_all_assignment(stormClusterState, null);

    for (Entry<String, Assignment> entry : assignments.entrySet()) {
      String topologyId = entry.getKey();
      Assignment assignment = entry.getValue();

      Set<ResourceWorkerSlot> workers = assignment.getWorkers();

      for (ResourceWorkerSlot worker : workers) {

        SupervisorInfo supervisorInfo = supervisorInfos.get(worker.getNodeId());
        if (supervisorInfo == null) {
          // the supervisor is dead
          continue;
        }
        supervisorInfo.getWorkerPorts().remove(worker.getPort());
      }
    }
  }
Ejemplo n.º 5
0
  /**
   * Backup the toplogy's Assignment to ZK @@@ Question Do we need to do backup operation every
   * time?
   *
   * @param assignment
   * @param event
   */
  public void backupAssignment(Assignment assignment, TopologyAssignEvent event) {
    String topologyId = event.getTopologyId();
    String topologyName = event.getTopologyName();
    try {

      StormClusterState zkClusterState = nimbusData.getStormClusterState();
      // one little problem, get tasks twice when assign one topology
      HashMap<Integer, String> tasks = Cluster.topology_task_info(zkClusterState, topologyId);

      Map<String, List<Integer>> componentTasks = JStormUtils.reverse_map(tasks);

      for (Entry<String, List<Integer>> entry : componentTasks.entrySet()) {
        List<Integer> keys = entry.getValue();

        Collections.sort(keys);
      }

      AssignmentBak assignmentBak = new AssignmentBak(componentTasks, assignment);
      zkClusterState.backup_assignment(topologyName, assignmentBak);

    } catch (Exception e) {
      LOG.warn("Failed to backup " + topologyId + " assignment " + assignment, e);
    }
  }
Ejemplo n.º 6
0
  @SuppressWarnings({"rawtypes", "unchecked"})
  public WorkerData(
      Map conf,
      IContext context,
      String topology_id,
      String supervisor_id,
      int port,
      String worker_id,
      String jar_path)
      throws Exception {

    this.conf = conf;
    this.context = context;
    this.topologyId = topology_id;
    this.supervisorId = supervisor_id;
    this.port = port;
    this.workerId = worker_id;

    this.active = new AtomicBoolean(true);
    this.topologyStatus = StatusType.active;

    if (StormConfig.cluster_mode(conf).equals("distributed")) {
      String pidDir = StormConfig.worker_pids_root(conf, worker_id);
      JStormServerUtils.createPid(pidDir);
    }

    // create zk interface
    this.zkClusterstate = ZkTool.mk_distributed_cluster_state(conf);
    this.zkCluster = Cluster.mk_storm_cluster_state(zkClusterstate);

    Map rawConf = StormConfig.read_supervisor_topology_conf(conf, topology_id);
    this.stormConf = new HashMap<Object, Object>();
    this.stormConf.putAll(conf);
    this.stormConf.putAll(rawConf);

    LOG.info("Worker Configuration " + stormConf);

    try {

      boolean enableClassloader = ConfigExtension.isEnableTopologyClassLoader(stormConf);
      boolean enableDebugClassloader = ConfigExtension.isEnableClassloaderDebug(stormConf);

      if (jar_path == null && enableClassloader == true) {
        LOG.error("enable classloader, but not app jar");
        throw new InvalidParameterException();
      }

      URL[] urlArray = new URL[0];
      if (jar_path != null) {
        String[] paths = jar_path.split(":");
        Set<URL> urls = new HashSet<URL>();
        for (String path : paths) {
          if (StringUtils.isBlank(path)) continue;
          URL url = new URL("File:" + path);
          urls.add(url);
        }
        urlArray = urls.toArray(new URL[0]);
      }

      WorkerClassLoader.mkInstance(
          urlArray,
          ClassLoader.getSystemClassLoader(),
          ClassLoader.getSystemClassLoader().getParent(),
          enableClassloader,
          enableDebugClassloader);
    } catch (Exception e) {
      // TODO Auto-generated catch block
      LOG.error("init jarClassLoader error!", e);
      throw new InvalidParameterException();
    }

    if (this.context == null) {
      this.context = TransportFactory.makeContext(stormConf);
    }

    boolean disruptorUseSleep = ConfigExtension.isDisruptorUseSleep(stormConf);
    DisruptorQueue.setUseSleep(disruptorUseSleep);
    boolean isLimited = ConfigExtension.getTopologyBufferSizeLimited(stormConf);
    DisruptorQueue.setLimited(isLimited);
    LOG.info("Disruptor use sleep:" + disruptorUseSleep + ", limited size:" + isLimited);

    // this.transferQueue = new LinkedBlockingQueue<TransferData>();
    int buffer_size = Utils.getInt(conf.get(Config.TOPOLOGY_TRANSFER_BUFFER_SIZE));
    WaitStrategy waitStrategy =
        (WaitStrategy)
            Utils.newInstance((String) conf.get(Config.TOPOLOGY_DISRUPTOR_WAIT_STRATEGY));
    this.transferQueue =
        DisruptorQueue.mkInstance("TotalTransfer", ProducerType.MULTI, buffer_size, waitStrategy);
    this.transferQueue.consumerStarted();
    this.sendingQueue =
        DisruptorQueue.mkInstance("TotalSending", ProducerType.MULTI, buffer_size, waitStrategy);
    this.sendingQueue.consumerStarted();

    this.nodeportSocket = new ConcurrentHashMap<WorkerSlot, IConnection>();
    this.taskNodeport = new ConcurrentHashMap<Integer, WorkerSlot>();
    this.workerToResource = new ConcurrentSkipListSet<ResourceWorkerSlot>();
    this.innerTaskTransfer = new ConcurrentHashMap<Integer, DisruptorQueue>();
    this.deserializeQueues = new ConcurrentHashMap<Integer, DisruptorQueue>();

    Assignment assignment = zkCluster.assignment_info(topologyId, null);
    if (assignment == null) {
      String errMsg = "Failed to get Assignment of " + topologyId;
      LOG.error(errMsg);
      throw new RuntimeException(errMsg);
    }
    workerToResource.addAll(assignment.getWorkers());

    // get current worker's task list

    this.taskids = assignment.getCurrentWorkerTasks(supervisorId, port);
    if (taskids.size() == 0) {
      throw new RuntimeException("No tasks running current workers");
    }
    LOG.info("Current worker taskList:" + taskids);

    // deserialize topology code from local dir
    rawTopology = StormConfig.read_supervisor_topology_code(conf, topology_id);
    sysTopology = Common.system_topology(stormConf, rawTopology);

    generateMaps();

    contextMaker = new ContextMaker(this);

    metricReporter = new MetricReporter(this);

    outTaskStatus = new HashMap<Integer, Boolean>();

    threadPool = Executors.newScheduledThreadPool(THREAD_POOL_NUM);
    TimerTrigger.setScheduledExecutorService(threadPool);

    LOG.info("Successfully create WorkerData");
  }
Ejemplo n.º 7
0
  @Override
  public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException {
    try {
      StormClusterState stormClusterState = data.getStormClusterState();

      String supervisorId = null;
      SupervisorInfo supervisorInfo = null;

      String ip = NetWorkUtils.host2Ip(host);
      String hostName = NetWorkUtils.ip2Host(host);

      // all supervisors
      Map<String, SupervisorInfo> supervisorInfos =
          Cluster.allSupervisorInfo(stormClusterState, null);

      for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) {

        SupervisorInfo info = entry.getValue();
        if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) {
          supervisorId = entry.getKey();
          supervisorInfo = info;
          break;
        }
      }

      if (supervisorId == null) {
        throw new TException("No supervisor of " + host);
      }

      Map<String, Assignment> assignments = new HashMap<String, Assignment>();

      // get all active topology's StormBase
      Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState);
      for (Entry<String, StormBase> entry : bases.entrySet()) {

        String topologyId = entry.getKey();
        StormBase base = entry.getValue();

        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
          LOG.error("Failed to get assignment of " + topologyId);
          continue;
        }
        assignments.put(topologyId, assignment);
      }

      Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>();
      for (Entry<String, Assignment> entry : assignments.entrySet()) {
        String topologyId = entry.getKey();
        Assignment assignment = entry.getValue();

        Map<Integer, String> taskToComponent =
            Cluster.topology_task_info(stormClusterState, topologyId);

        Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource();

        for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) {
          Integer taskId = resourceEntry.getKey();
          ResourceAssignment resourceAssignment = resourceEntry.getValue();

          if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) {
            continue;
          }

          supervisorInfo.allocResource(resourceAssignment);

          Integer port = resourceAssignment.getPort();
          WorkerSummary workerSummary = portWorkerSummarys.get(port);
          if (workerSummary == null) {
            workerSummary = new WorkerSummary();
            workerSummary.set_port(port);
            workerSummary.set_topology(topologyId);
            workerSummary.set_tasks(new ArrayList<TaskSummary>());

            portWorkerSummarys.put(port, workerSummary);
          }

          String componentName = taskToComponent.get(taskId);
          int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId));
          List<TaskSummary> tasks = workerSummary.get_tasks();

          TaskSummary taskSummary =
              NimbusUtils.mkSimpleTaskSummary(
                  resourceAssignment, taskId, componentName, host, uptime);

          tasks.add(taskSummary);
        }
      }

      List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>();
      wokersList.addAll(portWorkerSummarys.values());

      SupervisorSummary supervisorSummary =
          NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId);
      return new SupervisorWorkers(supervisorSummary, wokersList);

    } catch (TException e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw new TException(e);
    }
  }
Ejemplo n.º 8
0
  /**
   * get cluster's summary, it will contain SupervisorSummary and TopologySummary
   *
   * @return ClusterSummary
   */
  @Override
  public ClusterSummary getClusterInfo() throws TException {

    try {

      StormClusterState stormClusterState = data.getStormClusterState();

      Map<String, Assignment> assignments = new HashMap<String, Assignment>();

      // get nimbus running time
      int uptime = data.uptime();

      // get TopologySummary
      List<TopologySummary> topologySummaries = new ArrayList<TopologySummary>();

      // get all active topology's StormBase
      Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState);
      for (Entry<String, StormBase> entry : bases.entrySet()) {

        String topologyId = entry.getKey();
        StormBase base = entry.getValue();

        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
          LOG.error("Failed to get assignment of " + topologyId);
          continue;
        }
        assignments.put(topologyId, assignment);
        String group = "default";
        if (data.isGroupMode()) group = base.getGroup();
        if (group == null) group = "default";

        TopologySummary topology =
            NimbusUtils.mkTopologySummary(
                assignment,
                topologyId,
                base.getStormName(),
                base.getStatusString(),
                TimeUtils.time_delta(base.getLanchTimeSecs()),
                group);

        topologySummaries.add(topology);
      }

      // all supervisors
      Map<String, SupervisorInfo> supervisorInfos =
          Cluster.allSupervisorInfo(stormClusterState, null);

      // generate SupervisorSummaries
      List<SupervisorSummary> supervisorSummaries =
          NimbusUtils.mkSupervisorSummaries(supervisorInfos, assignments);

      return new ClusterSummary(
          supervisorSummaries,
          uptime,
          topologySummaries,
          data.getGroupToTopology(),
          data.getGroupToResource(),
          data.getGroupToUsedResource(),
          data.isGroupMode());

    } catch (TException e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw new TException(e);
    }
  }
Ejemplo n.º 9
0
  protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event)
      throws Exception {
    TopologyAssignContext ret = new TopologyAssignContext();

    String topologyId = event.getTopologyId();

    /** 读取本地目录下的stormconf.ser和stormcode.ser */
    Map<Object, Object> nimbusConf = nimbusData.getConf();
    Map<Object, Object> topologyConf =
        StormConfig.read_nimbus_topology_conf(nimbusConf, topologyId);

    StormTopology rawTopology = StormConfig.read_nimbus_topology_code(nimbusConf, topologyId);
    ret.setRawTopology(rawTopology);

    Map stormConf = new HashMap();
    stormConf.putAll(nimbusConf);
    stormConf.putAll(topologyConf);
    ret.setStormConf(stormConf);

    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    // get all running supervisor, don't need callback to watch supervisor
    /** 获取所有的运行的supervisor,以及supervisorInfo */
    Map<String, SupervisorInfo> supInfos = Cluster.allSupervisorInfo(stormClusterState, null);
    if (supInfos.size() == 0) {
      throw new FailedAssignTopologyException(
          "Failed to make assignment " + topologyId + ", due to no alive supervisor");
    }

    /** 获取topologyId下的所有tasks */
    Map<Integer, String> taskToComponent =
        Cluster.topology_task_info(stormClusterState, topologyId);
    ret.setTaskToComponent(taskToComponent);

    // get taskids /ZK/tasks/topologyId
    Set<Integer> allTaskIds = taskToComponent.keySet();
    if (allTaskIds == null || allTaskIds.size() == 0) {
      String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
      LOG.warn(errMsg);
      throw new IOException(errMsg);
    }
    ret.setAllTaskIds(allTaskIds);

    Set<Integer> aliveTasks = new HashSet<Integer>();
    // unstoppedTasks are tasks which are alive on no supervisor's(dead)
    // machine
    /** 未完成的任务,supervisor已死,任务没完成 */
    Set<Integer> unstoppedTasks = new HashSet<Integer>();
    Set<Integer> deadTasks = new HashSet<Integer>();
    Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();

    Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
    if (existingAssignment != null) {
      aliveTasks = getAliveTasks(topologyId, allTaskIds);
      unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);

      deadTasks.addAll(allTaskIds);
      deadTasks.removeAll(aliveTasks);
    }

    ret.setDeadTaskIds(deadTasks);
    ret.setUnstoppedTaskIds(unstoppedTasks);

    // Step 2: get all slots resource, free slots/ alive slots/ unstopped
    // slots
    getFreeSlots(supInfos, stormClusterState);
    ret.setCluster(supInfos);

    if (existingAssignment == null) {
      ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);

      try {
        AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
        if (lastAssignment != null) {
          ret.setOldAssignment(lastAssignment.getAssignment());
        }
      } catch (Exception e) {
        LOG.warn("Fail to get old assignment", e);
      }
    } else {
      ret.setOldAssignment(existingAssignment);
      if (event.isScratch()) {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
        unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      } else {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
        unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      }
    }

    return ret;
  }
Ejemplo n.º 10
0
  /**
   * create and start one supervisor
   *
   * @param conf : configurationdefault.yaml storm.yaml
   * @param sharedContext : null (right now)
   * @return SupervisorManger: which is used to shutdown all workers and supervisor
   */
  @SuppressWarnings("rawtypes")
  public SupervisorManger mkSupervisor(Map conf, IContext sharedContext) throws Exception {

    LOG.info("Starting Supervisor with conf " + conf);

    active = new AtomicBoolean(true);

    /** Step 1: cleanup all files in /storm-local-dir/supervisor/tmp */
    String path = StormConfig.supervisorTmpDir(conf);
    FileUtils.cleanDirectory(new File(path));

    /*
     * Step 2: create ZK operation instance StromClusterState
     */

    StormClusterState stormClusterState = Cluster.mk_storm_cluster_state(conf);

    /*
     * Step 3, create LocalStat LocalStat is one KV database 4.1 create
     * LocalState instance 4.2 get supervisorId, if no supervisorId, create
     * one
     */

    LocalState localState = StormConfig.supervisorState(conf);

    String supervisorId = (String) localState.get(Common.LS_ID);
    if (supervisorId == null) {
      supervisorId = UUID.randomUUID().toString();
      localState.put(Common.LS_ID, supervisorId);
    }

    Vector<SmartThread> threads = new Vector<SmartThread>();

    // Step 5 create HeartBeat
    // every supervisor.heartbeat.frequency.secs, write SupervisorInfo to ZK
    String myHostName = null;
    myHostName = ConfigExtension.getSupervisorHost(conf);
    if (myHostName == null) {
      myHostName = NetWorkUtils.hostname();
    }
    Heartbeat hb = new Heartbeat(conf, stormClusterState, supervisorId, myHostName, active);
    hb.update();
    AsyncLoopThread heartbeat = new AsyncLoopThread(hb, false, null, Thread.MIN_PRIORITY, true);
    threads.add(heartbeat);

    // Step 6 create and start sync Supervisor thread
    // every supervisor.monitor.frequency.secs second run SyncSupervisor
    EventManager processEventManager = new EventManagerImp(false);
    ConcurrentHashMap<String, String> workerThreadPids = new ConcurrentHashMap<String, String>();
    SyncProcessEvent syncProcessEvent =
        new SyncProcessEvent(supervisorId, conf, localState, workerThreadPids, sharedContext);

    EventManager syncSupEventManager = new EventManagerImp(false);
    SyncSupervisorEvent syncSupervisorEvent =
        new SyncSupervisorEvent(
            supervisorId,
            conf,
            processEventManager,
            syncSupEventManager,
            stormClusterState,
            localState,
            syncProcessEvent);

    int syncFrequence = JStormUtils.parseInt(conf.get(Config.SUPERVISOR_MONITOR_FREQUENCY_SECS));
    EventManagerPusher syncSupervisorPusher =
        new EventManagerPusher(syncSupEventManager, syncSupervisorEvent, active, syncFrequence);
    AsyncLoopThread syncSupervisorThread = new AsyncLoopThread(syncSupervisorPusher);
    threads.add(syncSupervisorThread);

    // Step 7 start sync process thread
    // every supervisor.monitor.frequency.secs run SyncProcesses
    // skip thread to do syncProcess, due to nimbus will check whether
    // worker is dead or not, if dead, it will reassign a new worker
    //
    // int syncProcessFrequence = syncFrequence/2;
    // EventManagerPusher syncProcessPusher = new EventManagerPusher(
    // processEventManager, syncProcessEvent, active,
    // syncProcessFrequence);
    // AsyncLoopThread syncProcessThread = new
    // AsyncLoopThread(syncProcessPusher);
    // threads.add(syncProcessThread);

    // Step 7 start httpserver
    Httpserver httpserver = new Httpserver(conf);
    httpserver.start();

    LOG.info("Starting supervisor with id " + supervisorId + " at host " + myHostName);

    // SupervisorManger which can shutdown all supervisor and workers
    return new SupervisorManger(
        conf,
        supervisorId,
        active,
        threads,
        syncSupEventManager,
        processEventManager,
        httpserver,
        stormClusterState,
        workerThreadPids);
  }