/**
   * Get the task Map which the task is alive and will be kept Only when type is
   * ASSIGN_TYPE_MONITOR, it is valid
   *
   * @param defaultContext
   * @param needAssigns
   * @return
   */
  public Map<Integer, ResourceAssignment> getKeepAssign(
      DefaultTopologyAssignContext defaultContext, Set<Integer> needAssigns) {

    Set<Integer> keepAssignIds = new HashSet<Integer>();
    keepAssignIds.addAll(defaultContext.getAllTaskIds());
    keepAssignIds.removeAll(defaultContext.getUnstoppedTaskIds());
    keepAssignIds.removeAll(needAssigns);

    Map<Integer, ResourceAssignment> keeps = new HashMap<Integer, ResourceAssignment>();
    if (keepAssignIds.isEmpty()) {
      return keeps;
    }

    Assignment oldAssignment = defaultContext.getOldAssignment();
    if (oldAssignment == null) {
      return keeps;
    }
    Map<Integer, ResourceAssignment> olds = oldAssignment.getTaskToResource();

    for (Integer task : keepAssignIds) {
      ResourceAssignment oldResource = olds.get(task);
      if (oldResource == null) {
        LOG.warn("No old assignment of " + task + ", " + defaultContext.toDetailString());
        continue;
      }

      keeps.put(task, oldResource);
    }

    return keeps;
  }
  /** Get unstopped slots from alive task list 获得所有supervisor已经dead,但是还没有dead的任务 */
  public Set<Integer> getUnstoppedSlots(
      Set<Integer> aliveTasks, Map<String, SupervisorInfo> supInfos, Assignment existAssignment) {
    Set<Integer> ret = new HashSet<Integer>();

    Set<ResourceWorkerSlot> oldWorkers = existAssignment.getWorkers();

    Set<String> aliveSupervisors = supInfos.keySet();

    for (ResourceWorkerSlot worker : oldWorkers) {
      for (Integer taskId : worker.getTasks()) {
        if (aliveTasks.contains(taskId) == false) {
          // task is dead
          continue;
        }

        String oldTaskSupervisorId = worker.getNodeId();

        if (aliveSupervisors.contains(oldTaskSupervisorId) == false) {
          // supervisor is dead
          ret.add(taskId);
          continue;
        }
      }
    }

    return ret;
  }
  /**
   * @param existingAssignment
   * @param taskWorkerSlot
   * @return
   * @throws Exception
   */
  public static Map<Integer, Integer> getTaskStartTimes(
      TopologyAssignContext context,
      NimbusData nimbusData,
      String topologyId,
      Assignment existingAssignment,
      Set<ResourceWorkerSlot> workers)
      throws Exception {

    Map<Integer, Integer> startTimes = new TreeMap<Integer, Integer>();

    if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_NEW) {
      int nowSecs = TimeUtils.current_time_secs();
      for (ResourceWorkerSlot worker : workers) {
        for (Integer changedTaskId : worker.getTasks()) {
          startTimes.put(changedTaskId, nowSecs);
        }
      }

      return startTimes;
    }

    Set<ResourceWorkerSlot> oldWorkers = new HashSet<ResourceWorkerSlot>();

    if (existingAssignment != null) {
      Map<Integer, Integer> taskStartTimeSecs = existingAssignment.getTaskStartTimeSecs();
      if (taskStartTimeSecs != null) {
        startTimes.putAll(taskStartTimeSecs);
      }

      if (existingAssignment.getWorkers() != null) {
        oldWorkers = existingAssignment.getWorkers();
      }
    }

    StormClusterState zkClusterState = nimbusData.getStormClusterState();
    Set<Integer> changeTaskIds = getChangeTaskIds(oldWorkers, workers);
    int nowSecs = TimeUtils.current_time_secs();
    for (Integer changedTaskId : changeTaskIds) {
      startTimes.put(changedTaskId, nowSecs);

      zkClusterState.remove_task_heartbeat(topologyId, changedTaskId);
    }

    LOG.info("Task assignment has been changed " + changeTaskIds);
    return startTimes;
  }
  /**
   * Get free resources 获得所有的空闲资源
   *
   * @param supervisorInfos
   * @param stormClusterState
   * @throws Exception
   */
  public static void getFreeSlots(
      Map<String, SupervisorInfo> supervisorInfos, StormClusterState stormClusterState)
      throws Exception {

    Map<String, Assignment> assignments = Cluster.get_all_assignment(stormClusterState, null);

    for (Entry<String, Assignment> entry : assignments.entrySet()) {
      String topologyId = entry.getKey();
      Assignment assignment = entry.getValue();

      Set<ResourceWorkerSlot> workers = assignment.getWorkers();

      for (ResourceWorkerSlot worker : workers) {

        SupervisorInfo supervisorInfo = supervisorInfos.get(worker.getNodeId());
        if (supervisorInfo == null) {
          // the supervisor is dead
          continue;
        }
        supervisorInfo.getWorkerPorts().remove(worker.getPort());
      }
    }
  }
 private Set<ResourceWorkerSlot> getUnstoppedWorkers(
     Set<Integer> aliveTasks, Assignment existAssignment) {
   Set<ResourceWorkerSlot> ret = new HashSet<ResourceWorkerSlot>();
   for (ResourceWorkerSlot worker : existAssignment.getWorkers()) {
     boolean alive = true;
     for (Integer task : worker.getTasks()) {
       if (!aliveTasks.contains(task)) {
         alive = false;
         break;
       }
     }
     if (alive) {
       ret.add(worker);
     }
   }
   return ret;
 }
  public static Map<String, String> getTopologyNodeHost(
      Map<String, SupervisorInfo> supervisorMap,
      Assignment existingAssignment,
      Set<ResourceWorkerSlot> workers) {

    // the following is that remove unused node from allNodeHost
    Set<String> usedNodes = new HashSet<String>();
    for (ResourceWorkerSlot worker : workers) {

      usedNodes.add(worker.getNodeId());
    }

    // map<supervisorId, hostname>
    Map<String, String> allNodeHost = new HashMap<String, String>();

    if (existingAssignment != null) {
      allNodeHost.putAll(existingAssignment.getNodeHost());
    }

    // get alive supervisorMap Map<supervisorId, hostname>
    Map<String, String> nodeHost = SupervisorInfo.getNodeHost(supervisorMap);
    if (nodeHost != null) {
      allNodeHost.putAll(nodeHost);
    }

    Map<String, String> ret = new HashMap<String, String>();

    for (String supervisorId : usedNodes) {
      if (allNodeHost.containsKey(supervisorId)) {
        ret.put(supervisorId, allNodeHost.get(supervisorId));
      } else {
        LOG.warn("Node " + supervisorId + " doesn't in the supervisor list");
      }
    }

    return ret;
  }
Exemple #7
0
  @SuppressWarnings({"rawtypes", "unchecked"})
  public WorkerData(
      Map conf,
      IContext context,
      String topology_id,
      String supervisor_id,
      int port,
      String worker_id,
      String jar_path)
      throws Exception {

    this.conf = conf;
    this.context = context;
    this.topologyId = topology_id;
    this.supervisorId = supervisor_id;
    this.port = port;
    this.workerId = worker_id;

    this.active = new AtomicBoolean(true);
    this.topologyStatus = StatusType.active;

    if (StormConfig.cluster_mode(conf).equals("distributed")) {
      String pidDir = StormConfig.worker_pids_root(conf, worker_id);
      JStormServerUtils.createPid(pidDir);
    }

    // create zk interface
    this.zkClusterstate = ZkTool.mk_distributed_cluster_state(conf);
    this.zkCluster = Cluster.mk_storm_cluster_state(zkClusterstate);

    Map rawConf = StormConfig.read_supervisor_topology_conf(conf, topology_id);
    this.stormConf = new HashMap<Object, Object>();
    this.stormConf.putAll(conf);
    this.stormConf.putAll(rawConf);

    LOG.info("Worker Configuration " + stormConf);

    try {

      boolean enableClassloader = ConfigExtension.isEnableTopologyClassLoader(stormConf);
      boolean enableDebugClassloader = ConfigExtension.isEnableClassloaderDebug(stormConf);

      if (jar_path == null && enableClassloader == true) {
        LOG.error("enable classloader, but not app jar");
        throw new InvalidParameterException();
      }

      URL[] urlArray = new URL[0];
      if (jar_path != null) {
        String[] paths = jar_path.split(":");
        Set<URL> urls = new HashSet<URL>();
        for (String path : paths) {
          if (StringUtils.isBlank(path)) continue;
          URL url = new URL("File:" + path);
          urls.add(url);
        }
        urlArray = urls.toArray(new URL[0]);
      }

      WorkerClassLoader.mkInstance(
          urlArray,
          ClassLoader.getSystemClassLoader(),
          ClassLoader.getSystemClassLoader().getParent(),
          enableClassloader,
          enableDebugClassloader);
    } catch (Exception e) {
      // TODO Auto-generated catch block
      LOG.error("init jarClassLoader error!", e);
      throw new InvalidParameterException();
    }

    if (this.context == null) {
      this.context = TransportFactory.makeContext(stormConf);
    }

    boolean disruptorUseSleep = ConfigExtension.isDisruptorUseSleep(stormConf);
    DisruptorQueue.setUseSleep(disruptorUseSleep);
    boolean isLimited = ConfigExtension.getTopologyBufferSizeLimited(stormConf);
    DisruptorQueue.setLimited(isLimited);
    LOG.info("Disruptor use sleep:" + disruptorUseSleep + ", limited size:" + isLimited);

    // this.transferQueue = new LinkedBlockingQueue<TransferData>();
    int buffer_size = Utils.getInt(conf.get(Config.TOPOLOGY_TRANSFER_BUFFER_SIZE));
    WaitStrategy waitStrategy =
        (WaitStrategy)
            Utils.newInstance((String) conf.get(Config.TOPOLOGY_DISRUPTOR_WAIT_STRATEGY));
    this.transferQueue =
        DisruptorQueue.mkInstance("TotalTransfer", ProducerType.MULTI, buffer_size, waitStrategy);
    this.transferQueue.consumerStarted();
    this.sendingQueue =
        DisruptorQueue.mkInstance("TotalSending", ProducerType.MULTI, buffer_size, waitStrategy);
    this.sendingQueue.consumerStarted();

    this.nodeportSocket = new ConcurrentHashMap<WorkerSlot, IConnection>();
    this.taskNodeport = new ConcurrentHashMap<Integer, WorkerSlot>();
    this.workerToResource = new ConcurrentSkipListSet<ResourceWorkerSlot>();
    this.innerTaskTransfer = new ConcurrentHashMap<Integer, DisruptorQueue>();
    this.deserializeQueues = new ConcurrentHashMap<Integer, DisruptorQueue>();

    Assignment assignment = zkCluster.assignment_info(topologyId, null);
    if (assignment == null) {
      String errMsg = "Failed to get Assignment of " + topologyId;
      LOG.error(errMsg);
      throw new RuntimeException(errMsg);
    }
    workerToResource.addAll(assignment.getWorkers());

    // get current worker's task list

    this.taskids = assignment.getCurrentWorkerTasks(supervisorId, port);
    if (taskids.size() == 0) {
      throw new RuntimeException("No tasks running current workers");
    }
    LOG.info("Current worker taskList:" + taskids);

    // deserialize topology code from local dir
    rawTopology = StormConfig.read_supervisor_topology_code(conf, topology_id);
    sysTopology = Common.system_topology(stormConf, rawTopology);

    generateMaps();

    contextMaker = new ContextMaker(this);

    metricReporter = new MetricReporter(this);

    outTaskStatus = new HashMap<Integer, Boolean>();

    threadPool = Executors.newScheduledThreadPool(THREAD_POOL_NUM);
    TimerTrigger.setScheduledExecutorService(threadPool);

    LOG.info("Successfully create WorkerData");
  }
  @Override
  public SupervisorWorkers getSupervisorWorkers(String host) throws NotAliveException, TException {
    try {
      StormClusterState stormClusterState = data.getStormClusterState();

      String supervisorId = null;
      SupervisorInfo supervisorInfo = null;

      String ip = NetWorkUtils.host2Ip(host);
      String hostName = NetWorkUtils.ip2Host(host);

      // all supervisors
      Map<String, SupervisorInfo> supervisorInfos =
          Cluster.allSupervisorInfo(stormClusterState, null);

      for (Entry<String, SupervisorInfo> entry : supervisorInfos.entrySet()) {

        SupervisorInfo info = entry.getValue();
        if (info.getHostName().equals(hostName) || info.getHostName().equals(ip)) {
          supervisorId = entry.getKey();
          supervisorInfo = info;
          break;
        }
      }

      if (supervisorId == null) {
        throw new TException("No supervisor of " + host);
      }

      Map<String, Assignment> assignments = new HashMap<String, Assignment>();

      // get all active topology's StormBase
      Map<String, StormBase> bases = Cluster.topology_bases(stormClusterState);
      for (Entry<String, StormBase> entry : bases.entrySet()) {

        String topologyId = entry.getKey();
        StormBase base = entry.getValue();

        Assignment assignment = stormClusterState.assignment_info(topologyId, null);
        if (assignment == null) {
          LOG.error("Failed to get assignment of " + topologyId);
          continue;
        }
        assignments.put(topologyId, assignment);
      }

      Map<Integer, WorkerSummary> portWorkerSummarys = new TreeMap<Integer, WorkerSummary>();
      for (Entry<String, Assignment> entry : assignments.entrySet()) {
        String topologyId = entry.getKey();
        Assignment assignment = entry.getValue();

        Map<Integer, String> taskToComponent =
            Cluster.topology_task_info(stormClusterState, topologyId);

        Map<Integer, ResourceAssignment> taskToResource = assignment.getTaskToResource();

        for (Entry<Integer, ResourceAssignment> resourceEntry : taskToResource.entrySet()) {
          Integer taskId = resourceEntry.getKey();
          ResourceAssignment resourceAssignment = resourceEntry.getValue();

          if (supervisorId.equals(resourceAssignment.getSupervisorId()) == false) {
            continue;
          }

          supervisorInfo.allocResource(resourceAssignment);

          Integer port = resourceAssignment.getPort();
          WorkerSummary workerSummary = portWorkerSummarys.get(port);
          if (workerSummary == null) {
            workerSummary = new WorkerSummary();
            workerSummary.set_port(port);
            workerSummary.set_topology(topologyId);
            workerSummary.set_tasks(new ArrayList<TaskSummary>());

            portWorkerSummarys.put(port, workerSummary);
          }

          String componentName = taskToComponent.get(taskId);
          int uptime = TimeUtils.time_delta(assignment.getTaskStartTimeSecs().get(taskId));
          List<TaskSummary> tasks = workerSummary.get_tasks();

          TaskSummary taskSummary =
              NimbusUtils.mkSimpleTaskSummary(
                  resourceAssignment, taskId, componentName, host, uptime);

          tasks.add(taskSummary);
        }
      }

      List<WorkerSummary> wokersList = new ArrayList<WorkerSummary>();
      wokersList.addAll(portWorkerSummarys.values());

      SupervisorSummary supervisorSummary =
          NimbusUtils.mkSupervisorSummary(supervisorInfo, supervisorId);
      return new SupervisorWorkers(supervisorSummary, wokersList);

    } catch (TException e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw e;
    } catch (Exception e) {
      LOG.info("Failed to get ClusterSummary ", e);
      throw new TException(e);
    }
  }
  @Override
  public Map<Integer, ResourceAssignment> assignTasks(TopologyAssignContext context)
      throws FailedAssignTopologyException {

    int assignType = context.getAssignType();
    if (TopologyAssignContext.isAssignTypeValid(assignType) == false) {
      throw new FailedAssignTopologyException("Invalide Assign Type " + assignType);
    }

    DefaultTopologyAssignContext defaultContext = new DefaultTopologyAssignContext(context);
    if (assignType == TopologyAssignContext.ASSIGN_TYPE_REBALANCE) {
      freeUsed(defaultContext);
    }
    LOG.info("Dead tasks:" + defaultContext.getDeadTaskIds());
    LOG.info("Unstopped tasks:" + defaultContext.getUnstoppedTaskIds());

    Set<Integer> needAssignTasks = getNeedAssignTasks(defaultContext);

    Map<Integer, ResourceAssignment> keepAssigns = getKeepAssign(defaultContext, needAssignTasks);

    // please use tree map to make task sequence
    Map<Integer, ResourceAssignment> ret = new TreeMap<Integer, ResourceAssignment>();
    ret.putAll(keepAssigns);
    ret.putAll(defaultContext.getUnstoppedAssignments());

    Map<WorkerSlot, List<Integer>> keepAssignWorkers = Assignment.getWorkerTasks(keepAssigns);

    int allocWorkerNum =
        defaultContext.getTotalWorkerNum()
            - defaultContext.getUnstoppedWorkerNum()
            - keepAssignWorkers.size();
    if (allocWorkerNum <= 0) {
      LOG.warn(
          "Don't need assign workers, all workers are fine " + defaultContext.toDetailString());
      throw new FailedAssignTopologyException("Don't need assign worker, all workers are fine ");
    }

    Set<String> outputConfigComponents = new HashSet<String>();

    Map<ComponentAssignType, Pair<Set<Integer>, IPreassignTask>> typeHandler =
        registerPreAssignHandler(defaultContext, needAssignTasks);

    Map<Integer, ResourceAssignment> newAssigns = new HashMap<Integer, ResourceAssignment>();
    Set<String> usedSupervisorIds = new HashSet<String>();
    List<Integer> lastFailed = new ArrayList<Integer>();

    for (Entry<ComponentAssignType, Pair<Set<Integer>, IPreassignTask>> entry :
        typeHandler.entrySet()) {
      ComponentAssignType type = entry.getKey();

      Set<Integer> tasks = entry.getValue().getFirst();
      IPreassignTask handler = entry.getValue().getSecond();

      tasks.addAll(lastFailed);
      lastFailed.clear();

      List<Integer> sortedTasks = sortAssignTasks(defaultContext, tasks);

      StormTopology sysTopology = defaultContext.getSysTopology();

      for (Integer task : sortedTasks) {
        Set<String> canUsedSupervisorIds =
            getCanUsedSupervisors(defaultContext, usedSupervisorIds, allocWorkerNum);

        String componentName = defaultContext.getTaskToComponent().get(task);
        ComponentCommon componentCommon =
            ThriftTopologyUtils.getComponentCommon(sysTopology, componentName);

        Map componentMap = (Map) JStormUtils.from_json(componentCommon.get_json_conf());
        if (componentMap == null) {
          componentMap = Maps.newHashMap();
        }

        if (outputConfigComponents.contains(componentName) == false) {
          LOG.info("Component map of " + componentName + "\n" + componentMap);
          outputConfigComponents.add(componentName);
        }

        ResourceAssignment preAssignment =
            handler.preAssign(
                task,
                defaultContext,
                componentMap,
                componentName,
                canUsedSupervisorIds,
                ret,
                newAssigns);
        if (preAssignment == null) {
          // pre assign fail
          lastFailed.add(task);
        } else {
          // sucess to do preAssign
          SupervisorInfo supervisorInfo =
              defaultContext.getCluster().get(preAssignment.getSupervisorId());
          LOG.info("Task " + task + " had been assigned to " + supervisorInfo.getHostName());
          newAssigns.put(task, preAssignment);
          ret.put(task, preAssignment);
          usedSupervisorIds.add(preAssignment.getSupervisorId());
        }
      }
    }

    if (lastFailed.isEmpty() == false) {
      throw new FailedAssignTopologyException("Failed to assign tasks " + lastFailed);
    }

    // Here just hardcode
    IPostAssignTask postAssignHandler = new PostAssignTaskPort();
    postAssignHandler.postAssign(defaultContext, newAssigns, allocWorkerNum);

    LOG.info("Keep Alive slots:" + keepAssigns);
    LOG.info("Unstopped slots:" + defaultContext.getUnstoppedAssignments());
    LOG.info("New assign slots:" + newAssigns);

    return ret;
  }