public DefaultTopologyAssignContext(TopologyAssignContext context) {
    super(context);

    try {
      sysTopology = Common.system_topology(stormConf, rawTopology);
    } catch (Exception e) {
      throw new FailedAssignTopologyException("Failed to generate system topology");
    }

    sidToHostname = generateSidToHost();
    hostToSid = JStormUtils.reverse_map(sidToHostname);

    if (oldAssignment != null && oldAssignment.getWorkers() != null) {
      oldWorkers = oldAssignment.getWorkers();
    } else {
      oldWorkers = new HashSet<ResourceWorkerSlot>();
    }

    refineDeadTasks();

    componentTasks = JStormUtils.reverse_map(context.getTaskToComponent());

    for (Entry<String, List<Integer>> entry : componentTasks.entrySet()) {
      List<Integer> componentTaskList = entry.getValue();

      Collections.sort(componentTaskList);
    }

    totalWorkerNum = computeWorkerNum();

    unstoppedWorkerNum = computeUnstoppedAssignments();
  }
Example #2
0
 private static Set<ResourceWorkerSlot> mkLocalAssignment(TopologyAssignContext context) {
   Set<ResourceWorkerSlot> result = new HashSet<ResourceWorkerSlot>();
   Map<String, SupervisorInfo> cluster = context.getCluster();
   if (cluster.size() != 1) throw new RuntimeException();
   SupervisorInfo localSupervisor = null;
   String supervisorId = null;
   for (Entry<String, SupervisorInfo> entry : cluster.entrySet()) {
     supervisorId = entry.getKey();
     localSupervisor = entry.getValue();
   }
   int port = localSupervisor.getWorkerPorts().iterator().next();
   ResourceWorkerSlot worker = new ResourceWorkerSlot(supervisorId, port);
   worker.setTasks(new HashSet<Integer>(context.getAllTaskIds()));
   worker.setHostname(localSupervisor.getHostName());
   result.add(worker);
   return result;
 }
  /**
   * @@@ Here maybe exist one problem, some dead slots have been free
   *
   * @param context
   */
  protected void freeUsed(TopologyAssignContext context) {
    Set<Integer> canFree = new HashSet<Integer>();
    canFree.addAll(context.getAllTaskIds());
    canFree.removeAll(context.getUnstoppedTaskIds());

    Map<String, SupervisorInfo> cluster = context.getCluster();
    Map<Integer, ResourceAssignment> oldAssigns = context.getOldAssignment().getTaskToResource();
    for (Integer task : canFree) {
      ResourceAssignment oldAssign = oldAssigns.get(task);
      if (oldAssign == null) {
        LOG.warn("When free rebalance resource, no ResourceAssignment of task " + task);
        continue;
      }

      SupervisorInfo supervisorInfo = cluster.get(oldAssign.getSupervisorId());
      if (supervisorInfo == null) {
        continue;
      }
      supervisorInfo.getCpuPool().free(oldAssign.getCpuSlotNum(), context);
      supervisorInfo.getMemPool().free(oldAssign.getMemSlotNum(), context);
      supervisorInfo.getDiskPool().free(oldAssign.getDiskSlot(), context);
      supervisorInfo.getNetPool().free(oldAssign.getPort(), context);
    }
  }
Example #4
0
  /**
   * @param existingAssignment
   * @param taskWorkerSlot
   * @return
   * @throws Exception
   */
  public static Map<Integer, Integer> getTaskStartTimes(
      TopologyAssignContext context,
      NimbusData nimbusData,
      String topologyId,
      Assignment existingAssignment,
      Set<ResourceWorkerSlot> workers)
      throws Exception {

    Map<Integer, Integer> startTimes = new TreeMap<Integer, Integer>();

    if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_NEW) {
      int nowSecs = TimeUtils.current_time_secs();
      for (ResourceWorkerSlot worker : workers) {
        for (Integer changedTaskId : worker.getTasks()) {
          startTimes.put(changedTaskId, nowSecs);
        }
      }

      return startTimes;
    }

    Set<ResourceWorkerSlot> oldWorkers = new HashSet<ResourceWorkerSlot>();

    if (existingAssignment != null) {
      Map<Integer, Integer> taskStartTimeSecs = existingAssignment.getTaskStartTimeSecs();
      if (taskStartTimeSecs != null) {
        startTimes.putAll(taskStartTimeSecs);
      }

      if (existingAssignment.getWorkers() != null) {
        oldWorkers = existingAssignment.getWorkers();
      }
    }

    StormClusterState zkClusterState = nimbusData.getStormClusterState();
    Set<Integer> changeTaskIds = getChangeTaskIds(oldWorkers, workers);
    int nowSecs = TimeUtils.current_time_secs();
    for (Integer changedTaskId : changeTaskIds) {
      startTimes.put(changedTaskId, nowSecs);

      zkClusterState.remove_task_heartbeat(topologyId, changedTaskId);
    }

    LOG.info("Task assignment has been changed " + changeTaskIds);
    return startTimes;
  }
Example #5
0
  /**
   * make assignments for a topology The nimbus core function, this function has been totally
   * rewrite
   *
   * @param nimbusData NimbusData
   * @param topologyId String
   * @param isScratch Boolean: isScratch is false unless rebalancing the topology
   * @throws Exception
   */
  public Assignment mkAssignment(TopologyAssignEvent event) throws Exception {
    String topologyId = event.getTopologyId();

    LOG.info("Determining assignment for " + topologyId);

    TopologyAssignContext context = prepareTopologyAssign(event);

    Set<ResourceWorkerSlot> assignments = null;

    if (!StormConfig.local_mode(nimbusData.getConf())) {

      IToplogyScheduler scheduler = schedulers.get(DEFAULT_SCHEDULER_NAME);

      assignments = scheduler.assignTasks(context);

    } else {
      assignments = mkLocalAssignment(context);
    }
    Assignment assignment = null;

    Map<String, String> nodeHost =
        getTopologyNodeHost(context.getCluster(), context.getOldAssignment(), assignments);

    Map<Integer, Integer> startTimes =
        getTaskStartTimes(context, nimbusData, topologyId, context.getOldAssignment(), assignments);

    String codeDir = StormConfig.masterStormdistRoot(nimbusData.getConf(), topologyId);

    assignment = new Assignment(codeDir, assignments, nodeHost, startTimes);

    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    stormClusterState.set_assignment(topologyId, assignment);

    // update task heartbeat's start time
    NimbusUtils.updateTaskHbStartTime(nimbusData, assignment, topologyId);

    // Update metrics information in ZK when rebalance or reassignment
    // Only update metrics monitor status when creating topology
    if (context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_REBALANCE
        || context.getAssignType() == TopologyAssignContext.ASSIGN_TYPE_MONITOR)
      NimbusUtils.updateMetricsInfo(nimbusData, topologyId, assignment);
    else metricsMonitor(event);

    LOG.info("Successfully make assignment for topology id " + topologyId + ": " + assignment);

    return assignment;
  }
Example #6
0
  protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event)
      throws Exception {
    TopologyAssignContext ret = new TopologyAssignContext();

    String topologyId = event.getTopologyId();

    /** 读取本地目录下的stormconf.ser和stormcode.ser */
    Map<Object, Object> nimbusConf = nimbusData.getConf();
    Map<Object, Object> topologyConf =
        StormConfig.read_nimbus_topology_conf(nimbusConf, topologyId);

    StormTopology rawTopology = StormConfig.read_nimbus_topology_code(nimbusConf, topologyId);
    ret.setRawTopology(rawTopology);

    Map stormConf = new HashMap();
    stormConf.putAll(nimbusConf);
    stormConf.putAll(topologyConf);
    ret.setStormConf(stormConf);

    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    // get all running supervisor, don't need callback to watch supervisor
    /** 获取所有的运行的supervisor,以及supervisorInfo */
    Map<String, SupervisorInfo> supInfos = Cluster.allSupervisorInfo(stormClusterState, null);
    if (supInfos.size() == 0) {
      throw new FailedAssignTopologyException(
          "Failed to make assignment " + topologyId + ", due to no alive supervisor");
    }

    /** 获取topologyId下的所有tasks */
    Map<Integer, String> taskToComponent =
        Cluster.topology_task_info(stormClusterState, topologyId);
    ret.setTaskToComponent(taskToComponent);

    // get taskids /ZK/tasks/topologyId
    Set<Integer> allTaskIds = taskToComponent.keySet();
    if (allTaskIds == null || allTaskIds.size() == 0) {
      String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
      LOG.warn(errMsg);
      throw new IOException(errMsg);
    }
    ret.setAllTaskIds(allTaskIds);

    Set<Integer> aliveTasks = new HashSet<Integer>();
    // unstoppedTasks are tasks which are alive on no supervisor's(dead)
    // machine
    /** 未完成的任务,supervisor已死,任务没完成 */
    Set<Integer> unstoppedTasks = new HashSet<Integer>();
    Set<Integer> deadTasks = new HashSet<Integer>();
    Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();

    Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
    if (existingAssignment != null) {
      aliveTasks = getAliveTasks(topologyId, allTaskIds);
      unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);

      deadTasks.addAll(allTaskIds);
      deadTasks.removeAll(aliveTasks);
    }

    ret.setDeadTaskIds(deadTasks);
    ret.setUnstoppedTaskIds(unstoppedTasks);

    // Step 2: get all slots resource, free slots/ alive slots/ unstopped
    // slots
    getFreeSlots(supInfos, stormClusterState);
    ret.setCluster(supInfos);

    if (existingAssignment == null) {
      ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);

      try {
        AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
        if (lastAssignment != null) {
          ret.setOldAssignment(lastAssignment.getAssignment());
        }
      } catch (Exception e) {
        LOG.warn("Fail to get old assignment", e);
      }
    } else {
      ret.setOldAssignment(existingAssignment);
      if (event.isScratch()) {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
        unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      } else {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
        unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      }
    }

    return ret;
  }
  @Override
  public Map<Integer, ResourceAssignment> assignTasks(TopologyAssignContext context)
      throws FailedAssignTopologyException {

    int assignType = context.getAssignType();
    if (TopologyAssignContext.isAssignTypeValid(assignType) == false) {
      throw new FailedAssignTopologyException("Invalide Assign Type " + assignType);
    }

    DefaultTopologyAssignContext defaultContext = new DefaultTopologyAssignContext(context);
    if (assignType == TopologyAssignContext.ASSIGN_TYPE_REBALANCE) {
      freeUsed(defaultContext);
    }
    LOG.info("Dead tasks:" + defaultContext.getDeadTaskIds());
    LOG.info("Unstopped tasks:" + defaultContext.getUnstoppedTaskIds());

    Set<Integer> needAssignTasks = getNeedAssignTasks(defaultContext);

    Map<Integer, ResourceAssignment> keepAssigns = getKeepAssign(defaultContext, needAssignTasks);

    // please use tree map to make task sequence
    Map<Integer, ResourceAssignment> ret = new TreeMap<Integer, ResourceAssignment>();
    ret.putAll(keepAssigns);
    ret.putAll(defaultContext.getUnstoppedAssignments());

    Map<WorkerSlot, List<Integer>> keepAssignWorkers = Assignment.getWorkerTasks(keepAssigns);

    int allocWorkerNum =
        defaultContext.getTotalWorkerNum()
            - defaultContext.getUnstoppedWorkerNum()
            - keepAssignWorkers.size();
    if (allocWorkerNum <= 0) {
      LOG.warn(
          "Don't need assign workers, all workers are fine " + defaultContext.toDetailString());
      throw new FailedAssignTopologyException("Don't need assign worker, all workers are fine ");
    }

    Set<String> outputConfigComponents = new HashSet<String>();

    Map<ComponentAssignType, Pair<Set<Integer>, IPreassignTask>> typeHandler =
        registerPreAssignHandler(defaultContext, needAssignTasks);

    Map<Integer, ResourceAssignment> newAssigns = new HashMap<Integer, ResourceAssignment>();
    Set<String> usedSupervisorIds = new HashSet<String>();
    List<Integer> lastFailed = new ArrayList<Integer>();

    for (Entry<ComponentAssignType, Pair<Set<Integer>, IPreassignTask>> entry :
        typeHandler.entrySet()) {
      ComponentAssignType type = entry.getKey();

      Set<Integer> tasks = entry.getValue().getFirst();
      IPreassignTask handler = entry.getValue().getSecond();

      tasks.addAll(lastFailed);
      lastFailed.clear();

      List<Integer> sortedTasks = sortAssignTasks(defaultContext, tasks);

      StormTopology sysTopology = defaultContext.getSysTopology();

      for (Integer task : sortedTasks) {
        Set<String> canUsedSupervisorIds =
            getCanUsedSupervisors(defaultContext, usedSupervisorIds, allocWorkerNum);

        String componentName = defaultContext.getTaskToComponent().get(task);
        ComponentCommon componentCommon =
            ThriftTopologyUtils.getComponentCommon(sysTopology, componentName);

        Map componentMap = (Map) JStormUtils.from_json(componentCommon.get_json_conf());
        if (componentMap == null) {
          componentMap = Maps.newHashMap();
        }

        if (outputConfigComponents.contains(componentName) == false) {
          LOG.info("Component map of " + componentName + "\n" + componentMap);
          outputConfigComponents.add(componentName);
        }

        ResourceAssignment preAssignment =
            handler.preAssign(
                task,
                defaultContext,
                componentMap,
                componentName,
                canUsedSupervisorIds,
                ret,
                newAssigns);
        if (preAssignment == null) {
          // pre assign fail
          lastFailed.add(task);
        } else {
          // sucess to do preAssign
          SupervisorInfo supervisorInfo =
              defaultContext.getCluster().get(preAssignment.getSupervisorId());
          LOG.info("Task " + task + " had been assigned to " + supervisorInfo.getHostName());
          newAssigns.put(task, preAssignment);
          ret.put(task, preAssignment);
          usedSupervisorIds.add(preAssignment.getSupervisorId());
        }
      }
    }

    if (lastFailed.isEmpty() == false) {
      throw new FailedAssignTopologyException("Failed to assign tasks " + lastFailed);
    }

    // Here just hardcode
    IPostAssignTask postAssignHandler = new PostAssignTaskPort();
    postAssignHandler.postAssign(defaultContext, newAssigns, allocWorkerNum);

    LOG.info("Keep Alive slots:" + keepAssigns);
    LOG.info("Unstopped slots:" + defaultContext.getUnstoppedAssignments());
    LOG.info("New assign slots:" + newAssigns);

    return ret;
  }