Example #1
0
 /**
  * get topology configuration
  *
  * @param id String: topology id
  * @return String
  */
 @Override
 public String getTopologyConf(String id) throws NotAliveException, TException {
   String rtn;
   try {
     Map<Object, Object> topologyConf = StormConfig.read_nimbus_topology_conf(conf, id);
     rtn = JStormUtils.to_json(topologyConf);
   } catch (IOException e) {
     // TODO Auto-generated catch block
     LOG.info("Failed to get configuration of " + id, e);
     throw new TException(e);
   }
   return rtn;
 }
Example #2
0
  /**
   * get StormTopology throw deserialize local files
   *
   * @param id String: topology id
   * @return StormTopology
   */
  @Override
  public StormTopology getTopology(String id) throws NotAliveException, TException {
    StormTopology topology = null;
    try {
      StormTopology stormtopology = StormConfig.read_nimbus_topology_code(conf, id);
      if (stormtopology == null) {
        throw new TException("topology:" + id + "is null");
      }

      Map<Object, Object> topologyConf =
          (Map<Object, Object>) StormConfig.read_nimbus_topology_conf(conf, id);

      topology = Common.system_topology(topologyConf, stormtopology);
    } catch (Exception e) {
      LOG.error("Failed to get topology " + id + ",", e);
      throw new TException("Failed to get system_topology");
    }
    return topology;
  }
Example #3
0
  /**
   * generate a taskid(Integer) for every task
   *
   * @param conf
   * @param topologyid
   * @return Map<Integer, String>: from taskid to componentid
   * @throws IOException
   * @throws InvalidTopologyException
   */
  public Map<Integer, String> mkTaskComponentAssignments(
      Map<Object, Object> conf, String topologyid) throws IOException, InvalidTopologyException {

    // @@@ here exist a little problem,
    // we can directly pass stormConf from Submit method
    Map<Object, Object> stormConf = StormConfig.read_nimbus_topology_conf(conf, topologyid);

    StormTopology stopology = StormConfig.read_nimbus_topology_code(conf, topologyid);

    // use TreeMap to make task as sequence
    Map<Integer, String> rtn = new TreeMap<Integer, String>();

    StormTopology topology = Common.system_topology(stormConf, stopology);

    Integer count = 0;
    count = mkTaskMaker(stormConf, topology.get_bolts(), rtn, count);
    count = mkTaskMaker(stormConf, topology.get_spouts(), rtn, count);
    count = mkTaskMaker(stormConf, topology.get_state_spouts(), rtn, count);

    return rtn;
  }
  @Override
  public <T> Object execute(T... args) {
    boolean isSetTaskInfo = false;
    try {
      Boolean reassign = (Boolean) args[1];
      Map<Object, Object> conf = (Map<Object, Object>) args[2]; // args[0]:
      // delay,
      // args[1]:
      // reassign_flag,
      // args[2]:
      // conf
      if (conf != null) {
        boolean isConfUpdate = false;
        Map stormConf = data.getConf();

        // Update topology code
        Map topoConf = StormConfig.read_nimbus_topology_conf(stormConf, topologyid);
        StormTopology rawOldTopology = StormConfig.read_nimbus_topology_code(stormConf, topologyid);
        StormTopology rawNewTopology = NimbusUtils.normalizeTopology(conf, rawOldTopology, true);
        StormTopology sysOldTopology = rawOldTopology.deepCopy();
        StormTopology sysNewTopology = rawNewTopology.deepCopy();
        if (conf.get(Config.TOPOLOGY_ACKER_EXECUTORS) != null) {
          Common.add_acker(topoConf, sysOldTopology);
          Common.add_acker(conf, sysNewTopology);
          int ackerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_ACKER_EXECUTORS));
          int oldAckerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_ACKER_EXECUTORS));
          LOG.info("Update acker from oldAckerNum=" + oldAckerNum + " to ackerNum=" + ackerNum);
          topoConf.put(Config.TOPOLOGY_ACKER_EXECUTORS, ackerNum);
          isConfUpdate = true;
        }

        // If scale-out, setup task info for new added tasks
        setTaskInfo(sysOldTopology, sysNewTopology);
        isSetTaskInfo = true;

        // If everything is OK, write topology code into disk
        StormConfig.write_nimbus_topology_code(
            stormConf, topologyid, Utils.serialize(rawNewTopology));

        // Update topology conf if worker num has been updated
        Set<Object> keys = conf.keySet();
        Integer workerNum = JStormUtils.parseInt(conf.get(Config.TOPOLOGY_WORKERS));
        if (workerNum != null) {
          Integer oldWorkerNum = JStormUtils.parseInt(topoConf.get(Config.TOPOLOGY_WORKERS));
          topoConf.put(Config.TOPOLOGY_WORKERS, workerNum);
          isConfUpdate = true;

          LOG.info("Update worker num from " + oldWorkerNum + " to " + workerNum);
        }

        if (keys.contains(Config.ISOLATION_SCHEDULER_MACHINES)) {
          topoConf.put(
              Config.ISOLATION_SCHEDULER_MACHINES, conf.get(Config.ISOLATION_SCHEDULER_MACHINES));
        }

        if (isConfUpdate) {
          StormConfig.write_nimbus_topology_conf(stormConf, topologyid, topoConf);
        }
      }

      TopologyAssignEvent event = new TopologyAssignEvent();

      event.setTopologyId(topologyid);
      event.setScratch(true);
      event.setOldStatus(oldStatus);
      event.setReassign(reassign);
      if (conf != null) event.setScaleTopology(true);
      TopologyAssign.push(event);
      event.waitFinish();
    } catch (Exception e) {
      LOG.error("do-rebalance error!", e);
      // Rollback the changes on ZK
      if (isSetTaskInfo) {
        try {
          StormClusterState clusterState = data.getStormClusterState();
          clusterState.remove_task(topologyid, newTasks);
        } catch (Exception e1) {
          LOG.error("Failed to rollback the changes on ZK for task-" + newTasks, e);
        }
      }
    }

    DelayStatusTransitionCallback delayCallback =
        new DelayStatusTransitionCallback(
            data, topologyid, oldStatus, StatusType.rebalancing, StatusType.done_rebalance);
    return delayCallback.execute();
  }
Example #5
0
  protected TopologyAssignContext prepareTopologyAssign(TopologyAssignEvent event)
      throws Exception {
    TopologyAssignContext ret = new TopologyAssignContext();

    String topologyId = event.getTopologyId();

    /** 读取本地目录下的stormconf.ser和stormcode.ser */
    Map<Object, Object> nimbusConf = nimbusData.getConf();
    Map<Object, Object> topologyConf =
        StormConfig.read_nimbus_topology_conf(nimbusConf, topologyId);

    StormTopology rawTopology = StormConfig.read_nimbus_topology_code(nimbusConf, topologyId);
    ret.setRawTopology(rawTopology);

    Map stormConf = new HashMap();
    stormConf.putAll(nimbusConf);
    stormConf.putAll(topologyConf);
    ret.setStormConf(stormConf);

    StormClusterState stormClusterState = nimbusData.getStormClusterState();

    // get all running supervisor, don't need callback to watch supervisor
    /** 获取所有的运行的supervisor,以及supervisorInfo */
    Map<String, SupervisorInfo> supInfos = Cluster.allSupervisorInfo(stormClusterState, null);
    if (supInfos.size() == 0) {
      throw new FailedAssignTopologyException(
          "Failed to make assignment " + topologyId + ", due to no alive supervisor");
    }

    /** 获取topologyId下的所有tasks */
    Map<Integer, String> taskToComponent =
        Cluster.topology_task_info(stormClusterState, topologyId);
    ret.setTaskToComponent(taskToComponent);

    // get taskids /ZK/tasks/topologyId
    Set<Integer> allTaskIds = taskToComponent.keySet();
    if (allTaskIds == null || allTaskIds.size() == 0) {
      String errMsg = "Failed to get all task ID list from /ZK-dir/tasks/" + topologyId;
      LOG.warn(errMsg);
      throw new IOException(errMsg);
    }
    ret.setAllTaskIds(allTaskIds);

    Set<Integer> aliveTasks = new HashSet<Integer>();
    // unstoppedTasks are tasks which are alive on no supervisor's(dead)
    // machine
    /** 未完成的任务,supervisor已死,任务没完成 */
    Set<Integer> unstoppedTasks = new HashSet<Integer>();
    Set<Integer> deadTasks = new HashSet<Integer>();
    Set<ResourceWorkerSlot> unstoppedWorkers = new HashSet<ResourceWorkerSlot>();

    Assignment existingAssignment = stormClusterState.assignment_info(topologyId, null);
    if (existingAssignment != null) {
      aliveTasks = getAliveTasks(topologyId, allTaskIds);
      unstoppedTasks = getUnstoppedSlots(aliveTasks, supInfos, existingAssignment);

      deadTasks.addAll(allTaskIds);
      deadTasks.removeAll(aliveTasks);
    }

    ret.setDeadTaskIds(deadTasks);
    ret.setUnstoppedTaskIds(unstoppedTasks);

    // Step 2: get all slots resource, free slots/ alive slots/ unstopped
    // slots
    getFreeSlots(supInfos, stormClusterState);
    ret.setCluster(supInfos);

    if (existingAssignment == null) {
      ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_NEW);

      try {
        AssignmentBak lastAssignment = stormClusterState.assignment_bak(event.getTopologyName());
        if (lastAssignment != null) {
          ret.setOldAssignment(lastAssignment.getAssignment());
        }
      } catch (Exception e) {
        LOG.warn("Fail to get old assignment", e);
      }
    } else {
      ret.setOldAssignment(existingAssignment);
      if (event.isScratch()) {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_REBALANCE);
        unstoppedWorkers = getUnstoppedWorkers(unstoppedTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      } else {
        ret.setAssignType(TopologyAssignContext.ASSIGN_TYPE_MONITOR);
        unstoppedWorkers = getUnstoppedWorkers(aliveTasks, existingAssignment);
        ret.setUnstoppedWorkers(unstoppedWorkers);
      }
    }

    return ret;
  }