// @wjw_note: 把任务分配给taskServerList里随机的一个server!
  private void assignServer2Task(List<String> taskServerList, String taskPath) throws Exception {
    // @wjw_note: 清除过期的无Runner的Task
    {
      Stat stat = new Stat();
      this.getZooKeeper().getData(taskPath, null, stat);
      if (getSystemTime() - stat.getMtime() > TASK_EXPIRE_TIME) {
        ZKTools.deleteTree(this.getZooKeeper(), taskPath);
        LOG.warn("清除过期的无Runner的Task[" + taskPath + "]");
        return;
      }
    }

    int index = random.nextInt(taskServerList.size());
    String serverId = taskServerList.get(index);
    this.getZooKeeper()
        .create(taskPath + "/" + serverId, null, this.zkManager.getAcl(), CreateMode.PERSISTENT);

    if (LOG.isDebugEnabled()) {
      StringBuilder buffer = new StringBuilder();
      buffer
          .append("Assign server [")
          .append(serverId)
          .append("]")
          .append(" to task [")
          .append(taskPath)
          .append("]");
      LOG.debug(buffer.toString());
    }
  }
示例#2
0
 public static void copyStat(Stat from, Stat to) {
   to.setAversion(from.getAversion());
   to.setCtime(from.getCtime());
   to.setCversion(from.getCversion());
   to.setCzxid(from.getCzxid());
   to.setMtime(from.getMtime());
   to.setMzxid(from.getMzxid());
   to.setPzxid(from.getPzxid());
   to.setVersion(from.getVersion());
   to.setEphemeralOwner(from.getEphemeralOwner());
   to.setDataLength(from.getDataLength());
   to.setNumChildren(from.getNumChildren());
 }
示例#3
0
        @Override
        public void process(WatchedEvent event) {
          // this lock is important so that when changes start happening, we
          // won't run into any concurrency issues

          synchronized (WatchedNode.this) {
            if (!cancelled) {
              if (event.getState() == KeeperState.SyncConnected) {
                // If connected update data and notify listeners
                try {
                  if (event.getType().equals(Event.EventType.NodeCreated)) {
                    watchForData();
                  } else if (event.getType().equals(Event.EventType.NodeDeleted)) {
                    // Previous version notified is null, and we will notify with null
                    previousVersion = null;
                    watchForCreation();
                  } else if (event.getType().equals(Event.EventType.NodeDataChanged)) {
                    watchForData();
                  }
                } catch (KeeperException e) {
                  LOG.error("Exception while trying to update our cached value for " + nodePath, e);
                } catch (InterruptedException e) {
                  if (LOG.isTraceEnabled()) {
                    LOG.trace(
                        "Interrupted while trying to update our cached value for " + nodePath, e);
                  }
                }
                // Notify of new value if either we didn't notify of any value, or the node has
                // changed
                long currentVersion = stat.getCtime() + stat.getMtime();
                if (previousVersion == null || !previousVersion.equals(currentVersion)) {
                  try {
                    synchronized (listeners) {
                      for (WatchedNodeListener<T> listener : listeners) {
                        listener.onWatchedNodeChange(value);
                      }
                    }
                  } finally {
                    previousVersion = currentVersion;
                  }
                }
              } else {
                // Not sync connected, do nothing
                if (LOG.isDebugEnabled()) {
                  LOG.debug("Not sync connected anymore for watched node " + nodePath);
                }
              }
            }
          }
        }
 public PositionEventData getCanalCursor(String destination, short clientId) {
   String path = String.format(CANAL_CURSOR_PATH, destination, String.valueOf(clientId));
   try {
     IZkConnection connection = zookeeper.getConnection();
     // zkclient会将获取stat信息和正常的操作分开,使用原生的zk进行优化
     ZooKeeper orginZk = ((ZooKeeperx) connection).getZookeeper();
     Stat stat = new Stat();
     byte[] bytes = orginZk.getData(path, false, stat);
     PositionEventData eventData = new PositionEventData();
     eventData.setCreateTime(new Date(stat.getCtime()));
     eventData.setModifiedTime(new Date(stat.getMtime()));
     eventData.setPosition(new String(bytes, "UTF-8"));
     return eventData;
   } catch (Exception e) {
     return null;
   }
 }
 public void clearExpireScheduleServer() throws Exception {
   String zkPath = this.pathServer;
   if (this.getZooKeeper().exists(zkPath, false) == null) {
     this.getZooKeeper().create(zkPath, null, this.zkManager.getAcl(), CreateMode.PERSISTENT);
   }
   for (String name : this.zkManager.getZooKeeper().getChildren(zkPath, false)) {
     try {
       Stat stat = new Stat();
       this.getZooKeeper().getData(zkPath + "/" + name, null, stat);
       if (getSystemTime() - stat.getMtime() > SERVER_EXPIRE_TIME) {
         ZKTools.deleteTree(this.getZooKeeper(), zkPath + "/" + name);
         LOG.warn("清除过期ScheduleServer[" + zkPath + "/" + name + "]");
       }
     } catch (Exception e) {
       // 当有多台服务器时,存在并发清理的可能,忽略异常
     }
   }
 }
示例#6
0
  /** Get metrics data for this service node (self) for current interval. */
  public MetricsData getMyMetrics(String clusterId, String serviceId) {
    String key = clusterId + "/" + serviceId + "/" + getContext().getZkNodeId().getPathToken();
    ExportMeta exportMeta = exportPathMap.get(key);
    if (exportMeta == null) {
      logger.trace(
          "MetricsData not found:  data has not been exported:  clusterId={}; serviceId={}; exportMeta={}",
          clusterId,
          serviceId,
          exportMeta);
      return null;
    }
    if (exportMeta.dataPath == null) {
      logger.trace(
          "MetricsData not found:  waiting for data to be reported in ZK:  clusterId={}; serviceId={}; exportMeta.dataPath={}",
          clusterId,
          serviceId,
          exportMeta.dataPath);
      synchronized (exportMeta) {
        try {
          exportMeta.wait();
        } catch (InterruptedException e) {
          logger.warn("Interrupted while waiting:  " + e, e);
        }
      }
    }

    try {
      logger.debug("Retrieving metrics:  path={}", exportMeta.dataPath);
      Stat stat = new Stat();
      byte[] bytes = getContext().getZkClient().getData(exportMeta.dataPath, true, stat);
      MetricsData metricsData = JacksonUtil.getObjectMapper().readValue(bytes, MetricsData.class);
      metricsData.setClusterId(clusterId);
      metricsData.setServiceId(serviceId);
      metricsData.setLastUpdatedTimestamp(stat.getMtime());
      return metricsData;
    } catch (KeeperException e) {
      if (e.code() == KeeperException.Code.NONODE) {
        return null;
      }
      throw new ReignException(e);
    } catch (Exception e) {
      throw new ReignException(e);
    }
  }
  @Override
  // @wjw_note: 非常重要的,分配任务的逻辑!
  public void assignTask(String currentUuid, List<String> taskServerList) throws Exception {
    if (this.zkManager.isZookeeperConnected() == false) {
      return;
    }

    if (this.isLeader(currentUuid, taskServerList) == false) {
      if (LOG.isDebugEnabled()) {
        LOG.debug(currentUuid + ":不是负责任务分配的Leader,直接返回");
      }
      return;
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug(currentUuid + ":开始重新分配任务......");
    }

    if (taskServerList.size() <= 0) {
      // 在服务器动态调整的时候,可能出现服务器列表为空的清空
      return;
    }

    String zkPath = this.pathTask;
    if (this.getZooKeeper().exists(zkPath, false) == null) {
      this.getZooKeeper().create(zkPath, null, this.zkManager.getAcl(), CreateMode.PERSISTENT);
    }
    List<String> taskChildren = this.getZooKeeper().getChildren(zkPath, false);
    if (null == taskChildren || taskChildren.size() == 0) {
      if (LOG.isDebugEnabled()) {
        LOG.debug(currentUuid + ":没有集群任务");
      }
      return;
    }

    for (int i = 0; i < taskChildren.size(); i++) {
      String taskName = taskChildren.get(i);
      String taskPath = zkPath + "/" + taskName;
      if (this.getZooKeeper().exists(taskPath, false) == null) {
        this.getZooKeeper().create(taskPath, null, this.zkManager.getAcl(), CreateMode.PERSISTENT);
      }
      List<String> taskServerIds = this.getZooKeeper().getChildren(taskPath, false);
      if (null == taskServerIds || taskServerIds.size() == 0) { // 执行task的节点是空的
        assignServer2Task(taskServerList, taskPath);
      } else {
        boolean hasAssignSuccess = false;
        for (String serverId : taskServerIds) {
          if (taskServerList.contains(serverId)) {
            Stat stat = new Stat();
            this.getZooKeeper().getData(taskPath + "/" + serverId, null, stat);
            if (getSystemTime() - stat.getMtime()
                < TASK_EXPIRE_TIME) { // @wjw_note: 判断task Owner节点是否过期
              hasAssignSuccess = true;
              continue;
            }
          }

          LOG.warn("删除僵尸Task Runner: " + taskPath + "/" + serverId);
          ZKTools.deleteTree(
              this.getZooKeeper(), taskPath + "/" + serverId); // @wjw_note: 删除某一节点已经死掉的残留下来的僵尸task!
        }
        if (hasAssignSuccess == false) {
          assignServer2Task(
              taskServerList, taskPath); // @wjw_note: 把任务分配给taskServerList里随机的一个server!
        }
      }
    }
  }
  public List<ProcessStat> listProcesses(Long channelId, Long pipelineId) {
    List<ProcessStat> processStats = new ArrayList<ProcessStat>();
    String processRoot = ManagePathUtils.getProcessRoot(channelId, pipelineId);
    IZkConnection connection = zookeeper.getConnection();
    // zkclient会将获取stat信息和正常的操作分开,使用原生的zk进行优化
    ZooKeeper orginZk = ((ZooKeeperx) connection).getZookeeper();

    // 获取所有的process列表
    List<String> processNodes = zookeeper.getChildren(processRoot);
    List<Long> processIds = new ArrayList<Long>();
    for (String processNode : processNodes) {
      processIds.add(ManagePathUtils.getProcessId(processNode));
    }

    Collections.sort(processIds);

    for (int i = 0; i < processIds.size(); i++) {
      Long processId = processIds.get(i);
      // 当前的process可能会有变化
      ProcessStat processStat = new ProcessStat();
      processStat.setPipelineId(pipelineId);
      processStat.setProcessId(processId);

      List<StageStat> stageStats = new ArrayList<StageStat>();
      processStat.setStageStats(stageStats);
      try {
        String processPath = ManagePathUtils.getProcess(channelId, pipelineId, processId);
        Stat zkProcessStat = new Stat();
        List<String> stages = orginZk.getChildren(processPath, false, zkProcessStat);
        Collections.sort(stages, new StageComparator());

        StageStat prev = null;
        for (String stage : stages) { // 循环每个process下的stage
          String stagePath = processPath + "/" + stage;
          Stat zkStat = new Stat();

          StageStat stageStat = new StageStat();
          stageStat.setPipelineId(pipelineId);
          stageStat.setProcessId(processId);

          byte[] bytes = orginZk.getData(stagePath, false, zkStat);
          if (bytes != null && bytes.length > 0) {
            // 特殊处理zookeeper里的data信息,manager没有对应node中PipeKey的对象,所以导致反序列化会失败,需要特殊处理,删除'@'符号
            String json = StringUtils.remove(new String(bytes, "UTF-8"), '@');
            EtlEventData data = JsonUtils.unmarshalFromString(json, EtlEventData.class);
            stageStat.setNumber(data.getNumber());
            stageStat.setSize(data.getSize());

            Map exts = new HashMap();
            if (!CollectionUtils.isEmpty(data.getExts())) {
              exts.putAll(data.getExts());
            }
            exts.put("currNid", data.getCurrNid());
            exts.put("nextNid", data.getNextNid());
            exts.put("desc", data.getDesc());
            stageStat.setExts(exts);
          }
          if (prev != null) { // 对应的start时间为上一个节点的结束时间
            stageStat.setStartTime(prev.getEndTime());
          } else {
            stageStat.setStartTime(zkProcessStat.getMtime()); // process的最后修改时间,select
            // await成功后会设置USED标志位
          }
          stageStat.setEndTime(zkStat.getMtime());
          if (ArbitrateConstants.NODE_SELECTED.equals(stage)) {
            stageStat.setStage(StageType.SELECT);
          } else if (ArbitrateConstants.NODE_EXTRACTED.equals(stage)) {
            stageStat.setStage(StageType.EXTRACT);
          } else if (ArbitrateConstants.NODE_TRANSFORMED.equals(stage)) {
            stageStat.setStage(StageType.TRANSFORM);
            // } else if
            // (ArbitrateConstants.NODE_LOADED.equals(stage)) {
            // stageStat.setStage(StageType.LOAD);
          }

          prev = stageStat;
          stageStats.add(stageStat);
        }

        // 添加一个当前正在处理的
        StageStat currentStageStat = new StageStat();
        currentStageStat.setPipelineId(pipelineId);
        currentStageStat.setProcessId(processId);
        if (prev == null) {
          byte[] bytes = orginZk.getData(processPath, false, zkProcessStat);
          if (bytes == null || bytes.length == 0) {
            continue; // 直接认为未使用,忽略之
          }

          ProcessNodeEventData nodeData =
              JsonUtils.unmarshalFromByte(bytes, ProcessNodeEventData.class);
          if (nodeData.getStatus().isUnUsed()) { // process未使用,直接忽略
            continue; // 跳过该process
          } else {
            currentStageStat.setStage(StageType.SELECT); // select操作
            currentStageStat.setStartTime(zkProcessStat.getMtime());
          }
        } else {
          // 判断上一个节点,确定当前的stage
          StageType stage = prev.getStage();
          if (stage.isSelect()) {
            currentStageStat.setStage(StageType.EXTRACT);
          } else if (stage.isExtract()) {
            currentStageStat.setStage(StageType.TRANSFORM);
          } else if (stage.isTransform()) {
            currentStageStat.setStage(StageType.LOAD);
          } else if (stage.isLoad()) { // 已经是最后一个节点了
            continue;
          }

          currentStageStat.setStartTime(prev.getEndTime()); // 开始时间为上一个节点的结束时间
        }

        if (currentStageStat.getStage().isLoad()) { // load必须为第一个process节点
          if (i == 0) {
            stageStats.add(currentStageStat);
          }
        } else {
          stageStats.add(currentStageStat); // 其他情况都添加
        }

      } catch (NoNodeException e) {
        // ignore
      } catch (KeeperException e) {
        throw new ArbitrateException(e);
      } catch (InterruptedException e) {
        // ignore
      } catch (UnsupportedEncodingException e) {
        // ignore
      }

      processStats.add(processStat);
    }

    return processStats;
  }
示例#9
0
 MetricsData getMetricsFromDataNode(String clusterId, String serviceId, String dataNode) {
   PathScheme pathScheme = getContext().getPathScheme();
   String dataPath = null;
   if (dataNode != null) {
     dataPath =
         pathScheme.getAbsolutePath(
             PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId, dataNode));
   } else {
     dataPath =
         pathScheme.getAbsolutePath(PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId));
   }
   byte[] bytes = null;
   try {
     Stat stat = new Stat();
     bytes = getContext().getZkClient().getData(dataPath, true, stat);
     MetricsData metricsData = JacksonUtil.getObjectMapper().readValue(bytes, MetricsData.class);
     metricsData.setLastUpdatedTimestamp(stat.getMtime());
     return metricsData;
   } catch (KeeperException e) {
     if (e.code() == KeeperException.Code.NONODE) {
       return null;
     }
     logger.warn(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8))
             + ":  "
             + e,
         e);
     throw new ReignException(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8)),
         e);
   } catch (UnrecognizedPropertyException e) {
     logger.warn(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8))
             + ":  "
             + e,
         e);
     return null;
   } catch (Exception e) {
     logger.warn(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8))
             + ":  "
             + e,
         e);
     throw new ReignException(
         "Error retrieving data node:  clusterId="
             + clusterId
             + "; serviceId="
             + serviceId
             + "; dataPath="
             + dataPath
             + "; dataAsString="
             + (new String(bytes, UTF_8)),
         e);
   }
 }