// @wjw_note: 把任务分配给taskServerList里随机的一个server! private void assignServer2Task(List<String> taskServerList, String taskPath) throws Exception { // @wjw_note: 清除过期的无Runner的Task { Stat stat = new Stat(); this.getZooKeeper().getData(taskPath, null, stat); if (getSystemTime() - stat.getMtime() > TASK_EXPIRE_TIME) { ZKTools.deleteTree(this.getZooKeeper(), taskPath); LOG.warn("清除过期的无Runner的Task[" + taskPath + "]"); return; } } int index = random.nextInt(taskServerList.size()); String serverId = taskServerList.get(index); this.getZooKeeper() .create(taskPath + "/" + serverId, null, this.zkManager.getAcl(), CreateMode.PERSISTENT); if (LOG.isDebugEnabled()) { StringBuilder buffer = new StringBuilder(); buffer .append("Assign server [") .append(serverId) .append("]") .append(" to task [") .append(taskPath) .append("]"); LOG.debug(buffer.toString()); } }
public static void copyStat(Stat from, Stat to) { to.setAversion(from.getAversion()); to.setCtime(from.getCtime()); to.setCversion(from.getCversion()); to.setCzxid(from.getCzxid()); to.setMtime(from.getMtime()); to.setMzxid(from.getMzxid()); to.setPzxid(from.getPzxid()); to.setVersion(from.getVersion()); to.setEphemeralOwner(from.getEphemeralOwner()); to.setDataLength(from.getDataLength()); to.setNumChildren(from.getNumChildren()); }
@Override public void process(WatchedEvent event) { // this lock is important so that when changes start happening, we // won't run into any concurrency issues synchronized (WatchedNode.this) { if (!cancelled) { if (event.getState() == KeeperState.SyncConnected) { // If connected update data and notify listeners try { if (event.getType().equals(Event.EventType.NodeCreated)) { watchForData(); } else if (event.getType().equals(Event.EventType.NodeDeleted)) { // Previous version notified is null, and we will notify with null previousVersion = null; watchForCreation(); } else if (event.getType().equals(Event.EventType.NodeDataChanged)) { watchForData(); } } catch (KeeperException e) { LOG.error("Exception while trying to update our cached value for " + nodePath, e); } catch (InterruptedException e) { if (LOG.isTraceEnabled()) { LOG.trace( "Interrupted while trying to update our cached value for " + nodePath, e); } } // Notify of new value if either we didn't notify of any value, or the node has // changed long currentVersion = stat.getCtime() + stat.getMtime(); if (previousVersion == null || !previousVersion.equals(currentVersion)) { try { synchronized (listeners) { for (WatchedNodeListener<T> listener : listeners) { listener.onWatchedNodeChange(value); } } } finally { previousVersion = currentVersion; } } } else { // Not sync connected, do nothing if (LOG.isDebugEnabled()) { LOG.debug("Not sync connected anymore for watched node " + nodePath); } } } } }
public PositionEventData getCanalCursor(String destination, short clientId) { String path = String.format(CANAL_CURSOR_PATH, destination, String.valueOf(clientId)); try { IZkConnection connection = zookeeper.getConnection(); // zkclient会将获取stat信息和正常的操作分开,使用原生的zk进行优化 ZooKeeper orginZk = ((ZooKeeperx) connection).getZookeeper(); Stat stat = new Stat(); byte[] bytes = orginZk.getData(path, false, stat); PositionEventData eventData = new PositionEventData(); eventData.setCreateTime(new Date(stat.getCtime())); eventData.setModifiedTime(new Date(stat.getMtime())); eventData.setPosition(new String(bytes, "UTF-8")); return eventData; } catch (Exception e) { return null; } }
public void clearExpireScheduleServer() throws Exception { String zkPath = this.pathServer; if (this.getZooKeeper().exists(zkPath, false) == null) { this.getZooKeeper().create(zkPath, null, this.zkManager.getAcl(), CreateMode.PERSISTENT); } for (String name : this.zkManager.getZooKeeper().getChildren(zkPath, false)) { try { Stat stat = new Stat(); this.getZooKeeper().getData(zkPath + "/" + name, null, stat); if (getSystemTime() - stat.getMtime() > SERVER_EXPIRE_TIME) { ZKTools.deleteTree(this.getZooKeeper(), zkPath + "/" + name); LOG.warn("清除过期ScheduleServer[" + zkPath + "/" + name + "]"); } } catch (Exception e) { // 当有多台服务器时,存在并发清理的可能,忽略异常 } } }
/** Get metrics data for this service node (self) for current interval. */ public MetricsData getMyMetrics(String clusterId, String serviceId) { String key = clusterId + "/" + serviceId + "/" + getContext().getZkNodeId().getPathToken(); ExportMeta exportMeta = exportPathMap.get(key); if (exportMeta == null) { logger.trace( "MetricsData not found: data has not been exported: clusterId={}; serviceId={}; exportMeta={}", clusterId, serviceId, exportMeta); return null; } if (exportMeta.dataPath == null) { logger.trace( "MetricsData not found: waiting for data to be reported in ZK: clusterId={}; serviceId={}; exportMeta.dataPath={}", clusterId, serviceId, exportMeta.dataPath); synchronized (exportMeta) { try { exportMeta.wait(); } catch (InterruptedException e) { logger.warn("Interrupted while waiting: " + e, e); } } } try { logger.debug("Retrieving metrics: path={}", exportMeta.dataPath); Stat stat = new Stat(); byte[] bytes = getContext().getZkClient().getData(exportMeta.dataPath, true, stat); MetricsData metricsData = JacksonUtil.getObjectMapper().readValue(bytes, MetricsData.class); metricsData.setClusterId(clusterId); metricsData.setServiceId(serviceId); metricsData.setLastUpdatedTimestamp(stat.getMtime()); return metricsData; } catch (KeeperException e) { if (e.code() == KeeperException.Code.NONODE) { return null; } throw new ReignException(e); } catch (Exception e) { throw new ReignException(e); } }
@Override // @wjw_note: 非常重要的,分配任务的逻辑! public void assignTask(String currentUuid, List<String> taskServerList) throws Exception { if (this.zkManager.isZookeeperConnected() == false) { return; } if (this.isLeader(currentUuid, taskServerList) == false) { if (LOG.isDebugEnabled()) { LOG.debug(currentUuid + ":不是负责任务分配的Leader,直接返回"); } return; } if (LOG.isDebugEnabled()) { LOG.debug(currentUuid + ":开始重新分配任务......"); } if (taskServerList.size() <= 0) { // 在服务器动态调整的时候,可能出现服务器列表为空的清空 return; } String zkPath = this.pathTask; if (this.getZooKeeper().exists(zkPath, false) == null) { this.getZooKeeper().create(zkPath, null, this.zkManager.getAcl(), CreateMode.PERSISTENT); } List<String> taskChildren = this.getZooKeeper().getChildren(zkPath, false); if (null == taskChildren || taskChildren.size() == 0) { if (LOG.isDebugEnabled()) { LOG.debug(currentUuid + ":没有集群任务"); } return; } for (int i = 0; i < taskChildren.size(); i++) { String taskName = taskChildren.get(i); String taskPath = zkPath + "/" + taskName; if (this.getZooKeeper().exists(taskPath, false) == null) { this.getZooKeeper().create(taskPath, null, this.zkManager.getAcl(), CreateMode.PERSISTENT); } List<String> taskServerIds = this.getZooKeeper().getChildren(taskPath, false); if (null == taskServerIds || taskServerIds.size() == 0) { // 执行task的节点是空的 assignServer2Task(taskServerList, taskPath); } else { boolean hasAssignSuccess = false; for (String serverId : taskServerIds) { if (taskServerList.contains(serverId)) { Stat stat = new Stat(); this.getZooKeeper().getData(taskPath + "/" + serverId, null, stat); if (getSystemTime() - stat.getMtime() < TASK_EXPIRE_TIME) { // @wjw_note: 判断task Owner节点是否过期 hasAssignSuccess = true; continue; } } LOG.warn("删除僵尸Task Runner: " + taskPath + "/" + serverId); ZKTools.deleteTree( this.getZooKeeper(), taskPath + "/" + serverId); // @wjw_note: 删除某一节点已经死掉的残留下来的僵尸task! } if (hasAssignSuccess == false) { assignServer2Task( taskServerList, taskPath); // @wjw_note: 把任务分配给taskServerList里随机的一个server! } } } }
public List<ProcessStat> listProcesses(Long channelId, Long pipelineId) { List<ProcessStat> processStats = new ArrayList<ProcessStat>(); String processRoot = ManagePathUtils.getProcessRoot(channelId, pipelineId); IZkConnection connection = zookeeper.getConnection(); // zkclient会将获取stat信息和正常的操作分开,使用原生的zk进行优化 ZooKeeper orginZk = ((ZooKeeperx) connection).getZookeeper(); // 获取所有的process列表 List<String> processNodes = zookeeper.getChildren(processRoot); List<Long> processIds = new ArrayList<Long>(); for (String processNode : processNodes) { processIds.add(ManagePathUtils.getProcessId(processNode)); } Collections.sort(processIds); for (int i = 0; i < processIds.size(); i++) { Long processId = processIds.get(i); // 当前的process可能会有变化 ProcessStat processStat = new ProcessStat(); processStat.setPipelineId(pipelineId); processStat.setProcessId(processId); List<StageStat> stageStats = new ArrayList<StageStat>(); processStat.setStageStats(stageStats); try { String processPath = ManagePathUtils.getProcess(channelId, pipelineId, processId); Stat zkProcessStat = new Stat(); List<String> stages = orginZk.getChildren(processPath, false, zkProcessStat); Collections.sort(stages, new StageComparator()); StageStat prev = null; for (String stage : stages) { // 循环每个process下的stage String stagePath = processPath + "/" + stage; Stat zkStat = new Stat(); StageStat stageStat = new StageStat(); stageStat.setPipelineId(pipelineId); stageStat.setProcessId(processId); byte[] bytes = orginZk.getData(stagePath, false, zkStat); if (bytes != null && bytes.length > 0) { // 特殊处理zookeeper里的data信息,manager没有对应node中PipeKey的对象,所以导致反序列化会失败,需要特殊处理,删除'@'符号 String json = StringUtils.remove(new String(bytes, "UTF-8"), '@'); EtlEventData data = JsonUtils.unmarshalFromString(json, EtlEventData.class); stageStat.setNumber(data.getNumber()); stageStat.setSize(data.getSize()); Map exts = new HashMap(); if (!CollectionUtils.isEmpty(data.getExts())) { exts.putAll(data.getExts()); } exts.put("currNid", data.getCurrNid()); exts.put("nextNid", data.getNextNid()); exts.put("desc", data.getDesc()); stageStat.setExts(exts); } if (prev != null) { // 对应的start时间为上一个节点的结束时间 stageStat.setStartTime(prev.getEndTime()); } else { stageStat.setStartTime(zkProcessStat.getMtime()); // process的最后修改时间,select // await成功后会设置USED标志位 } stageStat.setEndTime(zkStat.getMtime()); if (ArbitrateConstants.NODE_SELECTED.equals(stage)) { stageStat.setStage(StageType.SELECT); } else if (ArbitrateConstants.NODE_EXTRACTED.equals(stage)) { stageStat.setStage(StageType.EXTRACT); } else if (ArbitrateConstants.NODE_TRANSFORMED.equals(stage)) { stageStat.setStage(StageType.TRANSFORM); // } else if // (ArbitrateConstants.NODE_LOADED.equals(stage)) { // stageStat.setStage(StageType.LOAD); } prev = stageStat; stageStats.add(stageStat); } // 添加一个当前正在处理的 StageStat currentStageStat = new StageStat(); currentStageStat.setPipelineId(pipelineId); currentStageStat.setProcessId(processId); if (prev == null) { byte[] bytes = orginZk.getData(processPath, false, zkProcessStat); if (bytes == null || bytes.length == 0) { continue; // 直接认为未使用,忽略之 } ProcessNodeEventData nodeData = JsonUtils.unmarshalFromByte(bytes, ProcessNodeEventData.class); if (nodeData.getStatus().isUnUsed()) { // process未使用,直接忽略 continue; // 跳过该process } else { currentStageStat.setStage(StageType.SELECT); // select操作 currentStageStat.setStartTime(zkProcessStat.getMtime()); } } else { // 判断上一个节点,确定当前的stage StageType stage = prev.getStage(); if (stage.isSelect()) { currentStageStat.setStage(StageType.EXTRACT); } else if (stage.isExtract()) { currentStageStat.setStage(StageType.TRANSFORM); } else if (stage.isTransform()) { currentStageStat.setStage(StageType.LOAD); } else if (stage.isLoad()) { // 已经是最后一个节点了 continue; } currentStageStat.setStartTime(prev.getEndTime()); // 开始时间为上一个节点的结束时间 } if (currentStageStat.getStage().isLoad()) { // load必须为第一个process节点 if (i == 0) { stageStats.add(currentStageStat); } } else { stageStats.add(currentStageStat); // 其他情况都添加 } } catch (NoNodeException e) { // ignore } catch (KeeperException e) { throw new ArbitrateException(e); } catch (InterruptedException e) { // ignore } catch (UnsupportedEncodingException e) { // ignore } processStats.add(processStat); } return processStats; }
MetricsData getMetricsFromDataNode(String clusterId, String serviceId, String dataNode) { PathScheme pathScheme = getContext().getPathScheme(); String dataPath = null; if (dataNode != null) { dataPath = pathScheme.getAbsolutePath( PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId, dataNode)); } else { dataPath = pathScheme.getAbsolutePath(PathType.METRICS, pathScheme.joinTokens(clusterId, serviceId)); } byte[] bytes = null; try { Stat stat = new Stat(); bytes = getContext().getZkClient().getData(dataPath, true, stat); MetricsData metricsData = JacksonUtil.getObjectMapper().readValue(bytes, MetricsData.class); metricsData.setLastUpdatedTimestamp(stat.getMtime()); return metricsData; } catch (KeeperException e) { if (e.code() == KeeperException.Code.NONODE) { return null; } logger.warn( "Error retrieving data node: clusterId=" + clusterId + "; serviceId=" + serviceId + "; dataPath=" + dataPath + "; dataAsString=" + (new String(bytes, UTF_8)) + ": " + e, e); throw new ReignException( "Error retrieving data node: clusterId=" + clusterId + "; serviceId=" + serviceId + "; dataPath=" + dataPath + "; dataAsString=" + (new String(bytes, UTF_8)), e); } catch (UnrecognizedPropertyException e) { logger.warn( "Error retrieving data node: clusterId=" + clusterId + "; serviceId=" + serviceId + "; dataPath=" + dataPath + "; dataAsString=" + (new String(bytes, UTF_8)) + ": " + e, e); return null; } catch (Exception e) { logger.warn( "Error retrieving data node: clusterId=" + clusterId + "; serviceId=" + serviceId + "; dataPath=" + dataPath + "; dataAsString=" + (new String(bytes, UTF_8)) + ": " + e, e); throw new ReignException( "Error retrieving data node: clusterId=" + clusterId + "; serviceId=" + serviceId + "; dataPath=" + dataPath + "; dataAsString=" + (new String(bytes, UTF_8)), e); } }