private synchronized void getZkRunning() throws Exception { LOG.debug("Reading " + parentZnode + Constants.DEFAULT_ZOOKEEPER_ZNODE_SERVERS_RUNNING); List<String> children = getChildren( parentZnode + Constants.DEFAULT_ZOOKEEPER_ZNODE_SERVERS_RUNNING, new RunningWatcher()); if (!children.isEmpty()) { for (String child : children) { // If stop-wms.sh is executed and WMS_MANAGES_ZK then zookeeper // is stopped abruptly. // Second scenario is when ZooKeeper fails for some reason // regardless of whether WMS // manages it. When either happens the WmsServer running znodes // still exist in ZooKeeper // and we see them at next startup. When they eventually timeout // we get node deleted events for a server that no longer // exists. So, only recognize // WmsServer running znodes that have timestamps after last // WmsMaster startup. Scanner scn = new Scanner(child); scn.useDelimiter(":"); String hostName = scn.next(); String instance = scn.next(); int infoPort = Integer.parseInt(scn.next()); long serverStartTimestamp = Long.parseLong(scn.next()); scn.close(); if (serverStartTimestamp < startupTimestamp) continue; if (!runningServers.contains(child)) { LOG.debug("Watching running [" + child + "]"); zkc.exists( parentZnode + Constants.DEFAULT_ZOOKEEPER_ZNODE_SERVERS_RUNNING + "/" + child, new RunningWatcher()); runningServers.add(child); } } metrics.setTotalRunning(runningServers.size()); } else { metrics.setTotalRunning(0); } }
private synchronized void restartServer(String znodePath) throws Exception { String child = znodePath.replace( parentZnode + Constants.DEFAULT_ZOOKEEPER_ZNODE_SERVERS_RUNNING + "/", ""); Scanner scn = new Scanner(child); scn.useDelimiter(":"); String hostName = scn.next(); String instance = scn.next(); int infoPort = Integer.parseInt(scn.next()); long serverStartTimestamp = Long.parseLong(scn.next()); scn.close(); LOG.error("WmsServer [" + hostName + ":" + instance + "] failed."); if (runningServers.contains(child)) { LOG.debug("Found [" + child + "], deleting from running servers list"); runningServers.remove(child); metrics.setTotalRunning(runningServers.size()); } RestartHandler handler = new RestartHandler(child); restartQueue.add(handler); }