@Override public void run() { while (!shutdown) { try { Thread.sleep(nodeExpiryInterval / 2); if (clusterManager.safeMode) { // Do nothing but sleep continue; } long now = ClusterManager.clock.getTime(); for (ClusterNode node : nameToNode.values()) { if (now - node.lastHeartbeatTime > nodeExpiryInterval) { LOG.warn("Timing out node: " + node.getName()); clusterManager.nodeTimeout(node.getName()); } } } catch (InterruptedException iex) { // ignore. if shutting down, while cond. will catch it continue; } } }
/** * Cancel grant on a node * * @param nodeName the node the grant is on * @param sessionId the session the grant was given to * @param requestId the request this grant satisfied */ public void cancelGrant(String nodeName, String sessionId, int requestId) { ClusterNode node = nameToNode.get(nodeName); if (node == null) { LOG.warn("Canceling grant for non-existent node: " + nodeName); return; } synchronized (node) { if (node.deleted) { LOG.warn("Canceling grant for deleted node: " + nodeName); return; } String hoststr = node.getClusterNodeInfo().getAddress().getHost(); if (!canAllowNode(hoststr)) { LOG.warn("Canceling grant for excluded node: " + hoststr); return; } ResourceRequestInfo req = node.getRequestForGrant(sessionId, requestId); if (req != null) { ResourceRequest unitReq = Utilities.getUnitResourceRequest(req.getType()); boolean previouslyRunnable = node.checkForGrant(unitReq, resourceLimit); node.cancelGrant(sessionId, requestId); loadManager.decrementLoad(req.getType()); if (!previouslyRunnable && node.checkForGrant(unitReq, resourceLimit)) { RunnableIndices r = typeToIndices.get(req.getType()); if (!faultManager.isBlacklisted(node.getName(), req.getType())) { r.addRunnable(node); } } } } }
/** * Remove the node from the runnable indices * * @param node node to remove */ public void deleteRunnable(ClusterNode node) { String host = node.getHost(); if (LOG.isDebugEnabled()) { LOG.debug(node.getName() + " deleted from runnable list for type: " + type); } NodeContainer nodeContainer = hostToRunnableNodes.get(host); if (nodeContainer != null) { synchronized (nodeContainer) { if (nodeContainer.removeNode(node)) { /** * We are not removing the nodeContainer from runnable nodes map since we are * synchronizing operations with runnable indices on it */ hostsWithRunnableNodes.decrementAndGet(); } } } Node rack = node.hostNode.getParent(); nodeContainer = rackToRunnableNodes.get(rack); if (nodeContainer != null) { synchronized (nodeContainer) { /** * We are not removing the nodeContainer from runnable nodes map since we are * synchronizing operations with runnable indices on it */ nodeContainer.removeNode(node); } } }
/** * Refresh the includes/excludes information. * * @throws IOException */ public synchronized void refreshNodes() throws IOException { hostsReader.refresh(); LOG.info( "After refresh Included hosts: " + hostsReader.getHostNames().size() + " Excluded hosts: " + hostsReader.getExcludedHosts().size()); Set<String> newHosts = hostsReader.getHostNames(); Set<String> newExcludes = hostsReader.getExcludedHosts(); Set<ClusterNode> hostsToExclude = new HashSet<ClusterNode>(); for (ClusterNode tmpNode : nameToNode.values()) { String host = tmpNode.getHost(); // Check if not included or explicitly excluded. if (!newHosts.contains(host) || newExcludes.contains(host)) { hostsToExclude.add(tmpNode); } } for (ClusterNode node : hostsToExclude) { synchronized (node) { for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) { ResourceType type = entry.getKey(); RunnableIndices r = entry.getValue(); if (r.hasRunnable(node)) { LOG.info( "Node " + node.getName() + " is no longer " + type + " runnable because it is excluded"); r.deleteRunnable(node); } } } } }
/** * Get information about applications running on a node. * * @param node The node. * @param type The type of resources. * @return The application-specific information */ public String getAppInfo(ClusterNode node, ResourceType type) { Map<ResourceType, String> resourceInfos = nameToApps.get(node.getName()); if (resourceInfos == null) { return null; } else { return resourceInfos.get(type); } }
/** * Update the runnable status of a node based on resources available. This checks both resources * and slot availability. * * @param node The node */ private void updateRunnability(ClusterNode node) { synchronized (node) { for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) { ResourceType type = entry.getKey(); RunnableIndices r = entry.getValue(); ResourceRequest unitReq = Utilities.getUnitResourceRequest(type); boolean currentlyRunnable = r.hasRunnable(node); boolean shouldBeRunnable = node.checkForGrant(unitReq, resourceLimit); if (currentlyRunnable && !shouldBeRunnable) { LOG.info("Node " + node.getName() + " is no longer " + type + " runnable"); r.deleteRunnable(node); } else if (!currentlyRunnable && shouldBeRunnable) { LOG.info("Node " + node.getName() + " is now " + type + " runnable"); r.addRunnable(node); } } } }
/** * Delete the node from the cluster. This happens when the node times out or is being * decommissioned. * * @param node the node to remove * @return the list of grants that are running on the node */ protected Set<ClusterNode.GrantId> deleteNode(ClusterNode node) { synchronized (node) { if (node.deleted) { return null; } node.deleted = true; // 1: primary nameToNode.remove(node.getName()); faultManager.deleteNode(node.getName()); nameToApps.remove(node.getName()); hostsToSessions.remove(node); setAliveDeadMetrics(); // 2: update runnable index for (RunnableIndices r : typeToIndices.values()) { r.deleteRunnable(node); } return node.getGrants(); } }
/** * Add a node to be managed. * * @param node Node to be managed * @param resourceInfos Mapping of the resource type to runnable indices */ protected void addNode(ClusterNode node, Map<ResourceType, String> resourceInfos) { synchronized (node) { // 1: primary nameToNode.put(node.getName(), node); faultManager.addNode(node.getName(), resourceInfos.keySet()); nameToApps.put(node.getName(), resourceInfos); hostsToSessions.put(node, new HashSet<String>()); setAliveDeadMetrics(); // 2: update runnable indices for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) { ResourceType type = entry.getKey(); if (resourceInfos.containsKey(type)) { if (node.checkForGrant(Utilities.getUnitResourceRequest(type), resourceLimit)) { RunnableIndices r = entry.getValue(); r.addRunnable(node); } } } } }
/** * Remove one application type from the node. Happens when the daemon responsible for handling * this application type on the node goes down * * @param node the node * @param type the type of the resource * @return the list of grants that belonged to the application on this node */ protected Set<ClusterNode.GrantId> deleteAppFromNode(ClusterNode node, ResourceType type) { synchronized (node) { if (node.deleted) { return null; } nameToApps.remove(node.getName()); RunnableIndices r = typeToIndices.get(type); r.deleteRunnable(node); return node.getGrants(type); } }
/** * Register a new application on the node * * @param node the node to register on * @param type the type of an application * @param appInfo the appInfo string for the application */ protected void addAppToNode(ClusterNode node, ResourceType type, String appInfo) { synchronized (node) { // Update primary index. Map<ResourceType, String> apps = nameToApps.get(node.getName()); apps.put(type, appInfo); // Update runnable indices. for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) { if (type.equals(entry.getKey())) { if (node.checkForGrant(Utilities.getUnitResourceRequest(type), resourceLimit)) { RunnableIndices r = entry.getValue(); r.addRunnable(node); } } } } }
/** * Add a node to the runnable indices * * @param clusterNode the node to add */ public void addRunnable(ClusterNode clusterNode) { String host = clusterNode.getHost(); if (LOG.isDebugEnabled()) { LOG.debug(clusterNode.getName() + " added to runnable list for type: " + type); } NodeContainer nodeContainer = getOrCreateHostRunnableNode(host); synchronized (nodeContainer) { nodeContainer.addNode(clusterNode); hostsWithRunnableNodes.incrementAndGet(); } Node rack = clusterNode.hostNode.getParent(); nodeContainer = getOrCreateRackRunnableNode(rack); synchronized (nodeContainer) { nodeContainer.addNode(clusterNode); } }