/** * Cancel grant on a node * * @param nodeName the node the grant is on * @param sessionId the session the grant was given to * @param requestId the request this grant satisfied */ public void cancelGrant(String nodeName, String sessionId, int requestId) { ClusterNode node = nameToNode.get(nodeName); if (node == null) { LOG.warn("Canceling grant for non-existent node: " + nodeName); return; } synchronized (node) { if (node.deleted) { LOG.warn("Canceling grant for deleted node: " + nodeName); return; } String hoststr = node.getClusterNodeInfo().getAddress().getHost(); if (!canAllowNode(hoststr)) { LOG.warn("Canceling grant for excluded node: " + hoststr); return; } ResourceRequestInfo req = node.getRequestForGrant(sessionId, requestId); if (req != null) { ResourceRequest unitReq = Utilities.getUnitResourceRequest(req.getType()); boolean previouslyRunnable = node.checkForGrant(unitReq, resourceLimit); node.cancelGrant(sessionId, requestId); loadManager.decrementLoad(req.getType()); if (!previouslyRunnable && node.checkForGrant(unitReq, resourceLimit)) { RunnableIndices r = typeToIndices.get(req.getType()); if (!faultManager.isBlacklisted(node.getName(), req.getType())) { r.addRunnable(node); } } } } }
/** * Process feedback about nodes. * * @param handle The session handle. * @param resourceTypes The types of resource this feedback is about. * @param reportList The list of reports. */ public void nodeFeedback( String handle, List<ResourceType> resourceTypes, List<NodeUsageReport> reportList) { // Iterate over each report. for (NodeUsageReport usageReport : reportList) { faultManager.nodeFeedback(usageReport.getNodeName(), resourceTypes, usageReport); } }
/** * Delete the node from the cluster. This happens when the node times out or is being * decommissioned. * * @param node the node to remove * @return the list of grants that are running on the node */ protected Set<ClusterNode.GrantId> deleteNode(ClusterNode node) { synchronized (node) { if (node.deleted) { return null; } node.deleted = true; // 1: primary nameToNode.remove(node.getName()); faultManager.deleteNode(node.getName()); nameToApps.remove(node.getName()); hostsToSessions.remove(node); setAliveDeadMetrics(); // 2: update runnable index for (RunnableIndices r : typeToIndices.values()) { r.deleteRunnable(node); } return node.getGrants(); } }
/** * Add a node to be managed. * * @param node Node to be managed * @param resourceInfos Mapping of the resource type to runnable indices */ protected void addNode(ClusterNode node, Map<ResourceType, String> resourceInfos) { synchronized (node) { // 1: primary nameToNode.put(node.getName(), node); faultManager.addNode(node.getName(), resourceInfos.keySet()); nameToApps.put(node.getName(), resourceInfos); hostsToSessions.put(node, new HashSet<String>()); setAliveDeadMetrics(); // 2: update runnable indices for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) { ResourceType type = entry.getKey(); if (resourceInfos.containsKey(type)) { if (node.checkForGrant(Utilities.getUnitResourceRequest(type), resourceLimit)) { RunnableIndices r = entry.getValue(); r.addRunnable(node); } } } } }
@Override public void setConf(Configuration newConf) { this.conf = (CoronaConf) newConf; nodeExpiryInterval = conf.getNodeExpiryInterval(); if (this.expireNodesThread != null) { this.expireNodesThread.interrupt(); } loadManager = new LoadManager(this); topologyCache = new TopologyCache(conf); cpuToResourcePartitioning = conf.getCpuToResourcePartitioning(); for (Map.Entry<Integer, Map<ResourceType, Integer>> entry : cpuToResourcePartitioning.entrySet()) { for (ResourceType type : entry.getValue().keySet()) { if (!typeToIndices.containsKey(type)) { typeToIndices.put(type, new RunnableIndices(type)); } } } resourceLimit.setConf(conf); faultManager.setConf(conf); }
/** * Blacklist a resource on a node. * * @param nodeName The node name * @param resourceType The resource type. */ void blacklistNode(String nodeName, ResourceType resourceType) { LOG.info("Node " + nodeName + " has been blacklisted for resource " + resourceType); clusterManager.getMetrics().setBlacklistedNodes(faultManager.getBlacklistedNodeCount()); deleteAppFromNode(nodeName, resourceType); }