/** * Refresh the includes/excludes information. * * @throws IOException */ public synchronized void refreshNodes() throws IOException { hostsReader.refresh(); LOG.info( "After refresh Included hosts: " + hostsReader.getHostNames().size() + " Excluded hosts: " + hostsReader.getExcludedHosts().size()); Set<String> newHosts = hostsReader.getHostNames(); Set<String> newExcludes = hostsReader.getExcludedHosts(); Set<ClusterNode> hostsToExclude = new HashSet<ClusterNode>(); for (ClusterNode tmpNode : nameToNode.values()) { String host = tmpNode.getHost(); // Check if not included or explicitly excluded. if (!newHosts.contains(host) || newExcludes.contains(host)) { hostsToExclude.add(tmpNode); } } for (ClusterNode node : hostsToExclude) { synchronized (node) { for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) { ResourceType type = entry.getKey(); RunnableIndices r = entry.getValue(); if (r.hasRunnable(node)) { LOG.info( "Node " + node.getName() + " is no longer " + type + " runnable because it is excluded"); r.deleteRunnable(node); } } } } }
private void sendKillTwoAndReceive(ClusterNode node1, ClusterNode node2, ClusterNode node3) { log.info("Sending messages on node 1"); HazelcastMQProducer mqProducer = node1.getMqContext().createProducer(); mqProducer.send(destination, "Hello " + msgCounter++); mqProducer.send(destination, "Hello " + msgCounter++); // Kill the first two nodes. Again, this may not prove too much because we // don't know where the original data landed in the cluster. There's a // chance the "master" data isn't sitting on node1 or node2 anyway. log.info("Killing node 1"); node1.kill(); log.info("Killing node 2"); node2.kill(); log.info("Attempting receive from node 3"); HazelcastMQConsumer mqConsumer = node3.getMqContext().createConsumer(destination); String msg = new String(mqConsumer.receiveBody(1, TimeUnit.SECONDS)); log.info("Got message on node 3: " + msg); mqConsumer.close(); mqConsumer = node3.getMqContext().createConsumer(destination); msg = new String(mqConsumer.receiveBody(1, TimeUnit.SECONDS)); log.info("Got message on node 3: " + msg); mqConsumer.close(); }
private void sendAndReceiveOnMultipleNodes( ClusterNode node1, ClusterNode node2, ClusterNode node3) { HazelcastMQProducer mqProducer = node1.getMqContext().createProducer(); mqProducer.send(destination, "Hello " + msgCounter++); mqProducer.send(destination, "Hello " + msgCounter++); mqProducer.send(destination, "Hello " + msgCounter++); mqProducer.send(destination, "Hello " + msgCounter++); HazelcastMQConsumer mqConsumer = node2.getMqContext().createConsumer(destination); String msg = new String(mqConsumer.receiveBody(1, TimeUnit.SECONDS)); log.info("Got message on node 2: " + msg); mqConsumer.close(); mqConsumer = node1.getMqContext().createConsumer(destination); msg = new String(mqConsumer.receiveBody(1, TimeUnit.SECONDS)); log.info("Got message on node 1: " + msg); mqConsumer.close(); mqConsumer = node3.getMqContext().createConsumer(destination); msg = new String(mqConsumer.receiveBody(1, TimeUnit.SECONDS)); log.info("Got message on node 3: " + msg); mqConsumer.close(); mqConsumer = node2.getMqContext().createConsumer(destination); msg = new String(mqConsumer.receiveBody(1, TimeUnit.SECONDS)); log.info("Got message on node 2: " + msg); mqConsumer.close(); }
private void restoreClusterNode(ClusterNode clusterNode) { clusterNode.hostNode = topologyCache.getNode(clusterNode.getHost()); // This will reset the lastHeartbeatTime clusterNode.heartbeat(clusterNode.getClusterNodeInfo()); clusterNode.initResourceTypeToMaxCpuMap(cpuToResourcePartitioning); updateRunnability(clusterNode); }
/** * Remove the node from the runnable indices * * @param node node to remove */ public void deleteRunnable(ClusterNode node) { String host = node.getHost(); if (LOG.isDebugEnabled()) { LOG.debug(node.getName() + " deleted from runnable list for type: " + type); } NodeContainer nodeContainer = hostToRunnableNodes.get(host); if (nodeContainer != null) { synchronized (nodeContainer) { if (nodeContainer.removeNode(node)) { /** * We are not removing the nodeContainer from runnable nodes map since we are * synchronizing operations with runnable indices on it */ hostsWithRunnableNodes.decrementAndGet(); } } } Node rack = node.hostNode.getParent(); nodeContainer = rackToRunnableNodes.get(rack); if (nodeContainer != null) { synchronized (nodeContainer) { /** * We are not removing the nodeContainer from runnable nodes map since we are * synchronizing operations with runnable indices on it */ nodeContainer.removeNode(node); } } }
/** * Get any runnable node that is not one of the excluded nodes * * @param excluded the list of nodes to ignore * @return the runnable node, null if no runnable node can be found */ public ClusterNode getRunnableNodeForAny(Set<String> excluded) { double avgLoad = loadManager.getAverageLoad(type); // Make two passes over the nodes. In the first pass, try to find a // node that has lower than average number of grants on it. If that does // not find a node, try looking at all nodes. for (int pass = 0; pass < 2; pass++) { for (Map.Entry<String, NodeContainer> e : hostToRunnableNodes.entrySet()) { NodeContainer nodeContainer = e.getValue(); if (nodeContainer == null) { continue; } synchronized (nodeContainer) { if (nodeContainer.isEmpty()) { continue; } for (ClusterNode node : nodeContainer) { if (excluded == null || !excluded.contains(node.getHost())) { if (resourceLimit.hasEnoughResource(node)) { // When pass == 0, try to average out the load. if (pass == 0) { if (node.getGrantCount(type) < avgLoad) { return node; } } else { return node; } } } } } } } return null; }
/** * Create a cluster node, with a memory journal referencing a list of records. * * @param id cluster node id * @param records memory journal's list of records * @param disableAutoSync if <code>true</code> background synchronization is disabled */ private ClusterNode createClusterNode(String id, boolean disableAutoSync) throws Exception { final MemoryJournal journal = new MemoryJournal() { protected boolean syncAgainOnNewRecords() { return true; } }; JournalFactory jf = new JournalFactory() { public Journal getJournal(NamespaceResolver resolver) throws RepositoryException { return journal; } }; ClusterConfig cc = new ClusterConfig(id, SYNC_DELAY, jf); SimpleClusterContext context = new SimpleClusterContext(cc); journal.setRepositoryHome(context.getRepositoryHome()); journal.init(id, context.getNamespaceResolver()); journal.setRecords(records); ClusterNode clusterNode = new ClusterNode(); clusterNode.init(context); if (disableAutoSync) { clusterNode.disableAutoSync(); } return clusterNode; }
private void sendPartitions() { ClusterNode oldestNode = this.oldestNode.get(); try { sendLocalPartitions(oldestNode, exchId); } catch (ClusterTopologyCheckedException ignore) { if (log.isDebugEnabled()) log.debug( "Oldest node left during partition exchange [nodeId=" + oldestNode.id() + ", exchId=" + exchId + ']'); } catch (IgniteCheckedException e) { scheduleRecheck(); U.error( log, "Failed to send local partitions to oldest node (will retry after timeout) [oldestNodeId=" + oldestNode.id() + ", exchId=" + exchId + ']', e); } }
@Override public void run() { while (!shutdown) { try { Thread.sleep(nodeExpiryInterval / 2); if (clusterManager.safeMode) { // Do nothing but sleep continue; } long now = ClusterManager.clock.getTime(); for (ClusterNode node : nameToNode.values()) { if (now - node.lastHeartbeatTime > nodeExpiryInterval) { LOG.warn("Timing out node: " + node.getName()); clusterManager.nodeTimeout(node.getName()); } } } catch (InterruptedException iex) { // ignore. if shutting down, while cond. will catch it continue; } } }
/** * Get a runnable node in the given rack that is not present in the excluded list * * @param requestedNode the node to look up rack locality for * @param excluded the list of nodes to ignore * @return the runnable node from the rack satisfying conditions, null if the node was not found */ public ClusterNode getRunnableNodeForRack(RequestedNode requestedNode, Set<String> excluded) { NodeContainer nodeContainer = requestedNode.getRackNodes(); getRunnableNodeForRackCounter += 1; if (nodeContainer == null) { return null; } synchronized (nodeContainer) { if (nodeContainer.isEmpty()) { return null; } if (getRunnableNodeForRackCounter % RACK_SHUFFLE_PERIOD == 0) { // This balances more evenly across nodes in a rack nodeContainer.shuffle(); } for (ClusterNode node : nodeContainer) { if (excluded == null || !excluded.contains(node.getHost())) { if (resourceLimit.hasEnoughResource(node)) { return node; } } } } return null; }
/** * Sends query request. * * @param fut Distributed future. * @param req Request. * @param nodes Nodes. * @throws IgniteCheckedException In case of error. */ @SuppressWarnings("unchecked") private void sendRequest( final GridCacheDistributedQueryFuture<?, ?, ?> fut, final GridCacheQueryRequest req, Collection<ClusterNode> nodes) throws IgniteCheckedException { assert fut != null; assert req != null; assert nodes != null; final UUID locNodeId = cctx.localNodeId(); ClusterNode locNode = null; Collection<ClusterNode> rmtNodes = null; for (ClusterNode n : nodes) { if (n.id().equals(locNodeId)) locNode = n; else { if (rmtNodes == null) rmtNodes = new ArrayList<>(nodes.size()); rmtNodes.add(n); } } // Request should be sent to remote nodes before the query is processed on the local node. // For example, a remote reducer has a state, we should not serialize and then send // the reducer changed by the local node. if (!F.isEmpty(rmtNodes)) { cctx.io() .safeSend( rmtNodes, req, cctx.ioPolicy(), new P1<ClusterNode>() { @Override public boolean apply(ClusterNode node) { fut.onNodeLeft(node.id()); return !fut.isDone(); } }); } if (locNode != null) { cctx.closures() .callLocalSafe( new Callable<Object>() { @Override public Object call() throws Exception { req.beforeLocalExecution(cctx); processQueryRequest(locNodeId, req); return null; } }); } }
/** {@inheritDoc} */ @Override protected void tearDown() throws Exception { if (slave != null) { slave.stop(); } if (master != null) { master.stop(); } super.tearDown(); }
/** {@inheritDoc} */ @Override protected void setUp() throws Exception { master = createClusterNode("master", false); master.start(); slave = createClusterNode("slave", true); slave.start(); super.setUp(); }
/** Get a list nodes with free Cpu for a resource type */ public List<String> getFreeNodesForType(ResourceType type) { ArrayList<String> freeNodes = new ArrayList<String>(); for (Map.Entry<String, ClusterNode> entry : nameToNode.entrySet()) { ClusterNode node = entry.getValue(); synchronized (node) { if (!node.deleted && node.getMaxCpuForType(type) > node.getAllocatedCpuForType(type)) { freeNodes.add(entry.getKey() + ": " + node.getFree().toString()); } } } return freeNodes; }
/** * Find allocation for a resource type. * * @param type The resource type. * @return The allocation. */ public int getAllocatedCpuForType(ResourceType type) { int total = 0; for (ClusterNode node : nameToNode.values()) { synchronized (node) { if (node.deleted) { continue; } total += node.getAllocatedCpuForType(type); } } return total; }
/** * Remove one application type from the node. Happens when the daemon responsible for handling * this application type on the node goes down * * @param node the node * @param type the type of the resource * @return the list of grants that belonged to the application on this node */ protected Set<ClusterNode.GrantId> deleteAppFromNode(ClusterNode node, ResourceType type) { synchronized (node) { if (node.deleted) { return null; } nameToApps.remove(node.getName()); RunnableIndices r = typeToIndices.get(type); r.deleteRunnable(node); return node.getGrants(type); } }
/** @return {@code True} if */ public boolean jobUpdateLeader() { long minOrder = Long.MAX_VALUE; ClusterNode minOrderNode = null; for (ClusterNode node : nodes()) { if (node.order() < minOrder) { minOrder = node.order(); minOrderNode = node; } } assert minOrderNode != null; return localNodeId().equals(minOrderNode.id()); }
/** * Updates partition map in all caches. * * @param msg Partitions full messages. */ private void updatePartitionFullMap(GridDhtPartitionsFullMessage msg) { for (Map.Entry<Integer, GridDhtPartitionFullMap> entry : msg.partitions().entrySet()) { Integer cacheId = entry.getKey(); GridCacheContext cacheCtx = cctx.cacheContext(cacheId); if (cacheCtx != null) cacheCtx.topology().update(exchId, entry.getValue()); else { ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx, AffinityTopologyVersion.NONE); if (oldest != null && oldest.isLocal()) cctx.exchange().clientTopology(cacheId, this).update(exchId, entry.getValue()); } } }
/** * performs the clustering * * @return * @throws ClusterException */ public Cluster perform() throws ClusterException { for (int i = 0; i < data.getWidth(); i++) { for (int j = 0; j < data.getHeight(); j++) { if (!cluster.isClustered(i, j)) { ClusterNode node = new SimpleClusterNode(count++); node.addCell(i, j, data.getCellAt(i, j)); cluster.setClusterNode(i, j, node); clusterLoop(i, j, node); } } } return cluster; }
/** {@inheritDoc} */ @Override public Collection<ClusterNode> nodes(int p, AffinityTopologyVersion topVer) { Collection<ClusterNode> affNodes = cctx.affinity().nodes(p, topVer); lock.readLock().lock(); try { assert node2part != null && node2part.valid() : "Invalid node-to-partitions map [topVer1=" + topVer + ", topVer2=" + this.topVer + ", cache=" + cctx.name() + ", node2part=" + node2part + ']'; Collection<ClusterNode> nodes = null; Collection<UUID> nodeIds = part2node.get(p); if (!F.isEmpty(nodeIds)) { Collection<UUID> affIds = new HashSet<>(F.viewReadOnly(affNodes, F.node2id())); for (UUID nodeId : nodeIds) { if (!affIds.contains(nodeId) && hasState(p, nodeId, OWNING, MOVING, RENTING)) { ClusterNode n = cctx.discovery().node(nodeId); if (n != null && (topVer.topologyVersion() < 0 || n.order() <= topVer.topologyVersion())) { if (nodes == null) { nodes = new ArrayList<>(affNodes.size() + 2); nodes.addAll(affNodes); } nodes.add(n); } } } } return nodes != null ? nodes : affNodes; } finally { lock.readLock().unlock(); } }
/** * @param p Partition. * @param topVer Topology version ({@code -1} for all nodes). * @param state Partition state. * @param states Additional partition states. * @return List of nodes for the partition. */ private List<ClusterNode> nodes( int p, AffinityTopologyVersion topVer, GridDhtPartitionState state, GridDhtPartitionState... states) { Collection<UUID> allIds = topVer.topologyVersion() > 0 ? F.nodeIds(CU.affinityNodes(cctx, topVer)) : null; lock.readLock().lock(); try { assert node2part != null && node2part.valid() : "Invalid node-to-partitions map [topVer=" + topVer + ", allIds=" + allIds + ", node2part=" + node2part + ", cache=" + cctx.name() + ']'; Collection<UUID> nodeIds = part2node.get(p); // Node IDs can be null if both, primary and backup, nodes disappear. int size = nodeIds == null ? 0 : nodeIds.size(); if (size == 0) return Collections.emptyList(); List<ClusterNode> nodes = new ArrayList<>(size); for (UUID id : nodeIds) { if (topVer.topologyVersion() > 0 && !allIds.contains(id)) continue; if (hasState(p, id, state, states)) { ClusterNode n = cctx.discovery().node(id); if (n != null && (topVer.topologyVersion() < 0 || n.order() <= topVer.topologyVersion())) nodes.add(n); } } return nodes; } finally { lock.readLock().unlock(); } }
/** * Register a new application on the node * * @param node the node to register on * @param type the type of an application * @param appInfo the appInfo string for the application */ protected void addAppToNode(ClusterNode node, ResourceType type, String appInfo) { synchronized (node) { // Update primary index. Map<ResourceType, String> apps = nameToApps.get(node.getName()); apps.put(type, appInfo); // Update runnable indices. for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) { if (type.equals(entry.getKey())) { if (node.checkForGrant(Utilities.getUnitResourceRequest(type), resourceLimit)) { RunnableIndices r = entry.getValue(); r.addRunnable(node); } } } } }
/** * Get information about applications running on a node. * * @param node The node. * @param type The type of resources. * @return The application-specific information */ public String getAppInfo(ClusterNode node, ResourceType type) { Map<ResourceType, String> resourceInfos = nameToApps.get(node.getName()); if (resourceInfos == null) { return null; } else { return resourceInfos.get(type); } }
private void sendKillTwoRestartOneKillOneAndReceive( ClusterNode node1, ClusterNode node2, ClusterNode node3) throws InterruptedException { HazelcastMQProducer mqProducer = node1.getMqContext().createProducer(); mqProducer.send(destination, "Hello " + msgCounter++); mqProducer.send(destination, "Hello " + msgCounter++); // Kill the first two nodes. Again, this may not prove too much because we // don't know where the original data landed in the cluster. There's a // chance the "master" data isn't sitting on node1 or node2 anyway. node1.kill(); node2.kill(); HazelcastMQConsumer mqConsumer = node3.getMqContext().createConsumer(destination); String msg = new String(mqConsumer.receiveBody(1, TimeUnit.SECONDS)); log.info("Got message on node 3: " + msg); mqConsumer.close(); // Now restart node 2 and give it some time to join the cluster and migrate // data. node2.restart(); Thread.sleep(10000); // Now kill node 3. In theory the remaining queued message should have // migrated to node 2. node3.kill(); mqConsumer = node2.getMqContext().createConsumer(destination); msg = new String(mqConsumer.receiveBody(1, TimeUnit.SECONDS)); log.info("Got message on node 2: " + msg); mqConsumer.close(); }
/** * Cancel grant on a node * * @param nodeName the node the grant is on * @param sessionId the session the grant was given to * @param requestId the request this grant satisfied */ public void cancelGrant(String nodeName, String sessionId, int requestId) { ClusterNode node = nameToNode.get(nodeName); if (node == null) { LOG.warn("Canceling grant for non-existent node: " + nodeName); return; } synchronized (node) { if (node.deleted) { LOG.warn("Canceling grant for deleted node: " + nodeName); return; } String hoststr = node.getClusterNodeInfo().getAddress().getHost(); if (!canAllowNode(hoststr)) { LOG.warn("Canceling grant for excluded node: " + hoststr); return; } ResourceRequestInfo req = node.getRequestForGrant(sessionId, requestId); if (req != null) { ResourceRequest unitReq = Utilities.getUnitResourceRequest(req.getType()); boolean previouslyRunnable = node.checkForGrant(unitReq, resourceLimit); node.cancelGrant(sessionId, requestId); loadManager.decrementLoad(req.getType()); if (!previouslyRunnable && node.checkForGrant(unitReq, resourceLimit)) { RunnableIndices r = typeToIndices.get(req.getType()); if (!faultManager.isBlacklisted(node.getName(), req.getType())) { r.addRunnable(node); } } } } }
private void updateLinksAndNodes(NodeCluster newCluster) { for (ClusterLink l : newCluster.getInLinks().values()) { l.setNewRoot(newCluster, false); l.setToCluster(newCluster); } for (ClusterLink l : newCluster.getOutLinks().values()) { l.setNewRoot(newCluster, false); l.setFromCluster(newCluster); } for (ClusterLink l : newCluster.getInterLinks().values()) { l.setNewRoot(newCluster, true); } for (ClusterNode n : newCluster.getNodes().values()) { n.setNewRoot(newCluster); } newCluster.getChild1().setParent(newCluster); newCluster.getChild2().setParent(newCluster); }
/** {@inheritDoc} */ @Override public String toString() { ClusterNode oldestNode = this.oldestNode.get(); return S.toString( GridDhtPartitionsExchangeFuture.class, this, "oldest", oldestNode == null ? "null" : oldestNode.id(), "oldestOrder", oldestNode == null ? "null" : oldestNode.order(), "evtLatch", evtLatch == null ? "null" : evtLatch.getCount(), "remaining", remaining(), "super", super.toString()); }
/** * Update the runnable status of a node based on resources available. This checks both resources * and slot availability. * * @param node The node */ private void updateRunnability(ClusterNode node) { synchronized (node) { for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) { ResourceType type = entry.getKey(); RunnableIndices r = entry.getValue(); ResourceRequest unitReq = Utilities.getUnitResourceRequest(type); boolean currentlyRunnable = r.hasRunnable(node); boolean shouldBeRunnable = node.checkForGrant(unitReq, resourceLimit); if (currentlyRunnable && !shouldBeRunnable) { LOG.info("Node " + node.getName() + " is no longer " + type + " runnable"); r.deleteRunnable(node); } else if (!currentlyRunnable && shouldBeRunnable) { LOG.info("Node " + node.getName() + " is now " + type + " runnable"); r.addRunnable(node); } } } }
/** * Add a node to the runnable indices * * @param clusterNode the node to add */ public void addRunnable(ClusterNode clusterNode) { String host = clusterNode.getHost(); if (LOG.isDebugEnabled()) { LOG.debug(clusterNode.getName() + " added to runnable list for type: " + type); } NodeContainer nodeContainer = getOrCreateHostRunnableNode(host); synchronized (nodeContainer) { nodeContainer.addNode(clusterNode); hostsWithRunnableNodes.incrementAndGet(); } Node rack = clusterNode.hostNode.getParent(); nodeContainer = getOrCreateRackRunnableNode(rack); synchronized (nodeContainer) { nodeContainer.addNode(clusterNode); } }
/** * Add a grant to a node * * @param node the node the grant is on * @param sessionId the session the grant is given to * @param req the request this grant satisfies * @return true if the grant can be added to the node, false otherwise */ public boolean addGrant(ClusterNode node, String sessionId, ResourceRequestInfo req) { synchronized (node) { if (node.deleted) { return false; } if (!node.checkForGrant(Utilities.getUnitResourceRequest(req.getType()), resourceLimit)) { return false; } node.addGrant(sessionId, req); loadManager.incrementLoad(req.getType()); hostsToSessions.get(node).add(sessionId); if (!node.checkForGrant(Utilities.getUnitResourceRequest(req.getType()), resourceLimit)) { RunnableIndices r = typeToIndices.get(req.getType()); r.deleteRunnable(node); } } return true; }