protected void fetchRebalancingStatusFromCoordinator() { if (!transport.isCoordinator()) { ReplicableCommand command = new CacheTopologyControlCommand( null, CacheTopologyControlCommand.Type.POLICY_GET_STATUS, transport.getAddress(), transport.getViewId()); Address coordinator = transport.getCoordinator(); try { Map<Address, Response> responseMap = transport.invokeRemotely( Collections.singleton(coordinator), command, ResponseMode.SYNCHRONOUS, getGlobalTimeout(), null, DeliverOrder.NONE, false); Response response = responseMap.get(coordinator); if (response instanceof SuccessfulResponse) { globalRebalancingEnabled = ((Boolean) ((SuccessfulResponse) response).getResponseValue()); } else { log.errorReadingRebalancingStatus(coordinator, null); } } catch (Exception e) { log.errorReadingRebalancingStatus(coordinator, e); } } }
@Override public void handleClusterView(boolean mergeView, int newViewId) { synchronized (viewHandlingLock) { // check to ensure this is not an older view if (newViewId <= viewId) { log.tracef("Ignoring old cluster view notification: %s", newViewId); return; } boolean becameCoordinator = !isCoordinator && transport.isCoordinator(); isCoordinator = transport.isCoordinator(); if (trace) { log.tracef( "Received new cluster view: %d, isCoordinator = %s, becameCoordinator = %s", (Object) newViewId, isCoordinator, becameCoordinator); } mustRecoverClusterStatus |= mergeView || becameCoordinator; if (!isCoordinator) return; if (mustRecoverClusterStatus) { // Clean up leftover cache status information from the last time we were coordinator. // E.g. if the local node was coordinator, started a rebalance, and then lost coordinator // status because of a merge, the existing cache statuses may have a rebalance in progress. cacheStatusMap.clear(); try { recoverClusterStatus(newViewId, mergeView, transport.getMembers()); mustRecoverClusterStatus = false; } catch (InterruptedException e) { log.tracef("Cluster state recovery interrupted because the coordinator is shutting down"); // the CTMI has already stopped, no need to update the view id or notify waiters return; } catch (SuspectException e) { // We will retry when we receive the new view and then we'll reset the // mustRecoverClusterStatus flag return; } catch (Exception e) { if (!isShuttingDown) { log.failedToRecoverClusterState(e); } else { log.tracef("Cluster state recovery failed because the coordinator is shutting down"); } } } // update the view id last, so join requests from other nodes wait until we recovered existing // members' info synchronized (viewUpdateLock) { viewId = newViewId; viewUpdateLock.notifyAll(); } } if (!mustRecoverClusterStatus) { try { updateCacheMembers(transport.getMembers()); } catch (Exception e) { log.errorUpdatingMembersList(e); } } }
protected Cache<String, String> startCache() throws IOException { CacheBuilder cb = new CacheBuilder(cfgFile); EmbeddedCacheManager cacheManager = cb.getCacheManager(); Configuration dcc = cacheManager.getDefaultCacheConfiguration(); cacheManager.defineConfiguration( "wordcount", new ConfigurationBuilder() .read(dcc) .clustering() .l1() .disable() .clustering() .cacheMode(CacheMode.DIST_SYNC) .hash() .numOwners(1) .build()); Cache<String, String> cache = cacheManager.getCache(); Transport transport = cache.getAdvancedCache().getRpcManager().getTransport(); if (isMaster) System.out.printf( "Node %s joined as master. View is %s.%n", transport.getAddress(), transport.getMembers()); else System.out.printf( "Node %s joined as slave. View is %s.%n", transport.getAddress(), transport.getMembers()); return cache; }
@Start(priority = 100) public void start() { isShuttingDown = false; isCoordinator = transport.isCoordinator(); viewListener = new ClusterViewListener(); cacheManagerNotifier.addListener(viewListener); // The listener already missed the initial view handleClusterView(false, transport.getViewId()); }
private void confirmMembersAvailable() throws Exception { ReplicableCommand heartbeatCommand = new CacheTopologyControlCommand( null, CacheTopologyControlCommand.Type.POLICY_GET_STATUS, transport.getAddress(), -1); transport.invokeRemotely( null, heartbeatCommand, ResponseMode.SYNCHRONOUS, getGlobalTimeout(), null, DeliverOrder.NONE, false); }
@Override public void broadcastRebalanceStart( String cacheName, CacheTopology cacheTopology, boolean totalOrder, boolean distributed) { CLUSTER.startRebalance(cacheName, cacheTopology); ReplicableCommand command = new CacheTopologyControlCommand( cacheName, CacheTopologyControlCommand.Type.REBALANCE_START, transport.getAddress(), cacheTopology, null, transport.getViewId()); executeOnClusterAsync(command, getGlobalTimeout(), totalOrder, distributed); }
@Override public void broadcastStableTopologyUpdate( String cacheName, CacheTopology cacheTopology, boolean totalOrder, boolean distributed) { log.debugf( "Updating cluster-wide stable topology for cache %s, topology = %s", cacheName, cacheTopology); ReplicableCommand command = new CacheTopologyControlCommand( cacheName, CacheTopologyControlCommand.Type.STABLE_TOPOLOGY_UPDATE, transport.getAddress(), cacheTopology, null, transport.getViewId()); executeOnClusterAsync(command, getGlobalTimeout(), totalOrder, distributed); }
@Start(priority = 25) // after the distribution manager! @SuppressWarnings("unused") private void setAddress() { this.address = transport.getAddress(); this.isUsingLockDelegation = !cacheConfiguration.transaction().transactionMode().isTransactional(); }
private void recoverClusterStatus( int newViewId, boolean isMergeView, List<Address> clusterMembers) throws Exception { ReplicableCommand command = new CacheTopologyControlCommand( null, CacheTopologyControlCommand.Type.GET_STATUS, transport.getAddress(), newViewId); Map<Address, Object> statusResponses = executeOnClusterSync(command, getGlobalTimeout(), false, false); log.debugf("Got %d status responses. members are %s", statusResponses.size(), clusterMembers); Map<String, Map<Address, CacheStatusResponse>> responsesByCache = new HashMap<>(); for (Map.Entry<Address, Object> responseEntry : statusResponses.entrySet()) { Address sender = responseEntry.getKey(); Map<String, CacheStatusResponse> nodeStatus = (Map<String, CacheStatusResponse>) responseEntry.getValue(); for (Map.Entry<String, CacheStatusResponse> statusEntry : nodeStatus.entrySet()) { String cacheName = statusEntry.getKey(); Map<Address, CacheStatusResponse> cacheResponses = responsesByCache.get(cacheName); if (cacheResponses == null) { cacheResponses = new HashMap<>(); responsesByCache.put(cacheName, cacheResponses); } cacheResponses.put(sender, statusEntry.getValue()); } } for (Map.Entry<String, Map<Address, CacheStatusResponse>> e : responsesByCache.entrySet()) { ClusterCacheStatus cacheStatus = initCacheStatusIfAbsent(e.getKey()); cacheStatus.doMergePartitions(e.getValue(), clusterMembers, isMergeView); } }
public void executeOnClusterAsync( final ReplicableCommand command, final int timeout, boolean totalOrder, boolean distributed) { if (!totalOrder) { // invoke the command on the local node asyncTransportExecutor.submit( new Runnable() { @Override public void run() { gcr.wireDependencies(command); try { if (log.isTraceEnabled()) log.tracef("Attempting to execute command on self: %s", command); command.perform(null); } catch (Throwable throwable) { // The command already logs any exception in perform() } } }); } // invoke remotely try { transport.invokeRemotely( null, command, ResponseMode.ASYNCHRONOUS_WITH_SYNC_MARSHALLING, timeout, true, null, totalOrder, distributed); } catch (Exception e) { throw new CacheException("Failed to broadcast asynchronous command: " + command); } }
@Start(priority = 11) // after Transport public void start() { SecurityActions.addCacheManagerListener(cache.getCacheManager(), new RankCalculator()); isClustered = SecurityActions.getCacheConfiguration(cache.getAdvancedCache()) .clustering() .cacheMode() .isClustered(); if (isClustered) { // Use component registry to avoid keeping an instance ref simply used on start ComponentRegistry componentRegistry = SecurityActions.getCacheComponentRegistry(cache.getAdvancedCache()); Transport transport = componentRegistry.getGlobalComponentRegistry().getComponent(Transport.class); calculateRank(transport.getAddress(), transport.getMembers(), transport.getViewId()); } }
@Override public void handleClusterView(boolean mergeView, int newViewId) { synchronized (viewHandlingLock) { // check to ensure this is not an older view if (newViewId <= viewId) { log.tracef("Ignoring old cluster view notification: %s", newViewId); return; } boolean becameCoordinator = !isCoordinator && transport.isCoordinator(); isCoordinator = transport.isCoordinator(); log.tracef( "Received new cluster view: %s, isCoordinator = %s, becameCoordinator = %s", newViewId, isCoordinator, becameCoordinator); if (!isCoordinator) return; if (mergeView || becameCoordinator) { try { recoverClusterStatus(newViewId, mergeView, transport.getMembers()); } catch (InterruptedException e) { log.tracef("Cluster state recovery interrupted because the coordinator is shutting down"); // the CTMI has already stopped, no need to update the view id or notify waiters return; } catch (Exception e) { // TODO Retry? log.failedToRecoverClusterState(e); } } else { try { updateCacheMembers(transport.getMembers()); } catch (Exception e) { log.errorUpdatingMembersList(e); } } // update the view id last, so join requests from other nodes wait until we recovered existing // members' info synchronized (viewUpdateLock) { viewId = newViewId; viewUpdateLock.notifyAll(); } } }
@Override public void broadcastTopologyUpdate( String cacheName, CacheTopology cacheTopology, AvailabilityMode availabilityMode, boolean totalOrder, boolean distributed) { log.debugf( "Updating cluster-wide current topology for cache %s, topology = %s, availability mode = %s", cacheName, cacheTopology, availabilityMode); ReplicableCommand command = new CacheTopologyControlCommand( cacheName, CacheTopologyControlCommand.Type.CH_UPDATE, transport.getAddress(), cacheTopology, availabilityMode, transport.getViewId()); executeOnClusterAsync(command, getGlobalTimeout(), totalOrder, distributed); }
public void testInvokeRemotelyWhenSingleMember() throws Exception { Cache cache1 = cache(0, "replSync"); Transport mockTransport = createMock(Transport.class); RpcManagerImpl rpcManager = (RpcManagerImpl) TestingUtil.extractComponent(cache1, RpcManager.class); Transport originalTransport = TestingUtil.extractComponent(cache1, Transport.class); try { Address mockAddress1 = createNiceMock(Address.class); List<Address> memberList = new ArrayList<Address>(1); memberList.add(mockAddress1); expect(mockTransport.getMembers()).andReturn(memberList).anyTimes(); expect(mockTransport.getAddress()).andReturn(null).anyTimes(); rpcManager.setTransport(mockTransport); // Transport invoke remote should not be called. replay(mockAddress1, mockTransport); // now try a simple replication. Since the RpcManager is a mock object it will not actually // replicate anything. cache1.put(key, value); verify(mockTransport); } finally { if (rpcManager != null) rpcManager.setTransport(originalTransport); } }
private void recoverClusterStatus( int newViewId, final boolean isMergeView, final List<Address> clusterMembers) throws Exception { log.debugf("Recovering cluster status for view %d", newViewId); ReplicableCommand command = new CacheTopologyControlCommand( null, CacheTopologyControlCommand.Type.GET_STATUS, transport.getAddress(), newViewId); Map<Address, Object> statusResponses = executeOnClusterSync( command, getGlobalTimeout(), false, false, new CacheTopologyFilterReuser()); log.debugf("Got %d status responses. members are %s", statusResponses.size(), clusterMembers); Map<String, Map<Address, CacheStatusResponse>> responsesByCache = new HashMap<>(); boolean recoveredRebalancingStatus = true; for (Map.Entry<Address, Object> responseEntry : statusResponses.entrySet()) { Address sender = responseEntry.getKey(); ManagerStatusResponse nodeStatus = (ManagerStatusResponse) responseEntry.getValue(); recoveredRebalancingStatus &= nodeStatus.isRebalancingEnabled(); for (Map.Entry<String, CacheStatusResponse> statusEntry : nodeStatus.getCaches().entrySet()) { String cacheName = statusEntry.getKey(); Map<Address, CacheStatusResponse> cacheResponses = responsesByCache.get(cacheName); if (cacheResponses == null) { cacheResponses = new HashMap<>(); responsesByCache.put(cacheName, cacheResponses); } cacheResponses.put(sender, statusEntry.getValue()); } } globalRebalancingEnabled = recoveredRebalancingStatus; // Compute the new consistent hashes on separate threads int maxThreads = Runtime.getRuntime().availableProcessors() / 2 + 1; CompletionService<Void> cs = new SemaphoreCompletionService<>(asyncTransportExecutor, maxThreads); for (final Map.Entry<String, Map<Address, CacheStatusResponse>> e : responsesByCache.entrySet()) { final ClusterCacheStatus cacheStatus = initCacheStatusIfAbsent(e.getKey()); cs.submit( new Callable<Void>() { @Override public Void call() throws Exception { cacheStatus.doMergePartitions(e.getValue(), clusterMembers, isMergeView); return null; } }); } for (int i = 0; i < responsesByCache.size(); i++) { cs.take(); } }
public void notifyCoordinatorPushCompleted(int viewId) throws Exception { Transport t = rpcManager.getTransport(); if (t.isCoordinator()) { if (trace) log.tracef( "Node %s is the coordinator, marking push for %d as complete directly", self, viewId); markNodePushCompleted(viewId, self); } else { final RehashControlCommand cmd = cf.buildRehashControlCommand(RehashControlCommand.Type.NODE_PUSH_COMPLETED, self, viewId); Address coordinator = rpcManager.getTransport().getCoordinator(); if (trace) log.tracef( "Node %s is not the coordinator, sending request to mark push for %d as complete to %s", self, viewId, coordinator); rpcManager.invokeRemotely( Collections.singleton(coordinator), cmd, ResponseMode.SYNCHRONOUS, configuration.getRehashRpcTimeout()); } }
@Start(priority = 100) public void start() { isShuttingDown = false; isCoordinator = transport.isCoordinator(); viewListener = new ClusterViewListener(); cacheManagerNotifier.addListener(viewListener); // The listener already missed the initial view asyncTransportExecutor.submit( new Runnable() { @Override public void run() { handleClusterView(false, transport.getViewId()); } }); fetchRebalancingStatusFromCoordinator(); }
// needs to be AFTER the RpcManager // The DMI is cache-scoped, so it will always start after the RMI, which is global-scoped @Start(priority = 20) private void join() throws Exception { if (trace) log.trace("starting distribution manager on " + getMyAddress()); notifier.addListener(listener); Transport t = rpcManager.getTransport(); List<Address> members = t.getMembers(); self = t.getAddress(); lastViewId = t.getViewId(); consistentHash = ConsistentHashHelper.createConsistentHash(configuration, members); lastSuccessfulCH = ConsistentHashHelper.createConsistentHash(configuration, members); // in case we are/become the coordinator, make sure we're in the push confirmations map before // anyone else synchronized (pushConfirmations) { pushConfirmations.put(t.getAddress(), -1); } // allow incoming requests joinStartedLatch.countDown(); // nothing to push, but we need to inform the coordinator that we have finished our push notifyCoordinatorPushCompleted(t.getViewId()); }
private Map<Address, Object> executeOnClusterSync( final ReplicableCommand command, final int timeout, boolean totalOrder, boolean distributed) throws Exception { // first invoke remotely if (totalOrder) { Map<Address, Response> responseMap = transport.invokeRemotely( transport.getMembers(), command, ResponseMode.SYNCHRONOUS_IGNORE_LEAVERS, timeout, false, null, totalOrder, distributed); Map<Address, Object> responseValues = new HashMap<Address, Object>(transport.getMembers().size()); for (Map.Entry<Address, Response> entry : responseMap.entrySet()) { Address address = entry.getKey(); Response response = entry.getValue(); if (!response.isSuccessful()) { Throwable cause = response instanceof ExceptionResponse ? ((ExceptionResponse) response).getException() : null; throw new CacheException( "Unsuccessful response received from node " + address + ": " + response, cause); } responseValues.put(address, ((SuccessfulResponse) response).getResponseValue()); } return responseValues; } Future<Map<Address, Response>> remoteFuture = asyncTransportExecutor.submit( new Callable<Map<Address, Response>>() { @Override public Map<Address, Response> call() throws Exception { return transport.invokeRemotely( null, command, ResponseMode.SYNCHRONOUS_IGNORE_LEAVERS, timeout, true, null, false, false); } }); // invoke the command on the local node gcr.wireDependencies(command); Response localResponse; try { if (log.isTraceEnabled()) log.tracef("Attempting to execute command on self: %s", command); localResponse = (Response) command.perform(null); } catch (Throwable throwable) { throw new Exception(throwable); } if (!localResponse.isSuccessful()) { throw new CacheException("Unsuccessful local response: " + localResponse); } // wait for the remote commands to finish Map<Address, Response> responseMap = remoteFuture.get(timeout, TimeUnit.MILLISECONDS); // parse the responses Map<Address, Object> responseValues = new HashMap<Address, Object>(transport.getMembers().size()); for (Map.Entry<Address, Response> entry : responseMap.entrySet()) { Address address = entry.getKey(); Response response = entry.getValue(); if (!response.isSuccessful()) { Throwable cause = response instanceof ExceptionResponse ? ((ExceptionResponse) response).getException() : null; throw new CacheException( "Unsuccessful response received from node " + address + ": " + response, cause); } responseValues.put(address, ((SuccessfulResponse) response).getResponseValue()); } responseValues.put( transport.getAddress(), ((SuccessfulResponse) localResponse).getResponseValue()); return responseValues; }