private void updateTopologyId(TopologyAffectedCommand command) throws InterruptedException { // set the topology id if it was not set before (ie. this is local command) // TODO Make tx commands extend FlagAffectedCommand so we can use CACHE_MODE_LOCAL in // TransactionTable.cleanupStaleTransactions if (command.getTopologyId() == -1) { CacheTopology cacheTopology = stateTransferManager.getCacheTopology(); if (cacheTopology != null) { command.setTopologyId(cacheTopology.getTopologyId()); } } }
private void waitForStateTransfer(int expectedTopologyId, Cache... caches) { waitForRehashToComplete(caches); for (Cache c : caches) { CacheTopology cacheTopology = extractComponent(c, StateTransferManager.class).getCacheTopology(); assertEquals( String.format( "Wrong topology on cache %s, expected %d and got %s", c, expectedTopologyId, cacheTopology), expectedTopologyId, cacheTopology.getTopologyId()); } }
@Override public boolean isAcceptable(Response response, Address sender) { if (response.isSuccessful() && response.isValid()) { ManagerStatusResponse value = (ManagerStatusResponse) ((SuccessfulResponse) response).getResponseValue(); for (Entry<String, CacheStatusResponse> entry : value.getCaches().entrySet()) { CacheStatusResponse csr = entry.getValue(); CacheTopology cacheTopology = csr.getCacheTopology(); CacheTopology stableTopology = csr.getStableTopology(); CacheTopology replaceCacheTopology = seenTopologies.get(cacheTopology); if (replaceCacheTopology == null) { seenTopologies.put(cacheTopology, cacheTopology); replaceCacheTopology = cacheTopology; } CacheTopology replaceStableTopology = null; // If the don't equal check if we replace - note stableTopology can be null if (!cacheTopology.equals(stableTopology)) { replaceStableTopology = seenTopologies.get(stableTopology); if (replaceStableTopology == null) { seenTopologies.put(stableTopology, stableTopology); } } else { // Since they were equal replace it with the cache topology we are going to use replaceStableTopology = replaceCacheTopology != null ? replaceCacheTopology : cacheTopology; } CacheJoinInfo info = csr.getCacheJoinInfo(); CacheJoinInfo replaceInfo = seenInfos.get(info); if (replaceInfo == null) { seenInfos.put(info, info); } if (replaceCacheTopology != null || replaceStableTopology != null || replaceInfo != null) { entry.setValue( new CacheStatusResponse( replaceInfo != null ? replaceInfo : info, replaceCacheTopology != null ? replaceCacheTopology : cacheTopology, replaceStableTopology != null ? replaceStableTopology : stableTopology, csr.getAvailabilityMode())); } } } return true; }
@Override public void notifyTopologyChanged( CacheTopology oldTopology, CacheTopology newTopology, int newTopologyId, boolean pre) { if (!topologyChangedListeners.isEmpty()) { EventImpl<K, V> e = EventImpl.createEvent(cache, TOPOLOGY_CHANGED); e.setPre(pre); if (oldTopology != null) { e.setConsistentHashAtStart(oldTopology.getReadConsistentHash()); } e.setConsistentHashAtEnd(newTopology.getWriteConsistentHash()); e.setNewTopologyId(newTopologyId); for (CacheEntryListenerInvocation<K, V> listener : topologyChangedListeners) listener.invoke(e); } }
private InboundTransferTask addTransfer(Address source, Set<Integer> segmentsFromSource) { synchronized (this) { segmentsFromSource.removeAll( transfersBySegment.keySet()); // already in progress segments are excluded if (!segmentsFromSource.isEmpty()) { InboundTransferTask inboundTransfer = new InboundTransferTask( segmentsFromSource, source, cacheTopology.getTopologyId(), this, rpcManager, commandsFactory, timeout, cacheName); for (int segmentId : segmentsFromSource) { transfersBySegment.put(segmentId, inboundTransfer); } List<InboundTransferTask> inboundTransfers = transfersBySource.get(inboundTransfer.getSource()); if (inboundTransfers == null) { inboundTransfers = new ArrayList<InboundTransferTask>(); transfersBySource.put(inboundTransfer.getSource(), inboundTransfers); } inboundTransfers.add(inboundTransfer); taskQueue.add(inboundTransfer); return inboundTransfer; } else { return null; } } }
void onTaskCompletion(InboundTransferTask inboundTransfer) { log.tracef("Completion of inbound transfer task: %s ", inboundTransfer); removeTransfer(inboundTransfer); if (activeTopologyUpdates.get() == 0) { notifyEndOfTopologyUpdate(cacheTopology.getTopologyId()); } }
/** * First moves the prepared transactions originated on the leavers into the recovery cache and * then cleans up the transactions that are not yet prepared. * * @param cacheTopology */ @Override public void cleanupLeaverTransactions(CacheTopology cacheTopology) { // We only care about transactions originated before this topology update if (getMinTopologyId() >= cacheTopology.getTopologyId()) return; Iterator<RemoteTransaction> it = getRemoteTransactions().iterator(); while (it.hasNext()) { RecoveryAwareRemoteTransaction recTx = (RecoveryAwareRemoteTransaction) it.next(); if (recTx.getTopologyId() < cacheTopology.getTopologyId()) { recTx.computeOrphan(cacheTopology.getMembers()); if (recTx.isInDoubt()) { recoveryManager.registerInDoubtTransaction(recTx); it.remove(); } } } // this cleans up the transactions that are not yet prepared super.cleanupLeaverTransactions(cacheTopology); }
public void cleanupStaleTransactions(CacheTopology cacheTopology) { int topologyId = cacheTopology.getTopologyId(); List<Address> members = cacheTopology.getMembers(); // We only care about transactions originated before this topology update if (getMinTopologyId() >= topologyId) return; log.tracef( "Checking for transactions originated on leavers. Current members are %s, remote transactions: %d", members, remoteTransactions.size()); Set<GlobalTransaction> toKill = new HashSet<GlobalTransaction>(); for (Map.Entry<GlobalTransaction, RemoteTransaction> e : remoteTransactions.entrySet()) { GlobalTransaction gt = e.getKey(); RemoteTransaction remoteTx = e.getValue(); log.tracef("Checking transaction %s", gt); // The topology id check is needed for joiners if (remoteTx.getTopologyId() < topologyId && !members.contains(gt.getAddress())) { toKill.add(gt); } } if (toKill.isEmpty()) { log.tracef("No global transactions pertain to originator(s) who have left the cluster."); } else { log.tracef("%s global transactions pertain to leavers and need to be killed", toKill.size()); } for (GlobalTransaction gtx : toKill) { log.tracef("Killing remote transaction originating on leaver %s", gtx); RollbackCommand rc = new RollbackCommand(cacheName, gtx); rc.init(invoker, icc, TransactionTable.this); try { rc.perform(null); log.tracef("Rollback of transaction %s complete.", gtx); } catch (Throwable e) { log.unableToRollbackGlobalTx(gtx, e); } } log.tracef( "Completed cleaning transactions originating on leavers. Remote transactions remaining: %d", remoteTransactions.size()); }
/** * Receive notification of updated keys right before they are committed in DataContainer. * * @param key the key that is being modified */ @Override public void addUpdatedKey(Object key) { // grab a copy of the reference to prevent issues if another thread calls stopApplyingState() // between null check and actual usage final Set<Object> localUpdatedKeys = updatedKeys; if (localUpdatedKeys != null) { if (cacheTopology.getWriteConsistentHash().isKeyLocalToNode(rpcManager.getAddress(), key)) { localUpdatedKeys.add(key); } } }
public void applyState(Address sender, int topologyId, Collection<StateChunk> stateChunks) { if (trace) { log.tracef( "Before applying the received state the data container of cache %s has %d keys", cacheName, dataContainer.size()); } for (StateChunk stateChunk : stateChunks) { // it's possible to receive a late message so we must be prepared to ignore segments we no // longer own // todo [anistor] this check should be based on topologyId if (!cacheTopology .getWriteConsistentHash() .getSegmentsForOwner(rpcManager.getAddress()) .contains(stateChunk.getSegmentId())) { log.warnf( "Discarding received cache entries for segment %d of cache %s because they do not belong to this node.", stateChunk.getSegmentId(), cacheName); continue; } // notify the inbound task that a chunk of cache entries was received InboundTransferTask inboundTransfer; synchronized (this) { inboundTransfer = transfersBySegment.get(stateChunk.getSegmentId()); } if (inboundTransfer != null) { if (stateChunk.getCacheEntries() != null) { doApplyState(sender, stateChunk.getSegmentId(), stateChunk.getCacheEntries()); } inboundTransfer.onStateReceived(stateChunk.getSegmentId(), stateChunk.isLastChunk()); } else { log.warnf( "Received unsolicited state from node %s for segment %d of cache %s", sender, stateChunk.getSegmentId(), cacheName); } } if (trace) { log.tracef( "After applying the received state the data container of cache %s has %d keys", cacheName, dataContainer.size()); synchronized (this) { log.tracef("Segments not received yet for cache %s: %s", cacheName, transfersBySource); } } }
private Address findSource(int segmentId, Set<Address> excludedSources) { List<Address> owners = cacheTopology.getReadConsistentHash().locateOwnersForSegment(segmentId); if (owners.size() == 1 && owners.get(0).equals(rpcManager.getAddress())) { return null; } for (int i = owners.size() - 1; i >= 0; i--) { // iterate backwards because we prefer to fetch from newer nodes Address o = owners.get(i); if (!o.equals(rpcManager.getAddress()) && !excludedSources.contains(o)) { return o; } } log.noLiveOwnersFoundForSegment(segmentId, cacheName, owners, excludedSources); return null; }
private void requestTransactions( Set<Integer> segments, Map<Address, Set<Integer>> sources, Set<Address> excludedSources) { findSources(segments, sources, excludedSources); boolean seenFailures = false; while (true) { Set<Integer> failedSegments = new HashSet<Integer>(); for (Map.Entry<Address, Set<Integer>> e : sources.entrySet()) { Address source = e.getKey(); Set<Integer> segmentsFromSource = e.getValue(); List<TransactionInfo> transactions = getTransactions(source, segmentsFromSource, cacheTopology.getTopologyId()); if (transactions != null) { applyTransactions(source, transactions); } else { // if requesting the transactions failed we need to retry from another source failedSegments.addAll(segmentsFromSource); excludedSources.add(source); } } if (failedSegments.isEmpty()) { break; } // look for other sources for all failed segments seenFailures = true; sources.clear(); findSources(failedSegments, sources, excludedSources); } if (seenFailures) { // start fresh when next step starts (fetching segments) sources.clear(); } }
private int getSegment(Object key) { // there we can use any CH version because the routing table is not involved return cacheTopology.getReadConsistentHash().getSegment(key); }
@Override public void onTopologyUpdate(CacheTopology cacheTopology, boolean isRebalance) { if (trace) log.tracef( "Received new CH %s for cache %s", cacheTopology.getWriteConsistentHash(), cacheName); int numStartedTopologyUpdates = activeTopologyUpdates.incrementAndGet(); if (isRebalance) { rebalanceInProgress.set(true); } final ConsistentHash previousCh = this.cacheTopology != null ? this.cacheTopology.getWriteConsistentHash() : null; // Ensures writes to the data container use the right consistent hash // No need for a try/finally block, since it's just an assignment stateTransferLock.acquireExclusiveTopologyLock(); this.cacheTopology = cacheTopology; if (numStartedTopologyUpdates == 1) { updatedKeys = new ConcurrentHashSet<Object>(); } stateTransferLock.releaseExclusiveTopologyLock(); stateTransferLock.notifyTopologyInstalled(cacheTopology.getTopologyId()); try { // fetch transactions and data segments from other owners if this is enabled if (isTransactional || isFetchEnabled) { Set<Integer> addedSegments; if (previousCh == null) { // we start fresh, without any data, so we need to pull everything we own according to // writeCh addedSegments = getOwnedSegments(cacheTopology.getWriteConsistentHash()); if (trace) { log.tracef("On cache %s we have: added segments: %s", cacheName, addedSegments); } } else { Set<Integer> previousSegments = getOwnedSegments(previousCh); Set<Integer> newSegments = getOwnedSegments(cacheTopology.getWriteConsistentHash()); Set<Integer> removedSegments = new HashSet<Integer>(previousSegments); removedSegments.removeAll(newSegments); // This is a rebalance, we need to request the segments we own in the new CH. addedSegments = new HashSet<Integer>(newSegments); addedSegments.removeAll(previousSegments); if (trace) { log.tracef( "On cache %s we have: removed segments: %s; new segments: %s; old segments: %s; added segments: %s", cacheName, removedSegments, newSegments, previousSegments, addedSegments); } // remove inbound transfers and any data for segments we no longer own cancelTransfers(removedSegments); // If L1.onRehash is enabled, "removed" segments are actually moved to L1. The new (and // old) owners // will automatically add the nodes that no longer own a key to that key's requestors // list. invalidateSegments(newSegments, removedSegments); // check if any of the existing transfers should be restarted from a different source // because the initial source is no longer a member Set<Address> members = new HashSet<Address>(cacheTopology.getReadConsistentHash().getMembers()); synchronized (this) { for (Iterator<Address> it = transfersBySource.keySet().iterator(); it.hasNext(); ) { Address source = it.next(); if (!members.contains(source)) { if (trace) { log.tracef( "Removing inbound transfers from source %s for cache %s", source, cacheName); } List<InboundTransferTask> inboundTransfers = transfersBySource.get(source); it.remove(); for (InboundTransferTask inboundTransfer : inboundTransfers) { // these segments will be restarted if they are still in new write CH if (trace) { log.tracef( "Removing inbound transfers for segments %s from source %s for cache %s", inboundTransfer.getSegments(), source, cacheName); } transfersBySegment.keySet().removeAll(inboundTransfer.getSegments()); addedSegments.addAll(inboundTransfer.getUnfinishedSegments()); } } } // exclude those that are already in progress from a valid source addedSegments.removeAll(transfersBySegment.keySet()); } } if (!addedSegments.isEmpty()) { addTransfers(addedSegments); // add transfers for new or restarted segments } } } finally { stateTransferLock.notifyTransactionDataReceived(cacheTopology.getTopologyId()); if (activeTopologyUpdates.decrementAndGet() == 0) { notifyEndOfTopologyUpdate(cacheTopology.getTopologyId()); } } }
public void testReplace() throws Exception { cache(0).put("myKey", "myValue"); // add an interceptor on second node that will block REPLACE commands right after // EntryWrappingInterceptor until we are ready final CountDownLatch replaceStartedLatch = new CountDownLatch(1); final CountDownLatch replaceProceedLatch = new CountDownLatch(1); boolean isVersioningEnabled = cache(0).getCacheConfiguration().versioning().enabled(); cacheConfigBuilder .customInterceptors() .addInterceptor() .after( isVersioningEnabled ? VersionedEntryWrappingInterceptor.class : EntryWrappingInterceptor.class) .interceptor( new CommandInterceptor() { @Override protected Object handleDefault(InvocationContext ctx, VisitableCommand cmd) throws Throwable { if (cmd instanceof ReplaceCommand) { // signal we encounter a REPLACE replaceStartedLatch.countDown(); // wait until it is ok to continue with REPLACE if (!replaceProceedLatch.await(15, TimeUnit.SECONDS)) { throw new TimeoutException(); } } return super.handleDefault(ctx, cmd); } }); // do not allow coordinator to send topology updates to node B final ClusterTopologyManager ctm0 = TestingUtil.extractGlobalComponent(manager(0), ClusterTopologyManager.class); ctm0.setRebalancingEnabled(false); log.info("Adding a new node .."); addClusterEnabledCacheManager(cacheConfigBuilder); log.info("Added a new node"); // node B is not a member yet and rebalance has not started yet CacheTopology cacheTopology = advancedCache(1).getComponentRegistry().getStateTransferManager().getCacheTopology(); assertNull(cacheTopology.getPendingCH()); assertTrue(cacheTopology.getMembers().contains(address(0))); assertFalse(cacheTopology.getMembers().contains(address(1))); assertFalse(cacheTopology.getCurrentCH().getMembers().contains(address(1))); // no keys should be present on node B yet because state transfer is blocked assertTrue(cache(1).keySet().isEmpty()); // initiate a REPLACE Future<Object> getFuture = fork( new Callable<Object>() { @Override public Object call() throws Exception { try { return cache(1).replace("myKey", "newValue"); } catch (Exception e) { log.errorf(e, "REPLACE failed: %s", e.getMessage()); throw e; } } }); // wait for REPLACE command on node B to reach beyond *EntryWrappingInterceptor, where it will // block. // the value seen so far is null if (!replaceStartedLatch.await(15, TimeUnit.SECONDS)) { throw new TimeoutException(); } // paranoia, yes the value is still missing from data container assertTrue(cache(1).keySet().isEmpty()); // allow rebalance to start ctm0.setRebalancingEnabled(true); // wait for state transfer to end TestingUtil.waitForRehashToComplete(cache(0), cache(1)); // the state should be already transferred now assertEquals(1, cache(1).keySet().size()); // allow REPLACE to continue replaceProceedLatch.countDown(); Object oldVal = getFuture.get(15, TimeUnit.SECONDS); assertNotNull(oldVal); assertEquals("myValue", oldVal); assertEquals("newValue", cache(0).get("myKey")); assertEquals("newValue", cache(1).get("myKey")); }