private void sendPartitions() { ClusterNode oldestNode = this.oldestNode.get(); try { sendLocalPartitions(oldestNode, exchId); } catch (ClusterTopologyCheckedException ignore) { if (log.isDebugEnabled()) log.debug( "Oldest node left during partition exchange [nodeId=" + oldestNode.id() + ", exchId=" + exchId + ']'); } catch (IgniteCheckedException e) { scheduleRecheck(); U.error( log, "Failed to send local partitions to oldest node (will retry after timeout) [oldestNodeId=" + oldestNode.id() + ", exchId=" + exchId + ']', e); } }
/** * @param cctx Cache context. * @param busyLock Busy lock. * @param exchId Exchange ID. * @param reqs Cache change requests. */ public GridDhtPartitionsExchangeFuture( GridCacheSharedContext cctx, ReadWriteLock busyLock, GridDhtPartitionExchangeId exchId, Collection<DynamicCacheChangeRequest> reqs) { assert busyLock != null; assert exchId != null; dummy = false; forcePreload = false; reassign = false; this.cctx = cctx; this.busyLock = busyLock; this.exchId = exchId; this.reqs = reqs; log = cctx.logger(getClass()); initFut = new GridFutureAdapter<>(); if (log.isDebugEnabled()) log.debug( "Creating exchange future [localNode=" + cctx.localNodeId() + ", fut=" + this + ']'); }
/** * @param nodes Nodes. * @param id ID. * @throws IgniteCheckedException If failed. */ private void sendAllPartitions( Collection<? extends ClusterNode> nodes, GridDhtPartitionExchangeId id) throws IgniteCheckedException { GridDhtPartitionsFullMessage m = new GridDhtPartitionsFullMessage(id, lastVer.get(), id.topologyVersion()); for (GridCacheContext cacheCtx : cctx.cacheContexts()) { if (!cacheCtx.isLocal()) { AffinityTopologyVersion startTopVer = cacheCtx.startTopologyVersion(); boolean ready = startTopVer == null || startTopVer.compareTo(id.topologyVersion()) <= 0; if (ready) m.addFullPartitionsMap(cacheCtx.cacheId(), cacheCtx.topology().partitionMap(true)); } } // It is important that client topologies be added after contexts. for (GridClientPartitionTopology top : cctx.exchange().clientTopologies()) m.addFullPartitionsMap(top.cacheId(), top.partitionMap(true)); if (log.isDebugEnabled()) log.debug( "Sending full partition map [nodeIds=" + F.viewReadOnly(nodes, F.node2id()) + ", exchId=" + exchId + ", msg=" + m + ']'); cctx.io().safeSend(nodes, m, SYSTEM_POOL, null); }
/** * @param updateSeq Update sequence. * @return {@code True} if entry has been transitioned to state EVICTED. */ boolean tryEvict(boolean updateSeq) { if (state.getReference() != RENTING || state.getStamp() != 0 || groupReserved()) return false; // Attempt to evict partition entries from cache. clearAll(); if (map.isEmpty() && state.compareAndSet(RENTING, EVICTED, 0, 0)) { if (log.isDebugEnabled()) log.debug("Evicted partition: " + this); if (!GridQueryProcessor.isEnabled(cctx.config())) clearSwap(); if (cctx.isDrEnabled()) cctx.dr().partitionEvicted(id); cctx.dataStructures().onPartitionEvicted(id); rent.onDone(); ((GridDhtPreloader) cctx.preloader()).onPartitionEvicted(this, updateSeq); clearDeferredDeletes(); return true; } return false; }
/** * @param updateSeq Update sequence. * @return Future for evict attempt. */ IgniteInternalFuture<Boolean> tryEvictAsync(boolean updateSeq) { if (map.isEmpty() && !GridQueryProcessor.isEnabled(cctx.config()) && state.compareAndSet(RENTING, EVICTED, 0, 0)) { if (log.isDebugEnabled()) log.debug("Evicted partition: " + this); clearSwap(); if (cctx.isDrEnabled()) cctx.dr().partitionEvicted(id); cctx.dataStructures().onPartitionEvicted(id); rent.onDone(); ((GridDhtPreloader) cctx.preloader()).onPartitionEvicted(this, updateSeq); clearDeferredDeletes(); return new GridFinishedFuture<>(true); } return cctx.closures() .callLocalSafe( new GPC<Boolean>() { @Override public Boolean call() { return tryEvict(true); } }, /*system pool*/ true); }
/** @return {@code true} if entered to busy state. */ private boolean enterBusy() { if (busyLock.readLock().tryLock()) return true; if (log.isDebugEnabled()) log.debug("Failed to enter busy state (exchanger is stopping): " + this); return false; }
/** {@inheritDoc} */ @Override public boolean onDone(AffinityTopologyVersion res, Throwable err) { Map<Integer, Boolean> m = null; for (GridCacheContext cacheCtx : cctx.cacheContexts()) { if (cacheCtx.config().getTopologyValidator() != null && !CU.isSystemCache(cacheCtx.name())) { if (m == null) m = new HashMap<>(); m.put( cacheCtx.cacheId(), cacheCtx.config().getTopologyValidator().validate(discoEvt.topologyNodes())); } } cacheValidRes = m != null ? m : Collections.<Integer, Boolean>emptyMap(); cctx.cache().onExchangeDone(exchId.topologyVersion(), reqs, err); cctx.exchange().onExchangeDone(this, err); if (super.onDone(res, err) && !dummy && !forcePreload) { if (log.isDebugEnabled()) log.debug( "Completed partition exchange [localNode=" + cctx.localNodeId() + ", exchange= " + this + ']'); initFut.onDone(err == null); GridTimeoutObject timeoutObj = this.timeoutObj; // Deschedule timeout object. if (timeoutObj != null) cctx.kernalContext().timeout().removeTimeoutObject(timeoutObj); if (exchId.isLeft()) { for (GridCacheContext cacheCtx : cctx.cacheContexts()) cacheCtx.config().getAffinity().removeNode(exchId.nodeId()); } return true; } return dummy; }
/** * @param updateSeq Update sequence. * @return Future to signal that this node is no longer an owner or backup. */ IgniteInternalFuture<?> rent(boolean updateSeq) { while (true) { int reservations = state.getStamp(); GridDhtPartitionState s = state.getReference(); if (s == RENTING || s == EVICTED) return rent; if (state.compareAndSet(s, RENTING, reservations, reservations)) { if (log.isDebugEnabled()) log.debug("Moved partition to RENTING state: " + this); // Evict asynchronously, as the 'rent' method may be called // from within write locks on local partition. tryEvictAsync(updateSeq); break; } } return rent; }
/** @return {@code True} if transitioned to OWNING state. */ boolean own() { while (true) { int reservations = state.getStamp(); GridDhtPartitionState s = state.getReference(); if (s == RENTING || s == EVICTED) return false; if (s == OWNING) return true; assert s == MOVING; if (state.compareAndSet(MOVING, OWNING, reservations, reservations)) { if (log.isDebugEnabled()) log.debug("Owned partition: " + this); // No need to keep history any more. evictHist = null; return true; } } }
/** * @param node Node. * @param id ID. * @throws IgniteCheckedException If failed. */ private void sendLocalPartitions(ClusterNode node, @Nullable GridDhtPartitionExchangeId id) throws IgniteCheckedException { GridDhtPartitionsSingleMessage m = new GridDhtPartitionsSingleMessage( id, cctx.kernalContext().clientNode(), cctx.versions().last()); for (GridCacheContext cacheCtx : cctx.cacheContexts()) { if (!cacheCtx.isLocal()) m.addLocalPartitionMap(cacheCtx.cacheId(), cacheCtx.topology().localPartitionMap()); } if (log.isDebugEnabled()) log.debug( "Sending local partitions [nodeId=" + node.id() + ", exchId=" + exchId + ", msg=" + m + ']'); cctx.io().send(node, m, SYSTEM_POOL); }
/** * @param nodeId Node ID. * @param retryCnt Number of retries. */ private void sendAllPartitions(final UUID nodeId, final int retryCnt) { ClusterNode n = cctx.node(nodeId); try { if (n != null) sendAllPartitions(F.asList(n), exchId); } catch (IgniteCheckedException e) { if (e instanceof ClusterTopologyCheckedException || !cctx.discovery().alive(n)) { log.debug( "Failed to send full partition map to node, node left grid " + "[rmtNode=" + nodeId + ", exchangeId=" + exchId + ']'); return; } if (retryCnt > 0) { long timeout = cctx.gridConfig().getNetworkSendRetryDelay(); LT.error( log, e, "Failed to send full partition map to node (will retry after timeout) " + "[node=" + nodeId + ", exchangeId=" + exchId + ", timeout=" + timeout + ']'); cctx.time() .addTimeoutObject( new GridTimeoutObjectAdapter(timeout) { @Override public void onTimeout() { sendAllPartitions(nodeId, retryCnt - 1); } }); } else U.error( log, "Failed to send full partition map [node=" + n + ", exchangeId=" + exchId + ']', e); } }
/** * @param cacheCtx Cache context. * @throws IgniteCheckedException If failed. */ private void initTopology(GridCacheContext cacheCtx) throws IgniteCheckedException { if (stopping(cacheCtx.cacheId())) return; if (canCalculateAffinity(cacheCtx)) { if (log.isDebugEnabled()) log.debug( "Will recalculate affinity [locNodeId=" + cctx.localNodeId() + ", exchId=" + exchId + ']'); cacheCtx.affinity().calculateAffinity(exchId.topologyVersion(), discoEvt); } else { if (log.isDebugEnabled()) log.debug( "Will request affinity from remote node [locNodeId=" + cctx.localNodeId() + ", exchId=" + exchId + ']'); // Fetch affinity assignment from remote node. GridDhtAssignmentFetchFuture fetchFut = new GridDhtAssignmentFetchFuture( cacheCtx, exchId.topologyVersion(), CU.affinityNodes(cacheCtx, exchId.topologyVersion())); fetchFut.init(); List<List<ClusterNode>> affAssignment = fetchFut.get(); if (log.isDebugEnabled()) log.debug( "Fetched affinity from remote node, initializing affinity assignment [locNodeId=" + cctx.localNodeId() + ", topVer=" + exchId.topologyVersion() + ']'); if (affAssignment == null) { affAssignment = new ArrayList<>(cacheCtx.affinity().partitions()); List<ClusterNode> empty = Collections.emptyList(); for (int i = 0; i < cacheCtx.affinity().partitions(); i++) affAssignment.add(empty); } cacheCtx.affinity().initializeAffinity(exchId.topologyVersion(), affAssignment); } }
/** * Starts activity. * * @throws IgniteInterruptedCheckedException If interrupted. */ public void init() throws IgniteInterruptedCheckedException { if (isDone()) return; if (init.compareAndSet(false, true)) { if (isDone()) return; try { // Wait for event to occur to make sure that discovery // will return corresponding nodes. U.await(evtLatch); assert discoEvt != null : this; assert !dummy && !forcePreload : this; ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx, exchId.topologyVersion()); oldestNode.set(oldest); startCaches(); // True if client node joined or failed. boolean clientNodeEvt; if (F.isEmpty(reqs)) { int type = discoEvt.type(); assert type == EVT_NODE_JOINED || type == EVT_NODE_LEFT || type == EVT_NODE_FAILED : discoEvt; clientNodeEvt = CU.clientNode(discoEvt.eventNode()); } else { assert discoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT : discoEvt; boolean clientOnlyStart = true; for (DynamicCacheChangeRequest req : reqs) { if (!req.clientStartOnly()) { clientOnlyStart = false; break; } } clientNodeEvt = clientOnlyStart; } if (clientNodeEvt) { ClusterNode node = discoEvt.eventNode(); // Client need to initialize affinity for local join event or for stated client caches. if (!node.isLocal()) { for (GridCacheContext cacheCtx : cctx.cacheContexts()) { if (cacheCtx.isLocal()) continue; GridDhtPartitionTopology top = cacheCtx.topology(); top.updateTopologyVersion(exchId, this, -1, stopping(cacheCtx.cacheId())); if (cacheCtx.affinity().affinityTopologyVersion() == AffinityTopologyVersion.NONE) { initTopology(cacheCtx); top.beforeExchange(this); } else cacheCtx.affinity().clientEventTopologyChange(discoEvt, exchId.topologyVersion()); } if (exchId.isLeft()) cctx.mvcc().removeExplicitNodeLocks(exchId.nodeId(), exchId.topologyVersion()); onDone(exchId.topologyVersion()); skipPreload = cctx.kernalContext().clientNode(); return; } } if (cctx.kernalContext().clientNode()) { skipPreload = true; for (GridCacheContext cacheCtx : cctx.cacheContexts()) { if (cacheCtx.isLocal()) continue; GridDhtPartitionTopology top = cacheCtx.topology(); top.updateTopologyVersion(exchId, this, -1, stopping(cacheCtx.cacheId())); } for (GridCacheContext cacheCtx : cctx.cacheContexts()) { if (cacheCtx.isLocal()) continue; initTopology(cacheCtx); } if (oldestNode.get() != null) { rmtNodes = new ConcurrentLinkedQueue<>( CU.aliveRemoteServerNodesWithCaches(cctx, exchId.topologyVersion())); rmtIds = Collections.unmodifiableSet(new HashSet<>(F.nodeIds(rmtNodes))); ready.set(true); initFut.onDone(true); if (log.isDebugEnabled()) log.debug("Initialized future: " + this); sendPartitions(); } else onDone(exchId.topologyVersion()); return; } assert oldestNode.get() != null; for (GridCacheContext cacheCtx : cctx.cacheContexts()) { if (isCacheAdded(cacheCtx.cacheId(), exchId.topologyVersion())) { if (cacheCtx .discovery() .cacheAffinityNodes(cacheCtx.name(), topologyVersion()) .isEmpty()) U.quietAndWarn(log, "No server nodes found for cache client: " + cacheCtx.namex()); } cacheCtx.preloader().onExchangeFutureAdded(); } List<String> cachesWithoutNodes = null; if (exchId.isLeft()) { for (String name : cctx.cache().cacheNames()) { if (cctx.discovery().cacheAffinityNodes(name, topologyVersion()).isEmpty()) { if (cachesWithoutNodes == null) cachesWithoutNodes = new ArrayList<>(); cachesWithoutNodes.add(name); // Fire event even if there is no client cache started. if (cctx.gridEvents().isRecordable(EventType.EVT_CACHE_NODES_LEFT)) { Event evt = new CacheEvent( name, cctx.localNode(), cctx.localNode(), "All server nodes have left the cluster.", EventType.EVT_CACHE_NODES_LEFT, 0, false, null, null, null, null, false, null, false, null, null, null); cctx.gridEvents().record(evt); } } } } if (cachesWithoutNodes != null) { StringBuilder sb = new StringBuilder( "All server nodes for the following caches have left the cluster: "); for (int i = 0; i < cachesWithoutNodes.size(); i++) { String cache = cachesWithoutNodes.get(i); sb.append('\'').append(cache).append('\''); if (i != cachesWithoutNodes.size() - 1) sb.append(", "); } U.quietAndWarn(log, sb.toString()); U.quietAndWarn(log, "Must have server nodes for caches to operate."); } assert discoEvt != null; assert exchId.nodeId().equals(discoEvt.eventNode().id()); for (GridCacheContext cacheCtx : cctx.cacheContexts()) { GridClientPartitionTopology clientTop = cctx.exchange().clearClientTopology(cacheCtx.cacheId()); long updSeq = clientTop == null ? -1 : clientTop.lastUpdateSequence(); // Update before waiting for locks. if (!cacheCtx.isLocal()) cacheCtx .topology() .updateTopologyVersion(exchId, this, updSeq, stopping(cacheCtx.cacheId())); } // Grab all alive remote nodes with order of equal or less than last joined node. rmtNodes = new ConcurrentLinkedQueue<>( CU.aliveRemoteServerNodesWithCaches(cctx, exchId.topologyVersion())); rmtIds = Collections.unmodifiableSet(new HashSet<>(F.nodeIds(rmtNodes))); for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> m : singleMsgs.entrySet()) // If received any messages, process them. onReceive(m.getKey(), m.getValue()); for (Map.Entry<UUID, GridDhtPartitionsFullMessage> m : fullMsgs.entrySet()) // If received any messages, process them. onReceive(m.getKey(), m.getValue()); AffinityTopologyVersion topVer = exchId.topologyVersion(); for (GridCacheContext cacheCtx : cctx.cacheContexts()) { if (cacheCtx.isLocal()) continue; // Must initialize topology after we get discovery event. initTopology(cacheCtx); cacheCtx.preloader().updateLastExchangeFuture(this); } IgniteInternalFuture<?> partReleaseFut = cctx.partitionReleaseFuture(topVer); // Assign to class variable so it will be included into toString() method. this.partReleaseFut = partReleaseFut; if (log.isDebugEnabled()) log.debug("Before waiting for partition release future: " + this); while (true) { try { partReleaseFut.get(2 * cctx.gridConfig().getNetworkTimeout(), TimeUnit.MILLISECONDS); break; } catch (IgniteFutureTimeoutCheckedException ignored) { // Print pending transactions and locks that might have led to hang. dumpPendingObjects(); } } if (log.isDebugEnabled()) log.debug("After waiting for partition release future: " + this); if (!F.isEmpty(reqs)) blockGateways(); if (exchId.isLeft()) cctx.mvcc().removeExplicitNodeLocks(exchId.nodeId(), exchId.topologyVersion()); IgniteInternalFuture<?> locksFut = cctx.mvcc().finishLocks(exchId.topologyVersion()); while (true) { try { locksFut.get(2 * cctx.gridConfig().getNetworkTimeout(), TimeUnit.MILLISECONDS); break; } catch (IgniteFutureTimeoutCheckedException ignored) { U.warn( log, "Failed to wait for locks release future. " + "Dumping pending objects that might be the cause: " + cctx.localNodeId()); U.warn(log, "Locked entries:"); Map<IgniteTxKey, Collection<GridCacheMvccCandidate>> locks = cctx.mvcc().unfinishedLocks(exchId.topologyVersion()); for (Map.Entry<IgniteTxKey, Collection<GridCacheMvccCandidate>> e : locks.entrySet()) U.warn(log, "Locked entry [key=" + e.getKey() + ", mvcc=" + e.getValue() + ']'); } } for (GridCacheContext cacheCtx : cctx.cacheContexts()) { if (cacheCtx.isLocal()) continue; // Notify replication manager. GridCacheContext drCacheCtx = cacheCtx.isNear() ? cacheCtx.near().dht().context() : cacheCtx; if (drCacheCtx.isDrEnabled()) drCacheCtx.dr().beforeExchange(topVer, exchId.isLeft()); // Partition release future is done so we can flush the write-behind store. cacheCtx.store().forceFlush(); // Process queued undeploys prior to sending/spreading map. cacheCtx.preloader().unwindUndeploys(); GridDhtPartitionTopology top = cacheCtx.topology(); assert topVer.equals(top.topologyVersion()) : "Topology version is updated only in this class instances inside single ExchangeWorker thread."; top.beforeExchange(this); } for (GridClientPartitionTopology top : cctx.exchange().clientTopologies()) { top.updateTopologyVersion(exchId, this, -1, stopping(top.cacheId())); top.beforeExchange(this); } } catch (IgniteInterruptedCheckedException e) { onDone(e); throw e; } catch (Throwable e) { U.error( log, "Failed to reinitialize local partitions (preloading will be stopped): " + exchId, e); onDone(e); if (e instanceof Error) throw (Error) e; return; } if (F.isEmpty(rmtIds)) { onDone(exchId.topologyVersion()); return; } ready.set(true); initFut.onDone(true); if (log.isDebugEnabled()) log.debug("Initialized future: " + this); // If this node is not oldest. if (!oldestNode.get().id().equals(cctx.localNodeId())) sendPartitions(); else { boolean allReceived = allReceived(); if (allReceived && replied.compareAndSet(false, true)) { if (spreadPartitions()) onDone(exchId.topologyVersion()); } } scheduleRecheck(); } else assert false : "Skipped init future: " + this; }
/** * @param nodeId Sender node ID. * @param msg Full partition info. */ public void onReceive(final UUID nodeId, final GridDhtPartitionsFullMessage msg) { assert msg != null; if (isDone()) { if (log.isDebugEnabled()) log.debug("Received message for finished future [msg=" + msg + ", fut=" + this + ']'); return; } if (log.isDebugEnabled()) log.debug("Received full partition map from node [nodeId=" + nodeId + ", msg=" + msg + ']'); assert exchId.topologyVersion().equals(msg.topologyVersion()); initFut.listen( new CI1<IgniteInternalFuture<Boolean>>() { @Override public void apply(IgniteInternalFuture<Boolean> t) { ClusterNode curOldest = oldestNode.get(); if (!nodeId.equals(curOldest.id())) { if (log.isDebugEnabled()) log.debug( "Received full partition map from unexpected node [oldest=" + curOldest.id() + ", unexpectedNodeId=" + nodeId + ']'); ClusterNode snd = cctx.discovery().node(nodeId); if (snd == null) { if (log.isDebugEnabled()) log.debug( "Sender node left grid, will ignore message from unexpected node [nodeId=" + nodeId + ", exchId=" + msg.exchangeId() + ']'); return; } // Will process message later if sender node becomes oldest node. if (snd.order() > curOldest.order()) fullMsgs.put(nodeId, msg); return; } assert msg.exchangeId().equals(exchId); assert msg.lastVersion() != null; cctx.versions().onReceived(nodeId, msg.lastVersion()); updatePartitionFullMap(msg); onDone(exchId.topologyVersion()); } }); }
/** * @param nodeId Sender node id. * @param msg Single partition info. */ public void onReceive(final UUID nodeId, final GridDhtPartitionsSingleMessage msg) { assert msg != null; assert msg.exchangeId().equals(exchId); // Update last seen version. while (true) { GridCacheVersion old = lastVer.get(); if (old == null || old.compareTo(msg.lastVersion()) < 0) { if (lastVer.compareAndSet(old, msg.lastVersion())) break; } else break; } if (isDone()) { if (log.isDebugEnabled()) log.debug( "Received message for finished future (will reply only to sender) [msg=" + msg + ", fut=" + this + ']'); sendAllPartitions(nodeId, cctx.gridConfig().getNetworkSendRetryCount()); } else { initFut.listen( new CI1<IgniteInternalFuture<Boolean>>() { @Override public void apply(IgniteInternalFuture<Boolean> t) { try { if (!t.get()) // Just to check if there was an error. return; ClusterNode loc = cctx.localNode(); singleMsgs.put(nodeId, msg); boolean match = true; // Check if oldest node has changed. if (!oldestNode.get().equals(loc)) { match = false; synchronized (mux) { // Double check. if (oldestNode.get().equals(loc)) match = true; } } if (match) { boolean allReceived; synchronized (rcvdIds) { if (rcvdIds.add(nodeId)) updatePartitionSingleMap(msg); allReceived = allReceived(); } // If got all replies, and initialization finished, and reply has not been sent // yet. if (allReceived && ready.get() && replied.compareAndSet(false, true)) { spreadPartitions(); onDone(exchId.topologyVersion()); } else if (log.isDebugEnabled()) log.debug( "Exchange future full map is not sent [allReceived=" + allReceived() + ", ready=" + ready + ", replied=" + replied.get() + ", init=" + init.get() + ", fut=" + this + ']'); } } catch (IgniteCheckedException e) { U.error(log, "Failed to initialize exchange future: " + this, e); } } }); } }