/** {@inheritDoc} */ @Override public GridDhtPartitionFullMap partitionMap(boolean onlyActive) { lock.readLock().lock(); try { assert node2part != null && node2part.valid() : "Invalid node2part [node2part: " + node2part + ", cache=" + cctx.name() + ", started=" + cctx.started() + ", stopping=" + stopping + ", locNodeId=" + cctx.localNode().id() + ", locName=" + cctx.gridName() + ']'; GridDhtPartitionFullMap m = node2part; return new GridDhtPartitionFullMap( m.nodeId(), m.nodeOrder(), m.updateSequence(), m, onlyActive); } finally { lock.readLock().unlock(); } }
/** Checks consistency after all operations. */ private void consistencyCheck() { if (CONSISTENCY_CHECK) { assert lock.writeLock().isHeldByCurrentThread(); if (node2part == null) return; for (Map.Entry<UUID, GridDhtPartitionMap> e : node2part.entrySet()) { for (Integer p : e.getValue().keySet()) { Set<UUID> nodeIds = part2node.get(p); assert nodeIds != null : "Failed consistency check [part=" + p + ", nodeId=" + e.getKey() + ']'; assert nodeIds.contains(e.getKey()) : "Failed consistency check [part=" + p + ", nodeId=" + e.getKey() + ", nodeIds=" + nodeIds + ']'; } } for (Map.Entry<Integer, Set<UUID>> e : part2node.entrySet()) { for (UUID nodeId : e.getValue()) { GridDhtPartitionMap map = node2part.get(nodeId); assert map != null : "Failed consistency check [part=" + e.getKey() + ", nodeId=" + nodeId + ']'; assert map.containsKey(e.getKey()) : "Failed consistency check [part=" + e.getKey() + ", nodeId=" + nodeId + ']'; } } } }
/** {@inheritDoc} */ @Nullable @Override public GridDhtPartitionMap partitions(UUID nodeId) { lock.readLock().lock(); try { return node2part.get(nodeId); } finally { lock.readLock().unlock(); } }
/** @param nodeId Node to remove. */ private void removeNode(UUID nodeId) { assert nodeId != null; assert lock.writeLock().isHeldByCurrentThread(); ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx.shared(), topVer); assert oldest != null; ClusterNode loc = cctx.localNode(); if (node2part != null) { if (oldest.equals(loc) && !node2part.nodeId().equals(loc.id())) { updateSeq.setIfGreater(node2part.updateSequence()); node2part = new GridDhtPartitionFullMap( loc.id(), loc.order(), updateSeq.incrementAndGet(), node2part, false); } else node2part = new GridDhtPartitionFullMap(node2part, node2part.updateSequence()); part2node = new HashMap<>(part2node); GridDhtPartitionMap parts = node2part.remove(nodeId); if (parts != null) { for (Integer p : parts.keySet()) { Set<UUID> nodeIds = part2node.get(p); if (nodeIds != null) { nodeIds.remove(nodeId); if (nodeIds.isEmpty()) part2node.remove(p); } } } consistencyCheck(); } }
/** {@inheritDoc} */ @Override public Collection<ClusterNode> nodes(int p, AffinityTopologyVersion topVer) { Collection<ClusterNode> affNodes = cctx.affinity().nodes(p, topVer); lock.readLock().lock(); try { assert node2part != null && node2part.valid() : "Invalid node-to-partitions map [topVer1=" + topVer + ", topVer2=" + this.topVer + ", cache=" + cctx.name() + ", node2part=" + node2part + ']'; Collection<ClusterNode> nodes = null; Collection<UUID> nodeIds = part2node.get(p); if (!F.isEmpty(nodeIds)) { Collection<UUID> affIds = new HashSet<>(F.viewReadOnly(affNodes, F.node2id())); for (UUID nodeId : nodeIds) { if (!affIds.contains(nodeId) && hasState(p, nodeId, OWNING, MOVING, RENTING)) { ClusterNode n = cctx.discovery().node(nodeId); if (n != null && (topVer.topologyVersion() < 0 || n.order() <= topVer.topologyVersion())) { if (nodes == null) { nodes = new ArrayList<>(affNodes.size() + 2); nodes.addAll(affNodes); } nodes.add(n); } } } } return nodes != null ? nodes : affNodes; } finally { lock.readLock().unlock(); } }
/** * @param p Partition. * @param topVer Topology version ({@code -1} for all nodes). * @param state Partition state. * @param states Additional partition states. * @return List of nodes for the partition. */ private List<ClusterNode> nodes( int p, AffinityTopologyVersion topVer, GridDhtPartitionState state, GridDhtPartitionState... states) { Collection<UUID> allIds = topVer.topologyVersion() > 0 ? F.nodeIds(CU.affinityNodes(cctx, topVer)) : null; lock.readLock().lock(); try { assert node2part != null && node2part.valid() : "Invalid node-to-partitions map [topVer=" + topVer + ", allIds=" + allIds + ", node2part=" + node2part + ", cache=" + cctx.name() + ']'; Collection<UUID> nodeIds = part2node.get(p); // Node IDs can be null if both, primary and backup, nodes disappear. int size = nodeIds == null ? 0 : nodeIds.size(); if (size == 0) return Collections.emptyList(); List<ClusterNode> nodes = new ArrayList<>(size); for (UUID id : nodeIds) { if (topVer.topologyVersion() > 0 && !allIds.contains(id)) continue; if (hasState(p, id, state, states)) { ClusterNode n = cctx.discovery().node(id); if (n != null && (topVer.topologyVersion() < 0 || n.order() <= topVer.topologyVersion())) nodes.add(n); } } return nodes; } finally { lock.readLock().unlock(); } }
/** {@inheritDoc} */ @Override public GridDhtPartitionState partitionState(UUID nodeId, int part) { lock.readLock().lock(); try { GridDhtPartitionMap partMap = node2part.get(nodeId); if (partMap != null) { GridDhtPartitionState state = partMap.get(part); return state == null ? EVICTED : state; } return EVICTED; } finally { lock.readLock().unlock(); } }
/** * @param p Partition. * @param nodeId Node ID. * @param match State to match. * @param matches Additional states. * @return Filter for owners of this partition. */ private boolean hasState( final int p, @Nullable UUID nodeId, final GridDhtPartitionState match, final GridDhtPartitionState... matches) { if (nodeId == null) return false; GridDhtPartitionMap parts = node2part.get(nodeId); // Set can be null if node has been removed. if (parts != null) { GridDhtPartitionState state = parts.get(p); if (state == match) return true; if (matches != null && matches.length > 0) for (GridDhtPartitionState s : matches) if (state == s) return true; } return false; }
/** {@inheritDoc} */ @SuppressWarnings({"MismatchedQueryAndUpdateOfCollection"}) @Nullable @Override public GridDhtPartitionMap update( @Nullable GridDhtPartitionExchangeId exchId, GridDhtPartitionFullMap partMap) { if (log.isDebugEnabled()) log.debug( "Updating full partition map [exchId=" + exchId + ", parts=" + fullMapString() + ']'); assert partMap != null; lock.writeLock().lock(); try { if (stopping) return null; if (exchId != null && lastExchangeId != null && lastExchangeId.compareTo(exchId) >= 0) { if (log.isDebugEnabled()) log.debug( "Stale exchange id for full partition map update (will ignore) [lastExchId=" + lastExchangeId + ", exchId=" + exchId + ']'); return null; } if (node2part != null && node2part.compareTo(partMap) >= 0) { if (log.isDebugEnabled()) log.debug( "Stale partition map for full partition map update (will ignore) [lastExchId=" + lastExchangeId + ", exchId=" + exchId + ", curMap=" + node2part + ", newMap=" + partMap + ']'); return null; } long updateSeq = this.updateSeq.incrementAndGet(); if (exchId != null) lastExchangeId = exchId; if (node2part != null) { for (GridDhtPartitionMap part : node2part.values()) { GridDhtPartitionMap newPart = partMap.get(part.nodeId()); // If for some nodes current partition has a newer map, // then we keep the newer value. if (newPart != null && newPart.updateSequence() < part.updateSequence()) { if (log.isDebugEnabled()) log.debug( "Overriding partition map in full update map [exchId=" + exchId + ", curPart=" + mapString(part) + ", newPart=" + mapString(newPart) + ']'); partMap.put(part.nodeId(), part); } } for (Iterator<UUID> it = partMap.keySet().iterator(); it.hasNext(); ) { UUID nodeId = it.next(); if (!cctx.discovery().alive(nodeId)) { if (log.isDebugEnabled()) log.debug( "Removing left node from full map update [nodeId=" + nodeId + ", partMap=" + partMap + ']'); it.remove(); } } } node2part = partMap; Map<Integer, Set<UUID>> p2n = new HashMap<>(cctx.affinity().partitions(), 1.0f); for (Map.Entry<UUID, GridDhtPartitionMap> e : partMap.entrySet()) { for (Integer p : e.getValue().keySet()) { Set<UUID> ids = p2n.get(p); if (ids == null) // Initialize HashSet to size 3 in anticipation that there won't be // more than 3 nodes per partitions. p2n.put(p, ids = U.newHashSet(3)); ids.add(e.getKey()); } } part2node = p2n; boolean changed = checkEvictions(updateSeq); consistencyCheck(); if (log.isDebugEnabled()) log.debug("Partition map after full update: " + fullMapString()); return changed ? localPartitionMap() : null; } finally { lock.writeLock().unlock(); } }
/** {@inheritDoc} */ @Override public void beforeExchange(GridDhtPartitionsExchangeFuture exchFut) throws IgniteCheckedException { waitForRent(); ClusterNode loc = cctx.localNode(); int num = cctx.affinity().partitions(); lock.writeLock().lock(); try { GridDhtPartitionExchangeId exchId = exchFut.exchangeId(); if (stopping) return; assert topVer.equals(exchId.topologyVersion()) : "Invalid topology version [topVer=" + topVer + ", exchId=" + exchId + ']'; if (exchId.isLeft()) removeNode(exchId.nodeId()); // In case if node joins, get topology at the time of joining node. ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx.shared(), topVer); assert oldest != null; if (log.isDebugEnabled()) log.debug( "Partition map beforeExchange [exchId=" + exchId + ", fullMap=" + fullMapString() + ']'); long updateSeq = this.updateSeq.incrementAndGet(); // If this is the oldest node. if (oldest.id().equals(loc.id()) || exchFut.isCacheAdded(cctx.cacheId(), exchId.topologyVersion())) { if (node2part == null) { node2part = new GridDhtPartitionFullMap(oldest.id(), oldest.order(), updateSeq); if (log.isDebugEnabled()) log.debug( "Created brand new full topology map on oldest node [exchId=" + exchId + ", fullMap=" + fullMapString() + ']'); } else if (!node2part.valid()) { node2part = new GridDhtPartitionFullMap(oldest.id(), oldest.order(), updateSeq, node2part, false); if (log.isDebugEnabled()) log.debug( "Created new full topology map on oldest node [exchId=" + exchId + ", fullMap=" + node2part + ']'); } else if (!node2part.nodeId().equals(loc.id())) { node2part = new GridDhtPartitionFullMap(oldest.id(), oldest.order(), updateSeq, node2part, false); if (log.isDebugEnabled()) log.debug( "Copied old map into new map on oldest node (previous oldest node left) [exchId=" + exchId + ", fullMap=" + fullMapString() + ']'); } } if (cctx.rebalanceEnabled()) { for (int p = 0; p < num; p++) { // If this is the first node in grid. boolean added = exchFut.isCacheAdded(cctx.cacheId(), exchId.topologyVersion()); if ((oldest.id().equals(loc.id()) && oldest.id().equals(exchId.nodeId()) && exchId.isJoined()) || added) { assert exchId.isJoined() || added; try { GridDhtLocalPartition locPart = localPartition(p, topVer, true, false); assert locPart != null; boolean owned = locPart.own(); assert owned : "Failed to own partition for oldest node [cacheName" + cctx.name() + ", part=" + locPart + ']'; if (log.isDebugEnabled()) log.debug("Owned partition for oldest node: " + locPart); updateLocal(p, loc.id(), locPart.state(), updateSeq); } catch (GridDhtInvalidPartitionException e) { if (log.isDebugEnabled()) log.debug( "Ignoring invalid partition on oldest node (no need to create a partition " + "if it no longer belongs to local node: " + e.partition()); } } // If this is not the first node in grid. else { if (node2part != null && node2part.valid()) { if (cctx.affinity().localNode(p, topVer)) { try { // This will make sure that all non-existing partitions // will be created in MOVING state. GridDhtLocalPartition locPart = localPartition(p, topVer, true, false); updateLocal(p, loc.id(), locPart.state(), updateSeq); } catch (GridDhtInvalidPartitionException e) { if (log.isDebugEnabled()) log.debug( "Ignoring invalid partition (no need to create a partition if it " + "no longer belongs to local node: " + e.partition()); } } } // If this node's map is empty, we pre-create local partitions, // so local map will be sent correctly during exchange. else if (cctx.affinity().localNode(p, topVer)) { try { localPartition(p, topVer, true, false); } catch (GridDhtInvalidPartitionException e) { if (log.isDebugEnabled()) log.debug( "Ignoring invalid partition (no need to pre-create a partition if it " + "no longer belongs to local node: " + e.partition()); } } } } } else { // If preloader is disabled, then we simply clear out // the partitions this node is not responsible for. for (int p = 0; p < num; p++) { GridDhtLocalPartition locPart = localPartition(p, topVer, false, false); boolean belongs = cctx.affinity().localNode(p, topVer); if (locPart != null) { if (!belongs) { GridDhtPartitionState state = locPart.state(); if (state.active()) { locPart.rent(false); updateLocal(p, loc.id(), locPart.state(), updateSeq); if (log.isDebugEnabled()) log.debug( "Evicting partition with rebalancing disabled " + "(it does not belong to affinity): " + locPart); } } } else if (belongs) { try { // Pre-create partitions. localPartition(p, topVer, true, false); } catch (GridDhtInvalidPartitionException e) { if (log.isDebugEnabled()) log.debug( "Ignoring invalid partition with disabled rebalancer (no need to " + "pre-create a partition if it no longer belongs to local node: " + e.partition()); } } } } if (node2part != null && node2part.valid()) checkEvictions(updateSeq); consistencyCheck(); if (log.isDebugEnabled()) log.debug( "Partition map after beforeExchange [exchId=" + exchId + ", fullMap=" + fullMapString() + ']'); } finally { lock.writeLock().unlock(); } // Wait for evictions. waitForRent(); }
/** * Updates value for single partition. * * @param p Partition. * @param nodeId Node ID. * @param state State. * @param updateSeq Update sequence. */ @SuppressWarnings({"MismatchedQueryAndUpdateOfCollection"}) private void updateLocal(int p, UUID nodeId, GridDhtPartitionState state, long updateSeq) { assert lock.isWriteLockedByCurrentThread(); assert nodeId.equals(cctx.nodeId()); // In case if node joins, get topology at the time of joining node. ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx.shared(), topVer); assert oldest != null; // If this node became the oldest node. if (oldest.id().equals(cctx.nodeId())) { long seq = node2part.updateSequence(); if (seq != updateSeq) { if (seq > updateSeq) { if (this.updateSeq.get() < seq) { // Update global counter if necessary. boolean b = this.updateSeq.compareAndSet(this.updateSeq.get(), seq + 1); assert b : "Invalid update sequence [updateSeq=" + updateSeq + ", seq=" + seq + ", curUpdateSeq=" + this.updateSeq.get() + ", node2part=" + node2part.toFullString() + ']'; updateSeq = seq + 1; } else updateSeq = seq; } node2part.updateSequence(updateSeq); } } GridDhtPartitionMap map = node2part.get(nodeId); if (map == null) node2part.put( nodeId, map = new GridDhtPartitionMap( nodeId, updateSeq, Collections.<Integer, GridDhtPartitionState>emptyMap(), false)); map.updateSequence(updateSeq); map.put(p, state); Set<UUID> ids = part2node.get(p); if (ids == null) part2node.put(p, ids = U.newHashSet(3)); ids.add(nodeId); }
/** @return Full map string representation. */ @SuppressWarnings({"ConstantConditions"}) private String fullMapString() { return node2part == null ? "null" : FULL_MAP_DEBUG ? node2part.toFullString() : node2part.toString(); }
/** {@inheritDoc} */ @SuppressWarnings({"MismatchedQueryAndUpdateOfCollection"}) @Nullable @Override public GridDhtPartitionMap update( @Nullable GridDhtPartitionExchangeId exchId, GridDhtPartitionMap parts) { if (log.isDebugEnabled()) log.debug( "Updating single partition map [exchId=" + exchId + ", parts=" + mapString(parts) + ']'); if (!cctx.discovery().alive(parts.nodeId())) { if (log.isDebugEnabled()) log.debug( "Received partition update for non-existing node (will ignore) [exchId=" + exchId + ", parts=" + parts + ']'); return null; } lock.writeLock().lock(); try { if (stopping) return null; if (lastExchangeId != null && exchId != null && lastExchangeId.compareTo(exchId) > 0) { if (log.isDebugEnabled()) log.debug( "Stale exchange id for single partition map update (will ignore) [lastExchId=" + lastExchangeId + ", exchId=" + exchId + ']'); return null; } if (exchId != null) lastExchangeId = exchId; if (node2part == null) // Create invalid partition map. node2part = new GridDhtPartitionFullMap(); GridDhtPartitionMap cur = node2part.get(parts.nodeId()); if (cur != null && cur.updateSequence() >= parts.updateSequence()) { if (log.isDebugEnabled()) log.debug( "Stale update sequence for single partition map update (will ignore) [exchId=" + exchId + ", curSeq=" + cur.updateSequence() + ", newSeq=" + parts.updateSequence() + ']'); return null; } long updateSeq = this.updateSeq.incrementAndGet(); node2part = new GridDhtPartitionFullMap(node2part, updateSeq); boolean changed = false; if (cur == null || !cur.equals(parts)) changed = true; node2part.put(parts.nodeId(), parts); part2node = new HashMap<>(part2node); // Add new mappings. for (Integer p : parts.keySet()) { Set<UUID> ids = part2node.get(p); if (ids == null) // Initialize HashSet to size 3 in anticipation that there won't be // more than 3 nodes per partition. part2node.put(p, ids = U.newHashSet(3)); changed |= ids.add(parts.nodeId()); } // Remove obsolete mappings. if (cur != null) { for (Integer p : F.view(cur.keySet(), F0.notIn(parts.keySet()))) { Set<UUID> ids = part2node.get(p); if (ids != null) changed |= ids.remove(parts.nodeId()); } } changed |= checkEvictions(updateSeq); consistencyCheck(); if (log.isDebugEnabled()) log.debug("Partition map after single update: " + fullMapString()); return changed ? localPartitionMap() : null; } finally { lock.writeLock().unlock(); } }