/** {@inheritDoc} */
  @Override
  public Collection<ClusterNode> nodes(int p, AffinityTopologyVersion topVer) {
    Collection<ClusterNode> affNodes = cctx.affinity().nodes(p, topVer);

    lock.readLock().lock();

    try {
      assert node2part != null && node2part.valid()
          : "Invalid node-to-partitions map [topVer1="
              + topVer
              + ", topVer2="
              + this.topVer
              + ", cache="
              + cctx.name()
              + ", node2part="
              + node2part
              + ']';

      Collection<ClusterNode> nodes = null;

      Collection<UUID> nodeIds = part2node.get(p);

      if (!F.isEmpty(nodeIds)) {
        Collection<UUID> affIds = new HashSet<>(F.viewReadOnly(affNodes, F.node2id()));

        for (UUID nodeId : nodeIds) {
          if (!affIds.contains(nodeId) && hasState(p, nodeId, OWNING, MOVING, RENTING)) {
            ClusterNode n = cctx.discovery().node(nodeId);

            if (n != null
                && (topVer.topologyVersion() < 0 || n.order() <= topVer.topologyVersion())) {
              if (nodes == null) {
                nodes = new ArrayList<>(affNodes.size() + 2);

                nodes.addAll(affNodes);
              }

              nodes.add(n);
            }
          }
        }
      }

      return nodes != null ? nodes : affNodes;
    } finally {
      lock.readLock().unlock();
    }
  }
  /** {@inheritDoc} */
  @Override
  public GridDhtPartitionMap localPartitionMap() {
    lock.readLock().lock();

    try {
      return new GridDhtPartitionMap(
          cctx.nodeId(), updateSeq.get(), F.viewReadOnly(locParts, CU.part2state()), true);
    } finally {
      lock.readLock().unlock();
    }
  }
  /**
   * @param p Partition.
   * @param topVer Topology version ({@code -1} for all nodes).
   * @param state Partition state.
   * @param states Additional partition states.
   * @return List of nodes for the partition.
   */
  private List<ClusterNode> nodes(
      int p,
      AffinityTopologyVersion topVer,
      GridDhtPartitionState state,
      GridDhtPartitionState... states) {
    Collection<UUID> allIds =
        topVer.topologyVersion() > 0 ? F.nodeIds(CU.affinityNodes(cctx, topVer)) : null;

    lock.readLock().lock();

    try {
      assert node2part != null && node2part.valid()
          : "Invalid node-to-partitions map [topVer="
              + topVer
              + ", allIds="
              + allIds
              + ", node2part="
              + node2part
              + ", cache="
              + cctx.name()
              + ']';

      Collection<UUID> nodeIds = part2node.get(p);

      // Node IDs can be null if both, primary and backup, nodes disappear.
      int size = nodeIds == null ? 0 : nodeIds.size();

      if (size == 0) return Collections.emptyList();

      List<ClusterNode> nodes = new ArrayList<>(size);

      for (UUID id : nodeIds) {
        if (topVer.topologyVersion() > 0 && !allIds.contains(id)) continue;

        if (hasState(p, id, state, states)) {
          ClusterNode n = cctx.discovery().node(id);

          if (n != null && (topVer.topologyVersion() < 0 || n.order() <= topVer.topologyVersion()))
            nodes.add(n);
        }
      }

      return nodes;
    } finally {
      lock.readLock().unlock();
    }
  }
  /** Clears values for this partition. */
  private void clearAll() {
    GridCacheVersion clearVer = cctx.versions().next();

    boolean swap = cctx.isSwapOrOffheapEnabled();

    boolean rec = cctx.events().isRecordable(EVT_CACHE_REBALANCE_OBJECT_UNLOADED);

    Iterator<GridDhtCacheEntry> it = map.values().iterator();

    GridCloseableIterator<Map.Entry<byte[], GridCacheSwapEntry>> swapIt = null;

    if (swap
        && GridQueryProcessor.isEnabled(cctx.config())) { // Indexing needs to unswap cache values.
      Iterator<GridDhtCacheEntry> unswapIt = null;

      try {
        swapIt = cctx.swap().iterator(id);
        unswapIt = unswapIterator(swapIt);
      } catch (Exception e) {
        U.error(log, "Failed to clear swap for evicted partition: " + this, e);
      }

      if (unswapIt != null) it = F.concat(it, unswapIt);
    }

    try {
      while (it.hasNext()) {
        GridDhtCacheEntry cached = it.next();

        try {
          if (cached.clearInternal(clearVer, swap)) {
            map.remove(cached.key(), cached);

            if (!cached.isInternal()) {
              mapPubSize.decrement();

              if (rec)
                cctx.events()
                    .addEvent(
                        cached.partition(),
                        cached.key(),
                        cctx.localNodeId(),
                        (IgniteUuid) null,
                        null,
                        EVT_CACHE_REBALANCE_OBJECT_UNLOADED,
                        null,
                        false,
                        cached.rawGet(),
                        cached.hasValue(),
                        null,
                        null,
                        null);
            }
          }
        } catch (IgniteCheckedException e) {
          U.error(log, "Failed to clear cache entry for evicted partition: " + cached, e);
        }
      }
    } finally {
      U.close(swapIt, log);
    }
  }
  /** {@inheritDoc} */
  @Override
  public boolean afterExchange(GridDhtPartitionsExchangeFuture exchFut)
      throws IgniteCheckedException {
    boolean changed = waitForRent();

    ClusterNode loc = cctx.localNode();

    int num = cctx.affinity().partitions();

    AffinityTopologyVersion topVer = exchFut.topologyVersion();

    lock.writeLock().lock();

    try {
      if (stopping) return false;

      assert topVer.equals(exchFut.topologyVersion())
          : "Invalid topology version [topVer=" + topVer + ", exchId=" + exchFut.exchangeId() + ']';

      if (log.isDebugEnabled())
        log.debug(
            "Partition map before afterExchange [exchId="
                + exchFut.exchangeId()
                + ", fullMap="
                + fullMapString()
                + ']');

      long updateSeq = this.updateSeq.incrementAndGet();

      for (int p = 0; p < num; p++) {
        GridDhtLocalPartition locPart = localPartition(p, topVer, false, false);

        if (cctx.affinity().localNode(p, topVer)) {
          // This partition will be created during next topology event,
          // which obviously has not happened at this point.
          if (locPart == null) {
            if (log.isDebugEnabled())
              log.debug("Skipping local partition afterExchange (will not create): " + p);

            continue;
          }

          GridDhtPartitionState state = locPart.state();

          if (state == MOVING) {
            if (cctx.rebalanceEnabled()) {
              Collection<ClusterNode> owners = owners(p);

              // If there are no other owners, then become an owner.
              if (F.isEmpty(owners)) {
                boolean owned = locPart.own();

                assert owned
                    : "Failed to own partition [cacheName"
                        + cctx.name()
                        + ", locPart="
                        + locPart
                        + ']';

                updateLocal(p, loc.id(), locPart.state(), updateSeq);

                changed = true;

                if (cctx.events().isRecordable(EVT_CACHE_REBALANCE_PART_DATA_LOST)) {
                  DiscoveryEvent discoEvt = exchFut.discoveryEvent();

                  cctx.events()
                      .addPreloadEvent(
                          p,
                          EVT_CACHE_REBALANCE_PART_DATA_LOST,
                          discoEvt.eventNode(),
                          discoEvt.type(),
                          discoEvt.timestamp());
                }

                if (log.isDebugEnabled()) log.debug("Owned partition: " + locPart);
              } else if (log.isDebugEnabled())
                log.debug(
                    "Will not own partition (there are owners to rebalance from) [locPart="
                        + locPart
                        + ", owners = "
                        + owners
                        + ']');
            } else updateLocal(p, loc.id(), locPart.state(), updateSeq);
          }
        } else {
          if (locPart != null) {
            GridDhtPartitionState state = locPart.state();

            if (state == MOVING) {
              locPart.rent(false);

              updateLocal(p, loc.id(), locPart.state(), updateSeq);

              changed = true;

              if (log.isDebugEnabled())
                log.debug("Evicting moving partition (it does not belong to affinity): " + locPart);
            }
          }
        }
      }

      consistencyCheck();
    } finally {
      lock.writeLock().unlock();
    }

    return changed;
  }
  /**
   * @param updateSeq Update sequence.
   * @return Checks if any of the local partitions need to be evicted.
   */
  private boolean checkEvictions(long updateSeq) {
    assert lock.isWriteLockedByCurrentThread();

    boolean changed = false;

    UUID locId = cctx.nodeId();

    for (GridDhtLocalPartition part : locParts.values()) {
      GridDhtPartitionState state = part.state();

      if (state.active()) {
        int p = part.id();

        List<ClusterNode> affNodes = cctx.affinity().nodes(p, topVer);

        if (!affNodes.contains(cctx.localNode())) {
          Collection<UUID> nodeIds = F.nodeIds(nodes(p, topVer, OWNING));

          // If all affinity nodes are owners, then evict partition from local node.
          if (nodeIds.containsAll(F.nodeIds(affNodes))) {
            part.rent(false);

            updateLocal(part.id(), locId, part.state(), updateSeq);

            changed = true;

            if (log.isDebugEnabled())
              log.debug("Evicted local partition (all affinity nodes are owners): " + part);
          } else {
            int ownerCnt = nodeIds.size();
            int affCnt = affNodes.size();

            if (ownerCnt > affCnt) {
              List<ClusterNode> sorted = new ArrayList<>(cctx.discovery().nodes(nodeIds));

              // Sort by node orders in ascending order.
              Collections.sort(sorted, CU.nodeComparator(true));

              int diff = sorted.size() - affCnt;

              for (int i = 0; i < diff; i++) {
                ClusterNode n = sorted.get(i);

                if (locId.equals(n.id())) {
                  part.rent(false);

                  updateLocal(part.id(), locId, part.state(), updateSeq);

                  changed = true;

                  if (log.isDebugEnabled())
                    log.debug(
                        "Evicted local partition (this node is oldest non-affinity node): " + part);

                  break;
                }
              }
            }
          }
        }
      }
    }

    return changed;
  }
  /** {@inheritDoc} */
  @SuppressWarnings({"MismatchedQueryAndUpdateOfCollection"})
  @Nullable
  @Override
  public GridDhtPartitionMap update(
      @Nullable GridDhtPartitionExchangeId exchId, GridDhtPartitionMap parts) {
    if (log.isDebugEnabled())
      log.debug(
          "Updating single partition map [exchId=" + exchId + ", parts=" + mapString(parts) + ']');

    if (!cctx.discovery().alive(parts.nodeId())) {
      if (log.isDebugEnabled())
        log.debug(
            "Received partition update for non-existing node (will ignore) [exchId="
                + exchId
                + ", parts="
                + parts
                + ']');

      return null;
    }

    lock.writeLock().lock();

    try {
      if (stopping) return null;

      if (lastExchangeId != null && exchId != null && lastExchangeId.compareTo(exchId) > 0) {
        if (log.isDebugEnabled())
          log.debug(
              "Stale exchange id for single partition map update (will ignore) [lastExchId="
                  + lastExchangeId
                  + ", exchId="
                  + exchId
                  + ']');

        return null;
      }

      if (exchId != null) lastExchangeId = exchId;

      if (node2part == null)
        // Create invalid partition map.
        node2part = new GridDhtPartitionFullMap();

      GridDhtPartitionMap cur = node2part.get(parts.nodeId());

      if (cur != null && cur.updateSequence() >= parts.updateSequence()) {
        if (log.isDebugEnabled())
          log.debug(
              "Stale update sequence for single partition map update (will ignore) [exchId="
                  + exchId
                  + ", curSeq="
                  + cur.updateSequence()
                  + ", newSeq="
                  + parts.updateSequence()
                  + ']');

        return null;
      }

      long updateSeq = this.updateSeq.incrementAndGet();

      node2part = new GridDhtPartitionFullMap(node2part, updateSeq);

      boolean changed = false;

      if (cur == null || !cur.equals(parts)) changed = true;

      node2part.put(parts.nodeId(), parts);

      part2node = new HashMap<>(part2node);

      // Add new mappings.
      for (Integer p : parts.keySet()) {
        Set<UUID> ids = part2node.get(p);

        if (ids == null)
          // Initialize HashSet to size 3 in anticipation that there won't be
          // more than 3 nodes per partition.
          part2node.put(p, ids = U.newHashSet(3));

        changed |= ids.add(parts.nodeId());
      }

      // Remove obsolete mappings.
      if (cur != null) {
        for (Integer p : F.view(cur.keySet(), F0.notIn(parts.keySet()))) {
          Set<UUID> ids = part2node.get(p);

          if (ids != null) changed |= ids.remove(parts.nodeId());
        }
      }

      changed |= checkEvictions(updateSeq);

      consistencyCheck();

      if (log.isDebugEnabled()) log.debug("Partition map after single update: " + fullMapString());

      return changed ? localPartitionMap() : null;
    } finally {
      lock.writeLock().unlock();
    }
  }