/**
   * Adds key mapping to dht mapping.
   *
   * @param key Key to add.
   * @param node Node this key mapped to.
   */
  public void addKeyMapping(IgniteTxKey key, ClusterNode node) {
    GridDistributedTxMapping m = mappings.get(node.id());

    if (m == null) mappings.put(m = new GridDistributedTxMapping(node));

    IgniteTxEntry txEntry = entry(key);

    assert txEntry != null;

    txEntry.nodeId(node.id());

    m.add(txEntry);

    if (log.isDebugEnabled())
      log.debug(
          "Added mappings to transaction [locId="
              + cctx.localNodeId()
              + ", key="
              + key
              + ", node="
              + node
              + ", tx="
              + this
              + ']');
  }
  /** @param m Mapping. */
  private void finish(GridDistributedTxMapping m) {
    ClusterNode n = m.node();

    assert !m.empty();

    GridNearTxFinishRequest req =
        new GridNearTxFinishRequest(
            futId,
            tx.xidVersion(),
            tx.threadId(),
            commit,
            tx.isInvalidate(),
            tx.system(),
            tx.ioPolicy(),
            tx.syncCommit(),
            tx.syncRollback(),
            m.explicitLock(),
            tx.storeEnabled(),
            tx.topologyVersion(),
            null,
            null,
            null,
            tx.size(),
            tx.subjectId(),
            tx.taskNameHash(),
            tx.activeCachesDeploymentEnabled());

    // If this is the primary node for the keys.
    if (n.isLocal()) {
      req.miniId(IgniteUuid.randomUuid());

      IgniteInternalFuture<IgniteInternalTx> fut = cctx.tm().txHandler().finish(n.id(), tx, req);

      // Add new future.
      if (fut != null) add(fut);
    } else {
      FinishMiniFuture fut = new FinishMiniFuture(m);

      req.miniId(fut.futureId());

      add(fut); // Append new future.

      if (tx.pessimistic()) cctx.tm().beforeFinishRemote(n.id(), tx.threadId());

      try {
        cctx.io().send(n, req, tx.ioPolicy());

        // If we don't wait for result, then mark future as done.
        if (!isSync() && !m.explicitLock()) fut.onDone();
      } catch (ClusterTopologyCheckedException e) {
        // Remove previous mapping.
        mappings.remove(m.node().id());

        fut.onNodeLeft(n.id());
      } catch (IgniteCheckedException e) {
        // Fail the whole thing.
        fut.onDone(e);
      }
    }
  }
  private void sendPartitions() {
    ClusterNode oldestNode = this.oldestNode.get();

    try {
      sendLocalPartitions(oldestNode, exchId);
    } catch (ClusterTopologyCheckedException ignore) {
      if (log.isDebugEnabled())
        log.debug(
            "Oldest node left during partition exchange [nodeId="
                + oldestNode.id()
                + ", exchId="
                + exchId
                + ']');
    } catch (IgniteCheckedException e) {
      scheduleRecheck();

      U.error(
          log,
          "Failed to send local partitions to oldest node (will retry after timeout) [oldestNodeId="
              + oldestNode.id()
              + ", exchId="
              + exchId
              + ']',
          e);
    }
  }
  /** {@inheritDoc} */
  @Override
  public UUID localNodeId() {
    if (locNode != null) return locNode.id();

    if (discoMgr != null) locNode = discoMgr.localNode();

    return locNode != null ? locNode.id() : config().getNodeId();
  }
Exemple #5
0
  /**
   * Set source nodes.
   *
   * @param nodes Nodes.
   */
  public void setSources(Collection<ClusterNode> nodes) {
    assert remainingRows == null;

    remainingRows = U.newHashMap(nodes.size());

    for (ClusterNode node : nodes) {
      if (remainingRows.put(node.id(), new Counter()) != null)
        throw new IllegalStateException("Duplicate node id: " + node.id());
    }
  }
  /**
   * Converts collection of rich nodes to block location data.
   *
   * @param nodes Collection of affinity nodes.
   */
  private void convertFromNodes(Collection<ClusterNode> nodes) {
    Collection<String> names = new LinkedHashSet<>();
    Collection<String> hosts = new LinkedHashSet<>();
    Collection<UUID> nodeIds = new ArrayList<>(nodes.size());

    for (final ClusterNode node : nodes) {
      // Normalize host names into Hadoop-expected format.
      try {
        Collection<InetAddress> addrs = U.toInetAddresses(node);

        for (InetAddress addr : addrs) {
          if (addr.getHostName() == null) names.add(addr.getHostAddress() + ":" + 9001);
          else {
            names.add(addr.getHostName() + ":" + 9001); // hostname:portNumber
            hosts.add(addr.getHostName());
          }
        }
      } catch (IgniteCheckedException ignored) {
        names.addAll(node.addresses());
      }

      nodeIds.add(node.id());
    }

    this.nodeIds = nodeIds;
    this.names = names;
    this.hosts = hosts;
  }
  /** @param maps Mappings. */
  void addEntryMapping(@Nullable Collection<GridDistributedTxMapping> maps) {
    if (!F.isEmpty(maps)) {
      for (GridDistributedTxMapping map : maps) {
        ClusterNode n = map.node();

        GridDistributedTxMapping m = mappings.get(n.id());

        if (m == null) {
          mappings.put(m = new GridDistributedTxMapping(n));

          m.near(map.near());

          if (map.explicitLock()) m.markExplicitLock();
        }

        for (IgniteTxEntry entry : map.entries()) m.add(entry);
      }

      if (log.isDebugEnabled())
        log.debug(
            "Added mappings to transaction [locId="
                + cctx.localNodeId()
                + ", mappings="
                + maps
                + ", tx="
                + this
                + ']');
    }
  }
  /**
   * Sends query request.
   *
   * @param fut Distributed future.
   * @param req Request.
   * @param nodes Nodes.
   * @throws IgniteCheckedException In case of error.
   */
  @SuppressWarnings("unchecked")
  private void sendRequest(
      final GridCacheDistributedQueryFuture<?, ?, ?> fut,
      final GridCacheQueryRequest req,
      Collection<ClusterNode> nodes)
      throws IgniteCheckedException {
    assert fut != null;
    assert req != null;
    assert nodes != null;

    final UUID locNodeId = cctx.localNodeId();

    ClusterNode locNode = null;

    Collection<ClusterNode> rmtNodes = null;

    for (ClusterNode n : nodes) {
      if (n.id().equals(locNodeId)) locNode = n;
      else {
        if (rmtNodes == null) rmtNodes = new ArrayList<>(nodes.size());

        rmtNodes.add(n);
      }
    }

    // Request should be sent to remote nodes before the query is processed on the local node.
    // For example, a remote reducer has a state, we should not serialize and then send
    // the reducer changed by the local node.
    if (!F.isEmpty(rmtNodes)) {
      cctx.io()
          .safeSend(
              rmtNodes,
              req,
              cctx.ioPolicy(),
              new P1<ClusterNode>() {
                @Override
                public boolean apply(ClusterNode node) {
                  fut.onNodeLeft(node.id());

                  return !fut.isDone();
                }
              });
    }

    if (locNode != null) {
      cctx.closures()
          .callLocalSafe(
              new Callable<Object>() {
                @Override
                public Object call() throws Exception {
                  req.beforeLocalExecution(cctx);

                  processQueryRequest(locNodeId, req);

                  return null;
                }
              });
    }
  }
  /** @param futs Futures to complete. */
  private void completeOnNodeLeft(GridNioFuture<?>[] futs) {
    for (GridNioFuture<?> msg : futs) {
      IOException e = new IOException("Failed to send message, node has left: " + node.id());

      ((GridNioFutureImpl) msg).onDone(e);

      if (msg.ackClosure() != null) msg.ackClosure().apply(new IgniteException(e));
    }
  }
  /** {@inheritDoc} */
  @Override
  public Map<? extends ComputeJob, ClusterNode> map(List<ClusterNode> subgrid, UUID arg) {
    assert arg != null;
    assert subgrid.size() > 1
        : "Test requires at least 2 nodes. One with load and another one to steal.";

    int jobsNum = subgrid.size();

    Map<GridStealingLoadTestJob, ClusterNode> map = new HashMap<>(jobsNum);

    stealingNodeId = arg;

    Iterator<ClusterNode> iter = subgrid.iterator();

    Collection<UUID> assigned = new ArrayList<>(subgrid.size());

    for (int i = 0; i < jobsNum; i++) {
      ClusterNode node = null;

      boolean nextNodeFound = false;

      while (iter.hasNext() && !nextNodeFound) {
        node = iter.next();

        // Do not map jobs to the stealing node.
        if (!node.id().equals(stealingNodeId)) nextNodeFound = true;

        // Recycle iterator.
        if (!iter.hasNext()) iter = subgrid.iterator();
      }

      assert node != null;

      assigned.add(node.id());

      map.put(new GridStealingLoadTestJob(), node);
    }

    taskSes.setAttribute("nodes", assigned);

    return map;
  }
    /** {@inheritDoc} */
    @Override
    boolean onNodeLeft(UUID nodeId) {
      if (nodeId.equals(backup.id())) {
        readyNearMappingFromBackup(m);

        onDone(new ClusterTopologyCheckedException("Remote node left grid: " + nodeId));

        return true;
      }

      return false;
    }
  /** @param nodeId Node to remove. */
  private void removeNode(UUID nodeId) {
    assert nodeId != null;
    assert lock.writeLock().isHeldByCurrentThread();

    ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx.shared(), topVer);

    assert oldest != null;

    ClusterNode loc = cctx.localNode();

    if (node2part != null) {
      if (oldest.equals(loc) && !node2part.nodeId().equals(loc.id())) {
        updateSeq.setIfGreater(node2part.updateSequence());

        node2part =
            new GridDhtPartitionFullMap(
                loc.id(), loc.order(), updateSeq.incrementAndGet(), node2part, false);
      } else node2part = new GridDhtPartitionFullMap(node2part, node2part.updateSequence());

      part2node = new HashMap<>(part2node);

      GridDhtPartitionMap parts = node2part.remove(nodeId);

      if (parts != null) {
        for (Integer p : parts.keySet()) {
          Set<UUID> nodeIds = part2node.get(p);

          if (nodeIds != null) {
            nodeIds.remove(nodeId);

            if (nodeIds.isEmpty()) part2node.remove(p);
          }
        }
      }

      consistencyCheck();
    }
  }
  /**
   * @param r Query run.
   * @param nodes Nodes to check periodically if they alive.
   * @throws IgniteInterruptedCheckedException If interrupted.
   */
  private void awaitAllReplies(QueryRun r, Collection<ClusterNode> nodes)
      throws IgniteInterruptedCheckedException {
    while (!U.await(r.latch, 500, TimeUnit.MILLISECONDS)) {
      for (ClusterNode node : nodes) {
        if (!ctx.discovery().alive(node)) {
          handleNodeLeft(r, node.id());

          assert r.latch.getCount() == 0;

          return;
        }
      }
    }
  }
  /** @return {@code True} if */
  public boolean jobUpdateLeader() {
    long minOrder = Long.MAX_VALUE;
    ClusterNode minOrderNode = null;

    for (ClusterNode node : nodes()) {
      if (node.order() < minOrder) {
        minOrder = node.order();
        minOrderNode = node;
      }
    }

    assert minOrderNode != null;

    return localNodeId().equals(minOrderNode.id());
  }
  /** {@inheritDoc} */
  @Override
  public String toString() {
    ClusterNode oldestNode = this.oldestNode.get();

    return S.toString(
        GridDhtPartitionsExchangeFuture.class,
        this,
        "oldest",
        oldestNode == null ? "null" : oldestNode.id(),
        "oldestOrder",
        oldestNode == null ? "null" : oldestNode.order(),
        "evtLatch",
        evtLatch == null ? "null" : evtLatch.getCount(),
        "remaining",
        remaining(),
        "super",
        super.toString());
  }
  /**
   * Send delete message to all meta cache nodes in the grid.
   *
   * @param msg Message to send.
   */
  private void sendDeleteMessage(IgfsDeleteMessage msg) {
    assert msg != null;

    Collection<ClusterNode> nodes = meta.metaCacheNodes();

    for (ClusterNode node : nodes) {
      try {
        igfsCtx.send(node, topic, msg, GridIoPolicy.SYSTEM_POOL);
      } catch (IgniteCheckedException e) {
        U.warn(
            log,
            "Failed to send IGFS delete message to node [nodeId="
                + node.id()
                + ", msg="
                + msg
                + ", err="
                + e.getMessage()
                + ']');
      }
    }
  }
  /** {@inheritDoc} */
  @Override
  public boolean own(GridDhtLocalPartition part) {
    ClusterNode loc = cctx.localNode();

    lock.writeLock().lock();

    try {
      if (part.own()) {
        updateLocal(part.id(), loc.id(), part.state(), updateSeq.incrementAndGet());

        consistencyCheck();

        return true;
      }

      consistencyCheck();

      return false;
    } finally {
      lock.writeLock().unlock();
    }
  }
  /**
   * @param node Node.
   * @param id ID.
   * @throws IgniteCheckedException If failed.
   */
  private void sendLocalPartitions(ClusterNode node, @Nullable GridDhtPartitionExchangeId id)
      throws IgniteCheckedException {
    GridDhtPartitionsSingleMessage m =
        new GridDhtPartitionsSingleMessage(
            id, cctx.kernalContext().clientNode(), cctx.versions().last());

    for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
      if (!cacheCtx.isLocal())
        m.addLocalPartitionMap(cacheCtx.cacheId(), cacheCtx.topology().localPartitionMap());
    }

    if (log.isDebugEnabled())
      log.debug(
          "Sending local partitions [nodeId="
              + node.id()
              + ", exchId="
              + exchId
              + ", msg="
              + m
              + ']');

    cctx.io().send(node, m, SYSTEM_POOL);
  }
  /**
   * Adds future.
   *
   * @param fut Future.
   * @return {@code True} if added.
   */
  @SuppressWarnings({"SynchronizationOnLocalVariableOrMethodParameter"})
  public boolean addFuture(final GridCacheFuture<?> fut) {
    if (fut.isDone()) {
      fut.markNotTrackable();

      return true;
    }

    if (!fut.trackable()) return true;

    while (true) {
      Collection<GridCacheFuture<?>> old =
          futs.putIfAbsent(
              fut.version(),
              new ConcurrentLinkedDeque8<GridCacheFuture<?>>() {
                /** */
                private int hash;

                {
                  // Make sure that we add future to queue before
                  // adding queue to the map of futures.
                  add(fut);
                }

                @Override
                public int hashCode() {
                  if (hash == 0) hash = System.identityHashCode(this);

                  return hash;
                }

                @Override
                public boolean equals(Object obj) {
                  return obj == this;
                }
              });

      if (old != null) {
        boolean empty, dup = false;

        synchronized (old) {
          empty = old.isEmpty();

          if (!empty) dup = old.contains(fut);

          if (!empty && !dup) old.add(fut);
        }

        // Future is being removed, so we force-remove here and try again.
        if (empty) {
          if (futs.remove(fut.version(), old)) {
            if (log.isDebugEnabled())
              log.debug("Removed future list from futures map for lock version: " + fut.version());
          }

          continue;
        }

        if (dup) {
          if (log.isDebugEnabled())
            log.debug("Found duplicate future in futures map (will not add): " + fut);

          return false;
        }
      }

      // Handle version mappings.
      if (fut instanceof GridCacheMappedVersion) {
        GridCacheVersion from = ((GridCacheMappedVersion) fut).mappedVersion();

        if (from != null) mapVersion(from, fut.version());
      }

      if (log.isDebugEnabled()) log.debug("Added future to future map: " + fut);

      break;
    }

    // Close window in case of node is gone before the future got added to
    // the map of futures.
    for (ClusterNode n : fut.nodes()) {
      if (cctx.discovery().node(n.id()) == null) fut.onNodeLeft(n.id());
    }

    // Just in case if future was completed before it was added.
    if (fut.isDone()) removeFuture(fut);
    else onFutureAdded(fut);

    return true;
  }
  /** {@inheritDoc} */
  @Override
  public boolean afterExchange(GridDhtPartitionsExchangeFuture exchFut)
      throws IgniteCheckedException {
    boolean changed = waitForRent();

    ClusterNode loc = cctx.localNode();

    int num = cctx.affinity().partitions();

    AffinityTopologyVersion topVer = exchFut.topologyVersion();

    lock.writeLock().lock();

    try {
      if (stopping) return false;

      assert topVer.equals(exchFut.topologyVersion())
          : "Invalid topology version [topVer=" + topVer + ", exchId=" + exchFut.exchangeId() + ']';

      if (log.isDebugEnabled())
        log.debug(
            "Partition map before afterExchange [exchId="
                + exchFut.exchangeId()
                + ", fullMap="
                + fullMapString()
                + ']');

      long updateSeq = this.updateSeq.incrementAndGet();

      for (int p = 0; p < num; p++) {
        GridDhtLocalPartition locPart = localPartition(p, topVer, false, false);

        if (cctx.affinity().localNode(p, topVer)) {
          // This partition will be created during next topology event,
          // which obviously has not happened at this point.
          if (locPart == null) {
            if (log.isDebugEnabled())
              log.debug("Skipping local partition afterExchange (will not create): " + p);

            continue;
          }

          GridDhtPartitionState state = locPart.state();

          if (state == MOVING) {
            if (cctx.rebalanceEnabled()) {
              Collection<ClusterNode> owners = owners(p);

              // If there are no other owners, then become an owner.
              if (F.isEmpty(owners)) {
                boolean owned = locPart.own();

                assert owned
                    : "Failed to own partition [cacheName"
                        + cctx.name()
                        + ", locPart="
                        + locPart
                        + ']';

                updateLocal(p, loc.id(), locPart.state(), updateSeq);

                changed = true;

                if (cctx.events().isRecordable(EVT_CACHE_REBALANCE_PART_DATA_LOST)) {
                  DiscoveryEvent discoEvt = exchFut.discoveryEvent();

                  cctx.events()
                      .addPreloadEvent(
                          p,
                          EVT_CACHE_REBALANCE_PART_DATA_LOST,
                          discoEvt.eventNode(),
                          discoEvt.type(),
                          discoEvt.timestamp());
                }

                if (log.isDebugEnabled()) log.debug("Owned partition: " + locPart);
              } else if (log.isDebugEnabled())
                log.debug(
                    "Will not own partition (there are owners to rebalance from) [locPart="
                        + locPart
                        + ", owners = "
                        + owners
                        + ']');
            } else updateLocal(p, loc.id(), locPart.state(), updateSeq);
          }
        } else {
          if (locPart != null) {
            GridDhtPartitionState state = locPart.state();

            if (state == MOVING) {
              locPart.rent(false);

              updateLocal(p, loc.id(), locPart.state(), updateSeq);

              changed = true;

              if (log.isDebugEnabled())
                log.debug("Evicting moving partition (it does not belong to affinity): " + locPart);
            }
          }
        }
      }

      consistencyCheck();
    } finally {
      lock.writeLock().unlock();
    }

    return changed;
  }
  /** {@inheritDoc} */
  @Override
  public void beforeExchange(GridDhtPartitionsExchangeFuture exchFut)
      throws IgniteCheckedException {
    waitForRent();

    ClusterNode loc = cctx.localNode();

    int num = cctx.affinity().partitions();

    lock.writeLock().lock();

    try {
      GridDhtPartitionExchangeId exchId = exchFut.exchangeId();

      if (stopping) return;

      assert topVer.equals(exchId.topologyVersion())
          : "Invalid topology version [topVer=" + topVer + ", exchId=" + exchId + ']';

      if (exchId.isLeft()) removeNode(exchId.nodeId());

      // In case if node joins, get topology at the time of joining node.
      ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx.shared(), topVer);

      assert oldest != null;

      if (log.isDebugEnabled())
        log.debug(
            "Partition map beforeExchange [exchId="
                + exchId
                + ", fullMap="
                + fullMapString()
                + ']');

      long updateSeq = this.updateSeq.incrementAndGet();

      // If this is the oldest node.
      if (oldest.id().equals(loc.id())
          || exchFut.isCacheAdded(cctx.cacheId(), exchId.topologyVersion())) {
        if (node2part == null) {
          node2part = new GridDhtPartitionFullMap(oldest.id(), oldest.order(), updateSeq);

          if (log.isDebugEnabled())
            log.debug(
                "Created brand new full topology map on oldest node [exchId="
                    + exchId
                    + ", fullMap="
                    + fullMapString()
                    + ']');
        } else if (!node2part.valid()) {
          node2part =
              new GridDhtPartitionFullMap(oldest.id(), oldest.order(), updateSeq, node2part, false);

          if (log.isDebugEnabled())
            log.debug(
                "Created new full topology map on oldest node [exchId="
                    + exchId
                    + ", fullMap="
                    + node2part
                    + ']');
        } else if (!node2part.nodeId().equals(loc.id())) {
          node2part =
              new GridDhtPartitionFullMap(oldest.id(), oldest.order(), updateSeq, node2part, false);

          if (log.isDebugEnabled())
            log.debug(
                "Copied old map into new map on oldest node (previous oldest node left) [exchId="
                    + exchId
                    + ", fullMap="
                    + fullMapString()
                    + ']');
        }
      }

      if (cctx.rebalanceEnabled()) {
        for (int p = 0; p < num; p++) {
          // If this is the first node in grid.
          boolean added = exchFut.isCacheAdded(cctx.cacheId(), exchId.topologyVersion());

          if ((oldest.id().equals(loc.id())
                  && oldest.id().equals(exchId.nodeId())
                  && exchId.isJoined())
              || added) {
            assert exchId.isJoined() || added;

            try {
              GridDhtLocalPartition locPart = localPartition(p, topVer, true, false);

              assert locPart != null;

              boolean owned = locPart.own();

              assert owned
                  : "Failed to own partition for oldest node [cacheName"
                      + cctx.name()
                      + ", part="
                      + locPart
                      + ']';

              if (log.isDebugEnabled()) log.debug("Owned partition for oldest node: " + locPart);

              updateLocal(p, loc.id(), locPart.state(), updateSeq);
            } catch (GridDhtInvalidPartitionException e) {
              if (log.isDebugEnabled())
                log.debug(
                    "Ignoring invalid partition on oldest node (no need to create a partition "
                        + "if it no longer belongs to local node: "
                        + e.partition());
            }
          }
          // If this is not the first node in grid.
          else {
            if (node2part != null && node2part.valid()) {
              if (cctx.affinity().localNode(p, topVer)) {
                try {
                  // This will make sure that all non-existing partitions
                  // will be created in MOVING state.
                  GridDhtLocalPartition locPart = localPartition(p, topVer, true, false);

                  updateLocal(p, loc.id(), locPart.state(), updateSeq);
                } catch (GridDhtInvalidPartitionException e) {
                  if (log.isDebugEnabled())
                    log.debug(
                        "Ignoring invalid partition (no need to create a partition if it "
                            + "no longer belongs to local node: "
                            + e.partition());
                }
              }
            }
            // If this node's map is empty, we pre-create local partitions,
            // so local map will be sent correctly during exchange.
            else if (cctx.affinity().localNode(p, topVer)) {
              try {
                localPartition(p, topVer, true, false);
              } catch (GridDhtInvalidPartitionException e) {
                if (log.isDebugEnabled())
                  log.debug(
                      "Ignoring invalid partition (no need to pre-create a partition if it "
                          + "no longer belongs to local node: "
                          + e.partition());
              }
            }
          }
        }
      } else {
        // If preloader is disabled, then we simply clear out
        // the partitions this node is not responsible for.
        for (int p = 0; p < num; p++) {
          GridDhtLocalPartition locPart = localPartition(p, topVer, false, false);

          boolean belongs = cctx.affinity().localNode(p, topVer);

          if (locPart != null) {
            if (!belongs) {
              GridDhtPartitionState state = locPart.state();

              if (state.active()) {
                locPart.rent(false);

                updateLocal(p, loc.id(), locPart.state(), updateSeq);

                if (log.isDebugEnabled())
                  log.debug(
                      "Evicting partition with rebalancing disabled "
                          + "(it does not belong to affinity): "
                          + locPart);
              }
            }
          } else if (belongs) {
            try {
              // Pre-create partitions.
              localPartition(p, topVer, true, false);
            } catch (GridDhtInvalidPartitionException e) {
              if (log.isDebugEnabled())
                log.debug(
                    "Ignoring invalid partition with disabled rebalancer (no need to "
                        + "pre-create a partition if it no longer belongs to local node: "
                        + e.partition());
            }
          }
        }
      }

      if (node2part != null && node2part.valid()) checkEvictions(updateSeq);

      consistencyCheck();

      if (log.isDebugEnabled())
        log.debug(
            "Partition map after beforeExchange [exchId="
                + exchId
                + ", fullMap="
                + fullMapString()
                + ']');
    } finally {
      lock.writeLock().unlock();
    }

    // Wait for evictions.
    waitForRent();
  }
  private void checkBackup() {
    GridDistributedTxMapping mapping = mappings.singleMapping();

    if (mapping != null) {
      UUID nodeId = mapping.node().id();

      Collection<UUID> backups = tx.transactionNodes().get(nodeId);

      if (!F.isEmpty(backups)) {
        assert backups.size() == 1;

        UUID backupId = F.first(backups);

        ClusterNode backup = cctx.discovery().node(backupId);

        // Nothing to do if backup has left the grid.
        if (backup == null) {
          readyNearMappingFromBackup(mapping);

          ClusterTopologyCheckedException cause =
              new ClusterTopologyCheckedException("Backup node left grid: " + backupId);

          cause.retryReadyFuture(cctx.nextAffinityReadyFuture(tx.topologyVersion()));

          onDone(
              new IgniteTxRollbackCheckedException(
                  "Failed to commit transaction " + "(backup has left grid): " + tx.xidVersion(),
                  cause));
        } else {
          final CheckBackupMiniFuture mini = new CheckBackupMiniFuture(backup, mapping);

          add(mini);

          if (backup.isLocal()) {
            boolean committed = !cctx.tm().addRolledbackTx(tx);

            readyNearMappingFromBackup(mapping);

            if (committed) {
              if (tx.syncCommit()) {
                GridCacheVersion nearXidVer = tx.nearXidVersion();

                assert nearXidVer != null : tx;

                IgniteInternalFuture<?> fut = cctx.tm().remoteTxFinishFuture(nearXidVer);

                fut.listen(
                    new CI1<IgniteInternalFuture<?>>() {
                      @Override
                      public void apply(IgniteInternalFuture<?> fut) {
                        mini.onDone(tx);
                      }
                    });

                return;
              }

              mini.onDone(tx);
            } else {
              ClusterTopologyCheckedException cause =
                  new ClusterTopologyCheckedException("Primary node left grid: " + nodeId);

              cause.retryReadyFuture(cctx.nextAffinityReadyFuture(tx.topologyVersion()));

              mini.onDone(
                  new IgniteTxRollbackCheckedException(
                      "Failed to commit transaction "
                          + "(transaction has been rolled back on backup node): "
                          + tx.xidVersion(),
                      cause));
            }
          } else {
            GridDhtTxFinishRequest finishReq = checkCommittedRequest(mini.futureId());

            // Preserve old behavior, otherwise response is not sent.
            if (WAIT_REMOTE_TXS_SINCE.compareTo(backup.version()) > 0) finishReq.syncCommit(true);

            try {
              if (FINISH_NEAR_ONE_PHASE_SINCE.compareTo(backup.version()) <= 0)
                cctx.io().send(backup, finishReq, tx.ioPolicy());
              else {
                mini.onDone(
                    new IgniteTxHeuristicCheckedException(
                        "Failed to check for tx commit on "
                            + "the backup node (node has an old Ignite version) [rmtNodeId="
                            + backup.id()
                            + ", ver="
                            + backup.version()
                            + ']'));
              }
            } catch (ClusterTopologyCheckedException e) {
              mini.onNodeLeft(backupId);
            } catch (IgniteCheckedException e) {
              mini.onDone(e);
            }
          }
        }
      } else readyNearMappingFromBackup(mapping);
    }
  }
  /**
   * @param cctx Cache context.
   * @param qry Query.
   * @param keepPortable Keep portable.
   * @return Cursor.
   */
  public Iterator<List<?>> query(
      GridCacheContext<?, ?> cctx, GridCacheTwoStepQuery qry, boolean keepPortable) {
    for (int attempt = 0; ; attempt++) {
      if (attempt != 0) {
        try {
          Thread.sleep(attempt * 10); // Wait for exchange.
        } catch (InterruptedException e) {
          Thread.currentThread().interrupt();

          throw new CacheException("Query was interrupted.", e);
        }
      }

      long qryReqId = reqIdGen.incrementAndGet();

      QueryRun r = new QueryRun();

      r.pageSize = qry.pageSize() <= 0 ? GridCacheTwoStepQuery.DFLT_PAGE_SIZE : qry.pageSize();

      r.idxs = new ArrayList<>(qry.mapQueries().size());

      String space = cctx.name();

      r.conn = (JdbcConnection) h2.connectionForSpace(space);

      AffinityTopologyVersion topVer = h2.readyTopologyVersion();

      List<String> extraSpaces = extraSpaces(space, qry.spaces());

      Collection<ClusterNode> nodes;

      // Explicit partition mapping for unstable topology.
      Map<ClusterNode, IntArray> partsMap = null;

      if (isPreloadingActive(cctx, extraSpaces)) {
        if (cctx.isReplicated()) nodes = replicatedUnstableDataNodes(cctx, extraSpaces);
        else {
          partsMap = partitionedUnstableDataNodes(cctx, extraSpaces);

          nodes = partsMap == null ? null : partsMap.keySet();
        }
      } else nodes = stableDataNodes(topVer, cctx, extraSpaces);

      if (nodes == null) continue; // Retry.

      assert !nodes.isEmpty();

      if (cctx.isReplicated() || qry.explain()) {
        assert qry.explain() || !nodes.contains(ctx.discovery().localNode())
            : "We must be on a client node.";

        // Select random data node to run query on a replicated data or get EXPLAIN PLAN from a
        // single node.
        nodes = Collections.singleton(F.rand(nodes));
      }

      int tblIdx = 0;

      final boolean skipMergeTbl = !qry.explain() && qry.skipMergeTable();

      for (GridCacheSqlQuery mapQry : qry.mapQueries()) {
        GridMergeIndex idx;

        if (!skipMergeTbl) {
          GridMergeTable tbl;

          try {
            tbl = createMergeTable(r.conn, mapQry, qry.explain());
          } catch (IgniteCheckedException e) {
            throw new IgniteException(e);
          }

          idx = tbl.getScanIndex(null);

          fakeTable(r.conn, tblIdx++).setInnerTable(tbl);
        } else idx = GridMergeIndexUnsorted.createDummy();

        for (ClusterNode node : nodes) idx.addSource(node.id());

        r.idxs.add(idx);
      }

      r.latch = new CountDownLatch(r.idxs.size() * nodes.size());

      runs.put(qryReqId, r);

      try {
        if (ctx.clientDisconnected()) {
          throw new CacheException(
              "Query was cancelled, client node disconnected.",
              new IgniteClientDisconnectedException(
                  ctx.cluster().clientReconnectFuture(), "Client node disconnected."));
        }

        Collection<GridCacheSqlQuery> mapQrys = qry.mapQueries();

        if (qry.explain()) {
          mapQrys = new ArrayList<>(qry.mapQueries().size());

          for (GridCacheSqlQuery mapQry : qry.mapQueries())
            mapQrys.add(new GridCacheSqlQuery("EXPLAIN " + mapQry.query(), mapQry.parameters()));
        }

        if (nodes.size() != 1 || !F.first(nodes).isLocal()) { // Marshall params for remotes.
          Marshaller m = ctx.config().getMarshaller();

          for (GridCacheSqlQuery mapQry : mapQrys) mapQry.marshallParams(m);
        }

        boolean retry = false;

        if (send(
            nodes,
            new GridQueryRequest(qryReqId, r.pageSize, space, mapQrys, topVer, extraSpaces, null),
            partsMap)) {
          awaitAllReplies(r, nodes);

          Object state = r.state.get();

          if (state != null) {
            if (state instanceof CacheException) {
              CacheException err = (CacheException) state;

              if (err.getCause() instanceof IgniteClientDisconnectedException) throw err;

              throw new CacheException("Failed to run map query remotely.", err);
            }

            if (state instanceof AffinityTopologyVersion) {
              retry = true;

              // If remote node asks us to retry then we have outdated full partition map.
              h2.awaitForReadyTopologyVersion((AffinityTopologyVersion) state);
            }
          }
        } else // Send failed.
        retry = true;

        Iterator<List<?>> resIter = null;

        if (!retry) {
          if (qry.explain()) return explainPlan(r.conn, space, qry);

          if (skipMergeTbl) {
            List<List<?>> res = new ArrayList<>();

            assert r.idxs.size() == 1 : r.idxs;

            GridMergeIndex idx = r.idxs.get(0);

            Cursor cur = idx.findInStream(null, null);

            while (cur.next()) {
              Row row = cur.get();

              int cols = row.getColumnCount();

              List<Object> resRow = new ArrayList<>(cols);

              for (int c = 0; c < cols; c++) resRow.add(row.getValue(c).getObject());

              res.add(resRow);
            }

            resIter = res.iterator();
          } else {
            GridCacheSqlQuery rdc = qry.reduceQuery();

            // Statement caching is prohibited here because we can't guarantee correct merge index
            // reuse.
            ResultSet res =
                h2.executeSqlQueryWithTimer(
                    space, r.conn, rdc.query(), F.asList(rdc.parameters()), false);

            resIter = new Iter(res);
          }
        }

        for (GridMergeIndex idx : r.idxs) {
          if (!idx.fetchedAll()) // We have to explicitly cancel queries on remote nodes.
          send(nodes, new GridQueryCancelRequest(qryReqId), null);
        }

        if (retry) {
          if (Thread.currentThread().isInterrupted())
            throw new IgniteInterruptedCheckedException("Query was interrupted.");

          continue;
        }

        return new GridQueryCacheObjectsIterator(resIter, cctx, keepPortable);
      } catch (IgniteCheckedException | RuntimeException e) {
        U.closeQuiet(r.conn);

        if (e instanceof CacheException) throw (CacheException) e;

        Throwable cause = e;

        if (e instanceof IgniteCheckedException) {
          Throwable disconnectedErr =
              ((IgniteCheckedException) e).getCause(IgniteClientDisconnectedException.class);

          if (disconnectedErr != null) cause = disconnectedErr;
        }

        throw new CacheException("Failed to run reduce query locally.", cause);
      } finally {
        if (!runs.remove(qryReqId, r)) U.warn(log, "Query run was already removed: " + qryReqId);

        if (!skipMergeTbl) {
          for (int i = 0, mapQrys = qry.mapQueries().size(); i < mapQrys; i++)
            fakeTable(null, i).setInnerTable(null); // Drop all merge tables.
        }
      }
    }
  }
  /**
   * @param entry Entry to map.
   * @param val Value to write.
   * @param entryProcessor Entry processor.
   * @param ttl TTL (optional).
   * @param conflictExpireTime Conflict expire time (optional).
   * @param conflictVer Conflict version (optional).
   * @param updateCntr Partition update counter.
   */
  public void addWriteEntry(
      GridDhtCacheEntry entry,
      @Nullable CacheObject val,
      EntryProcessor<Object, Object, Object> entryProcessor,
      long ttl,
      long conflictExpireTime,
      @Nullable GridCacheVersion conflictVer,
      boolean addPrevVal,
      @Nullable CacheObject prevVal,
      long updateCntr) {
    AffinityTopologyVersion topVer = updateReq.topologyVersion();

    Collection<ClusterNode> dhtNodes = cctx.dht().topology().nodes(entry.partition(), topVer);

    if (log.isDebugEnabled())
      log.debug(
          "Mapping entry to DHT nodes [nodes=" + U.nodeIds(dhtNodes) + ", entry=" + entry + ']');

    CacheWriteSynchronizationMode syncMode = updateReq.writeSynchronizationMode();

    keys.add(entry.key());

    for (ClusterNode node : dhtNodes) {
      UUID nodeId = node.id();

      if (!nodeId.equals(cctx.localNodeId())) {
        GridDhtAtomicUpdateRequest updateReq = mappings.get(nodeId);

        if (updateReq == null) {
          updateReq =
              new GridDhtAtomicUpdateRequest(
                  cctx.cacheId(),
                  nodeId,
                  futVer,
                  writeVer,
                  syncMode,
                  topVer,
                  forceTransformBackups,
                  this.updateReq.subjectId(),
                  this.updateReq.taskNameHash(),
                  forceTransformBackups ? this.updateReq.invokeArguments() : null,
                  cctx.deploymentEnabled(),
                  this.updateReq.keepBinary());

          mappings.put(nodeId, updateReq);
        }

        updateReq.addWriteValue(
            entry.key(),
            val,
            entryProcessor,
            ttl,
            conflictExpireTime,
            conflictVer,
            addPrevVal,
            entry.partition(),
            prevVal,
            updateCntr);
      } else if (dhtNodes.size() == 1) {
        try {
          cctx.continuousQueries()
              .onEntryUpdated(
                  entry.key(),
                  val,
                  prevVal,
                  entry.key().internal() || !cctx.userCache(),
                  entry.partition(),
                  true,
                  false,
                  updateCntr,
                  updateReq.topologyVersion());
        } catch (IgniteCheckedException e) {
          U.warn(
              log,
              "Failed to send continuous query message. [key="
                  + entry.key()
                  + ", newVal="
                  + val
                  + ", err="
                  + e
                  + "]");
        }
      }
    }
  }
  /** {@inheritDoc} */
  @SuppressWarnings("unchecked")
  @Override
  public void onUtilityCacheStarted() throws IgniteCheckedException {
    IgniteCacheProxy<Object, Object> proxy = ctx.cache().jcache(CU.UTILITY_CACHE_NAME);

    boolean old = proxy.context().deploy().ignoreOwnership(true);

    try {
      metaDataCache = (IgniteCacheProxy) proxy.withNoRetries();
    } finally {
      proxy.context().deploy().ignoreOwnership(old);
    }

    if (clientNode) {
      assert !metaDataCache.context().affinityNode();

      metaCacheQryId =
          metaDataCache
              .context()
              .continuousQueries()
              .executeInternalQuery(
                  new MetaDataEntryListener(), new MetaDataEntryFilter(), false, true);

      while (true) {
        ClusterNode oldestSrvNode =
            CU.oldestAliveCacheServerNode(ctx.cache().context(), AffinityTopologyVersion.NONE);

        if (oldestSrvNode == null) break;

        GridCacheQueryManager qryMgr = metaDataCache.context().queries();

        CacheQuery<Map.Entry<PortableMetadataKey, BinaryMetadata>> qry =
            qryMgr.createScanQuery(new MetaDataPredicate(), null, false);

        qry.keepAll(false);

        qry.projection(ctx.cluster().get().forNode(oldestSrvNode));

        try {
          CacheQueryFuture<Map.Entry<PortableMetadataKey, BinaryMetadata>> fut = qry.execute();

          Map.Entry<PortableMetadataKey, BinaryMetadata> next;

          while ((next = fut.next()) != null) {
            assert next.getKey() != null : next;
            assert next.getValue() != null : next;

            addClientCacheMetaData(next.getKey(), next.getValue());
          }
        } catch (IgniteCheckedException e) {
          if (!ctx.discovery().alive(oldestSrvNode)
              || !ctx.discovery().pingNode(oldestSrvNode.id())) continue;
          else throw e;
        } catch (CacheException e) {
          if (X.hasCause(e, ClusterTopologyCheckedException.class, ClusterTopologyException.class))
            continue;
          else throw e;
        }

        break;
      }
    }

    for (Map.Entry<Integer, BinaryMetadata> e : metaBuf.entrySet())
      addMeta(e.getKey(), e.getValue().wrap(portableCtx));

    metaBuf.clear();

    startLatch.countDown();
  }
  /**
   * @param keys Keys.
   * @param mapped Mappings to check for duplicates.
   * @param topVer Topology version on which keys should be mapped.
   */
  private void map(
      Collection<KeyCacheObject> keys,
      Map<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>> mapped,
      AffinityTopologyVersion topVer) {
    Collection<ClusterNode> cacheNodes = CU.affinityNodes(cctx, topVer);

    if (cacheNodes.isEmpty()) {
      onDone(
          new ClusterTopologyServerNotFoundException(
              "Failed to map keys for cache "
                  + "(all partition nodes left the grid) [topVer="
                  + topVer
                  + ", cache="
                  + cctx.name()
                  + ']'));

      return;
    }

    Map<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>> mappings =
        U.newHashMap(cacheNodes.size());

    final int keysSize = keys.size();

    Map<K, V> locVals = U.newHashMap(keysSize);

    boolean hasRmtNodes = false;

    // Assign keys to primary nodes.
    for (KeyCacheObject key : keys) hasRmtNodes |= map(key, mappings, locVals, topVer, mapped);

    if (isDone()) return;

    if (!locVals.isEmpty()) add(new GridFinishedFuture<>(locVals));

    if (hasRmtNodes) {
      if (!trackable) {
        trackable = true;

        cctx.mvcc().addFuture(this, futId);
      }
    }

    // Create mini futures.
    for (Map.Entry<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>> entry :
        mappings.entrySet()) {
      final ClusterNode n = entry.getKey();

      final LinkedHashMap<KeyCacheObject, Boolean> mappedKeys = entry.getValue();

      assert !mappedKeys.isEmpty();

      // If this is the primary or backup node for the keys.
      if (n.isLocal()) {
        final GridDhtFuture<Collection<GridCacheEntryInfo>> fut =
            cache()
                .getDhtAsync(
                    n.id(),
                    -1,
                    mappedKeys,
                    readThrough,
                    topVer,
                    subjId,
                    taskName == null ? 0 : taskName.hashCode(),
                    expiryPlc,
                    skipVals);

        final Collection<Integer> invalidParts = fut.invalidPartitions();

        if (!F.isEmpty(invalidParts)) {
          Collection<KeyCacheObject> remapKeys = new ArrayList<>(keysSize);

          for (KeyCacheObject key : keys) {
            if (key != null && invalidParts.contains(cctx.affinity().partition(key)))
              remapKeys.add(key);
          }

          AffinityTopologyVersion updTopVer = cctx.discovery().topologyVersionEx();

          assert updTopVer.compareTo(topVer) > 0
              : "Got invalid partitions for local node but topology version did "
                  + "not change [topVer="
                  + topVer
                  + ", updTopVer="
                  + updTopVer
                  + ", invalidParts="
                  + invalidParts
                  + ']';

          // Remap recursively.
          map(remapKeys, mappings, updTopVer);
        }

        // Add new future.
        add(
            fut.chain(
                new C1<IgniteInternalFuture<Collection<GridCacheEntryInfo>>, Map<K, V>>() {
                  @Override
                  public Map<K, V> apply(IgniteInternalFuture<Collection<GridCacheEntryInfo>> fut) {
                    try {
                      return createResultMap(fut.get());
                    } catch (Exception e) {
                      U.error(log, "Failed to get values from dht cache [fut=" + fut + "]", e);

                      onDone(e);

                      return Collections.emptyMap();
                    }
                  }
                }));
      } else {
        MiniFuture fut = new MiniFuture(n, mappedKeys, topVer);

        GridCacheMessage req =
            new GridNearGetRequest(
                cctx.cacheId(),
                futId,
                fut.futureId(),
                n.version().compareTo(SINGLE_GET_MSG_SINCE) >= 0 ? null : DUMMY_VER,
                mappedKeys,
                readThrough,
                topVer,
                subjId,
                taskName == null ? 0 : taskName.hashCode(),
                expiryPlc != null ? expiryPlc.forAccess() : -1L,
                skipVals,
                cctx.deploymentEnabled());

        add(fut); // Append new future.

        try {
          cctx.io().send(n, req, cctx.ioPolicy());
        } catch (IgniteCheckedException e) {
          // Fail the whole thing.
          if (e instanceof ClusterTopologyCheckedException)
            fut.onNodeLeft((ClusterTopologyCheckedException) e);
          else fut.onResult(e);
        }
      }
    }
  }
  /**
   * @param node Node.
   * @param msg Message.
   */
  private void onFail(ClusterNode node, GridQueryFailResponse msg) {
    QueryRun r = runs.get(msg.queryRequestId());

    fail(r, node.id(), msg.error());
  }
  /**
   * Updates value for single partition.
   *
   * @param p Partition.
   * @param nodeId Node ID.
   * @param state State.
   * @param updateSeq Update sequence.
   */
  @SuppressWarnings({"MismatchedQueryAndUpdateOfCollection"})
  private void updateLocal(int p, UUID nodeId, GridDhtPartitionState state, long updateSeq) {
    assert lock.isWriteLockedByCurrentThread();
    assert nodeId.equals(cctx.nodeId());

    // In case if node joins, get topology at the time of joining node.
    ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx.shared(), topVer);

    assert oldest != null;

    // If this node became the oldest node.
    if (oldest.id().equals(cctx.nodeId())) {
      long seq = node2part.updateSequence();

      if (seq != updateSeq) {
        if (seq > updateSeq) {
          if (this.updateSeq.get() < seq) {
            // Update global counter if necessary.
            boolean b = this.updateSeq.compareAndSet(this.updateSeq.get(), seq + 1);

            assert b
                : "Invalid update sequence [updateSeq="
                    + updateSeq
                    + ", seq="
                    + seq
                    + ", curUpdateSeq="
                    + this.updateSeq.get()
                    + ", node2part="
                    + node2part.toFullString()
                    + ']';

            updateSeq = seq + 1;
          } else updateSeq = seq;
        }

        node2part.updateSequence(updateSeq);
      }
    }

    GridDhtPartitionMap map = node2part.get(nodeId);

    if (map == null)
      node2part.put(
          nodeId,
          map =
              new GridDhtPartitionMap(
                  nodeId,
                  updateSeq,
                  Collections.<Integer, GridDhtPartitionState>emptyMap(),
                  false));

    map.updateSequence(updateSeq);

    map.put(p, state);

    Set<UUID> ids = part2node.get(p);

    if (ids == null) part2node.put(p, ids = U.newHashSet(3));

    ids.add(nodeId);
  }
    /** @param res Result callback. */
    @SuppressWarnings("ThrowableResultOfMethodCallIgnored")
    void onResult(final GridNearGetResponse res) {
      final Collection<Integer> invalidParts = res.invalidPartitions();

      // If error happened on remote node, fail the whole future.
      if (res.error() != null) {
        onDone(res.error());

        return;
      }

      // Remap invalid partitions.
      if (!F.isEmpty(invalidParts)) {
        AffinityTopologyVersion rmtTopVer = res.topologyVersion();

        assert !rmtTopVer.equals(AffinityTopologyVersion.ZERO);

        if (rmtTopVer.compareTo(topVer) <= 0) {
          // Fail the whole get future.
          onDone(
              new IgniteCheckedException(
                  "Failed to process invalid partitions response (remote node reported "
                      + "invalid partitions but remote topology version does not differ from local) "
                      + "[topVer="
                      + topVer
                      + ", rmtTopVer="
                      + rmtTopVer
                      + ", invalidParts="
                      + invalidParts
                      + ", nodeId="
                      + node.id()
                      + ']'));

          return;
        }

        if (log.isDebugEnabled())
          log.debug(
              "Remapping mini get future [invalidParts=" + invalidParts + ", fut=" + this + ']');

        if (!canRemap) {
          map(
              F.view(
                  keys.keySet(),
                  new P1<KeyCacheObject>() {
                    @Override
                    public boolean apply(KeyCacheObject key) {
                      return invalidParts.contains(cctx.affinity().partition(key));
                    }
                  }),
              F.t(node, keys),
              topVer);

          onDone(createResultMap(res.entries()));

          return;
        }

        // Need to wait for next topology version to remap.
        IgniteInternalFuture<AffinityTopologyVersion> topFut =
            cctx.affinity().affinityReadyFuture(rmtTopVer);

        topFut.listen(
            new CIX1<IgniteInternalFuture<AffinityTopologyVersion>>() {
              @SuppressWarnings("unchecked")
              @Override
              public void applyx(IgniteInternalFuture<AffinityTopologyVersion> fut)
                  throws IgniteCheckedException {
                AffinityTopologyVersion topVer = fut.get();

                // This will append new futures to compound list.
                map(
                    F.view(
                        keys.keySet(),
                        new P1<KeyCacheObject>() {
                          @Override
                          public boolean apply(KeyCacheObject key) {
                            return invalidParts.contains(cctx.affinity().partition(key));
                          }
                        }),
                    F.t(node, keys),
                    topVer);

                onDone(createResultMap(res.entries()));
              }
            });
      } else {
        try {
          onDone(createResultMap(res.entries()));
        } catch (Exception e) {
          onDone(e);
        }
      }
    }
  /**
   * @param node Node.
   * @param msg Message.
   */
  private void onNextPage(final ClusterNode node, GridQueryNextPageResponse msg) {
    final long qryReqId = msg.queryRequestId();
    final int qry = msg.query();

    final QueryRun r = runs.get(qryReqId);

    if (r == null) // Already finished with error or canceled.
    return;

    final int pageSize = r.pageSize;

    GridMergeIndex idx = r.idxs.get(msg.query());

    GridResultPage page;

    try {
      page =
          new GridResultPage(ctx, node.id(), msg) {
            @Override
            public void fetchNextPage() {
              Object errState = r.state.get();

              if (errState != null) {
                CacheException err0 =
                    errState instanceof CacheException ? (CacheException) errState : null;

                if (err0 != null && err0.getCause() instanceof IgniteClientDisconnectedException)
                  throw err0;

                CacheException e =
                    new CacheException("Failed to fetch data from node: " + node.id());

                if (err0 != null) e.addSuppressed(err0);

                throw e;
              }

              try {
                GridQueryNextPageRequest msg0 =
                    new GridQueryNextPageRequest(qryReqId, qry, pageSize);

                if (node.isLocal()) h2.mapQueryExecutor().onMessage(ctx.localNodeId(), msg0);
                else ctx.io().send(node, GridTopic.TOPIC_QUERY, msg0, QUERY_POOL);
              } catch (IgniteCheckedException e) {
                throw new CacheException("Failed to fetch data from node: " + node.id(), e);
              }
            }
          };
    } catch (Exception e) {
      U.error(log, "Error in message.", e);

      fail(r, node.id(), "Error in message.");

      return;
    }

    idx.addPage(page);

    if (msg.retry() != null) retry(r, msg.retry(), node.id());
    else if (msg.allRows() != -1) // Only the first page contains row count.
    r.latch.countDown();
  }