/**
   * Adds key mapping to dht mapping.
   *
   * @param key Key to add.
   * @param node Node this key mapped to.
   */
  public void addKeyMapping(IgniteTxKey key, ClusterNode node) {
    GridDistributedTxMapping m = mappings.get(node.id());

    if (m == null) mappings.put(m = new GridDistributedTxMapping(node));

    IgniteTxEntry txEntry = entry(key);

    assert txEntry != null;

    txEntry.nodeId(node.id());

    m.add(txEntry);

    if (log.isDebugEnabled())
      log.debug(
          "Added mappings to transaction [locId="
              + cctx.localNodeId()
              + ", key="
              + key
              + ", node="
              + node
              + ", tx="
              + this
              + ']');
  }
  /** @param maps Mappings. */
  void addEntryMapping(@Nullable Collection<GridDistributedTxMapping> maps) {
    if (!F.isEmpty(maps)) {
      for (GridDistributedTxMapping map : maps) {
        ClusterNode n = map.node();

        GridDistributedTxMapping m = mappings.get(n.id());

        if (m == null) {
          mappings.put(m = new GridDistributedTxMapping(n));

          m.near(map.near());

          if (map.explicitLock()) m.markExplicitLock();
        }

        for (IgniteTxEntry entry : map.entries()) m.add(entry);
      }

      if (log.isDebugEnabled())
        log.debug(
            "Added mappings to transaction [locId="
                + cctx.localNodeId()
                + ", mappings="
                + maps
                + ", tx="
                + this
                + ']');
    }
  }
  /**
   * @param nodes Nodes.
   * @param msg Message.
   * @param partsMap Partitions.
   * @return {@code true} If all messages sent successfully.
   */
  private boolean send(
      Collection<ClusterNode> nodes, Message msg, Map<ClusterNode, IntArray> partsMap) {
    boolean locNodeFound = false;

    boolean ok = true;

    for (ClusterNode node : nodes) {
      if (node.isLocal()) {
        locNodeFound = true;

        continue;
      }

      try {
        ctx.io().send(node, GridTopic.TOPIC_QUERY, copy(msg, node, partsMap), QUERY_POOL);
      } catch (IgniteCheckedException e) {
        ok = false;

        U.warn(log, e.getMessage());
      }
    }

    if (locNodeFound) // Local node goes the last to allow parallel execution.
    h2.mapQueryExecutor()
          .onMessage(ctx.localNodeId(), copy(msg, ctx.discovery().localNode(), partsMap));

    return ok;
  }
  /** @param m Mapping. */
  private void finish(GridDistributedTxMapping m) {
    ClusterNode n = m.node();

    assert !m.empty();

    GridNearTxFinishRequest req =
        new GridNearTxFinishRequest(
            futId,
            tx.xidVersion(),
            tx.threadId(),
            commit,
            tx.isInvalidate(),
            tx.system(),
            tx.ioPolicy(),
            tx.syncCommit(),
            tx.syncRollback(),
            m.explicitLock(),
            tx.storeEnabled(),
            tx.topologyVersion(),
            null,
            null,
            null,
            tx.size(),
            tx.subjectId(),
            tx.taskNameHash(),
            tx.activeCachesDeploymentEnabled());

    // If this is the primary node for the keys.
    if (n.isLocal()) {
      req.miniId(IgniteUuid.randomUuid());

      IgniteInternalFuture<IgniteInternalTx> fut = cctx.tm().txHandler().finish(n.id(), tx, req);

      // Add new future.
      if (fut != null) add(fut);
    } else {
      FinishMiniFuture fut = new FinishMiniFuture(m);

      req.miniId(fut.futureId());

      add(fut); // Append new future.

      if (tx.pessimistic()) cctx.tm().beforeFinishRemote(n.id(), tx.threadId());

      try {
        cctx.io().send(n, req, tx.ioPolicy());

        // If we don't wait for result, then mark future as done.
        if (!isSync() && !m.explicitLock()) fut.onDone();
      } catch (ClusterTopologyCheckedException e) {
        // Remove previous mapping.
        mappings.remove(m.node().id());

        fut.onNodeLeft(n.id());
      } catch (IgniteCheckedException e) {
        // Fail the whole thing.
        fut.onDone(e);
      }
    }
  }
  /**
   * Converts collection of rich nodes to block location data.
   *
   * @param nodes Collection of affinity nodes.
   */
  private void convertFromNodes(Collection<ClusterNode> nodes) {
    Collection<String> names = new LinkedHashSet<>();
    Collection<String> hosts = new LinkedHashSet<>();
    Collection<UUID> nodeIds = new ArrayList<>(nodes.size());

    for (final ClusterNode node : nodes) {
      // Normalize host names into Hadoop-expected format.
      try {
        Collection<InetAddress> addrs = U.toInetAddresses(node);

        for (InetAddress addr : addrs) {
          if (addr.getHostName() == null) names.add(addr.getHostAddress() + ":" + 9001);
          else {
            names.add(addr.getHostName() + ":" + 9001); // hostname:portNumber
            hosts.add(addr.getHostName());
          }
        }
      } catch (IgniteCheckedException ignored) {
        names.addAll(node.addresses());
      }

      nodeIds.add(node.id());
    }

    this.nodeIds = nodeIds;
    this.names = names;
    this.hosts = hosts;
  }
  private void sendPartitions() {
    ClusterNode oldestNode = this.oldestNode.get();

    try {
      sendLocalPartitions(oldestNode, exchId);
    } catch (ClusterTopologyCheckedException ignore) {
      if (log.isDebugEnabled())
        log.debug(
            "Oldest node left during partition exchange [nodeId="
                + oldestNode.id()
                + ", exchId="
                + exchId
                + ']');
    } catch (IgniteCheckedException e) {
      scheduleRecheck();

      U.error(
          log,
          "Failed to send local partitions to oldest node (will retry after timeout) [oldestNodeId="
              + oldestNode.id()
              + ", exchId="
              + exchId
              + ']',
          e);
    }
  }
  /**
   * Sends query request.
   *
   * @param fut Distributed future.
   * @param req Request.
   * @param nodes Nodes.
   * @throws IgniteCheckedException In case of error.
   */
  @SuppressWarnings("unchecked")
  private void sendRequest(
      final GridCacheDistributedQueryFuture<?, ?, ?> fut,
      final GridCacheQueryRequest req,
      Collection<ClusterNode> nodes)
      throws IgniteCheckedException {
    assert fut != null;
    assert req != null;
    assert nodes != null;

    final UUID locNodeId = cctx.localNodeId();

    ClusterNode locNode = null;

    Collection<ClusterNode> rmtNodes = null;

    for (ClusterNode n : nodes) {
      if (n.id().equals(locNodeId)) locNode = n;
      else {
        if (rmtNodes == null) rmtNodes = new ArrayList<>(nodes.size());

        rmtNodes.add(n);
      }
    }

    // Request should be sent to remote nodes before the query is processed on the local node.
    // For example, a remote reducer has a state, we should not serialize and then send
    // the reducer changed by the local node.
    if (!F.isEmpty(rmtNodes)) {
      cctx.io()
          .safeSend(
              rmtNodes,
              req,
              cctx.ioPolicy(),
              new P1<ClusterNode>() {
                @Override
                public boolean apply(ClusterNode node) {
                  fut.onNodeLeft(node.id());

                  return !fut.isDone();
                }
              });
    }

    if (locNode != null) {
      cctx.closures()
          .callLocalSafe(
              new Callable<Object>() {
                @Override
                public Object call() throws Exception {
                  req.beforeLocalExecution(cctx);

                  processQueryRequest(locNodeId, req);

                  return null;
                }
              });
    }
  }
  /** {@inheritDoc} */
  @Override
  public UUID localNodeId() {
    if (locNode != null) return locNode.id();

    if (discoMgr != null) locNode = discoMgr.localNode();

    return locNode != null ? locNode.id() : config().getNodeId();
  }
    /** @param nodeId Failed node ID. */
    boolean onNodeLeft(UUID nodeId) {
      if (nodeId.equals(m.node().id())) {
        if (log.isDebugEnabled())
          log.debug("Remote node left grid while sending or waiting for reply: " + this);

        if (isSync()) {
          Map<UUID, Collection<UUID>> txNodes = tx.transactionNodes();

          if (txNodes != null) {
            Collection<UUID> backups = txNodes.get(nodeId);

            if (!F.isEmpty(backups)) {
              final CheckRemoteTxMiniFuture mini =
                  new CheckRemoteTxMiniFuture(new HashSet<>(backups));

              add(mini);

              GridDhtTxFinishRequest req = checkCommittedRequest(mini.futureId());

              req.waitRemoteTransactions(true);

              for (UUID backupId : backups) {
                ClusterNode backup = cctx.discovery().node(backupId);

                if (backup != null && WAIT_REMOTE_TXS_SINCE.compareTo(backup.version()) <= 0) {
                  if (backup.isLocal()) {
                    IgniteInternalFuture<?> fut =
                        cctx.tm().remoteTxFinishFuture(tx.nearXidVersion());

                    fut.listen(
                        new CI1<IgniteInternalFuture<?>>() {
                          @Override
                          public void apply(IgniteInternalFuture<?> fut) {
                            mini.onDhtFinishResponse(cctx.localNodeId());
                          }
                        });
                  } else {
                    try {
                      cctx.io().send(backup, req, tx.ioPolicy());
                    } catch (ClusterTopologyCheckedException e) {
                      mini.onNodeLeft(backupId);
                    } catch (IgniteCheckedException e) {
                      mini.onDone(e);
                    }
                  }
                } else mini.onDhtFinishResponse(backupId);
              }
            }
          }
        }

        onDone(tx);

        return true;
      }

      return false;
    }
Exemple #10
0
  /**
   * Set source nodes.
   *
   * @param nodes Nodes.
   */
  public void setSources(Collection<ClusterNode> nodes) {
    assert remainingRows == null;

    remainingRows = U.newHashMap(nodes.size());

    for (ClusterNode node : nodes) {
      if (remainingRows.put(node.id(), new Counter()) != null)
        throw new IllegalStateException("Duplicate node id: " + node.id());
    }
  }
  /**
   * @param r Query run.
   * @param nodes Nodes to check periodically if they alive.
   * @throws IgniteInterruptedCheckedException If interrupted.
   */
  private void awaitAllReplies(QueryRun r, Collection<ClusterNode> nodes)
      throws IgniteInterruptedCheckedException {
    while (!U.await(r.latch, 500, TimeUnit.MILLISECONDS)) {
      for (ClusterNode node : nodes) {
        if (!ctx.discovery().alive(node)) {
          handleNodeLeft(r, node.id());

          assert r.latch.getCount() == 0;

          return;
        }
      }
    }
  }
  /**
   * Updates partition map in all caches.
   *
   * @param msg Partitions full messages.
   */
  private void updatePartitionFullMap(GridDhtPartitionsFullMessage msg) {
    for (Map.Entry<Integer, GridDhtPartitionFullMap> entry : msg.partitions().entrySet()) {
      Integer cacheId = entry.getKey();

      GridCacheContext cacheCtx = cctx.cacheContext(cacheId);

      if (cacheCtx != null) cacheCtx.topology().update(exchId, entry.getValue());
      else {
        ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx, AffinityTopologyVersion.NONE);

        if (oldest != null && oldest.isLocal())
          cctx.exchange().clientTopology(cacheId, this).update(exchId, entry.getValue());
      }
    }
  }
  /** @return {@code True} if */
  public boolean jobUpdateLeader() {
    long minOrder = Long.MAX_VALUE;
    ClusterNode minOrderNode = null;

    for (ClusterNode node : nodes()) {
      if (node.order() < minOrder) {
        minOrder = node.order();
        minOrderNode = node;
      }
    }

    assert minOrderNode != null;

    return localNodeId().equals(minOrderNode.id());
  }
  /** {@inheritDoc} */
  @Override
  public Collection<ClusterNode> nodes(int p, AffinityTopologyVersion topVer) {
    Collection<ClusterNode> affNodes = cctx.affinity().nodes(p, topVer);

    lock.readLock().lock();

    try {
      assert node2part != null && node2part.valid()
          : "Invalid node-to-partitions map [topVer1="
              + topVer
              + ", topVer2="
              + this.topVer
              + ", cache="
              + cctx.name()
              + ", node2part="
              + node2part
              + ']';

      Collection<ClusterNode> nodes = null;

      Collection<UUID> nodeIds = part2node.get(p);

      if (!F.isEmpty(nodeIds)) {
        Collection<UUID> affIds = new HashSet<>(F.viewReadOnly(affNodes, F.node2id()));

        for (UUID nodeId : nodeIds) {
          if (!affIds.contains(nodeId) && hasState(p, nodeId, OWNING, MOVING, RENTING)) {
            ClusterNode n = cctx.discovery().node(nodeId);

            if (n != null
                && (topVer.topologyVersion() < 0 || n.order() <= topVer.topologyVersion())) {
              if (nodes == null) {
                nodes = new ArrayList<>(affNodes.size() + 2);

                nodes.addAll(affNodes);
              }

              nodes.add(n);
            }
          }
        }
      }

      return nodes != null ? nodes : affNodes;
    } finally {
      lock.readLock().unlock();
    }
  }
  /**
   * @param p Partition.
   * @param topVer Topology version ({@code -1} for all nodes).
   * @param state Partition state.
   * @param states Additional partition states.
   * @return List of nodes for the partition.
   */
  private List<ClusterNode> nodes(
      int p,
      AffinityTopologyVersion topVer,
      GridDhtPartitionState state,
      GridDhtPartitionState... states) {
    Collection<UUID> allIds =
        topVer.topologyVersion() > 0 ? F.nodeIds(CU.affinityNodes(cctx, topVer)) : null;

    lock.readLock().lock();

    try {
      assert node2part != null && node2part.valid()
          : "Invalid node-to-partitions map [topVer="
              + topVer
              + ", allIds="
              + allIds
              + ", node2part="
              + node2part
              + ", cache="
              + cctx.name()
              + ']';

      Collection<UUID> nodeIds = part2node.get(p);

      // Node IDs can be null if both, primary and backup, nodes disappear.
      int size = nodeIds == null ? 0 : nodeIds.size();

      if (size == 0) return Collections.emptyList();

      List<ClusterNode> nodes = new ArrayList<>(size);

      for (UUID id : nodeIds) {
        if (topVer.topologyVersion() > 0 && !allIds.contains(id)) continue;

        if (hasState(p, id, state, states)) {
          ClusterNode n = cctx.discovery().node(id);

          if (n != null && (topVer.topologyVersion() < 0 || n.order() <= topVer.topologyVersion()))
            nodes.add(n);
        }
      }

      return nodes;
    } finally {
      lock.readLock().unlock();
    }
  }
  /** {@inheritDoc} */
  @Override
  public String toString() {
    ClusterNode oldestNode = this.oldestNode.get();

    return S.toString(
        GridDhtPartitionsExchangeFuture.class,
        this,
        "oldest",
        oldestNode == null ? "null" : oldestNode.id(),
        "oldestOrder",
        oldestNode == null ? "null" : oldestNode.order(),
        "evtLatch",
        evtLatch == null ? "null" : evtLatch.getCount(),
        "remaining",
        remaining(),
        "super",
        super.toString());
  }
  /** @param futs Futures to complete. */
  private void completeOnNodeLeft(GridNioFuture<?>[] futs) {
    for (GridNioFuture<?> msg : futs) {
      IOException e = new IOException("Failed to send message, node has left: " + node.id());

      ((GridNioFutureImpl) msg).onDone(e);

      if (msg.ackClosure() != null) msg.ackClosure().apply(new IgniteException(e));
    }
  }
    /** {@inheritDoc} */
    @Override
    public boolean apply(ClusterNode clusterNode) {
      Boolean attr = clusterNode.attribute(IgniteNodeAttributes.ATTR_CLIENT_MODE);

      assertNotNull(attr);

      assertFalse(attr);

      return true;
    }
  /**
   * @param queueLimit Maximum size of unacknowledged messages queue.
   * @param node Node.
   * @param log Logger.
   */
  public GridNioRecoveryDescriptor(int queueLimit, ClusterNode node, IgniteLogger log) {
    assert !node.isLocal() : node;
    assert queueLimit > 0;

    msgFuts = new ArrayDeque<>(queueLimit);

    this.queueLimit = queueLimit;
    this.node = node;
    this.log = log;
  }
  /** {@inheritDoc} */
  @Override
  public Map<? extends ComputeJob, ClusterNode> map(List<ClusterNode> subgrid, UUID arg) {
    assert arg != null;
    assert subgrid.size() > 1
        : "Test requires at least 2 nodes. One with load and another one to steal.";

    int jobsNum = subgrid.size();

    Map<GridStealingLoadTestJob, ClusterNode> map = new HashMap<>(jobsNum);

    stealingNodeId = arg;

    Iterator<ClusterNode> iter = subgrid.iterator();

    Collection<UUID> assigned = new ArrayList<>(subgrid.size());

    for (int i = 0; i < jobsNum; i++) {
      ClusterNode node = null;

      boolean nextNodeFound = false;

      while (iter.hasNext() && !nextNodeFound) {
        node = iter.next();

        // Do not map jobs to the stealing node.
        if (!node.id().equals(stealingNodeId)) nextNodeFound = true;

        // Recycle iterator.
        if (!iter.hasNext()) iter = subgrid.iterator();
      }

      assert node != null;

      assigned.add(node.id());

      map.put(new GridStealingLoadTestJob(), node);
    }

    taskSes.setAttribute("nodes", assigned);

    return map;
  }
    /** {@inheritDoc} */
    @Override
    boolean onNodeLeft(UUID nodeId) {
      if (nodeId.equals(backup.id())) {
        readyNearMappingFromBackup(m);

        onDone(new ClusterTopologyCheckedException("Remote node left grid: " + nodeId));

        return true;
      }

      return false;
    }
  /**
   * Send delete message to all meta cache nodes in the grid.
   *
   * @param msg Message to send.
   */
  private void sendDeleteMessage(IgfsDeleteMessage msg) {
    assert msg != null;

    Collection<ClusterNode> nodes = meta.metaCacheNodes();

    for (ClusterNode node : nodes) {
      try {
        igfsCtx.send(node, topic, msg, GridIoPolicy.SYSTEM_POOL);
      } catch (IgniteCheckedException e) {
        U.warn(
            log,
            "Failed to send IGFS delete message to node [nodeId="
                + node.id()
                + ", msg="
                + msg
                + ", err="
                + e.getMessage()
                + ']');
      }
    }
  }
  /** {@inheritDoc} */
  @Override
  public boolean own(GridDhtLocalPartition part) {
    ClusterNode loc = cctx.localNode();

    lock.writeLock().lock();

    try {
      if (part.own()) {
        updateLocal(part.id(), loc.id(), part.state(), updateSeq.incrementAndGet());

        consistencyCheck();

        return true;
      }

      consistencyCheck();

      return false;
    } finally {
      lock.writeLock().unlock();
    }
  }
    private void init() {
      ClusterNode node = nodes.poll();

      GridCacheQueryFutureAdapter<?, ?, R> fut0 =
          (GridCacheQueryFutureAdapter<?, ?, R>)
              (node.isLocal()
                  ? qryMgr.queryLocal(bean)
                  : qryMgr.queryDistributed(bean, Collections.singleton(node)));

      fut0.listen(
          new IgniteInClosure<IgniteInternalFuture<Collection<R>>>() {
            @Override
            public void apply(IgniteInternalFuture<Collection<R>> fut) {
              try {
                onDone(fut.get());
              } catch (IgniteCheckedException e) {
                if (F.isEmpty(nodes)) onDone(e);
                else init();
              }
            }
          });

      fut = fut0;
    }
  /**
   * @param node Node.
   * @param id ID.
   * @throws IgniteCheckedException If failed.
   */
  private void sendLocalPartitions(ClusterNode node, @Nullable GridDhtPartitionExchangeId id)
      throws IgniteCheckedException {
    GridDhtPartitionsSingleMessage m =
        new GridDhtPartitionsSingleMessage(
            id, cctx.kernalContext().clientNode(), cctx.versions().last());

    for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
      if (!cacheCtx.isLocal())
        m.addLocalPartitionMap(cacheCtx.cacheId(), cacheCtx.topology().localPartitionMap());
    }

    if (log.isDebugEnabled())
      log.debug(
          "Sending local partitions [nodeId="
              + node.id()
              + ", exchId="
              + exchId
              + ", msg="
              + m
              + ']');

    cctx.io().send(node, m, SYSTEM_POOL);
  }
  /** @param nodeId Node to remove. */
  private void removeNode(UUID nodeId) {
    assert nodeId != null;
    assert lock.writeLock().isHeldByCurrentThread();

    ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx.shared(), topVer);

    assert oldest != null;

    ClusterNode loc = cctx.localNode();

    if (node2part != null) {
      if (oldest.equals(loc) && !node2part.nodeId().equals(loc.id())) {
        updateSeq.setIfGreater(node2part.updateSequence());

        node2part =
            new GridDhtPartitionFullMap(
                loc.id(), loc.order(), updateSeq.incrementAndGet(), node2part, false);
      } else node2part = new GridDhtPartitionFullMap(node2part, node2part.updateSequence());

      part2node = new HashMap<>(part2node);

      GridDhtPartitionMap parts = node2part.remove(nodeId);

      if (parts != null) {
        for (Integer p : parts.keySet()) {
          Set<UUID> nodeIds = part2node.get(p);

          if (nodeIds != null) {
            nodeIds.remove(nodeId);

            if (nodeIds.isEmpty()) part2node.remove(p);
          }
        }
      }

      consistencyCheck();
    }
  }
  private void checkBackup() {
    GridDistributedTxMapping mapping = mappings.singleMapping();

    if (mapping != null) {
      UUID nodeId = mapping.node().id();

      Collection<UUID> backups = tx.transactionNodes().get(nodeId);

      if (!F.isEmpty(backups)) {
        assert backups.size() == 1;

        UUID backupId = F.first(backups);

        ClusterNode backup = cctx.discovery().node(backupId);

        // Nothing to do if backup has left the grid.
        if (backup == null) {
          readyNearMappingFromBackup(mapping);

          ClusterTopologyCheckedException cause =
              new ClusterTopologyCheckedException("Backup node left grid: " + backupId);

          cause.retryReadyFuture(cctx.nextAffinityReadyFuture(tx.topologyVersion()));

          onDone(
              new IgniteTxRollbackCheckedException(
                  "Failed to commit transaction " + "(backup has left grid): " + tx.xidVersion(),
                  cause));
        } else {
          final CheckBackupMiniFuture mini = new CheckBackupMiniFuture(backup, mapping);

          add(mini);

          if (backup.isLocal()) {
            boolean committed = !cctx.tm().addRolledbackTx(tx);

            readyNearMappingFromBackup(mapping);

            if (committed) {
              if (tx.syncCommit()) {
                GridCacheVersion nearXidVer = tx.nearXidVersion();

                assert nearXidVer != null : tx;

                IgniteInternalFuture<?> fut = cctx.tm().remoteTxFinishFuture(nearXidVer);

                fut.listen(
                    new CI1<IgniteInternalFuture<?>>() {
                      @Override
                      public void apply(IgniteInternalFuture<?> fut) {
                        mini.onDone(tx);
                      }
                    });

                return;
              }

              mini.onDone(tx);
            } else {
              ClusterTopologyCheckedException cause =
                  new ClusterTopologyCheckedException("Primary node left grid: " + nodeId);

              cause.retryReadyFuture(cctx.nextAffinityReadyFuture(tx.topologyVersion()));

              mini.onDone(
                  new IgniteTxRollbackCheckedException(
                      "Failed to commit transaction "
                          + "(transaction has been rolled back on backup node): "
                          + tx.xidVersion(),
                      cause));
            }
          } else {
            GridDhtTxFinishRequest finishReq = checkCommittedRequest(mini.futureId());

            // Preserve old behavior, otherwise response is not sent.
            if (WAIT_REMOTE_TXS_SINCE.compareTo(backup.version()) > 0) finishReq.syncCommit(true);

            try {
              if (FINISH_NEAR_ONE_PHASE_SINCE.compareTo(backup.version()) <= 0)
                cctx.io().send(backup, finishReq, tx.ioPolicy());
              else {
                mini.onDone(
                    new IgniteTxHeuristicCheckedException(
                        "Failed to check for tx commit on "
                            + "the backup node (node has an old Ignite version) [rmtNodeId="
                            + backup.id()
                            + ", ver="
                            + backup.version()
                            + ']'));
              }
            } catch (ClusterTopologyCheckedException e) {
              mini.onNodeLeft(backupId);
            } catch (IgniteCheckedException e) {
              mini.onDone(e);
            }
          }
        }
      } else readyNearMappingFromBackup(mapping);
    }
  }
  /**
   * Starts activity.
   *
   * @throws IgniteInterruptedCheckedException If interrupted.
   */
  public void init() throws IgniteInterruptedCheckedException {
    if (isDone()) return;

    if (init.compareAndSet(false, true)) {
      if (isDone()) return;

      try {
        // Wait for event to occur to make sure that discovery
        // will return corresponding nodes.
        U.await(evtLatch);

        assert discoEvt != null : this;
        assert !dummy && !forcePreload : this;

        ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx, exchId.topologyVersion());

        oldestNode.set(oldest);

        startCaches();

        // True if client node joined or failed.
        boolean clientNodeEvt;

        if (F.isEmpty(reqs)) {
          int type = discoEvt.type();

          assert type == EVT_NODE_JOINED || type == EVT_NODE_LEFT || type == EVT_NODE_FAILED
              : discoEvt;

          clientNodeEvt = CU.clientNode(discoEvt.eventNode());
        } else {
          assert discoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT : discoEvt;

          boolean clientOnlyStart = true;

          for (DynamicCacheChangeRequest req : reqs) {
            if (!req.clientStartOnly()) {
              clientOnlyStart = false;

              break;
            }
          }

          clientNodeEvt = clientOnlyStart;
        }

        if (clientNodeEvt) {
          ClusterNode node = discoEvt.eventNode();

          // Client need to initialize affinity for local join event or for stated client caches.
          if (!node.isLocal()) {
            for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
              if (cacheCtx.isLocal()) continue;

              GridDhtPartitionTopology top = cacheCtx.topology();

              top.updateTopologyVersion(exchId, this, -1, stopping(cacheCtx.cacheId()));

              if (cacheCtx.affinity().affinityTopologyVersion() == AffinityTopologyVersion.NONE) {
                initTopology(cacheCtx);

                top.beforeExchange(this);
              } else
                cacheCtx.affinity().clientEventTopologyChange(discoEvt, exchId.topologyVersion());
            }

            if (exchId.isLeft())
              cctx.mvcc().removeExplicitNodeLocks(exchId.nodeId(), exchId.topologyVersion());

            onDone(exchId.topologyVersion());

            skipPreload = cctx.kernalContext().clientNode();

            return;
          }
        }

        if (cctx.kernalContext().clientNode()) {
          skipPreload = true;

          for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
            if (cacheCtx.isLocal()) continue;

            GridDhtPartitionTopology top = cacheCtx.topology();

            top.updateTopologyVersion(exchId, this, -1, stopping(cacheCtx.cacheId()));
          }

          for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
            if (cacheCtx.isLocal()) continue;

            initTopology(cacheCtx);
          }

          if (oldestNode.get() != null) {
            rmtNodes =
                new ConcurrentLinkedQueue<>(
                    CU.aliveRemoteServerNodesWithCaches(cctx, exchId.topologyVersion()));

            rmtIds = Collections.unmodifiableSet(new HashSet<>(F.nodeIds(rmtNodes)));

            ready.set(true);

            initFut.onDone(true);

            if (log.isDebugEnabled()) log.debug("Initialized future: " + this);

            sendPartitions();
          } else onDone(exchId.topologyVersion());

          return;
        }

        assert oldestNode.get() != null;

        for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
          if (isCacheAdded(cacheCtx.cacheId(), exchId.topologyVersion())) {
            if (cacheCtx
                .discovery()
                .cacheAffinityNodes(cacheCtx.name(), topologyVersion())
                .isEmpty())
              U.quietAndWarn(log, "No server nodes found for cache client: " + cacheCtx.namex());
          }

          cacheCtx.preloader().onExchangeFutureAdded();
        }

        List<String> cachesWithoutNodes = null;

        if (exchId.isLeft()) {
          for (String name : cctx.cache().cacheNames()) {
            if (cctx.discovery().cacheAffinityNodes(name, topologyVersion()).isEmpty()) {
              if (cachesWithoutNodes == null) cachesWithoutNodes = new ArrayList<>();

              cachesWithoutNodes.add(name);

              // Fire event even if there is no client cache started.
              if (cctx.gridEvents().isRecordable(EventType.EVT_CACHE_NODES_LEFT)) {
                Event evt =
                    new CacheEvent(
                        name,
                        cctx.localNode(),
                        cctx.localNode(),
                        "All server nodes have left the cluster.",
                        EventType.EVT_CACHE_NODES_LEFT,
                        0,
                        false,
                        null,
                        null,
                        null,
                        null,
                        false,
                        null,
                        false,
                        null,
                        null,
                        null);

                cctx.gridEvents().record(evt);
              }
            }
          }
        }

        if (cachesWithoutNodes != null) {
          StringBuilder sb =
              new StringBuilder(
                  "All server nodes for the following caches have left the cluster: ");

          for (int i = 0; i < cachesWithoutNodes.size(); i++) {
            String cache = cachesWithoutNodes.get(i);

            sb.append('\'').append(cache).append('\'');

            if (i != cachesWithoutNodes.size() - 1) sb.append(", ");
          }

          U.quietAndWarn(log, sb.toString());

          U.quietAndWarn(log, "Must have server nodes for caches to operate.");
        }

        assert discoEvt != null;

        assert exchId.nodeId().equals(discoEvt.eventNode().id());

        for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
          GridClientPartitionTopology clientTop =
              cctx.exchange().clearClientTopology(cacheCtx.cacheId());

          long updSeq = clientTop == null ? -1 : clientTop.lastUpdateSequence();

          // Update before waiting for locks.
          if (!cacheCtx.isLocal())
            cacheCtx
                .topology()
                .updateTopologyVersion(exchId, this, updSeq, stopping(cacheCtx.cacheId()));
        }

        // Grab all alive remote nodes with order of equal or less than last joined node.
        rmtNodes =
            new ConcurrentLinkedQueue<>(
                CU.aliveRemoteServerNodesWithCaches(cctx, exchId.topologyVersion()));

        rmtIds = Collections.unmodifiableSet(new HashSet<>(F.nodeIds(rmtNodes)));

        for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> m : singleMsgs.entrySet())
          // If received any messages, process them.
          onReceive(m.getKey(), m.getValue());

        for (Map.Entry<UUID, GridDhtPartitionsFullMessage> m : fullMsgs.entrySet())
          // If received any messages, process them.
          onReceive(m.getKey(), m.getValue());

        AffinityTopologyVersion topVer = exchId.topologyVersion();

        for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
          if (cacheCtx.isLocal()) continue;

          // Must initialize topology after we get discovery event.
          initTopology(cacheCtx);

          cacheCtx.preloader().updateLastExchangeFuture(this);
        }

        IgniteInternalFuture<?> partReleaseFut = cctx.partitionReleaseFuture(topVer);

        // Assign to class variable so it will be included into toString() method.
        this.partReleaseFut = partReleaseFut;

        if (log.isDebugEnabled()) log.debug("Before waiting for partition release future: " + this);

        while (true) {
          try {
            partReleaseFut.get(2 * cctx.gridConfig().getNetworkTimeout(), TimeUnit.MILLISECONDS);

            break;
          } catch (IgniteFutureTimeoutCheckedException ignored) {
            // Print pending transactions and locks that might have led to hang.
            dumpPendingObjects();
          }
        }

        if (log.isDebugEnabled()) log.debug("After waiting for partition release future: " + this);

        if (!F.isEmpty(reqs)) blockGateways();

        if (exchId.isLeft())
          cctx.mvcc().removeExplicitNodeLocks(exchId.nodeId(), exchId.topologyVersion());

        IgniteInternalFuture<?> locksFut = cctx.mvcc().finishLocks(exchId.topologyVersion());

        while (true) {
          try {
            locksFut.get(2 * cctx.gridConfig().getNetworkTimeout(), TimeUnit.MILLISECONDS);

            break;
          } catch (IgniteFutureTimeoutCheckedException ignored) {
            U.warn(
                log,
                "Failed to wait for locks release future. "
                    + "Dumping pending objects that might be the cause: "
                    + cctx.localNodeId());

            U.warn(log, "Locked entries:");

            Map<IgniteTxKey, Collection<GridCacheMvccCandidate>> locks =
                cctx.mvcc().unfinishedLocks(exchId.topologyVersion());

            for (Map.Entry<IgniteTxKey, Collection<GridCacheMvccCandidate>> e : locks.entrySet())
              U.warn(log, "Locked entry [key=" + e.getKey() + ", mvcc=" + e.getValue() + ']');
          }
        }

        for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
          if (cacheCtx.isLocal()) continue;

          // Notify replication manager.
          GridCacheContext drCacheCtx =
              cacheCtx.isNear() ? cacheCtx.near().dht().context() : cacheCtx;

          if (drCacheCtx.isDrEnabled()) drCacheCtx.dr().beforeExchange(topVer, exchId.isLeft());

          // Partition release future is done so we can flush the write-behind store.
          cacheCtx.store().forceFlush();

          // Process queued undeploys prior to sending/spreading map.
          cacheCtx.preloader().unwindUndeploys();

          GridDhtPartitionTopology top = cacheCtx.topology();

          assert topVer.equals(top.topologyVersion())
              : "Topology version is updated only in this class instances inside single ExchangeWorker thread.";

          top.beforeExchange(this);
        }

        for (GridClientPartitionTopology top : cctx.exchange().clientTopologies()) {
          top.updateTopologyVersion(exchId, this, -1, stopping(top.cacheId()));

          top.beforeExchange(this);
        }
      } catch (IgniteInterruptedCheckedException e) {
        onDone(e);

        throw e;
      } catch (Throwable e) {
        U.error(
            log,
            "Failed to reinitialize local partitions (preloading will be stopped): " + exchId,
            e);

        onDone(e);

        if (e instanceof Error) throw (Error) e;

        return;
      }

      if (F.isEmpty(rmtIds)) {
        onDone(exchId.topologyVersion());

        return;
      }

      ready.set(true);

      initFut.onDone(true);

      if (log.isDebugEnabled()) log.debug("Initialized future: " + this);

      // If this node is not oldest.
      if (!oldestNode.get().id().equals(cctx.localNodeId())) sendPartitions();
      else {
        boolean allReceived = allReceived();

        if (allReceived && replied.compareAndSet(false, true)) {
          if (spreadPartitions()) onDone(exchId.topologyVersion());
        }
      }

      scheduleRecheck();
    } else assert false : "Skipped init future: " + this;
  }
  /**
   * @param entry Entry to map.
   * @param val Value to write.
   * @param entryProcessor Entry processor.
   * @param ttl TTL (optional).
   * @param conflictExpireTime Conflict expire time (optional).
   * @param conflictVer Conflict version (optional).
   * @param updateCntr Partition update counter.
   */
  public void addWriteEntry(
      GridDhtCacheEntry entry,
      @Nullable CacheObject val,
      EntryProcessor<Object, Object, Object> entryProcessor,
      long ttl,
      long conflictExpireTime,
      @Nullable GridCacheVersion conflictVer,
      boolean addPrevVal,
      @Nullable CacheObject prevVal,
      long updateCntr) {
    AffinityTopologyVersion topVer = updateReq.topologyVersion();

    Collection<ClusterNode> dhtNodes = cctx.dht().topology().nodes(entry.partition(), topVer);

    if (log.isDebugEnabled())
      log.debug(
          "Mapping entry to DHT nodes [nodes=" + U.nodeIds(dhtNodes) + ", entry=" + entry + ']');

    CacheWriteSynchronizationMode syncMode = updateReq.writeSynchronizationMode();

    keys.add(entry.key());

    for (ClusterNode node : dhtNodes) {
      UUID nodeId = node.id();

      if (!nodeId.equals(cctx.localNodeId())) {
        GridDhtAtomicUpdateRequest updateReq = mappings.get(nodeId);

        if (updateReq == null) {
          updateReq =
              new GridDhtAtomicUpdateRequest(
                  cctx.cacheId(),
                  nodeId,
                  futVer,
                  writeVer,
                  syncMode,
                  topVer,
                  forceTransformBackups,
                  this.updateReq.subjectId(),
                  this.updateReq.taskNameHash(),
                  forceTransformBackups ? this.updateReq.invokeArguments() : null,
                  cctx.deploymentEnabled(),
                  this.updateReq.keepBinary());

          mappings.put(nodeId, updateReq);
        }

        updateReq.addWriteValue(
            entry.key(),
            val,
            entryProcessor,
            ttl,
            conflictExpireTime,
            conflictVer,
            addPrevVal,
            entry.partition(),
            prevVal,
            updateCntr);
      } else if (dhtNodes.size() == 1) {
        try {
          cctx.continuousQueries()
              .onEntryUpdated(
                  entry.key(),
                  val,
                  prevVal,
                  entry.key().internal() || !cctx.userCache(),
                  entry.partition(),
                  true,
                  false,
                  updateCntr,
                  updateReq.topologyVersion());
        } catch (IgniteCheckedException e) {
          U.warn(
              log,
              "Failed to send continuous query message. [key="
                  + entry.key()
                  + ", newVal="
                  + val
                  + ", err="
                  + e
                  + "]");
        }
      }
    }
  }
  /**
   * Adds future.
   *
   * @param fut Future.
   * @return {@code True} if added.
   */
  @SuppressWarnings({"SynchronizationOnLocalVariableOrMethodParameter"})
  public boolean addFuture(final GridCacheFuture<?> fut) {
    if (fut.isDone()) {
      fut.markNotTrackable();

      return true;
    }

    if (!fut.trackable()) return true;

    while (true) {
      Collection<GridCacheFuture<?>> old =
          futs.putIfAbsent(
              fut.version(),
              new ConcurrentLinkedDeque8<GridCacheFuture<?>>() {
                /** */
                private int hash;

                {
                  // Make sure that we add future to queue before
                  // adding queue to the map of futures.
                  add(fut);
                }

                @Override
                public int hashCode() {
                  if (hash == 0) hash = System.identityHashCode(this);

                  return hash;
                }

                @Override
                public boolean equals(Object obj) {
                  return obj == this;
                }
              });

      if (old != null) {
        boolean empty, dup = false;

        synchronized (old) {
          empty = old.isEmpty();

          if (!empty) dup = old.contains(fut);

          if (!empty && !dup) old.add(fut);
        }

        // Future is being removed, so we force-remove here and try again.
        if (empty) {
          if (futs.remove(fut.version(), old)) {
            if (log.isDebugEnabled())
              log.debug("Removed future list from futures map for lock version: " + fut.version());
          }

          continue;
        }

        if (dup) {
          if (log.isDebugEnabled())
            log.debug("Found duplicate future in futures map (will not add): " + fut);

          return false;
        }
      }

      // Handle version mappings.
      if (fut instanceof GridCacheMappedVersion) {
        GridCacheVersion from = ((GridCacheMappedVersion) fut).mappedVersion();

        if (from != null) mapVersion(from, fut.version());
      }

      if (log.isDebugEnabled()) log.debug("Added future to future map: " + fut);

      break;
    }

    // Close window in case of node is gone before the future got added to
    // the map of futures.
    for (ClusterNode n : fut.nodes()) {
      if (cctx.discovery().node(n.id()) == null) fut.onNodeLeft(n.id());
    }

    // Just in case if future was completed before it was added.
    if (fut.isDone()) removeFuture(fut);
    else onFutureAdded(fut);

    return true;
  }