/** @return Nodes to execute on. */
  private Collection<ClusterNode> nodes() {
    CacheMode cacheMode = cctx.config().getCacheMode();

    switch (cacheMode) {
      case LOCAL:
        if (prj != null)
          U.warn(
              log,
              "Ignoring query projection because it's executed over LOCAL cache "
                  + "(only local node will be queried): "
                  + this);

        return Collections.singletonList(cctx.localNode());

      case REPLICATED:
        if (prj != null || partition() != null) return nodes(cctx, prj, partition());

        return cctx.affinityNode()
            ? Collections.singletonList(cctx.localNode())
            : Collections.singletonList(F.rand(nodes(cctx, null, partition())));

      case PARTITIONED:
        return nodes(cctx, prj, partition());

      default:
        throw new IllegalStateException("Unknown cache distribution mode: " + cacheMode);
    }
  }
  /** @return Nodes to execute on. */
  private Collection<GridNode> nodes() {
    GridCacheMode cacheMode = cctx.config().getCacheMode();

    switch (cacheMode) {
      case LOCAL:
        if (prj != null)
          U.warn(
              log,
              "Ignoring query projection because it's executed over LOCAL cache "
                  + "(only local node will be queried): "
                  + this);

        return Collections.singletonList(cctx.localNode());

      case REPLICATED:
        if (prj != null) return nodes(cctx, prj);

        GridCacheDistributionMode mode = cctx.config().getDistributionMode();

        return mode == PARTITIONED_ONLY || mode == NEAR_PARTITIONED
            ? Collections.singletonList(cctx.localNode())
            : Collections.singletonList(F.rand(nodes(cctx, null)));

      case PARTITIONED:
        return nodes(cctx, prj);

      default:
        throw new IllegalStateException("Unknown cache distribution mode: " + cacheMode);
    }
  }
  /** {@inheritDoc} */
  @Override
  public GridDhtPartitionFullMap partitionMap(boolean onlyActive) {
    lock.readLock().lock();

    try {
      assert node2part != null && node2part.valid()
          : "Invalid node2part [node2part: "
              + node2part
              + ", cache="
              + cctx.name()
              + ", started="
              + cctx.started()
              + ", stopping="
              + stopping
              + ", locNodeId="
              + cctx.localNode().id()
              + ", locName="
              + cctx.gridName()
              + ']';

      GridDhtPartitionFullMap m = node2part;

      return new GridDhtPartitionFullMap(
          m.nodeId(), m.nodeOrder(), m.updateSequence(), m, onlyActive);
    } finally {
      lock.readLock().unlock();
    }
  }
  /** {@inheritDoc} */
  @Override
  public boolean own(GridDhtLocalPartition part) {
    ClusterNode loc = cctx.localNode();

    lock.writeLock().lock();

    try {
      if (part.own()) {
        updateLocal(part.id(), loc.id(), part.state(), updateSeq.incrementAndGet());

        consistencyCheck();

        return true;
      }

      consistencyCheck();

      return false;
    } finally {
      lock.writeLock().unlock();
    }
  }
  /** @param nodeId Node to remove. */
  private void removeNode(UUID nodeId) {
    assert nodeId != null;
    assert lock.writeLock().isHeldByCurrentThread();

    ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx.shared(), topVer);

    assert oldest != null;

    ClusterNode loc = cctx.localNode();

    if (node2part != null) {
      if (oldest.equals(loc) && !node2part.nodeId().equals(loc.id())) {
        updateSeq.setIfGreater(node2part.updateSequence());

        node2part =
            new GridDhtPartitionFullMap(
                loc.id(), loc.order(), updateSeq.incrementAndGet(), node2part, false);
      } else node2part = new GridDhtPartitionFullMap(node2part, node2part.updateSequence());

      part2node = new HashMap<>(part2node);

      GridDhtPartitionMap parts = node2part.remove(nodeId);

      if (parts != null) {
        for (Integer p : parts.keySet()) {
          Set<UUID> nodeIds = part2node.get(p);

          if (nodeIds != null) {
            nodeIds.remove(nodeId);

            if (nodeIds.isEmpty()) part2node.remove(p);
          }
        }
      }

      consistencyCheck();
    }
  }
  /** {@inheritDoc} */
  @Override
  public void execute(@Nullable GridProjection prj) throws GridException {
    if (cb == null)
      throw new IllegalStateException("Mandatory local callback is not set for the query: " + this);

    if (prj == null) prj = ctx.grid();

    prj = prj.forCache(ctx.name());

    if (prj.nodes().isEmpty())
      throw new GridTopologyException("Failed to execute query (projection is empty): " + this);

    GridCacheMode mode = ctx.config().getCacheMode();

    if (mode == LOCAL || mode == REPLICATED) {
      Collection<GridNode> nodes = prj.nodes();

      GridNode node = nodes.contains(ctx.localNode()) ? ctx.localNode() : F.rand(nodes);

      assert node != null;

      if (nodes.size() > 1 && !ctx.cache().isDrSystemCache()) {
        if (node.id().equals(ctx.localNodeId()))
          U.warn(
              log,
              "Continuous query for "
                  + mode
                  + " cache can be run only on local node. "
                  + "Will execute query locally: "
                  + this);
        else
          U.warn(
              log,
              "Continuous query for "
                  + mode
                  + " cache can be run only on single node. "
                  + "Will execute query on remote node [qry="
                  + this
                  + ", node="
                  + node
                  + ']');
      }

      prj = prj.forNode(node);
    }

    closeLock.lock();

    try {
      if (routineId != null)
        throw new IllegalStateException("Continuous query can't be executed twice.");

      guard.block();

      GridContinuousHandler hnd =
          new GridCacheContinuousQueryHandler<>(ctx.name(), topic, cb, filter, prjPred);

      routineId =
          ctx.kernalContext()
              .continuous()
              .startRoutine(hnd, bufSize, timeInterval, autoUnsubscribe, prj.predicate())
              .get();
    } finally {
      closeLock.unlock();
    }
  }
 /**
  * @param topVer Topology version.
  * @return {@code True} if local node is primary for this partition.
  */
 public boolean primary(AffinityTopologyVersion topVer) {
   return cctx.affinity().primary(cctx.localNode(), id, topVer);
 }
Beispiel #8
0
  /**
   * Gets next near lock mapping and either acquires dht locks locally or sends near lock request to
   * remote primary node.
   *
   * @param mappings Queue of mappings.
   * @throws GridException If mapping can not be completed.
   */
  private void proceedMapping(final ConcurrentLinkedDeque8<GridNearLockMapping<K, V>> mappings)
      throws GridException {
    GridNearLockMapping<K, V> map = mappings.poll();

    // If there are no more mappings to process, complete the future.
    if (map == null) return;

    final GridNearLockRequest<K, V> req = map.request();
    final Collection<K> mappedKeys = map.distributedKeys();
    final GridNode node = map.node();

    if (filter != null && filter.length != 0) req.filter(filter, cctx);

    if (node.isLocal()) {
      req.miniId(GridUuid.randomUuid());

      if (log.isDebugEnabled()) log.debug("Before locally locking near request: " + req);

      GridFuture<GridNearLockResponse<K, V>> fut;

      if (CU.DHT_ENABLED) fut = dht().lockAllAsync(cctx.localNode(), req, filter);
      else {
        // Create dummy values for testing.
        GridNearLockResponse<K, V> res =
            new GridNearLockResponse<>(lockVer, futId, null, false, 1, null);

        res.addValueBytes(null, null, true, lockVer, lockVer, cctx);

        fut = new GridFinishedFuture<>(ctx, res);
      }

      // Add new future.
      add(
          new GridEmbeddedFuture<>(
              cctx.kernalContext(),
              fut,
              new C2<GridNearLockResponse<K, V>, Exception, Boolean>() {
                @Override
                public Boolean apply(GridNearLockResponse<K, V> res, Exception e) {
                  if (CU.isLockTimeoutOrCancelled(e)
                      || (res != null && CU.isLockTimeoutOrCancelled(res.error()))) return false;

                  if (e != null) {
                    onError(e);

                    return false;
                  }

                  if (res == null) {
                    onError(new GridException("Lock response is null for future: " + this));

                    return false;
                  }

                  if (res.error() != null) {
                    onError(res.error());

                    return false;
                  }

                  if (log.isDebugEnabled())
                    log.debug(
                        "Acquired lock for local DHT mapping [locId="
                            + cctx.nodeId()
                            + ", mappedKeys="
                            + mappedKeys
                            + ", fut="
                            + GridNearLockFuture.this
                            + ']');

                  try {
                    int i = 0;

                    for (K k : mappedKeys) {
                      while (true) {
                        GridNearCacheEntry<K, V> entry =
                            cctx.near().entryExx(k, req.topologyVersion());

                        try {
                          GridTuple3<GridCacheVersion, V, byte[]> oldValTup =
                              valMap.get(entry.key());

                          boolean hasBytes = entry.hasValue();
                          V oldVal = entry.rawGet();
                          V newVal = res.value(i);
                          byte[] newBytes = res.valueBytes(i);

                          GridCacheVersion dhtVer = res.dhtVersion(i);
                          GridCacheVersion mappedVer = res.mappedVersion(i);

                          // On local node don't record twice if DHT cache already recorded.
                          boolean record =
                              retval && oldValTup != null && oldValTup.get1().equals(dhtVer);

                          if (newVal == null) {
                            if (oldValTup != null) {
                              if (oldValTup.get1().equals(dhtVer)) {
                                newVal = oldValTup.get2();

                                newBytes = oldValTup.get3();
                              }

                              oldVal = oldValTup.get2();
                            }
                          }

                          // Lock is held at this point, so we can set the
                          // returned value if any.
                          entry.resetFromPrimary(newVal, newBytes, lockVer, dhtVer, node.id());

                          entry.readyNearLock(
                              lockVer,
                              mappedVer,
                              res.committedVersions(),
                              res.rolledbackVersions(),
                              res.pending());

                          if (inTx() && implicitTx() && tx.onePhaseCommit()) {
                            boolean pass = res.filterResult(i);

                            tx.entry(k).filters(pass ? CU.<K, V>empty() : CU.<K, V>alwaysFalse());
                          }

                          if (record) {
                            if (cctx.events().isRecordable(EVT_CACHE_OBJECT_READ))
                              cctx.events()
                                  .addEvent(
                                      entry.partition(),
                                      entry.key(),
                                      tx,
                                      null,
                                      EVT_CACHE_OBJECT_READ,
                                      newVal,
                                      newVal != null,
                                      oldVal,
                                      hasBytes,
                                      CU.subjectId(tx, cctx));

                            cctx.cache().metrics0().onRead(oldVal != null);
                          }

                          if (log.isDebugEnabled())
                            log.debug(
                                "Processed response for entry [res="
                                    + res
                                    + ", entry="
                                    + entry
                                    + ']');

                          break; // Inner while loop.
                        } catch (GridCacheEntryRemovedException ignored) {
                          if (log.isDebugEnabled())
                            log.debug(
                                "Failed to add candidates because entry was "
                                    + "removed (will renew).");

                          // Replace old entry with new one.
                          entries.set(
                              i,
                              (GridDistributedCacheEntry<K, V>) cctx.cache().entryEx(entry.key()));
                        }
                      }

                      i++; // Increment outside of while loop.
                    }

                    // Proceed and add new future (if any) before completing embedded future.
                    proceedMapping(mappings);
                  } catch (GridException ex) {
                    onError(ex);

                    return false;
                  }

                  return true;
                }
              }));
    } else {
      final MiniFuture fut = new MiniFuture(node, mappedKeys, mappings);

      req.miniId(fut.futureId());

      add(fut); // Append new future.

      GridFuture<?> txSync = null;

      if (inTx()) txSync = cctx.tm().awaitFinishAckAsync(node.id(), tx.threadId());

      if (txSync == null || txSync.isDone()) {
        try {
          if (log.isDebugEnabled())
            log.debug("Sending near lock request [node=" + node.id() + ", req=" + req + ']');

          cctx.io().send(node, req);
        } catch (GridTopologyException ex) {
          assert fut != null;

          fut.onResult(ex);
        }
      } else {
        txSync.listenAsync(
            new CI1<GridFuture<?>>() {
              @Override
              public void apply(GridFuture<?> t) {
                try {
                  if (log.isDebugEnabled())
                    log.debug(
                        "Sending near lock request [node=" + node.id() + ", req=" + req + ']');

                  cctx.io().send(node, req);
                } catch (GridTopologyException ex) {
                  assert fut != null;

                  fut.onResult(ex);
                } catch (GridException e) {
                  onError(e);
                }
              }
            });
      }
    }
  }
  /** {@inheritDoc} */
  @Override
  public boolean afterExchange(GridDhtPartitionsExchangeFuture exchFut)
      throws IgniteCheckedException {
    boolean changed = waitForRent();

    ClusterNode loc = cctx.localNode();

    int num = cctx.affinity().partitions();

    AffinityTopologyVersion topVer = exchFut.topologyVersion();

    lock.writeLock().lock();

    try {
      if (stopping) return false;

      assert topVer.equals(exchFut.topologyVersion())
          : "Invalid topology version [topVer=" + topVer + ", exchId=" + exchFut.exchangeId() + ']';

      if (log.isDebugEnabled())
        log.debug(
            "Partition map before afterExchange [exchId="
                + exchFut.exchangeId()
                + ", fullMap="
                + fullMapString()
                + ']');

      long updateSeq = this.updateSeq.incrementAndGet();

      for (int p = 0; p < num; p++) {
        GridDhtLocalPartition locPart = localPartition(p, topVer, false, false);

        if (cctx.affinity().localNode(p, topVer)) {
          // This partition will be created during next topology event,
          // which obviously has not happened at this point.
          if (locPart == null) {
            if (log.isDebugEnabled())
              log.debug("Skipping local partition afterExchange (will not create): " + p);

            continue;
          }

          GridDhtPartitionState state = locPart.state();

          if (state == MOVING) {
            if (cctx.rebalanceEnabled()) {
              Collection<ClusterNode> owners = owners(p);

              // If there are no other owners, then become an owner.
              if (F.isEmpty(owners)) {
                boolean owned = locPart.own();

                assert owned
                    : "Failed to own partition [cacheName"
                        + cctx.name()
                        + ", locPart="
                        + locPart
                        + ']';

                updateLocal(p, loc.id(), locPart.state(), updateSeq);

                changed = true;

                if (cctx.events().isRecordable(EVT_CACHE_REBALANCE_PART_DATA_LOST)) {
                  DiscoveryEvent discoEvt = exchFut.discoveryEvent();

                  cctx.events()
                      .addPreloadEvent(
                          p,
                          EVT_CACHE_REBALANCE_PART_DATA_LOST,
                          discoEvt.eventNode(),
                          discoEvt.type(),
                          discoEvt.timestamp());
                }

                if (log.isDebugEnabled()) log.debug("Owned partition: " + locPart);
              } else if (log.isDebugEnabled())
                log.debug(
                    "Will not own partition (there are owners to rebalance from) [locPart="
                        + locPart
                        + ", owners = "
                        + owners
                        + ']');
            } else updateLocal(p, loc.id(), locPart.state(), updateSeq);
          }
        } else {
          if (locPart != null) {
            GridDhtPartitionState state = locPart.state();

            if (state == MOVING) {
              locPart.rent(false);

              updateLocal(p, loc.id(), locPart.state(), updateSeq);

              changed = true;

              if (log.isDebugEnabled())
                log.debug("Evicting moving partition (it does not belong to affinity): " + locPart);
            }
          }
        }
      }

      consistencyCheck();
    } finally {
      lock.writeLock().unlock();
    }

    return changed;
  }
  /** {@inheritDoc} */
  @Override
  public void beforeExchange(GridDhtPartitionsExchangeFuture exchFut)
      throws IgniteCheckedException {
    waitForRent();

    ClusterNode loc = cctx.localNode();

    int num = cctx.affinity().partitions();

    lock.writeLock().lock();

    try {
      GridDhtPartitionExchangeId exchId = exchFut.exchangeId();

      if (stopping) return;

      assert topVer.equals(exchId.topologyVersion())
          : "Invalid topology version [topVer=" + topVer + ", exchId=" + exchId + ']';

      if (exchId.isLeft()) removeNode(exchId.nodeId());

      // In case if node joins, get topology at the time of joining node.
      ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx.shared(), topVer);

      assert oldest != null;

      if (log.isDebugEnabled())
        log.debug(
            "Partition map beforeExchange [exchId="
                + exchId
                + ", fullMap="
                + fullMapString()
                + ']');

      long updateSeq = this.updateSeq.incrementAndGet();

      // If this is the oldest node.
      if (oldest.id().equals(loc.id())
          || exchFut.isCacheAdded(cctx.cacheId(), exchId.topologyVersion())) {
        if (node2part == null) {
          node2part = new GridDhtPartitionFullMap(oldest.id(), oldest.order(), updateSeq);

          if (log.isDebugEnabled())
            log.debug(
                "Created brand new full topology map on oldest node [exchId="
                    + exchId
                    + ", fullMap="
                    + fullMapString()
                    + ']');
        } else if (!node2part.valid()) {
          node2part =
              new GridDhtPartitionFullMap(oldest.id(), oldest.order(), updateSeq, node2part, false);

          if (log.isDebugEnabled())
            log.debug(
                "Created new full topology map on oldest node [exchId="
                    + exchId
                    + ", fullMap="
                    + node2part
                    + ']');
        } else if (!node2part.nodeId().equals(loc.id())) {
          node2part =
              new GridDhtPartitionFullMap(oldest.id(), oldest.order(), updateSeq, node2part, false);

          if (log.isDebugEnabled())
            log.debug(
                "Copied old map into new map on oldest node (previous oldest node left) [exchId="
                    + exchId
                    + ", fullMap="
                    + fullMapString()
                    + ']');
        }
      }

      if (cctx.rebalanceEnabled()) {
        for (int p = 0; p < num; p++) {
          // If this is the first node in grid.
          boolean added = exchFut.isCacheAdded(cctx.cacheId(), exchId.topologyVersion());

          if ((oldest.id().equals(loc.id())
                  && oldest.id().equals(exchId.nodeId())
                  && exchId.isJoined())
              || added) {
            assert exchId.isJoined() || added;

            try {
              GridDhtLocalPartition locPart = localPartition(p, topVer, true, false);

              assert locPart != null;

              boolean owned = locPart.own();

              assert owned
                  : "Failed to own partition for oldest node [cacheName"
                      + cctx.name()
                      + ", part="
                      + locPart
                      + ']';

              if (log.isDebugEnabled()) log.debug("Owned partition for oldest node: " + locPart);

              updateLocal(p, loc.id(), locPart.state(), updateSeq);
            } catch (GridDhtInvalidPartitionException e) {
              if (log.isDebugEnabled())
                log.debug(
                    "Ignoring invalid partition on oldest node (no need to create a partition "
                        + "if it no longer belongs to local node: "
                        + e.partition());
            }
          }
          // If this is not the first node in grid.
          else {
            if (node2part != null && node2part.valid()) {
              if (cctx.affinity().localNode(p, topVer)) {
                try {
                  // This will make sure that all non-existing partitions
                  // will be created in MOVING state.
                  GridDhtLocalPartition locPart = localPartition(p, topVer, true, false);

                  updateLocal(p, loc.id(), locPart.state(), updateSeq);
                } catch (GridDhtInvalidPartitionException e) {
                  if (log.isDebugEnabled())
                    log.debug(
                        "Ignoring invalid partition (no need to create a partition if it "
                            + "no longer belongs to local node: "
                            + e.partition());
                }
              }
            }
            // If this node's map is empty, we pre-create local partitions,
            // so local map will be sent correctly during exchange.
            else if (cctx.affinity().localNode(p, topVer)) {
              try {
                localPartition(p, topVer, true, false);
              } catch (GridDhtInvalidPartitionException e) {
                if (log.isDebugEnabled())
                  log.debug(
                      "Ignoring invalid partition (no need to pre-create a partition if it "
                          + "no longer belongs to local node: "
                          + e.partition());
              }
            }
          }
        }
      } else {
        // If preloader is disabled, then we simply clear out
        // the partitions this node is not responsible for.
        for (int p = 0; p < num; p++) {
          GridDhtLocalPartition locPart = localPartition(p, topVer, false, false);

          boolean belongs = cctx.affinity().localNode(p, topVer);

          if (locPart != null) {
            if (!belongs) {
              GridDhtPartitionState state = locPart.state();

              if (state.active()) {
                locPart.rent(false);

                updateLocal(p, loc.id(), locPart.state(), updateSeq);

                if (log.isDebugEnabled())
                  log.debug(
                      "Evicting partition with rebalancing disabled "
                          + "(it does not belong to affinity): "
                          + locPart);
              }
            }
          } else if (belongs) {
            try {
              // Pre-create partitions.
              localPartition(p, topVer, true, false);
            } catch (GridDhtInvalidPartitionException e) {
              if (log.isDebugEnabled())
                log.debug(
                    "Ignoring invalid partition with disabled rebalancer (no need to "
                        + "pre-create a partition if it no longer belongs to local node: "
                        + e.partition());
            }
          }
        }
      }

      if (node2part != null && node2part.valid()) checkEvictions(updateSeq);

      consistencyCheck();

      if (log.isDebugEnabled())
        log.debug(
            "Partition map after beforeExchange [exchId="
                + exchId
                + ", fullMap="
                + fullMapString()
                + ']');
    } finally {
      lock.writeLock().unlock();
    }

    // Wait for evictions.
    waitForRent();
  }
  /**
   * @param updateSeq Update sequence.
   * @return Checks if any of the local partitions need to be evicted.
   */
  private boolean checkEvictions(long updateSeq) {
    assert lock.isWriteLockedByCurrentThread();

    boolean changed = false;

    UUID locId = cctx.nodeId();

    for (GridDhtLocalPartition part : locParts.values()) {
      GridDhtPartitionState state = part.state();

      if (state.active()) {
        int p = part.id();

        List<ClusterNode> affNodes = cctx.affinity().nodes(p, topVer);

        if (!affNodes.contains(cctx.localNode())) {
          Collection<UUID> nodeIds = F.nodeIds(nodes(p, topVer, OWNING));

          // If all affinity nodes are owners, then evict partition from local node.
          if (nodeIds.containsAll(F.nodeIds(affNodes))) {
            part.rent(false);

            updateLocal(part.id(), locId, part.state(), updateSeq);

            changed = true;

            if (log.isDebugEnabled())
              log.debug("Evicted local partition (all affinity nodes are owners): " + part);
          } else {
            int ownerCnt = nodeIds.size();
            int affCnt = affNodes.size();

            if (ownerCnt > affCnt) {
              List<ClusterNode> sorted = new ArrayList<>(cctx.discovery().nodes(nodeIds));

              // Sort by node orders in ascending order.
              Collections.sort(sorted, CU.nodeComparator(true));

              int diff = sorted.size() - affCnt;

              for (int i = 0; i < diff; i++) {
                ClusterNode n = sorted.get(i);

                if (locId.equals(n.id())) {
                  part.rent(false);

                  updateLocal(part.id(), locId, part.state(), updateSeq);

                  changed = true;

                  if (log.isDebugEnabled())
                    log.debug(
                        "Evicted local partition (this node is oldest non-affinity node): " + part);

                  break;
                }
              }
            }
          }
        }
      }
    }

    return changed;
  }