/**
   * @param nodes Nodes.
   * @param id ID.
   * @throws IgniteCheckedException If failed.
   */
  private void sendAllPartitions(
      Collection<? extends ClusterNode> nodes, GridDhtPartitionExchangeId id)
      throws IgniteCheckedException {
    GridDhtPartitionsFullMessage m =
        new GridDhtPartitionsFullMessage(id, lastVer.get(), id.topologyVersion());

    for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
      if (!cacheCtx.isLocal()) {
        AffinityTopologyVersion startTopVer = cacheCtx.startTopologyVersion();

        boolean ready = startTopVer == null || startTopVer.compareTo(id.topologyVersion()) <= 0;

        if (ready)
          m.addFullPartitionsMap(cacheCtx.cacheId(), cacheCtx.topology().partitionMap(true));
      }
    }

    // It is important that client topologies be added after contexts.
    for (GridClientPartitionTopology top : cctx.exchange().clientTopologies())
      m.addFullPartitionsMap(top.cacheId(), top.partitionMap(true));

    if (log.isDebugEnabled())
      log.debug(
          "Sending full partition map [nodeIds="
              + F.viewReadOnly(nodes, F.node2id())
              + ", exchId="
              + exchId
              + ", msg="
              + m
              + ']');

    cctx.io().safeSend(nodes, m, SYSTEM_POOL, null);
  }
    /** @param nodes Nodes. */
    private Queue<ClusterNode> fallbacks(Collection<ClusterNode> nodes) {
      Queue<ClusterNode> fallbacks = new LinkedList<>();

      ClusterNode node = F.first(F.view(nodes, IS_LOC_NODE));

      if (node != null) fallbacks.add(node);

      fallbacks.addAll(node != null ? F.view(nodes, F.not(IS_LOC_NODE)) : nodes);

      return fallbacks;
    }
  /**
   * @param cacheId Cache ID.
   * @return {@code True} if local client has been added.
   */
  public boolean isLocalClientAdded(int cacheId) {
    if (!F.isEmpty(reqs)) {
      for (DynamicCacheChangeRequest req : reqs) {
        if (req.start() && F.eq(req.initiatingNodeId(), cctx.localNodeId())) {
          if (CU.cacheId(req.cacheName()) == cacheId) return true;
        }
      }
    }

    return false;
  }
  /**
   * @param cacheId Cache ID to check.
   * @param topVer Topology version.
   * @return {@code True} if cache was added during this exchange.
   */
  public boolean isCacheAdded(int cacheId, AffinityTopologyVersion topVer) {
    if (!F.isEmpty(reqs)) {
      for (DynamicCacheChangeRequest req : reqs) {
        if (req.start() && !req.clientStartOnly()) {
          if (CU.cacheId(req.cacheName()) == cacheId) return true;
        }
      }
    }

    GridCacheContext<?, ?> cacheCtx = cctx.cacheContext(cacheId);

    return cacheCtx != null && F.eq(cacheCtx.startTopologyVersion(), topVer);
  }
  /**
   * @param cctx Cache context.
   * @param prj Projection (optional).
   * @return Collection of data nodes in provided projection (if any).
   */
  private static Collection<ClusterNode> nodes(
      final GridCacheContext<?, ?> cctx,
      @Nullable final ClusterGroup prj,
      @Nullable final Integer part) {
    assert cctx != null;

    final AffinityTopologyVersion topVer = cctx.affinity().affinityTopologyVersion();

    Collection<ClusterNode> affNodes = CU.affinityNodes(cctx);

    if (prj == null && part == null) return affNodes;

    final Set<ClusterNode> owners =
        part == null
            ? Collections.<ClusterNode>emptySet()
            : new HashSet<>(cctx.topology().owners(part, topVer));

    return F.view(
        affNodes,
        new P1<ClusterNode>() {
          @Override
          public boolean apply(ClusterNode n) {

            return cctx.discovery().cacheAffinityNode(n, cctx.name())
                && (prj == null || prj.node(n.id()) != null)
                && (part == null || owners.contains(n));
          }
        });
  }
  /** @return Nodes to execute on. */
  private Collection<ClusterNode> nodes() {
    CacheMode cacheMode = cctx.config().getCacheMode();

    switch (cacheMode) {
      case LOCAL:
        if (prj != null)
          U.warn(
              log,
              "Ignoring query projection because it's executed over LOCAL cache "
                  + "(only local node will be queried): "
                  + this);

        return Collections.singletonList(cctx.localNode());

      case REPLICATED:
        if (prj != null || partition() != null) return nodes(cctx, prj, partition());

        return cctx.affinityNode()
            ? Collections.singletonList(cctx.localNode())
            : Collections.singletonList(F.rand(nodes(cctx, null, partition())));

      case PARTITIONED:
        return nodes(cctx, prj, partition());

      default:
        throw new IllegalStateException("Unknown cache distribution mode: " + cacheMode);
    }
  }
  /**
   * Sends query request.
   *
   * @param fut Distributed future.
   * @param req Request.
   * @param nodes Nodes.
   * @throws IgniteCheckedException In case of error.
   */
  @SuppressWarnings("unchecked")
  private void sendRequest(
      final GridCacheDistributedQueryFuture<?, ?, ?> fut,
      final GridCacheQueryRequest req,
      Collection<ClusterNode> nodes)
      throws IgniteCheckedException {
    assert fut != null;
    assert req != null;
    assert nodes != null;

    final UUID locNodeId = cctx.localNodeId();

    ClusterNode locNode = null;

    Collection<ClusterNode> rmtNodes = null;

    for (ClusterNode n : nodes) {
      if (n.id().equals(locNodeId)) locNode = n;
      else {
        if (rmtNodes == null) rmtNodes = new ArrayList<>(nodes.size());

        rmtNodes.add(n);
      }
    }

    // Request should be sent to remote nodes before the query is processed on the local node.
    // For example, a remote reducer has a state, we should not serialize and then send
    // the reducer changed by the local node.
    if (!F.isEmpty(rmtNodes)) {
      cctx.io()
          .safeSend(
              rmtNodes,
              req,
              cctx.ioPolicy(),
              new P1<ClusterNode>() {
                @Override
                public boolean apply(ClusterNode node) {
                  fut.onNodeLeft(node.id());

                  return !fut.isDone();
                }
              });
    }

    if (locNode != null) {
      cctx.closures()
          .callLocalSafe(
              new Callable<Object>() {
                @Override
                public Object call() throws Exception {
                  req.beforeLocalExecution(cctx);

                  processQueryRequest(locNodeId, req);

                  return null;
                }
              });
    }
  }
 /**
  * Starts dynamic caches.
  *
  * @throws IgniteCheckedException If failed.
  */
 private void startCaches() throws IgniteCheckedException {
   cctx.cache()
       .prepareCachesStart(
           F.view(
               reqs,
               new IgnitePredicate<DynamicCacheChangeRequest>() {
                 @Override
                 public boolean apply(DynamicCacheChangeRequest req) {
                   return req.start();
                 }
               }),
           exchId.topologyVersion());
 }
  /**
   * @param rmtReducer Optional reducer.
   * @param rmtTransform Optional transformer.
   * @param args Arguments.
   * @return Future.
   */
  @SuppressWarnings("IfMayBeConditional")
  private <R> CacheQueryFuture<R> execute(
      @Nullable IgniteReducer<T, R> rmtReducer,
      @Nullable IgniteClosure<T, R> rmtTransform,
      @Nullable Object... args) {
    Collection<ClusterNode> nodes = nodes();

    cctx.checkSecurity(SecurityPermission.CACHE_READ);

    if (nodes.isEmpty())
      return new GridCacheQueryErrorFuture<>(
          cctx.kernalContext(), new ClusterGroupEmptyCheckedException());

    if (log.isDebugEnabled())
      log.debug("Executing query [query=" + this + ", nodes=" + nodes + ']');

    if (cctx.deploymentEnabled()) {
      try {
        cctx.deploy().registerClasses(filter, rmtReducer, rmtTransform);
        cctx.deploy().registerClasses(args);
      } catch (IgniteCheckedException e) {
        return new GridCacheQueryErrorFuture<>(cctx.kernalContext(), e);
      }
    }

    if (subjId == null) subjId = cctx.localNodeId();

    taskHash = cctx.kernalContext().job().currentTaskNameHash();

    final GridCacheQueryBean bean =
        new GridCacheQueryBean(
            this,
            (IgniteReducer<Object, Object>) rmtReducer,
            (IgniteClosure<Object, Object>) rmtTransform,
            args);

    final GridCacheQueryManager qryMgr = cctx.queries();

    boolean loc = nodes.size() == 1 && F.first(nodes).id().equals(cctx.localNodeId());

    if (type == SQL_FIELDS || type == SPI)
      return (CacheQueryFuture<R>)
          (loc ? qryMgr.queryFieldsLocal(bean) : qryMgr.queryFieldsDistributed(bean, nodes));
    else if (type == SCAN && part != null && nodes.size() > 1)
      return new CacheQueryFallbackFuture<>(nodes, bean, qryMgr);
    else
      return (CacheQueryFuture<R>)
          (loc ? qryMgr.queryLocal(bean) : qryMgr.queryDistributed(bean, nodes));
  }
  /**
   * @param cacheId Cache ID to check.
   * @return {@code True} if cache is stopping by this exchange.
   */
  private boolean stopping(int cacheId) {
    boolean stopping = false;

    if (!F.isEmpty(reqs)) {
      for (DynamicCacheChangeRequest req : reqs) {
        if (cacheId == CU.cacheId(req.cacheName())) {
          stopping = req.stop();

          break;
        }
      }
    }

    return stopping;
  }
  /**
   * @param nodeId Node ID.
   * @param retryCnt Number of retries.
   */
  private void sendAllPartitions(final UUID nodeId, final int retryCnt) {
    ClusterNode n = cctx.node(nodeId);

    try {
      if (n != null) sendAllPartitions(F.asList(n), exchId);
    } catch (IgniteCheckedException e) {
      if (e instanceof ClusterTopologyCheckedException || !cctx.discovery().alive(n)) {
        log.debug(
            "Failed to send full partition map to node, node left grid "
                + "[rmtNode="
                + nodeId
                + ", exchangeId="
                + exchId
                + ']');

        return;
      }

      if (retryCnt > 0) {
        long timeout = cctx.gridConfig().getNetworkSendRetryDelay();

        LT.error(
            log,
            e,
            "Failed to send full partition map to node (will retry after timeout) "
                + "[node="
                + nodeId
                + ", exchangeId="
                + exchId
                + ", timeout="
                + timeout
                + ']');

        cctx.time()
            .addTimeoutObject(
                new GridTimeoutObjectAdapter(timeout) {
                  @Override
                  public void onTimeout() {
                    sendAllPartitions(nodeId, retryCnt - 1);
                  }
                });
      } else
        U.error(
            log,
            "Failed to send full partition map [node=" + n + ", exchangeId=" + exchId + ']',
            e);
    }
  }
  /**
   * Processes cache query request.
   *
   * @param sndId Sender node id.
   * @param req Query request.
   */
  @SuppressWarnings("unchecked")
  @Override
  void processQueryRequest(UUID sndId, GridCacheQueryRequest req) {
    if (req.cancel()) {
      cancelIds.add(new CancelMessageId(req.id(), sndId));

      if (req.fields()) removeFieldsQueryResult(sndId, req.id());
      else removeQueryResult(sndId, req.id());
    } else {
      if (!cancelIds.contains(new CancelMessageId(req.id(), sndId))) {
        if (!F.eq(req.cacheName(), cctx.name())) {
          GridCacheQueryResponse res =
              new GridCacheQueryResponse(
                  cctx.cacheId(),
                  req.id(),
                  new IgniteCheckedException(
                      "Received request for incorrect cache [expected="
                          + cctx.name()
                          + ", actual="
                          + req.cacheName()));

          sendQueryResponse(sndId, res, 0);
        } else {
          threads.put(req.id(), Thread.currentThread());

          try {
            GridCacheQueryInfo info = distributedQueryInfo(sndId, req);

            if (info == null) return;

            if (req.fields()) runFieldsQuery(info);
            else runQuery(info);
          } catch (Throwable e) {
            U.error(log(), "Failed to run query.", e);

            sendQueryResponse(
                sndId, new GridCacheQueryResponse(cctx.cacheId(), req.id(), e.getCause()), 0);

            if (e instanceof Error) throw (Error) e;
          } finally {
            threads.remove(req.id());
          }
        }
      }
    }
  }
  /** @return {@code True} if succeeded. */
  private boolean spreadPartitions() {
    try {
      sendAllPartitions(rmtNodes, exchId);

      return true;
    } catch (IgniteCheckedException e) {
      scheduleRecheck();

      if (!X.hasCause(e, InterruptedException.class))
        U.error(
            log,
            "Failed to send full partition map to nodes (will retry after timeout) [nodes="
                + F.nodeId8s(rmtNodes)
                + ", exchangeId="
                + exchId
                + ']',
            e);

      return false;
    }
  }
예제 #14
0
  /**
   * Constructor.
   *
   * @param igfsCtx IGFS context.
   */
  IgfsDeleteWorker(IgfsContext igfsCtx) {
    super(
        "igfs-delete-worker%"
            + igfsCtx.igfs().name()
            + "%"
            + igfsCtx.kernalContext().localNodeId()
            + "%");

    this.igfsCtx = igfsCtx;

    meta = igfsCtx.meta();
    data = igfsCtx.data();

    evts = igfsCtx.kernalContext().event();

    String igfsName = igfsCtx.igfs().name();

    topic = F.isEmpty(igfsName) ? TOPIC_IGFS : TOPIC_IGFS.topic(igfsName);

    assert meta != null;
    assert data != null;

    log = igfsCtx.kernalContext().log(IgfsDeleteWorker.class);
  }
  /**
   * Starts activity.
   *
   * @throws IgniteInterruptedCheckedException If interrupted.
   */
  public void init() throws IgniteInterruptedCheckedException {
    if (isDone()) return;

    if (init.compareAndSet(false, true)) {
      if (isDone()) return;

      try {
        // Wait for event to occur to make sure that discovery
        // will return corresponding nodes.
        U.await(evtLatch);

        assert discoEvt != null : this;
        assert !dummy && !forcePreload : this;

        ClusterNode oldest = CU.oldestAliveCacheServerNode(cctx, exchId.topologyVersion());

        oldestNode.set(oldest);

        startCaches();

        // True if client node joined or failed.
        boolean clientNodeEvt;

        if (F.isEmpty(reqs)) {
          int type = discoEvt.type();

          assert type == EVT_NODE_JOINED || type == EVT_NODE_LEFT || type == EVT_NODE_FAILED
              : discoEvt;

          clientNodeEvt = CU.clientNode(discoEvt.eventNode());
        } else {
          assert discoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT : discoEvt;

          boolean clientOnlyStart = true;

          for (DynamicCacheChangeRequest req : reqs) {
            if (!req.clientStartOnly()) {
              clientOnlyStart = false;

              break;
            }
          }

          clientNodeEvt = clientOnlyStart;
        }

        if (clientNodeEvt) {
          ClusterNode node = discoEvt.eventNode();

          // Client need to initialize affinity for local join event or for stated client caches.
          if (!node.isLocal()) {
            for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
              if (cacheCtx.isLocal()) continue;

              GridDhtPartitionTopology top = cacheCtx.topology();

              top.updateTopologyVersion(exchId, this, -1, stopping(cacheCtx.cacheId()));

              if (cacheCtx.affinity().affinityTopologyVersion() == AffinityTopologyVersion.NONE) {
                initTopology(cacheCtx);

                top.beforeExchange(this);
              } else
                cacheCtx.affinity().clientEventTopologyChange(discoEvt, exchId.topologyVersion());
            }

            if (exchId.isLeft())
              cctx.mvcc().removeExplicitNodeLocks(exchId.nodeId(), exchId.topologyVersion());

            onDone(exchId.topologyVersion());

            skipPreload = cctx.kernalContext().clientNode();

            return;
          }
        }

        if (cctx.kernalContext().clientNode()) {
          skipPreload = true;

          for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
            if (cacheCtx.isLocal()) continue;

            GridDhtPartitionTopology top = cacheCtx.topology();

            top.updateTopologyVersion(exchId, this, -1, stopping(cacheCtx.cacheId()));
          }

          for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
            if (cacheCtx.isLocal()) continue;

            initTopology(cacheCtx);
          }

          if (oldestNode.get() != null) {
            rmtNodes =
                new ConcurrentLinkedQueue<>(
                    CU.aliveRemoteServerNodesWithCaches(cctx, exchId.topologyVersion()));

            rmtIds = Collections.unmodifiableSet(new HashSet<>(F.nodeIds(rmtNodes)));

            ready.set(true);

            initFut.onDone(true);

            if (log.isDebugEnabled()) log.debug("Initialized future: " + this);

            sendPartitions();
          } else onDone(exchId.topologyVersion());

          return;
        }

        assert oldestNode.get() != null;

        for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
          if (isCacheAdded(cacheCtx.cacheId(), exchId.topologyVersion())) {
            if (cacheCtx
                .discovery()
                .cacheAffinityNodes(cacheCtx.name(), topologyVersion())
                .isEmpty())
              U.quietAndWarn(log, "No server nodes found for cache client: " + cacheCtx.namex());
          }

          cacheCtx.preloader().onExchangeFutureAdded();
        }

        List<String> cachesWithoutNodes = null;

        if (exchId.isLeft()) {
          for (String name : cctx.cache().cacheNames()) {
            if (cctx.discovery().cacheAffinityNodes(name, topologyVersion()).isEmpty()) {
              if (cachesWithoutNodes == null) cachesWithoutNodes = new ArrayList<>();

              cachesWithoutNodes.add(name);

              // Fire event even if there is no client cache started.
              if (cctx.gridEvents().isRecordable(EventType.EVT_CACHE_NODES_LEFT)) {
                Event evt =
                    new CacheEvent(
                        name,
                        cctx.localNode(),
                        cctx.localNode(),
                        "All server nodes have left the cluster.",
                        EventType.EVT_CACHE_NODES_LEFT,
                        0,
                        false,
                        null,
                        null,
                        null,
                        null,
                        false,
                        null,
                        false,
                        null,
                        null,
                        null);

                cctx.gridEvents().record(evt);
              }
            }
          }
        }

        if (cachesWithoutNodes != null) {
          StringBuilder sb =
              new StringBuilder(
                  "All server nodes for the following caches have left the cluster: ");

          for (int i = 0; i < cachesWithoutNodes.size(); i++) {
            String cache = cachesWithoutNodes.get(i);

            sb.append('\'').append(cache).append('\'');

            if (i != cachesWithoutNodes.size() - 1) sb.append(", ");
          }

          U.quietAndWarn(log, sb.toString());

          U.quietAndWarn(log, "Must have server nodes for caches to operate.");
        }

        assert discoEvt != null;

        assert exchId.nodeId().equals(discoEvt.eventNode().id());

        for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
          GridClientPartitionTopology clientTop =
              cctx.exchange().clearClientTopology(cacheCtx.cacheId());

          long updSeq = clientTop == null ? -1 : clientTop.lastUpdateSequence();

          // Update before waiting for locks.
          if (!cacheCtx.isLocal())
            cacheCtx
                .topology()
                .updateTopologyVersion(exchId, this, updSeq, stopping(cacheCtx.cacheId()));
        }

        // Grab all alive remote nodes with order of equal or less than last joined node.
        rmtNodes =
            new ConcurrentLinkedQueue<>(
                CU.aliveRemoteServerNodesWithCaches(cctx, exchId.topologyVersion()));

        rmtIds = Collections.unmodifiableSet(new HashSet<>(F.nodeIds(rmtNodes)));

        for (Map.Entry<UUID, GridDhtPartitionsSingleMessage> m : singleMsgs.entrySet())
          // If received any messages, process them.
          onReceive(m.getKey(), m.getValue());

        for (Map.Entry<UUID, GridDhtPartitionsFullMessage> m : fullMsgs.entrySet())
          // If received any messages, process them.
          onReceive(m.getKey(), m.getValue());

        AffinityTopologyVersion topVer = exchId.topologyVersion();

        for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
          if (cacheCtx.isLocal()) continue;

          // Must initialize topology after we get discovery event.
          initTopology(cacheCtx);

          cacheCtx.preloader().updateLastExchangeFuture(this);
        }

        IgniteInternalFuture<?> partReleaseFut = cctx.partitionReleaseFuture(topVer);

        // Assign to class variable so it will be included into toString() method.
        this.partReleaseFut = partReleaseFut;

        if (log.isDebugEnabled()) log.debug("Before waiting for partition release future: " + this);

        while (true) {
          try {
            partReleaseFut.get(2 * cctx.gridConfig().getNetworkTimeout(), TimeUnit.MILLISECONDS);

            break;
          } catch (IgniteFutureTimeoutCheckedException ignored) {
            // Print pending transactions and locks that might have led to hang.
            dumpPendingObjects();
          }
        }

        if (log.isDebugEnabled()) log.debug("After waiting for partition release future: " + this);

        if (!F.isEmpty(reqs)) blockGateways();

        if (exchId.isLeft())
          cctx.mvcc().removeExplicitNodeLocks(exchId.nodeId(), exchId.topologyVersion());

        IgniteInternalFuture<?> locksFut = cctx.mvcc().finishLocks(exchId.topologyVersion());

        while (true) {
          try {
            locksFut.get(2 * cctx.gridConfig().getNetworkTimeout(), TimeUnit.MILLISECONDS);

            break;
          } catch (IgniteFutureTimeoutCheckedException ignored) {
            U.warn(
                log,
                "Failed to wait for locks release future. "
                    + "Dumping pending objects that might be the cause: "
                    + cctx.localNodeId());

            U.warn(log, "Locked entries:");

            Map<IgniteTxKey, Collection<GridCacheMvccCandidate>> locks =
                cctx.mvcc().unfinishedLocks(exchId.topologyVersion());

            for (Map.Entry<IgniteTxKey, Collection<GridCacheMvccCandidate>> e : locks.entrySet())
              U.warn(log, "Locked entry [key=" + e.getKey() + ", mvcc=" + e.getValue() + ']');
          }
        }

        for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
          if (cacheCtx.isLocal()) continue;

          // Notify replication manager.
          GridCacheContext drCacheCtx =
              cacheCtx.isNear() ? cacheCtx.near().dht().context() : cacheCtx;

          if (drCacheCtx.isDrEnabled()) drCacheCtx.dr().beforeExchange(topVer, exchId.isLeft());

          // Partition release future is done so we can flush the write-behind store.
          cacheCtx.store().forceFlush();

          // Process queued undeploys prior to sending/spreading map.
          cacheCtx.preloader().unwindUndeploys();

          GridDhtPartitionTopology top = cacheCtx.topology();

          assert topVer.equals(top.topologyVersion())
              : "Topology version is updated only in this class instances inside single ExchangeWorker thread.";

          top.beforeExchange(this);
        }

        for (GridClientPartitionTopology top : cctx.exchange().clientTopologies()) {
          top.updateTopologyVersion(exchId, this, -1, stopping(top.cacheId()));

          top.beforeExchange(this);
        }
      } catch (IgniteInterruptedCheckedException e) {
        onDone(e);

        throw e;
      } catch (Throwable e) {
        U.error(
            log,
            "Failed to reinitialize local partitions (preloading will be stopped): " + exchId,
            e);

        onDone(e);

        if (e instanceof Error) throw (Error) e;

        return;
      }

      if (F.isEmpty(rmtIds)) {
        onDone(exchId.topologyVersion());

        return;
      }

      ready.set(true);

      initFut.onDone(true);

      if (log.isDebugEnabled()) log.debug("Initialized future: " + this);

      // If this node is not oldest.
      if (!oldestNode.get().id().equals(cctx.localNodeId())) sendPartitions();
      else {
        boolean allReceived = allReceived();

        if (allReceived && replied.compareAndSet(false, true)) {
          if (spreadPartitions()) onDone(exchId.topologyVersion());
        }
      }

      scheduleRecheck();
    } else assert false : "Skipped init future: " + this;
  }
  /** @return Remaining node IDs. */
  Collection<UUID> remaining() {
    if (rmtIds == null) return Collections.emptyList();

    return F.lose(rmtIds, true, rcvdIds);
  }