private Decision shouldFilter(
      ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
    Decision decision = shouldClusterFilter(node, allocation);
    if (decision != null) return decision;

    decision =
        shouldIndexFilter(
            allocation.metaData().getIndexSafe(shardRouting.index()), node, allocation);
    if (decision != null) return decision;

    return allocation.decision(Decision.YES, NAME, "node passes include/exclude/require filters");
  }
Пример #2
0
 @Override
 protected AsyncShardFetch.FetchResult<
         TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData>
     fetchData(ShardRouting shard, RoutingAllocation allocation) {
   AsyncShardFetch<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> fetch =
       asyncFetchStore.get(shard.shardId());
   if (fetch == null) {
     fetch = new InternalAsyncFetch<>(logger, "shard_store", shard.shardId(), storeAction);
     asyncFetchStore.put(shard.shardId(), fetch);
   }
   AsyncShardFetch.FetchResult<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData>
       shardStores =
           fetch.fetchData(
               allocation.nodes(),
               allocation.metaData(),
               allocation.getIgnoreNodes(shard.shardId()));
   if (shardStores.hasData() == true) {
     shardStores.processAllocation(allocation);
   }
   return shardStores;
 }
Пример #3
0
    @Override
    protected AsyncShardFetch.FetchResult<
            TransportNodesListGatewayStartedShards.NodeGatewayStartedShards>
        fetchData(ShardRouting shard, RoutingAllocation allocation) {
      AsyncShardFetch<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> fetch =
          asyncFetchStarted.get(shard.shardId());
      if (fetch == null) {
        fetch = new InternalAsyncFetch<>(logger, "shard_started", shard.shardId(), startedAction);
        asyncFetchStarted.put(shard.shardId(), fetch);
      }
      AsyncShardFetch.FetchResult<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards>
          shardState =
              fetch.fetchData(
                  allocation.nodes(),
                  allocation.metaData(),
                  allocation.getIgnoreNodes(shard.shardId()));

      if (shardState.hasData() == true) {
        shardState.processAllocation(allocation);
      }
      return shardState;
    }
 @Override
 public Decision canAllocate(
     ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
   if (shardRouting.unassigned()) {
     // only for unassigned - we filter allocation right after the index creation ie. for shard
     // shrinking etc. to ensure
     // that once it has been allocated post API the replicas can be allocated elsewhere without
     // user interaction
     // this is a setting that can only be set within the system!
     IndexMetaData indexMd = allocation.metaData().getIndexSafe(shardRouting.index());
     DiscoveryNodeFilters initialRecoveryFilters = indexMd.getInitialRecoveryFilters();
     if (initialRecoveryFilters != null
         && RecoverySource.isInitialRecovery(shardRouting.recoverySource().getType())
         && initialRecoveryFilters.match(node.node()) == false) {
       String explanation =
           (shardRouting.recoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS)
               ? "initial allocation of the shrunken index is only allowed on nodes [%s] that hold a copy of every shard in the index"
               : "initial allocation of the index is only allowed on nodes [%s]";
       return allocation.decision(Decision.NO, NAME, explanation, initialRecoveryFilters);
     }
   }
   return shouldFilter(shardRouting, node, allocation);
 }
  /**
   * Process existing recoveries of replicas and see if we need to cancel them if we find a better
   * match. Today, a better match is one that has full sync id match compared to not having one in
   * the previous recovery.
   */
  public boolean processExistingRecoveries(RoutingAllocation allocation) {
    boolean changed = false;
    MetaData metaData = allocation.metaData();
    for (RoutingNodes.RoutingNodesIterator nodes = allocation.routingNodes().nodes();
        nodes.hasNext(); ) {
      nodes.next();
      for (RoutingNodes.RoutingNodeIterator it = nodes.nodeShards(); it.hasNext(); ) {
        ShardRouting shard = it.next();
        if (shard.primary() == true) {
          continue;
        }
        if (shard.initializing() == false) {
          continue;
        }
        if (shard.relocatingNodeId() != null) {
          continue;
        }

        // if we are allocating a replica because of index creation, no need to go and find a copy,
        // there isn't one...
        IndexMetaData indexMetaData = metaData.index(shard.getIndexName());
        if (shard.allocatedPostIndexCreate(indexMetaData) == false) {
          continue;
        }

        AsyncShardFetch.FetchResult<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData>
            shardStores = fetchData(shard, allocation);
        if (shardStores.hasData() == false) {
          logger.trace("{}: fetching new stores for initializing shard", shard);
          continue; // still fetching
        }

        ShardRouting primaryShard = allocation.routingNodes().activePrimary(shard);
        assert primaryShard != null
            : "the replica shard can be allocated on at least one node, so there must be an active primary";
        TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryStore =
            findStore(primaryShard, allocation, shardStores);
        if (primaryStore == null || primaryStore.allocated() == false) {
          // if we can't find the primary data, it is probably because the primary shard is
          // corrupted (and listing failed)
          // just let the recovery find it out, no need to do anything about it for the initializing
          // shard
          logger.trace(
              "{}: no primary shard store found or allocated, letting actual allocation figure it out",
              shard);
          continue;
        }

        MatchingNodes matchingNodes =
            findMatchingNodes(shard, allocation, primaryStore, shardStores);
        if (matchingNodes.getNodeWithHighestMatch() != null) {
          DiscoveryNode currentNode = allocation.nodes().get(shard.currentNodeId());
          DiscoveryNode nodeWithHighestMatch = matchingNodes.getNodeWithHighestMatch();
          if (currentNode.equals(nodeWithHighestMatch) == false
              && matchingNodes.isNodeMatchBySyncID(currentNode) == false
              && matchingNodes.isNodeMatchBySyncID(nodeWithHighestMatch) == true) {
            // we found a better match that has a full sync id match, the existing allocation is not
            // fully synced
            // so we found a better one, cancel this one
            it.moveToUnassigned(
                new UnassignedInfo(
                    UnassignedInfo.Reason.REALLOCATED_REPLICA,
                    "existing allocation of replica to ["
                        + currentNode
                        + "] cancelled, sync id match found on node ["
                        + nodeWithHighestMatch
                        + "]",
                    null,
                    allocation.getCurrentNanoTime(),
                    System.currentTimeMillis()));
            changed = true;
          }
        }
      }
    }
    return changed;
  }
  public boolean allocateUnassigned(RoutingAllocation allocation) {
    boolean changed = false;
    final RoutingNodes routingNodes = allocation.routingNodes();
    final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator =
        routingNodes.unassigned().iterator();
    MetaData metaData = allocation.metaData();
    while (unassignedIterator.hasNext()) {
      ShardRouting shard = unassignedIterator.next();
      if (shard.primary()) {
        continue;
      }

      // if we are allocating a replica because of index creation, no need to go and find a copy,
      // there isn't one...
      IndexMetaData indexMetaData = metaData.index(shard.getIndexName());
      if (shard.allocatedPostIndexCreate(indexMetaData) == false) {
        continue;
      }

      // pre-check if it can be allocated to any node that currently exists, so we won't list the
      // store for it for nothing
      if (canBeAllocatedToAtLeastOneNode(shard, allocation) == false) {
        logger.trace("{}: ignoring allocation, can't be allocated on any node", shard);
        unassignedIterator.removeAndIgnore();
        continue;
      }

      AsyncShardFetch.FetchResult<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData>
          shardStores = fetchData(shard, allocation);
      if (shardStores.hasData() == false) {
        logger.trace("{}: ignoring allocation, still fetching shard stores", shard);
        allocation.setHasPendingAsyncFetch();
        unassignedIterator.removeAndIgnore();
        continue; // still fetching
      }

      ShardRouting primaryShard = routingNodes.activePrimary(shard);
      assert primaryShard != null
          : "the replica shard can be allocated on at least one node, so there must be an active primary";
      TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryStore =
          findStore(primaryShard, allocation, shardStores);
      if (primaryStore == null || primaryStore.allocated() == false) {
        // if we can't find the primary data, it is probably because the primary shard is corrupted
        // (and listing failed)
        // we want to let the replica be allocated in order to expose the actual problem with the
        // primary that the replica
        // will try and recover from
        // Note, this is the existing behavior, as exposed in running
        // CorruptFileTest#testNoPrimaryData
        logger.trace(
            "{}: no primary shard store found or allocated, letting actual allocation figure it out",
            shard);
        continue;
      }

      MatchingNodes matchingNodes = findMatchingNodes(shard, allocation, primaryStore, shardStores);

      if (matchingNodes.getNodeWithHighestMatch() != null) {
        RoutingNode nodeWithHighestMatch =
            allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().id());
        // we only check on THROTTLE since we checked before before on NO
        Decision decision =
            allocation.deciders().canAllocate(shard, nodeWithHighestMatch, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
          logger.debug(
              "[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store",
              shard.index(),
              shard.id(),
              shard,
              nodeWithHighestMatch.node());
          // we are throttling this, but we have enough to allocate to this node, ignore it for now
          unassignedIterator.removeAndIgnore();
        } else {
          logger.debug(
              "[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store",
              shard.index(),
              shard.id(),
              shard,
              nodeWithHighestMatch.node());
          // we found a match
          changed = true;
          unassignedIterator.initialize(
              nodeWithHighestMatch.nodeId(),
              null,
              allocation
                  .clusterInfo()
                  .getShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE));
        }
      } else if (matchingNodes.hasAnyData() == false) {
        // if we didn't manage to find *any* data (regardless of matching sizes), check if the
        // allocation of the replica shard needs to be delayed
        changed |= ignoreUnassignedIfDelayed(unassignedIterator, shard);
      }
    }
    return changed;
  }