Пример #1
0
 /** Returns a set of nodes that have at least one shard of the given index. */
 public synchronized Set<String> nodesInclude(String index) {
   if (clusterService().state().routingTable().hasIndex(index)) {
     List<ShardRouting> allShards = clusterService().state().routingTable().allShards(index);
     DiscoveryNodes discoveryNodes = clusterService().state().getNodes();
     Set<String> nodes = new HashSet<String>();
     for (ShardRouting shardRouting : allShards) {
       if (shardRouting.assignedToNode()) {
         DiscoveryNode discoveryNode = discoveryNodes.get(shardRouting.currentNodeId());
         nodes.add(discoveryNode.getName());
       }
     }
     return nodes;
   }
   return Collections.emptySet();
 }
Пример #2
0
 public String toString() {
   StringBuilder sb = new StringBuilder();
   sb.append("routingNode ([");
   sb.append(node.getName());
   sb.append("][");
   sb.append(node.getId());
   sb.append("][");
   sb.append(node.getHostName());
   sb.append("][");
   sb.append(node.getHostAddress());
   sb.append("], [");
   sb.append(shards.size());
   sb.append(" assigned shards])");
   return sb.toString();
 }
  /**
   * Get info about current operation of this river. Used for REST management operations handling.
   *
   * @return String with JSON formatted info.
   * @throws Exception
   */
  @Override
  public String getRiverOperationInfo(DiscoveryNode esNode, Date currentDate) throws Exception {

    XContentBuilder builder = jsonBuilder().prettyPrint();
    builder.startObject();
    builder.field("river_name", riverName().getName());
    builder.field("info_date", currentDate);
    builder.startObject("indexing");
    builder.field("state", closed ? "stopped" : "running");
    if (!closed) builder.field("last_restart", lastRestartDate);
    else if (permanentStopDate != null) builder.field("stopped_permanently", permanentStopDate);
    builder.endObject();
    if (esNode != null) {
      builder.startObject("node");
      builder.field("id", esNode.getId());
      builder.field("name", esNode.getName());
      builder.endObject();
    }
    if (coordinatorInstance != null) {
      List<SpaceIndexingInfo> currProjectIndexingInfo =
          coordinatorInstance.getCurrentSpaceIndexingInfo();
      if (currProjectIndexingInfo != null) {
        builder.startArray("current_indexing");
        for (SpaceIndexingInfo pi : currProjectIndexingInfo) {
          pi.buildDocument(builder, null, true, false);
        }
        builder.endArray();
      }
    }
    List<String> pkeys = getAllIndexedSpaceKeys();
    if (pkeys != null) {
      builder.startArray("indexed_spaces");
      for (String spaceKey : pkeys) {
        builder.startObject();
        builder.field(SpaceIndexingInfo.DOCFIELD_SPACE_KEY, spaceKey);
        SpaceIndexingInfo lastIndexing = getLastSpaceIndexingInfo(spaceKey);
        if (lastIndexing != null) {
          builder.field("last_indexing");
          lastIndexing.buildDocument(builder, null, false, true);
        }
        builder.endObject();
      }
      builder.endArray();
    }
    builder.endObject();
    return builder.string();
  }
 private IndexShard newShard(
     boolean primary, DiscoveryNode node, IndexMetaData indexMetaData, Path homePath)
     throws IOException {
   // add node name to settings for propper logging
   final Settings nodeSettings = Settings.builder().put("node.name", node.getName()).build();
   final IndexSettings indexSettings = new IndexSettings(indexMetaData, nodeSettings);
   ShardRouting shardRouting =
       TestShardRouting.newShardRouting(
           shardId, node.getId(), primary, ShardRoutingState.INITIALIZING);
   final Path path = Files.createDirectories(homePath.resolve(node.getId()));
   final NodeEnvironment.NodePath nodePath = new NodeEnvironment.NodePath(path);
   ShardPath shardPath =
       new ShardPath(false, nodePath.resolve(shardId), nodePath.resolve(shardId), shardId);
   Store store = createStore(indexSettings, shardPath);
   IndexCache indexCache =
       new IndexCache(indexSettings, new DisabledQueryCache(indexSettings), null);
   MapperService mapperService =
       MapperTestUtils.newMapperService(homePath, indexSettings.getSettings());
   for (Map.Entry<String, String> type : indexMapping.entrySet()) {
     mapperService.merge(
         type.getKey(),
         new CompressedXContent(type.getValue()),
         MapperService.MergeReason.MAPPING_RECOVERY,
         true);
   }
   SimilarityService similarityService =
       new SimilarityService(indexSettings, Collections.emptyMap());
   final IndexEventListener indexEventListener = new IndexEventListener() {};
   final Engine.Warmer warmer = searcher -> {};
   return new IndexShard(
       shardRouting,
       indexSettings,
       shardPath,
       store,
       indexCache,
       mapperService,
       similarityService,
       null,
       null,
       indexEventListener,
       null,
       threadPool,
       BigArrays.NON_RECYCLING_INSTANCE,
       warmer,
       Collections.emptyList(),
       Collections.emptyList());
 }
 @Override
 public void onNewClusterState(String reason) {
   ClusterState newClusterState = action.pendingStatesQueue().getNextClusterStateToProcess();
   logger.debug(
       "[{}] received version [{}], uuid [{}]",
       discoveryNode.getName(),
       newClusterState.version(),
       newClusterState.stateUUID());
   if (listener != null) {
     ClusterChangedEvent event = new ClusterChangedEvent("", newClusterState, clusterState);
     listener.clusterChanged(event);
   }
   if (clusterState.nodes().getMasterNode() == null
       || newClusterState.supersedes(clusterState)) {
     clusterState = newClusterState;
   }
   action.pendingStatesQueue().markAsProcessed(newClusterState);
 }
  private Table buildTable(RestRequest request, ClusterStateResponse state) {
    Table table = getTableWithHeader(request);
    DiscoveryNodes nodes = state.getState().nodes();

    table.startRow();
    DiscoveryNode master = nodes.get(nodes.masterNodeId());
    if (master == null) {
      table.addCell("-");
      table.addCell("-");
      table.addCell("-");
      table.addCell("-");
    } else {
      table.addCell(master.getId());
      table.addCell(master.getHostName());
      table.addCell(master.getHostAddress());
      table.addCell(master.getName());
    }
    table.endRow();

    return table;
  }
Пример #7
0
  private static void assertTribeNodeSuccessfullyCreated(Settings extraSettings) throws Exception {
    // The tribe clients do need it to make sure they can find their corresponding tribes using the
    // proper transport
    Settings settings =
        Settings.builder()
            .put(NetworkModule.HTTP_ENABLED.getKey(), false)
            .put("node.name", "tribe_node")
            .put("transport.type", MockTcpTransportPlugin.MOCK_TCP_TRANSPORT_NAME)
            .put("discovery.type", "local")
            .put("tribe.t1.transport.type", MockTcpTransportPlugin.MOCK_TCP_TRANSPORT_NAME)
            .put("tribe.t2.transport.type", MockTcpTransportPlugin.MOCK_TCP_TRANSPORT_NAME)
            .put("tribe.t1.discovery.type", "local")
            .put("tribe.t2.discovery.type", "local")
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
            .put(extraSettings)
            .build();

    try (Node node =
        new MockNode(settings, Collections.singleton(MockTcpTransportPlugin.class)).start()) {
      try (Client client = node.client()) {
        assertBusy(
            () -> {
              ClusterState state =
                  client.admin().cluster().prepareState().clear().setNodes(true).get().getState();
              assertThat(state.getClusterName().value(), equalTo("tribe_node_cluster"));
              assertThat(state.getNodes().getSize(), equalTo(5));
              for (DiscoveryNode discoveryNode : state.getNodes()) {
                assertThat(
                    discoveryNode.getName(),
                    either(equalTo("tribe1_node"))
                        .or(equalTo("tribe2_node"))
                        .or(equalTo("tribe_node"))
                        .or(equalTo("tribe_node/t1"))
                        .or(equalTo("tribe_node/t2")));
              }
            });
      }
    }
  }
Пример #8
0
  private Table buildTable(
      RestRequest req,
      ClusterStateResponse state,
      NodesInfoResponse nodesInfo,
      NodesStatsResponse nodesStats) {
    final String[] threadPools = req.paramAsStringArray("thread_pool_patterns", new String[] {"*"});
    final DiscoveryNodes nodes = state.getState().nodes();
    final Table table = getTableWithHeader(req);

    // collect all thread pool names that we see across the nodes
    final Set<String> candidates = new HashSet<>();
    for (final NodeStats nodeStats : nodesStats.getNodes()) {
      for (final ThreadPoolStats.Stats threadPoolStats : nodeStats.getThreadPool()) {
        candidates.add(threadPoolStats.getName());
      }
    }

    // collect all thread pool names that match the specified thread pool patterns
    final Set<String> included = new HashSet<>();
    for (final String candidate : candidates) {
      if (Regex.simpleMatch(threadPools, candidate)) {
        included.add(candidate);
      }
    }

    for (final DiscoveryNode node : nodes) {
      final NodeInfo info = nodesInfo.getNodesMap().get(node.getId());
      final NodeStats stats = nodesStats.getNodesMap().get(node.getId());

      final Map<String, ThreadPoolStats.Stats> poolThreadStats;
      final Map<String, ThreadPool.Info> poolThreadInfo;

      if (stats == null) {
        poolThreadStats = Collections.emptyMap();
        poolThreadInfo = Collections.emptyMap();
      } else {
        // we use a sorted map to ensure that thread pools are sorted by name
        poolThreadStats = new TreeMap<>();
        poolThreadInfo = new HashMap<>();

        ThreadPoolStats threadPoolStats = stats.getThreadPool();
        for (ThreadPoolStats.Stats threadPoolStat : threadPoolStats) {
          poolThreadStats.put(threadPoolStat.getName(), threadPoolStat);
        }
        if (info != null) {
          for (ThreadPool.Info threadPoolInfo : info.getThreadPool()) {
            poolThreadInfo.put(threadPoolInfo.getName(), threadPoolInfo);
          }
        }
      }
      for (Map.Entry<String, ThreadPoolStats.Stats> entry : poolThreadStats.entrySet()) {

        if (!included.contains(entry.getKey())) continue;

        table.startRow();

        table.addCell(node.getName());
        table.addCell(node.getId());
        table.addCell(node.getEphemeralId());
        table.addCell(info == null ? null : info.getProcess().getId());
        table.addCell(node.getHostName());
        table.addCell(node.getHostAddress());
        table.addCell(node.getAddress().address().getPort());
        final ThreadPoolStats.Stats poolStats = entry.getValue();
        final ThreadPool.Info poolInfo = poolThreadInfo.get(entry.getKey());

        Long maxQueueSize = null;
        String keepAlive = null;
        Integer minThreads = null;
        Integer maxThreads = null;

        if (poolInfo != null) {
          if (poolInfo.getQueueSize() != null) {
            maxQueueSize = poolInfo.getQueueSize().singles();
          }
          if (poolInfo.getKeepAlive() != null) {
            keepAlive = poolInfo.getKeepAlive().toString();
          }
          if (poolInfo.getMin() >= 0) {
            minThreads = poolInfo.getMin();
          }
          if (poolInfo.getMax() >= 0) {
            maxThreads = poolInfo.getMax();
          }
        }

        table.addCell(entry.getKey());
        table.addCell(poolInfo == null ? null : poolInfo.getThreadPoolType().getType());
        table.addCell(poolStats == null ? null : poolStats.getActive());
        table.addCell(poolStats == null ? null : poolStats.getThreads());
        table.addCell(poolStats == null ? null : poolStats.getQueue());
        table.addCell(maxQueueSize);
        table.addCell(poolStats == null ? null : poolStats.getRejected());
        table.addCell(poolStats == null ? null : poolStats.getLargest());
        table.addCell(poolStats == null ? null : poolStats.getCompleted());
        table.addCell(minThreads);
        table.addCell(maxThreads);
        table.addCell(keepAlive);

        table.endRow();
      }
    }

    return table;
  }
Пример #9
0
  public boolean allocateUnassigned(RoutingAllocation allocation) {
    boolean changed = false;
    DiscoveryNodes nodes = allocation.nodes();
    RoutingNodes routingNodes = allocation.routingNodes();

    // First, handle primaries, they must find a place to be allocated on here
    final MetaData metaData = routingNodes.metaData();
    RoutingNodes.UnassignedShards unassigned = routingNodes.unassigned();
    unassigned.sort(
        new PriorityComparator() {

          @Override
          protected Settings getIndexSettings(String index) {
            IndexMetaData indexMetaData = metaData.index(index);
            return indexMetaData.getSettings();
          }
        }); // sort for priority ordering
    Iterator<ShardRouting> unassignedIterator = unassigned.iterator();
    while (unassignedIterator.hasNext()) {
      ShardRouting shard = unassignedIterator.next();

      if (!shard.primary()) {
        continue;
      }

      // this is an API allocation, ignore since we know there is no data...
      if (!routingNodes
          .routingTable()
          .index(shard.index())
          .shard(shard.id())
          .primaryAllocatedPostApi()) {
        continue;
      }

      AsyncShardFetch<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> fetch =
          asyncFetchStarted.get(shard.shardId());
      if (fetch == null) {
        fetch = new InternalAsyncFetch<>(logger, "shard_started", shard.shardId(), startedAction);
        asyncFetchStarted.put(shard.shardId(), fetch);
      }
      AsyncShardFetch.FetchResult<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards>
          shardState = fetch.fetchData(nodes, metaData, allocation.getIgnoreNodes(shard.shardId()));
      if (shardState.hasData() == false) {
        logger.trace("{}: ignoring allocation, still fetching shard started state", shard);
        unassignedIterator.remove();
        routingNodes.ignoredUnassigned().add(shard);
        continue;
      }
      shardState.processAllocation(allocation);

      IndexMetaData indexMetaData = metaData.index(shard.getIndex());

      /**
       * Build a map of DiscoveryNodes to shard state number for the given shard. A state of -1
       * means the shard does not exist on the node, where any shard state >= 0 is the state version
       * of the shard on that node's disk.
       *
       * <p>A shard on shared storage will return at least shard state 0 for all nodes, indicating
       * that the shard can be allocated to any node.
       */
      ObjectLongHashMap<DiscoveryNode> nodesState = new ObjectLongHashMap<>();
      for (TransportNodesListGatewayStartedShards.NodeGatewayStartedShards nodeShardState :
          shardState.getData().values()) {
        long version = nodeShardState.version();
        // -1 version means it does not exists, which is what the API returns, and what we expect to
        logger.trace(
            "[{}] on node [{}] has version [{}] of shard",
            shard,
            nodeShardState.getNode(),
            version);
        nodesState.put(nodeShardState.getNode(), version);
      }

      int numberOfAllocationsFound = 0;
      long highestVersion = -1;
      final Map<DiscoveryNode, Long> nodesWithVersion = Maps.newHashMap();

      assert !nodesState.containsKey(null);
      final Object[] keys = nodesState.keys;
      final long[] values = nodesState.values;
      Settings idxSettings = indexMetaData.settings();
      for (int i = 0; i < keys.length; i++) {
        if (keys[i] == null) {
          continue;
        }

        DiscoveryNode node = (DiscoveryNode) keys[i];
        long version = values[i];
        // since we don't check in NO allocation, we need to double check here
        if (allocation.shouldIgnoreShardForNode(shard.shardId(), node.id())) {
          continue;
        }
        if (recoverOnAnyNode(idxSettings)) {
          numberOfAllocationsFound++;
          if (version > highestVersion) {
            highestVersion = version;
          }
          // We always put the node without clearing the map
          nodesWithVersion.put(node, version);
        } else if (version != -1) {
          numberOfAllocationsFound++;
          // If we've found a new "best" candidate, clear the
          // current candidates and add it
          if (version > highestVersion) {
            highestVersion = version;
            nodesWithVersion.clear();
            nodesWithVersion.put(node, version);
          } else if (version == highestVersion) {
            // If the candidate is the same, add it to the
            // list, but keep the current candidate
            nodesWithVersion.put(node, version);
          }
        }
      }
      // Now that we have a map of nodes to versions along with the
      // number of allocations found (and not ignored), we need to sort
      // it so the node with the highest version is at the beginning
      List<DiscoveryNode> nodesWithHighestVersion = Lists.newArrayList();
      nodesWithHighestVersion.addAll(nodesWithVersion.keySet());
      CollectionUtil.timSort(
          nodesWithHighestVersion,
          new Comparator<DiscoveryNode>() {
            @Override
            public int compare(DiscoveryNode o1, DiscoveryNode o2) {
              return Long.compare(nodesWithVersion.get(o2), nodesWithVersion.get(o1));
            }
          });

      if (logger.isDebugEnabled()) {
        logger.debug(
            "[{}][{}] found {} allocations of {}, highest version: [{}]",
            shard.index(),
            shard.id(),
            numberOfAllocationsFound,
            shard,
            highestVersion);
      }
      if (logger.isTraceEnabled()) {
        StringBuilder sb = new StringBuilder("[");
        for (DiscoveryNode n : nodesWithHighestVersion) {
          sb.append("[");
          sb.append(n.getName());
          sb.append("]");
          sb.append(" -> ");
          sb.append(nodesWithVersion.get(n));
          sb.append(", ");
        }
        sb.append("]");
        logger.trace("{} candidates for allocation: {}", shard, sb.toString());
      }

      // check if the counts meets the minimum set
      int requiredAllocation = 1;
      // if we restore from a repository one copy is more then enough
      if (shard.restoreSource() == null) {
        try {
          String initialShards =
              indexMetaData
                  .settings()
                  .get(
                      INDEX_RECOVERY_INITIAL_SHARDS,
                      settings.get(INDEX_RECOVERY_INITIAL_SHARDS, this.initialShards));
          if ("quorum".equals(initialShards)) {
            if (indexMetaData.numberOfReplicas() > 1) {
              requiredAllocation = ((1 + indexMetaData.numberOfReplicas()) / 2) + 1;
            }
          } else if ("quorum-1".equals(initialShards) || "half".equals(initialShards)) {
            if (indexMetaData.numberOfReplicas() > 2) {
              requiredAllocation = ((1 + indexMetaData.numberOfReplicas()) / 2);
            }
          } else if ("one".equals(initialShards)) {
            requiredAllocation = 1;
          } else if ("full".equals(initialShards) || "all".equals(initialShards)) {
            requiredAllocation = indexMetaData.numberOfReplicas() + 1;
          } else if ("full-1".equals(initialShards) || "all-1".equals(initialShards)) {
            if (indexMetaData.numberOfReplicas() > 1) {
              requiredAllocation = indexMetaData.numberOfReplicas();
            }
          } else {
            requiredAllocation = Integer.parseInt(initialShards);
          }
        } catch (Exception e) {
          logger.warn(
              "[{}][{}] failed to derived initial_shards from value {}, ignore allocation for {}",
              shard.index(),
              shard.id(),
              initialShards,
              shard);
        }
      }

      // not enough found for this shard, continue...
      if (numberOfAllocationsFound < requiredAllocation) {
        // if we are restoring this shard we still can allocate
        if (shard.restoreSource() == null) {
          // we can't really allocate, so ignore it and continue
          unassignedIterator.remove();
          routingNodes.ignoredUnassigned().add(shard);
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: not allocating, number_of_allocated_shards_found [{}], required_number [{}]",
                shard.index(),
                shard.id(),
                numberOfAllocationsFound,
                requiredAllocation);
          }
        } else if (logger.isDebugEnabled()) {
          logger.debug(
              "[{}][{}]: missing local data, will restore from [{}]",
              shard.index(),
              shard.id(),
              shard.restoreSource());
        }
        continue;
      }

      Set<DiscoveryNode> throttledNodes = Sets.newHashSet();
      Set<DiscoveryNode> noNodes = Sets.newHashSet();
      for (DiscoveryNode discoNode : nodesWithHighestVersion) {
        RoutingNode node = routingNodes.node(discoNode.id());
        if (node == null) {
          continue;
        }

        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
          throttledNodes.add(discoNode);
        } else if (decision.type() == Decision.Type.NO) {
          noNodes.add(discoNode);
        } else {
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: allocating [{}] to [{}] on primary allocation",
                shard.index(),
                shard.id(),
                shard,
                discoNode);
          }
          // we found a match
          changed = true;
          // make sure we create one with the version from the recovered state
          routingNodes.initialize(new ShardRouting(shard, highestVersion), node.nodeId());
          unassignedIterator.remove();

          // found a node, so no throttling, no "no", and break out of the loop
          throttledNodes.clear();
          noNodes.clear();
          break;
        }
      }
      if (throttledNodes.isEmpty()) {
        // if we have a node that we "can't" allocate to, force allocation, since this is our master
        // data!
        if (!noNodes.isEmpty()) {
          DiscoveryNode discoNode = noNodes.iterator().next();
          RoutingNode node = routingNodes.node(discoNode.id());
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: forcing allocating [{}] to [{}] on primary allocation",
                shard.index(),
                shard.id(),
                shard,
                discoNode);
          }
          // we found a match
          changed = true;
          // make sure we create one with the version from the recovered state
          routingNodes.initialize(new ShardRouting(shard, highestVersion), node.nodeId());
          unassignedIterator.remove();
        }
      } else {
        if (logger.isDebugEnabled()) {
          logger.debug(
              "[{}][{}]: throttling allocation [{}] to [{}] on primary allocation",
              shard.index(),
              shard.id(),
              shard,
              throttledNodes);
        }
        // we are throttling this, but we have enough to allocate to this node, ignore it for now
        unassignedIterator.remove();
        routingNodes.ignoredUnassigned().add(shard);
      }
    }

    if (!routingNodes.hasUnassigned()) {
      return changed;
    }

    // Now, handle replicas, try to assign them to nodes that are similar to the one the primary was
    // allocated on
    unassignedIterator = unassigned.iterator();
    while (unassignedIterator.hasNext()) {
      ShardRouting shard = unassignedIterator.next();
      if (shard.primary()) {
        continue;
      }

      // pre-check if it can be allocated to any node that currently exists, so we won't list the
      // store for it for nothing
      boolean canBeAllocatedToAtLeastOneNode = false;
      for (ObjectCursor<DiscoveryNode> cursor : nodes.dataNodes().values()) {
        RoutingNode node = routingNodes.node(cursor.value.id());
        if (node == null) {
          continue;
        }
        // if we can't allocate it on a node, ignore it, for example, this handles
        // cases for only allocating a replica after a primary
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        if (decision.type() == Decision.Type.YES) {
          canBeAllocatedToAtLeastOneNode = true;
          break;
        }
      }

      if (!canBeAllocatedToAtLeastOneNode) {
        logger.trace("{}: ignoring allocation, can't be allocated on any node", shard);
        unassignedIterator.remove();
        routingNodes.ignoredUnassigned().add(shard);
        continue;
      }

      AsyncShardFetch<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> fetch =
          asyncFetchStore.get(shard.shardId());
      if (fetch == null) {
        fetch = new InternalAsyncFetch<>(logger, "shard_store", shard.shardId(), storeAction);
        asyncFetchStore.put(shard.shardId(), fetch);
      }
      AsyncShardFetch.FetchResult<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData>
          shardStores =
              fetch.fetchData(nodes, metaData, allocation.getIgnoreNodes(shard.shardId()));
      if (shardStores.hasData() == false) {
        logger.trace("{}: ignoring allocation, still fetching shard stores", shard);
        unassignedIterator.remove();
        routingNodes.ignoredUnassigned().add(shard);
        continue; // still fetching
      }
      shardStores.processAllocation(allocation);

      long lastSizeMatched = 0;
      DiscoveryNode lastDiscoNodeMatched = null;
      RoutingNode lastNodeMatched = null;
      boolean hasReplicaData = false;
      IndexMetaData indexMetaData = metaData.index(shard.getIndex());

      for (Map.Entry<DiscoveryNode, TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData>
          nodeStoreEntry : shardStores.getData().entrySet()) {
        DiscoveryNode discoNode = nodeStoreEntry.getKey();
        TransportNodesListShardStoreMetaData.StoreFilesMetaData storeFilesMetaData =
            nodeStoreEntry.getValue().storeFilesMetaData();
        logger.trace("{}: checking node [{}]", shard, discoNode);

        if (storeFilesMetaData == null) {
          // already allocated on that node...
          continue;
        }

        RoutingNode node = routingNodes.node(discoNode.id());
        if (node == null) {
          continue;
        }

        // check if we can allocate on that node...
        // we only check for NO, since if this node is THROTTLING and it has enough "same data"
        // then we will try and assign it next time
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        if (decision.type() == Decision.Type.NO) {
          continue;
        }

        // if it is already allocated, we can't assign to it...
        if (storeFilesMetaData.allocated()) {
          continue;
        }

        if (!shard.primary()) {
          hasReplicaData |= storeFilesMetaData.iterator().hasNext();
          ShardRouting primaryShard = routingNodes.activePrimary(shard);
          if (primaryShard != null) {
            assert primaryShard.active();
            DiscoveryNode primaryNode = nodes.get(primaryShard.currentNodeId());
            if (primaryNode != null) {
              TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData primaryNodeFilesStore =
                  shardStores.getData().get(primaryNode);
              if (primaryNodeFilesStore != null) {
                TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryNodeStore =
                    primaryNodeFilesStore.storeFilesMetaData();
                if (primaryNodeStore != null && primaryNodeStore.allocated()) {
                  long sizeMatched = 0;

                  String primarySyncId = primaryNodeStore.syncId();
                  String replicaSyncId = storeFilesMetaData.syncId();
                  // see if we have a sync id we can make use of
                  if (replicaSyncId != null && replicaSyncId.equals(primarySyncId)) {
                    logger.trace(
                        "{}: node [{}] has same sync id {} as primary",
                        shard,
                        discoNode.name(),
                        replicaSyncId);
                    lastNodeMatched = node;
                    lastSizeMatched = Long.MAX_VALUE;
                    lastDiscoNodeMatched = discoNode;
                  } else {
                    for (StoreFileMetaData storeFileMetaData : storeFilesMetaData) {
                      String metaDataFileName = storeFileMetaData.name();
                      if (primaryNodeStore.fileExists(metaDataFileName)
                          && primaryNodeStore.file(metaDataFileName).isSame(storeFileMetaData)) {
                        sizeMatched += storeFileMetaData.length();
                      }
                    }
                    logger.trace(
                        "{}: node [{}] has [{}/{}] bytes of re-usable data",
                        shard,
                        discoNode.name(),
                        new ByteSizeValue(sizeMatched),
                        sizeMatched);
                    if (sizeMatched > lastSizeMatched) {
                      lastSizeMatched = sizeMatched;
                      lastDiscoNodeMatched = discoNode;
                      lastNodeMatched = node;
                    }
                  }
                }
              }
            }
          }
        }
      }

      if (lastNodeMatched != null) {
        // we only check on THROTTLE since we checked before before on NO
        Decision decision = allocation.deciders().canAllocate(shard, lastNodeMatched, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store with total_size [{}]",
                shard.index(),
                shard.id(),
                shard,
                lastDiscoNodeMatched,
                new ByteSizeValue(lastSizeMatched));
          }
          // we are throttling this, but we have enough to allocate to this node, ignore it for now
          unassignedIterator.remove();
          routingNodes.ignoredUnassigned().add(shard);
        } else {
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store with total_size [{}]",
                shard.index(),
                shard.id(),
                shard,
                lastDiscoNodeMatched,
                new ByteSizeValue(lastSizeMatched));
          }
          // we found a match
          changed = true;
          routingNodes.initialize(shard, lastNodeMatched.nodeId());
          unassignedIterator.remove();
        }
      } else if (hasReplicaData == false) {
        // if we didn't manage to find *any* data (regardless of matching sizes), check if the
        // allocation
        // of the replica shard needs to be delayed, and if so, add it to the ignore unassigned list
        // note: we only care about replica in delayed allocation, since if we have an unassigned
        // primary it
        //       will anyhow wait to find an existing copy of the shard to be allocated
        // note: the other side of the equation is scheduling a reroute in a timely manner, which
        // happens in the RoutingService
        long delay =
            shard
                .unassignedInfo()
                .getDelayAllocationExpirationIn(settings, indexMetaData.getSettings());
        if (delay > 0) {
          logger.debug(
              "[{}][{}]: delaying allocation of [{}] for [{}]",
              shard.index(),
              shard.id(),
              shard,
              TimeValue.timeValueMillis(delay));
          /**
           * mark it as changed, since we want to kick a publishing to schedule future allocation,
           * see {@link
           * org.elasticsearch.cluster.routing.RoutingService#clusterChanged(ClusterChangedEvent)}).
           */
          changed = true;
          unassignedIterator.remove();
          routingNodes.ignoredUnassigned().add(shard);
        }
      }
    }
    return changed;
  }
Пример #10
0
  public void testCancellationCleansTempFiles() throws Exception {
    final String indexName = "test";

    final String p_node = internalCluster().startNode();

    prepareCreate(
            indexName,
            Settings.builder()
                .put(
                    IndexMetaData.SETTING_NUMBER_OF_SHARDS,
                    1,
                    IndexMetaData.SETTING_NUMBER_OF_REPLICAS,
                    0))
        .get();

    internalCluster().startNode();
    internalCluster().startNode();

    List<IndexRequestBuilder> requests = new ArrayList<>();
    int numDocs = scaledRandomIntBetween(25, 250);
    for (int i = 0; i < numDocs; i++) {
      requests.add(client().prepareIndex(indexName, "type").setSource("{}"));
    }
    indexRandom(true, requests);
    assertFalse(
        client()
            .admin()
            .cluster()
            .prepareHealth()
            .setWaitForNodes("3")
            .setWaitForGreenStatus()
            .get()
            .isTimedOut());
    flush();

    int allowedFailures = randomIntBetween(3, 10);
    logger.info("--> blocking recoveries from primary (allowed failures: [{}])", allowedFailures);
    CountDownLatch corruptionCount = new CountDownLatch(allowedFailures);
    ClusterService clusterService = internalCluster().getInstance(ClusterService.class, p_node);
    MockTransportService mockTransportService =
        (MockTransportService) internalCluster().getInstance(TransportService.class, p_node);
    for (DiscoveryNode node : clusterService.state().nodes()) {
      if (!node.equals(clusterService.localNode())) {
        mockTransportService.addDelegate(
            internalCluster().getInstance(TransportService.class, node.getName()),
            new RecoveryCorruption(mockTransportService.original(), corruptionCount));
      }
    }

    client()
        .admin()
        .indices()
        .prepareUpdateSettings(indexName)
        .setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1))
        .get();

    corruptionCount.await();

    logger.info("--> stopping replica assignment");
    assertAcked(
        client()
            .admin()
            .cluster()
            .prepareUpdateSettings()
            .setTransientSettings(
                Settings.builder()
                    .put(
                        EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(),
                        "none")));

    logger.info("--> wait for all replica shards to be removed, on all nodes");
    assertBusy(
        () -> {
          for (String node : internalCluster().getNodeNames()) {
            if (node.equals(p_node)) {
              continue;
            }
            ClusterState state =
                client(node).admin().cluster().prepareState().setLocal(true).get().getState();
            assertThat(
                node + " indicates assigned replicas",
                state
                    .getRoutingTable()
                    .index(indexName)
                    .shardsWithState(ShardRoutingState.UNASSIGNED)
                    .size(),
                equalTo(1));
          }
        });

    logger.info("--> verifying no temporary recoveries are left");
    for (String node : internalCluster().getNodeNames()) {
      NodeEnvironment nodeEnvironment = internalCluster().getInstance(NodeEnvironment.class, node);
      for (final Path shardLoc :
          nodeEnvironment.availableShardPaths(new ShardId(indexName, "_na_", 0))) {
        if (Files.exists(shardLoc)) {
          assertBusy(
              () -> {
                try {
                  Files.walkFileTree(
                      shardLoc,
                      new SimpleFileVisitor<Path>() {
                        @Override
                        public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
                            throws IOException {
                          assertThat(
                              "found a temporary recovery file: " + file,
                              file.getFileName().toString(),
                              not(startsWith("recovery.")));
                          return FileVisitResult.CONTINUE;
                        }
                      });
                } catch (IOException e) {
                  throw new AssertionError(
                      "failed to walk file tree starting at [" + shardLoc + "]", e);
                }
              });
        }
      }
    }
  }