/** Returns a set of nodes that have at least one shard of the given index. */ public synchronized Set<String> nodesInclude(String index) { if (clusterService().state().routingTable().hasIndex(index)) { List<ShardRouting> allShards = clusterService().state().routingTable().allShards(index); DiscoveryNodes discoveryNodes = clusterService().state().getNodes(); Set<String> nodes = new HashSet<String>(); for (ShardRouting shardRouting : allShards) { if (shardRouting.assignedToNode()) { DiscoveryNode discoveryNode = discoveryNodes.get(shardRouting.currentNodeId()); nodes.add(discoveryNode.getName()); } } return nodes; } return Collections.emptySet(); }
public String toString() { StringBuilder sb = new StringBuilder(); sb.append("routingNode (["); sb.append(node.getName()); sb.append("]["); sb.append(node.getId()); sb.append("]["); sb.append(node.getHostName()); sb.append("]["); sb.append(node.getHostAddress()); sb.append("], ["); sb.append(shards.size()); sb.append(" assigned shards])"); return sb.toString(); }
/** * Get info about current operation of this river. Used for REST management operations handling. * * @return String with JSON formatted info. * @throws Exception */ @Override public String getRiverOperationInfo(DiscoveryNode esNode, Date currentDate) throws Exception { XContentBuilder builder = jsonBuilder().prettyPrint(); builder.startObject(); builder.field("river_name", riverName().getName()); builder.field("info_date", currentDate); builder.startObject("indexing"); builder.field("state", closed ? "stopped" : "running"); if (!closed) builder.field("last_restart", lastRestartDate); else if (permanentStopDate != null) builder.field("stopped_permanently", permanentStopDate); builder.endObject(); if (esNode != null) { builder.startObject("node"); builder.field("id", esNode.getId()); builder.field("name", esNode.getName()); builder.endObject(); } if (coordinatorInstance != null) { List<SpaceIndexingInfo> currProjectIndexingInfo = coordinatorInstance.getCurrentSpaceIndexingInfo(); if (currProjectIndexingInfo != null) { builder.startArray("current_indexing"); for (SpaceIndexingInfo pi : currProjectIndexingInfo) { pi.buildDocument(builder, null, true, false); } builder.endArray(); } } List<String> pkeys = getAllIndexedSpaceKeys(); if (pkeys != null) { builder.startArray("indexed_spaces"); for (String spaceKey : pkeys) { builder.startObject(); builder.field(SpaceIndexingInfo.DOCFIELD_SPACE_KEY, spaceKey); SpaceIndexingInfo lastIndexing = getLastSpaceIndexingInfo(spaceKey); if (lastIndexing != null) { builder.field("last_indexing"); lastIndexing.buildDocument(builder, null, false, true); } builder.endObject(); } builder.endArray(); } builder.endObject(); return builder.string(); }
private IndexShard newShard( boolean primary, DiscoveryNode node, IndexMetaData indexMetaData, Path homePath) throws IOException { // add node name to settings for propper logging final Settings nodeSettings = Settings.builder().put("node.name", node.getName()).build(); final IndexSettings indexSettings = new IndexSettings(indexMetaData, nodeSettings); ShardRouting shardRouting = TestShardRouting.newShardRouting( shardId, node.getId(), primary, ShardRoutingState.INITIALIZING); final Path path = Files.createDirectories(homePath.resolve(node.getId())); final NodeEnvironment.NodePath nodePath = new NodeEnvironment.NodePath(path); ShardPath shardPath = new ShardPath(false, nodePath.resolve(shardId), nodePath.resolve(shardId), shardId); Store store = createStore(indexSettings, shardPath); IndexCache indexCache = new IndexCache(indexSettings, new DisabledQueryCache(indexSettings), null); MapperService mapperService = MapperTestUtils.newMapperService(homePath, indexSettings.getSettings()); for (Map.Entry<String, String> type : indexMapping.entrySet()) { mapperService.merge( type.getKey(), new CompressedXContent(type.getValue()), MapperService.MergeReason.MAPPING_RECOVERY, true); } SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap()); final IndexEventListener indexEventListener = new IndexEventListener() {}; final Engine.Warmer warmer = searcher -> {}; return new IndexShard( shardRouting, indexSettings, shardPath, store, indexCache, mapperService, similarityService, null, null, indexEventListener, null, threadPool, BigArrays.NON_RECYCLING_INSTANCE, warmer, Collections.emptyList(), Collections.emptyList()); }
@Override public void onNewClusterState(String reason) { ClusterState newClusterState = action.pendingStatesQueue().getNextClusterStateToProcess(); logger.debug( "[{}] received version [{}], uuid [{}]", discoveryNode.getName(), newClusterState.version(), newClusterState.stateUUID()); if (listener != null) { ClusterChangedEvent event = new ClusterChangedEvent("", newClusterState, clusterState); listener.clusterChanged(event); } if (clusterState.nodes().getMasterNode() == null || newClusterState.supersedes(clusterState)) { clusterState = newClusterState; } action.pendingStatesQueue().markAsProcessed(newClusterState); }
private Table buildTable(RestRequest request, ClusterStateResponse state) { Table table = getTableWithHeader(request); DiscoveryNodes nodes = state.getState().nodes(); table.startRow(); DiscoveryNode master = nodes.get(nodes.masterNodeId()); if (master == null) { table.addCell("-"); table.addCell("-"); table.addCell("-"); table.addCell("-"); } else { table.addCell(master.getId()); table.addCell(master.getHostName()); table.addCell(master.getHostAddress()); table.addCell(master.getName()); } table.endRow(); return table; }
private static void assertTribeNodeSuccessfullyCreated(Settings extraSettings) throws Exception { // The tribe clients do need it to make sure they can find their corresponding tribes using the // proper transport Settings settings = Settings.builder() .put(NetworkModule.HTTP_ENABLED.getKey(), false) .put("node.name", "tribe_node") .put("transport.type", MockTcpTransportPlugin.MOCK_TCP_TRANSPORT_NAME) .put("discovery.type", "local") .put("tribe.t1.transport.type", MockTcpTransportPlugin.MOCK_TCP_TRANSPORT_NAME) .put("tribe.t2.transport.type", MockTcpTransportPlugin.MOCK_TCP_TRANSPORT_NAME) .put("tribe.t1.discovery.type", "local") .put("tribe.t2.discovery.type", "local") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) .put(extraSettings) .build(); try (Node node = new MockNode(settings, Collections.singleton(MockTcpTransportPlugin.class)).start()) { try (Client client = node.client()) { assertBusy( () -> { ClusterState state = client.admin().cluster().prepareState().clear().setNodes(true).get().getState(); assertThat(state.getClusterName().value(), equalTo("tribe_node_cluster")); assertThat(state.getNodes().getSize(), equalTo(5)); for (DiscoveryNode discoveryNode : state.getNodes()) { assertThat( discoveryNode.getName(), either(equalTo("tribe1_node")) .or(equalTo("tribe2_node")) .or(equalTo("tribe_node")) .or(equalTo("tribe_node/t1")) .or(equalTo("tribe_node/t2"))); } }); } } }
private Table buildTable( RestRequest req, ClusterStateResponse state, NodesInfoResponse nodesInfo, NodesStatsResponse nodesStats) { final String[] threadPools = req.paramAsStringArray("thread_pool_patterns", new String[] {"*"}); final DiscoveryNodes nodes = state.getState().nodes(); final Table table = getTableWithHeader(req); // collect all thread pool names that we see across the nodes final Set<String> candidates = new HashSet<>(); for (final NodeStats nodeStats : nodesStats.getNodes()) { for (final ThreadPoolStats.Stats threadPoolStats : nodeStats.getThreadPool()) { candidates.add(threadPoolStats.getName()); } } // collect all thread pool names that match the specified thread pool patterns final Set<String> included = new HashSet<>(); for (final String candidate : candidates) { if (Regex.simpleMatch(threadPools, candidate)) { included.add(candidate); } } for (final DiscoveryNode node : nodes) { final NodeInfo info = nodesInfo.getNodesMap().get(node.getId()); final NodeStats stats = nodesStats.getNodesMap().get(node.getId()); final Map<String, ThreadPoolStats.Stats> poolThreadStats; final Map<String, ThreadPool.Info> poolThreadInfo; if (stats == null) { poolThreadStats = Collections.emptyMap(); poolThreadInfo = Collections.emptyMap(); } else { // we use a sorted map to ensure that thread pools are sorted by name poolThreadStats = new TreeMap<>(); poolThreadInfo = new HashMap<>(); ThreadPoolStats threadPoolStats = stats.getThreadPool(); for (ThreadPoolStats.Stats threadPoolStat : threadPoolStats) { poolThreadStats.put(threadPoolStat.getName(), threadPoolStat); } if (info != null) { for (ThreadPool.Info threadPoolInfo : info.getThreadPool()) { poolThreadInfo.put(threadPoolInfo.getName(), threadPoolInfo); } } } for (Map.Entry<String, ThreadPoolStats.Stats> entry : poolThreadStats.entrySet()) { if (!included.contains(entry.getKey())) continue; table.startRow(); table.addCell(node.getName()); table.addCell(node.getId()); table.addCell(node.getEphemeralId()); table.addCell(info == null ? null : info.getProcess().getId()); table.addCell(node.getHostName()); table.addCell(node.getHostAddress()); table.addCell(node.getAddress().address().getPort()); final ThreadPoolStats.Stats poolStats = entry.getValue(); final ThreadPool.Info poolInfo = poolThreadInfo.get(entry.getKey()); Long maxQueueSize = null; String keepAlive = null; Integer minThreads = null; Integer maxThreads = null; if (poolInfo != null) { if (poolInfo.getQueueSize() != null) { maxQueueSize = poolInfo.getQueueSize().singles(); } if (poolInfo.getKeepAlive() != null) { keepAlive = poolInfo.getKeepAlive().toString(); } if (poolInfo.getMin() >= 0) { minThreads = poolInfo.getMin(); } if (poolInfo.getMax() >= 0) { maxThreads = poolInfo.getMax(); } } table.addCell(entry.getKey()); table.addCell(poolInfo == null ? null : poolInfo.getThreadPoolType().getType()); table.addCell(poolStats == null ? null : poolStats.getActive()); table.addCell(poolStats == null ? null : poolStats.getThreads()); table.addCell(poolStats == null ? null : poolStats.getQueue()); table.addCell(maxQueueSize); table.addCell(poolStats == null ? null : poolStats.getRejected()); table.addCell(poolStats == null ? null : poolStats.getLargest()); table.addCell(poolStats == null ? null : poolStats.getCompleted()); table.addCell(minThreads); table.addCell(maxThreads); table.addCell(keepAlive); table.endRow(); } } return table; }
public boolean allocateUnassigned(RoutingAllocation allocation) { boolean changed = false; DiscoveryNodes nodes = allocation.nodes(); RoutingNodes routingNodes = allocation.routingNodes(); // First, handle primaries, they must find a place to be allocated on here final MetaData metaData = routingNodes.metaData(); RoutingNodes.UnassignedShards unassigned = routingNodes.unassigned(); unassigned.sort( new PriorityComparator() { @Override protected Settings getIndexSettings(String index) { IndexMetaData indexMetaData = metaData.index(index); return indexMetaData.getSettings(); } }); // sort for priority ordering Iterator<ShardRouting> unassignedIterator = unassigned.iterator(); while (unassignedIterator.hasNext()) { ShardRouting shard = unassignedIterator.next(); if (!shard.primary()) { continue; } // this is an API allocation, ignore since we know there is no data... if (!routingNodes .routingTable() .index(shard.index()) .shard(shard.id()) .primaryAllocatedPostApi()) { continue; } AsyncShardFetch<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> fetch = asyncFetchStarted.get(shard.shardId()); if (fetch == null) { fetch = new InternalAsyncFetch<>(logger, "shard_started", shard.shardId(), startedAction); asyncFetchStarted.put(shard.shardId(), fetch); } AsyncShardFetch.FetchResult<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> shardState = fetch.fetchData(nodes, metaData, allocation.getIgnoreNodes(shard.shardId())); if (shardState.hasData() == false) { logger.trace("{}: ignoring allocation, still fetching shard started state", shard); unassignedIterator.remove(); routingNodes.ignoredUnassigned().add(shard); continue; } shardState.processAllocation(allocation); IndexMetaData indexMetaData = metaData.index(shard.getIndex()); /** * Build a map of DiscoveryNodes to shard state number for the given shard. A state of -1 * means the shard does not exist on the node, where any shard state >= 0 is the state version * of the shard on that node's disk. * * <p>A shard on shared storage will return at least shard state 0 for all nodes, indicating * that the shard can be allocated to any node. */ ObjectLongHashMap<DiscoveryNode> nodesState = new ObjectLongHashMap<>(); for (TransportNodesListGatewayStartedShards.NodeGatewayStartedShards nodeShardState : shardState.getData().values()) { long version = nodeShardState.version(); // -1 version means it does not exists, which is what the API returns, and what we expect to logger.trace( "[{}] on node [{}] has version [{}] of shard", shard, nodeShardState.getNode(), version); nodesState.put(nodeShardState.getNode(), version); } int numberOfAllocationsFound = 0; long highestVersion = -1; final Map<DiscoveryNode, Long> nodesWithVersion = Maps.newHashMap(); assert !nodesState.containsKey(null); final Object[] keys = nodesState.keys; final long[] values = nodesState.values; Settings idxSettings = indexMetaData.settings(); for (int i = 0; i < keys.length; i++) { if (keys[i] == null) { continue; } DiscoveryNode node = (DiscoveryNode) keys[i]; long version = values[i]; // since we don't check in NO allocation, we need to double check here if (allocation.shouldIgnoreShardForNode(shard.shardId(), node.id())) { continue; } if (recoverOnAnyNode(idxSettings)) { numberOfAllocationsFound++; if (version > highestVersion) { highestVersion = version; } // We always put the node without clearing the map nodesWithVersion.put(node, version); } else if (version != -1) { numberOfAllocationsFound++; // If we've found a new "best" candidate, clear the // current candidates and add it if (version > highestVersion) { highestVersion = version; nodesWithVersion.clear(); nodesWithVersion.put(node, version); } else if (version == highestVersion) { // If the candidate is the same, add it to the // list, but keep the current candidate nodesWithVersion.put(node, version); } } } // Now that we have a map of nodes to versions along with the // number of allocations found (and not ignored), we need to sort // it so the node with the highest version is at the beginning List<DiscoveryNode> nodesWithHighestVersion = Lists.newArrayList(); nodesWithHighestVersion.addAll(nodesWithVersion.keySet()); CollectionUtil.timSort( nodesWithHighestVersion, new Comparator<DiscoveryNode>() { @Override public int compare(DiscoveryNode o1, DiscoveryNode o2) { return Long.compare(nodesWithVersion.get(o2), nodesWithVersion.get(o1)); } }); if (logger.isDebugEnabled()) { logger.debug( "[{}][{}] found {} allocations of {}, highest version: [{}]", shard.index(), shard.id(), numberOfAllocationsFound, shard, highestVersion); } if (logger.isTraceEnabled()) { StringBuilder sb = new StringBuilder("["); for (DiscoveryNode n : nodesWithHighestVersion) { sb.append("["); sb.append(n.getName()); sb.append("]"); sb.append(" -> "); sb.append(nodesWithVersion.get(n)); sb.append(", "); } sb.append("]"); logger.trace("{} candidates for allocation: {}", shard, sb.toString()); } // check if the counts meets the minimum set int requiredAllocation = 1; // if we restore from a repository one copy is more then enough if (shard.restoreSource() == null) { try { String initialShards = indexMetaData .settings() .get( INDEX_RECOVERY_INITIAL_SHARDS, settings.get(INDEX_RECOVERY_INITIAL_SHARDS, this.initialShards)); if ("quorum".equals(initialShards)) { if (indexMetaData.numberOfReplicas() > 1) { requiredAllocation = ((1 + indexMetaData.numberOfReplicas()) / 2) + 1; } } else if ("quorum-1".equals(initialShards) || "half".equals(initialShards)) { if (indexMetaData.numberOfReplicas() > 2) { requiredAllocation = ((1 + indexMetaData.numberOfReplicas()) / 2); } } else if ("one".equals(initialShards)) { requiredAllocation = 1; } else if ("full".equals(initialShards) || "all".equals(initialShards)) { requiredAllocation = indexMetaData.numberOfReplicas() + 1; } else if ("full-1".equals(initialShards) || "all-1".equals(initialShards)) { if (indexMetaData.numberOfReplicas() > 1) { requiredAllocation = indexMetaData.numberOfReplicas(); } } else { requiredAllocation = Integer.parseInt(initialShards); } } catch (Exception e) { logger.warn( "[{}][{}] failed to derived initial_shards from value {}, ignore allocation for {}", shard.index(), shard.id(), initialShards, shard); } } // not enough found for this shard, continue... if (numberOfAllocationsFound < requiredAllocation) { // if we are restoring this shard we still can allocate if (shard.restoreSource() == null) { // we can't really allocate, so ignore it and continue unassignedIterator.remove(); routingNodes.ignoredUnassigned().add(shard); if (logger.isDebugEnabled()) { logger.debug( "[{}][{}]: not allocating, number_of_allocated_shards_found [{}], required_number [{}]", shard.index(), shard.id(), numberOfAllocationsFound, requiredAllocation); } } else if (logger.isDebugEnabled()) { logger.debug( "[{}][{}]: missing local data, will restore from [{}]", shard.index(), shard.id(), shard.restoreSource()); } continue; } Set<DiscoveryNode> throttledNodes = Sets.newHashSet(); Set<DiscoveryNode> noNodes = Sets.newHashSet(); for (DiscoveryNode discoNode : nodesWithHighestVersion) { RoutingNode node = routingNodes.node(discoNode.id()); if (node == null) { continue; } Decision decision = allocation.deciders().canAllocate(shard, node, allocation); if (decision.type() == Decision.Type.THROTTLE) { throttledNodes.add(discoNode); } else if (decision.type() == Decision.Type.NO) { noNodes.add(discoNode); } else { if (logger.isDebugEnabled()) { logger.debug( "[{}][{}]: allocating [{}] to [{}] on primary allocation", shard.index(), shard.id(), shard, discoNode); } // we found a match changed = true; // make sure we create one with the version from the recovered state routingNodes.initialize(new ShardRouting(shard, highestVersion), node.nodeId()); unassignedIterator.remove(); // found a node, so no throttling, no "no", and break out of the loop throttledNodes.clear(); noNodes.clear(); break; } } if (throttledNodes.isEmpty()) { // if we have a node that we "can't" allocate to, force allocation, since this is our master // data! if (!noNodes.isEmpty()) { DiscoveryNode discoNode = noNodes.iterator().next(); RoutingNode node = routingNodes.node(discoNode.id()); if (logger.isDebugEnabled()) { logger.debug( "[{}][{}]: forcing allocating [{}] to [{}] on primary allocation", shard.index(), shard.id(), shard, discoNode); } // we found a match changed = true; // make sure we create one with the version from the recovered state routingNodes.initialize(new ShardRouting(shard, highestVersion), node.nodeId()); unassignedIterator.remove(); } } else { if (logger.isDebugEnabled()) { logger.debug( "[{}][{}]: throttling allocation [{}] to [{}] on primary allocation", shard.index(), shard.id(), shard, throttledNodes); } // we are throttling this, but we have enough to allocate to this node, ignore it for now unassignedIterator.remove(); routingNodes.ignoredUnassigned().add(shard); } } if (!routingNodes.hasUnassigned()) { return changed; } // Now, handle replicas, try to assign them to nodes that are similar to the one the primary was // allocated on unassignedIterator = unassigned.iterator(); while (unassignedIterator.hasNext()) { ShardRouting shard = unassignedIterator.next(); if (shard.primary()) { continue; } // pre-check if it can be allocated to any node that currently exists, so we won't list the // store for it for nothing boolean canBeAllocatedToAtLeastOneNode = false; for (ObjectCursor<DiscoveryNode> cursor : nodes.dataNodes().values()) { RoutingNode node = routingNodes.node(cursor.value.id()); if (node == null) { continue; } // if we can't allocate it on a node, ignore it, for example, this handles // cases for only allocating a replica after a primary Decision decision = allocation.deciders().canAllocate(shard, node, allocation); if (decision.type() == Decision.Type.YES) { canBeAllocatedToAtLeastOneNode = true; break; } } if (!canBeAllocatedToAtLeastOneNode) { logger.trace("{}: ignoring allocation, can't be allocated on any node", shard); unassignedIterator.remove(); routingNodes.ignoredUnassigned().add(shard); continue; } AsyncShardFetch<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> fetch = asyncFetchStore.get(shard.shardId()); if (fetch == null) { fetch = new InternalAsyncFetch<>(logger, "shard_store", shard.shardId(), storeAction); asyncFetchStore.put(shard.shardId(), fetch); } AsyncShardFetch.FetchResult<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> shardStores = fetch.fetchData(nodes, metaData, allocation.getIgnoreNodes(shard.shardId())); if (shardStores.hasData() == false) { logger.trace("{}: ignoring allocation, still fetching shard stores", shard); unassignedIterator.remove(); routingNodes.ignoredUnassigned().add(shard); continue; // still fetching } shardStores.processAllocation(allocation); long lastSizeMatched = 0; DiscoveryNode lastDiscoNodeMatched = null; RoutingNode lastNodeMatched = null; boolean hasReplicaData = false; IndexMetaData indexMetaData = metaData.index(shard.getIndex()); for (Map.Entry<DiscoveryNode, TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> nodeStoreEntry : shardStores.getData().entrySet()) { DiscoveryNode discoNode = nodeStoreEntry.getKey(); TransportNodesListShardStoreMetaData.StoreFilesMetaData storeFilesMetaData = nodeStoreEntry.getValue().storeFilesMetaData(); logger.trace("{}: checking node [{}]", shard, discoNode); if (storeFilesMetaData == null) { // already allocated on that node... continue; } RoutingNode node = routingNodes.node(discoNode.id()); if (node == null) { continue; } // check if we can allocate on that node... // we only check for NO, since if this node is THROTTLING and it has enough "same data" // then we will try and assign it next time Decision decision = allocation.deciders().canAllocate(shard, node, allocation); if (decision.type() == Decision.Type.NO) { continue; } // if it is already allocated, we can't assign to it... if (storeFilesMetaData.allocated()) { continue; } if (!shard.primary()) { hasReplicaData |= storeFilesMetaData.iterator().hasNext(); ShardRouting primaryShard = routingNodes.activePrimary(shard); if (primaryShard != null) { assert primaryShard.active(); DiscoveryNode primaryNode = nodes.get(primaryShard.currentNodeId()); if (primaryNode != null) { TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData primaryNodeFilesStore = shardStores.getData().get(primaryNode); if (primaryNodeFilesStore != null) { TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryNodeStore = primaryNodeFilesStore.storeFilesMetaData(); if (primaryNodeStore != null && primaryNodeStore.allocated()) { long sizeMatched = 0; String primarySyncId = primaryNodeStore.syncId(); String replicaSyncId = storeFilesMetaData.syncId(); // see if we have a sync id we can make use of if (replicaSyncId != null && replicaSyncId.equals(primarySyncId)) { logger.trace( "{}: node [{}] has same sync id {} as primary", shard, discoNode.name(), replicaSyncId); lastNodeMatched = node; lastSizeMatched = Long.MAX_VALUE; lastDiscoNodeMatched = discoNode; } else { for (StoreFileMetaData storeFileMetaData : storeFilesMetaData) { String metaDataFileName = storeFileMetaData.name(); if (primaryNodeStore.fileExists(metaDataFileName) && primaryNodeStore.file(metaDataFileName).isSame(storeFileMetaData)) { sizeMatched += storeFileMetaData.length(); } } logger.trace( "{}: node [{}] has [{}/{}] bytes of re-usable data", shard, discoNode.name(), new ByteSizeValue(sizeMatched), sizeMatched); if (sizeMatched > lastSizeMatched) { lastSizeMatched = sizeMatched; lastDiscoNodeMatched = discoNode; lastNodeMatched = node; } } } } } } } } if (lastNodeMatched != null) { // we only check on THROTTLE since we checked before before on NO Decision decision = allocation.deciders().canAllocate(shard, lastNodeMatched, allocation); if (decision.type() == Decision.Type.THROTTLE) { if (logger.isDebugEnabled()) { logger.debug( "[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store with total_size [{}]", shard.index(), shard.id(), shard, lastDiscoNodeMatched, new ByteSizeValue(lastSizeMatched)); } // we are throttling this, but we have enough to allocate to this node, ignore it for now unassignedIterator.remove(); routingNodes.ignoredUnassigned().add(shard); } else { if (logger.isDebugEnabled()) { logger.debug( "[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store with total_size [{}]", shard.index(), shard.id(), shard, lastDiscoNodeMatched, new ByteSizeValue(lastSizeMatched)); } // we found a match changed = true; routingNodes.initialize(shard, lastNodeMatched.nodeId()); unassignedIterator.remove(); } } else if (hasReplicaData == false) { // if we didn't manage to find *any* data (regardless of matching sizes), check if the // allocation // of the replica shard needs to be delayed, and if so, add it to the ignore unassigned list // note: we only care about replica in delayed allocation, since if we have an unassigned // primary it // will anyhow wait to find an existing copy of the shard to be allocated // note: the other side of the equation is scheduling a reroute in a timely manner, which // happens in the RoutingService long delay = shard .unassignedInfo() .getDelayAllocationExpirationIn(settings, indexMetaData.getSettings()); if (delay > 0) { logger.debug( "[{}][{}]: delaying allocation of [{}] for [{}]", shard.index(), shard.id(), shard, TimeValue.timeValueMillis(delay)); /** * mark it as changed, since we want to kick a publishing to schedule future allocation, * see {@link * org.elasticsearch.cluster.routing.RoutingService#clusterChanged(ClusterChangedEvent)}). */ changed = true; unassignedIterator.remove(); routingNodes.ignoredUnassigned().add(shard); } } } return changed; }
public void testCancellationCleansTempFiles() throws Exception { final String indexName = "test"; final String p_node = internalCluster().startNode(); prepareCreate( indexName, Settings.builder() .put( IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)) .get(); internalCluster().startNode(); internalCluster().startNode(); List<IndexRequestBuilder> requests = new ArrayList<>(); int numDocs = scaledRandomIntBetween(25, 250); for (int i = 0; i < numDocs; i++) { requests.add(client().prepareIndex(indexName, "type").setSource("{}")); } indexRandom(true, requests); assertFalse( client() .admin() .cluster() .prepareHealth() .setWaitForNodes("3") .setWaitForGreenStatus() .get() .isTimedOut()); flush(); int allowedFailures = randomIntBetween(3, 10); logger.info("--> blocking recoveries from primary (allowed failures: [{}])", allowedFailures); CountDownLatch corruptionCount = new CountDownLatch(allowedFailures); ClusterService clusterService = internalCluster().getInstance(ClusterService.class, p_node); MockTransportService mockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, p_node); for (DiscoveryNode node : clusterService.state().nodes()) { if (!node.equals(clusterService.localNode())) { mockTransportService.addDelegate( internalCluster().getInstance(TransportService.class, node.getName()), new RecoveryCorruption(mockTransportService.original(), corruptionCount)); } } client() .admin() .indices() .prepareUpdateSettings(indexName) .setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)) .get(); corruptionCount.await(); logger.info("--> stopping replica assignment"); assertAcked( client() .admin() .cluster() .prepareUpdateSettings() .setTransientSettings( Settings.builder() .put( EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "none"))); logger.info("--> wait for all replica shards to be removed, on all nodes"); assertBusy( () -> { for (String node : internalCluster().getNodeNames()) { if (node.equals(p_node)) { continue; } ClusterState state = client(node).admin().cluster().prepareState().setLocal(true).get().getState(); assertThat( node + " indicates assigned replicas", state .getRoutingTable() .index(indexName) .shardsWithState(ShardRoutingState.UNASSIGNED) .size(), equalTo(1)); } }); logger.info("--> verifying no temporary recoveries are left"); for (String node : internalCluster().getNodeNames()) { NodeEnvironment nodeEnvironment = internalCluster().getInstance(NodeEnvironment.class, node); for (final Path shardLoc : nodeEnvironment.availableShardPaths(new ShardId(indexName, "_na_", 0))) { if (Files.exists(shardLoc)) { assertBusy( () -> { try { Files.walkFileTree( shardLoc, new SimpleFileVisitor<Path>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { assertThat( "found a temporary recovery file: " + file, file.getFileName().toString(), not(startsWith("recovery."))); return FileVisitResult.CONTINUE; } }); } catch (IOException e) { throw new AssertionError( "failed to walk file tree starting at [" + shardLoc + "]", e); } }); } } } }