@Test public void unknownDiskUsageTest() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.85) .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node2", new DiskUsage("node2", 100, 50)); // 50% used usages.put("node3", new DiskUsage("node3", 100, 0)); // 100% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 10L); // 10 bytes shardSizes.put("[test][0][r]", 10L); // 10 bytes final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding node1"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder() .put(newNode("node1")) .put( newNode( "node3")) // node3 is added because DiskThresholdDecider automatically // ignore single-node clusters ) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); // Shard can be allocated to node1, even though it only has 25% free, // because it's a primary that's never been allocated before assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> start the shards (primaries)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // A single shard is started on node1, even though it normally would not // be allowed, because it's a primary that hasn't been allocated, and node1 // is still below the high watermark (unlike node3) assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); }
@Test public void testShardRelocationsTakenIntoAccount() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.8) .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node1", new DiskUsage("node1", 100, 40)); // 60% used usages.put("node2", new DiskUsage("node2", 100, 40)); // 60% used usages.put("node2", new DiskUsage("node3", 100, 40)); // 60% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 14L); // 14 bytes shardSizes.put("[test][0][r]", 14L); shardSizes.put("[test2][0][p]", 1L); // 1 bytes shardSizes.put("[test2][0][r]", 1L); final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1)) .put(IndexMetaData.builder("test2").numberOfShards(1).numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder() .addAsNew(metaData.index("test")) .addAsNew(metaData.index("test2")) .build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding two nodes"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // shards should be initializing assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(4)); logger.info("--> start the shards"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that we're able to start the primary and replicas assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(4)); logger.info("--> adding node3"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3"))) .build(); AllocationCommand relocate1 = new MoveAllocationCommand(new ShardId("test", 0), "node2", "node3"); AllocationCommands cmds = new AllocationCommands(relocate1); routingTable = strategy.reroute(clusterState, cmds).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); AllocationCommand relocate2 = new MoveAllocationCommand(new ShardId("test2", 0), "node2", "node3"); cmds = new AllocationCommands(relocate2); try { // The shard for the "test" index is already being relocated to // node3, which will put it over the low watermark when it // completes, with shard relocations taken into account this should // throw an exception about not being able to complete strategy.reroute(clusterState, cmds).routingTable(); fail("should not have been able to reroute the shard"); } catch (ElasticsearchIllegalArgumentException e) { assertThat( "can't allocated because there isn't enough room: " + e.getMessage(), e.getMessage().contains("less than required [30.0%] free disk on node, free: [26.0%]"), equalTo(true)); } }
@Test public void diskThresholdTest() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.8) .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node1", new DiskUsage("node1", 100, 10)); // 90% used usages.put("node2", new DiskUsage("node2", 100, 35)); // 65% used usages.put("node3", new DiskUsage("node3", 100, 60)); // 40% used usages.put("node4", new DiskUsage("node4", 100, 80)); // 20% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 10L); // 10 bytes shardSizes.put("[test][0][r]", 10L); final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding two nodes"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Primary shard should be initializing, replica should not assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> start the shards (primaries)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that we're able to start the primary assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1)); // Assert that node1 didn't get any shards because its disk usage is too high assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); logger.info("--> start the shards (replicas)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that the replica couldn't be started since node1 doesn't have enough space assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1)); logger.info("--> adding node3"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that the replica is initialized now that node3 is available with enough space assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1)); assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1)); logger.info("--> start the shards (replicas)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that the replica couldn't be started since node1 doesn't have enough space assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> changing decider settings"); // Set the low threshold to 60 instead of 70 // Set the high threshold to 70 instead of 80 // node2 now should not have new shards allocated to it, but shards can remain diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "60%") .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.7) .build(); deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> changing settings again"); // Set the low threshold to 50 instead of 60 // Set the high threshold to 60 instead of 70 // node2 now should not have new shards allocated to it, and shards cannot remain diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.5) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.6) .build(); deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); // Shard hasn't been moved off of node2 yet because there's nowhere for it to go assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> adding node4"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node4"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> apply INITIALIZING shards"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); // Node4 is available now, so the shard is moved off of node2 assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1)); }
@Test public void diskThresholdWithShardSizes() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "71%") .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node1", new DiskUsage("node1", 100, 31)); // 69% used usages.put("node2", new DiskUsage("node2", 100, 1)); // 99% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 10L); // 10 bytes final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding node1"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder() .put(newNode("node1")) .put( newNode( "node2")) // node2 is added because DiskThresholdDecider automatically // ignore single-node clusters ) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logger.info("--> start the shards (primaries)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shard can't be allocated to node1 (or node2) because it would cause too much usage assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(0)); // No shards are started, no nodes have enough disk for allocation assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(0)); }
@Test @TestLogging("cluster.routing.allocation.decider:TRACE") public void diskThresholdWithAbsoluteSizesTest() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "30b") .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "9b") .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node1", new DiskUsage("node1", 100, 10)); // 90% used usages.put("node2", new DiskUsage("node2", 100, 10)); // 90% used usages.put("node3", new DiskUsage("node3", 100, 60)); // 40% used usages.put("node4", new DiskUsage("node4", 100, 80)); // 20% used usages.put("node5", new DiskUsage("node5", 100, 85)); // 15% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 10L); // 10 bytes shardSizes.put("[test][0][r]", 10L); final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(2)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding node1 and node2 node"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Primary should initialize, even though both nodes are over the limit initialize assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); String nodeWithPrimary, nodeWithoutPrimary; if (clusterState.getRoutingNodes().node("node1").size() == 1) { nodeWithPrimary = "node1"; nodeWithoutPrimary = "node2"; } else { nodeWithPrimary = "node2"; nodeWithoutPrimary = "node1"; } logger.info("--> nodeWithPrimary: {}", nodeWithPrimary); logger.info("--> nodeWithoutPrimary: {}", nodeWithoutPrimary); // Make node without the primary now habitable to replicas usages.put(nodeWithoutPrimary, new DiskUsage(nodeWithoutPrimary, 100, 35)); // 65% used final ClusterInfo clusterInfo2 = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo2; } }; strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Now the replica should be able to initialize assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(2)); logger.info("--> start the shards (primaries)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that we're able to start the primary and replica, since they were both initializing assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2)); // Assert that node1 got a single shard (the primary), even though its disk usage is too high assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); // Assert that node2 got a single shard (a replica) assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); // Assert that one replica is still unassigned // assertThat(clusterState.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), // equalTo(1)); logger.info("--> adding node3"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that the replica is initialized now that node3 is available with enough space assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2)); assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1)); logger.info("--> start the shards (replicas)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that all replicas could be started assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(3)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> changing decider settings"); // Set the low threshold to 60 instead of 70 // Set the high threshold to 70 instead of 80 // node2 now should not have new shards allocated to it, but shards can remain diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "40b") .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "30b") .build(); deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(3)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> changing settings again"); // Set the low threshold to 50 instead of 60 // Set the high threshold to 60 instead of 70 // node2 now should not have new shards allocated to it, and shards cannot remain diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "50b") .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "40b") .build(); deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(3)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); // Shard hasn't been moved off of node2 yet because there's nowhere for it to go assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> adding node4"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node4"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2)); // One shard is relocating off of node1 assertThat(clusterState.routingNodes().shardsWithState(RELOCATING).size(), equalTo(1)); assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> apply INITIALIZING shards"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // primary shard already has been relocated away assertThat(clusterState.getRoutingNodes().node(nodeWithPrimary).size(), equalTo(0)); // node with increased space still has its shard assertThat(clusterState.getRoutingNodes().node(nodeWithoutPrimary).size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1)); logger.info("--> adding node5"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node5"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started on node3 and node4 assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2)); // One shard is relocating off of node2 now assertThat(clusterState.routingNodes().shardsWithState(RELOCATING).size(), equalTo(1)); // Initializing on node5 assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> apply INITIALIZING shards"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logger.info("--> final cluster state:"); logShardStates(clusterState); // Node1 still has no shards because it has no space for them assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); // Node5 is available now, so the shard is moved off of node2 assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node5").size(), equalTo(1)); }
@Test public void testSingleIndexStartedShard() { AllocationService strategy = createAllocationService( settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build()); logger.info("Building initial routing table"); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder().metaData(metaData).routingTable(routingTable).build(); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue()); logger.info("Adding one node and performing rerouting"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1"))) .build(); RoutingTable prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1")); logger.info("Rerouting again, nothing should change"); prevRoutingTable = routingTable; clusterState = ClusterState.builder(clusterState).build(); routingTable = strategy.reroute(clusterState).routingTable(); assertThat(routingTable == prevRoutingTable, equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logger.info("Marking the shard as started"); RoutingNodes routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards( clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable != prevRoutingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1")); logger.info("Starting another node and making sure nothing changed"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node2"))) .build(); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable == prevRoutingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1")); logger.info("Killing node1 where the shard is, checking the shard is relocated"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node1")) .build(); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable != prevRoutingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node2")); logger.info( "Start another node, make sure that things remain the same (shard is in node2 and initializing)"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3"))) .build(); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable == prevRoutingTable, equalTo(true)); logger.info("Start the shard on node 2"); routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards( clusterState, routingNodes.node("node2").shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable != prevRoutingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node2")); }
@Test public void testMultiIndexUnevenNodes() { AllocationService strategy = createAllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put("cluster.routing.allocation.allow_rebalance", "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build()); final int numberOfIndices = 10; logger.info("Building initial routing table with " + numberOfIndices + " indices"); MetaData.Builder metaDataBuilder = MetaData.builder(); for (int i = 0; i < numberOfIndices; i++) { metaDataBuilder.put(IndexMetaData.builder("test" + i).numberOfShards(1).numberOfReplicas(0)); } MetaData metaData = metaDataBuilder.build(); RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); for (int i = 0; i < numberOfIndices; i++) { routingTableBuilder.addAsNew(metaData.index("test" + i)); } RoutingTable routingTable = routingTableBuilder.build(); ClusterState clusterState = ClusterState.builder().metaData(metaData).routingTable(routingTable).build(); assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices)); logger.info("Starting 3 nodes and rerouting"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder() .put(newNode("node1")) .put(newNode("node2")) .put(newNode("node3"))) .build(); RoutingTable prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING)); } RoutingNodes routingNodes = clusterState.routingNodes(); assertThat(numberOfShardsOfType(routingNodes, INITIALIZING), equalTo(numberOfIndices)); assertThat( routingNodes.node("node1").numberOfShardsWithState(INITIALIZING), anyOf(equalTo(3), equalTo(4))); assertThat( routingNodes.node("node2").numberOfShardsWithState(INITIALIZING), anyOf(equalTo(3), equalTo(4))); assertThat( routingNodes.node("node2").numberOfShardsWithState(INITIALIZING), anyOf(equalTo(3), equalTo(4))); logger.info("Start two more nodes, things should remain the same"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder(clusterState.nodes()) .put(newNode("node4")) .put(newNode("node5"))) .build(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); assertThat(prevRoutingTable == routingTable, equalTo(true)); routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), anyOf(equalTo(RELOCATING), equalTo(STARTED))); } routingNodes = clusterState.routingNodes(); assertThat( "4 source shard routing are relocating", numberOfShardsOfType(routingNodes, RELOCATING), equalTo(4)); assertThat( "4 target shard routing are initializing", numberOfShardsOfType(routingNodes, INITIALIZING), equalTo(4)); logger.info("Now, mark the relocated as started"); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); // routingTable = strategy.reroute(new RoutingStrategyInfo(metaData, routingTable), // nodes); assertThat(prevRoutingTable != routingTable, equalTo(true)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), anyOf(equalTo(RELOCATING), equalTo(STARTED))); } routingNodes = clusterState.routingNodes(); assertThat(numberOfShardsOfType(routingNodes, STARTED), equalTo(numberOfIndices)); for (RoutingNode routingNode : routingNodes) { assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(2)); } }
@Test public void testMultiIndexEvenDistribution() { AllocationService strategy = createAllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put("cluster.routing.allocation.allow_rebalance", "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build()); final int numberOfIndices = 50; logger.info("Building initial routing table with " + numberOfIndices + " indices"); MetaData.Builder metaDataBuilder = MetaData.builder(); for (int i = 0; i < numberOfIndices; i++) { metaDataBuilder.put(IndexMetaData.builder("test" + i).numberOfShards(1).numberOfReplicas(0)); } MetaData metaData = metaDataBuilder.build(); RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); for (int i = 0; i < numberOfIndices; i++) { routingTableBuilder.addAsNew(metaData.index("test" + i)); } RoutingTable routingTable = routingTableBuilder.build(); ClusterState clusterState = ClusterState.builder().metaData(metaData).routingTable(routingTable).build(); assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(), nullValue()); } logger.info("Adding " + (numberOfIndices / 2) + " nodes"); DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); List<DiscoveryNode> nodes = newArrayList(); for (int i = 0; i < (numberOfIndices / 2); i++) { nodesBuilder.put(newNode("node" + i)); } RoutingTable prevRoutingTable = routingTable; clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING)); assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true)); // make sure we still have 2 shards initializing per node on the first 25 nodes String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(); int nodeIndex = Integer.parseInt(nodeId.substring("node".length())); assertThat(nodeIndex, lessThan(25)); } RoutingNodes routingNodes = clusterState.routingNodes(); Set<String> encounteredIndices = newHashSet(); for (RoutingNode routingNode : routingNodes) { assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(0)); assertThat(routingNode.size(), equalTo(2)); // make sure we still have 2 shards initializing per node on the only 25 nodes int nodeIndex = Integer.parseInt(routingNode.nodeId().substring("node".length())); assertThat(nodeIndex, lessThan(25)); // check that we don't have a shard associated with a node with the same index name (we have a // single shard) for (MutableShardRouting shardRoutingEntry : routingNode) { assertThat(encounteredIndices, not(hasItem(shardRoutingEntry.index()))); encounteredIndices.add(shardRoutingEntry.index()); } } logger.info("Adding additional " + (numberOfIndices / 2) + " nodes, nothing should change"); nodesBuilder = DiscoveryNodes.builder(clusterState.nodes()); for (int i = (numberOfIndices / 2); i < numberOfIndices; i++) { nodesBuilder.put(newNode("node" + i)); } prevRoutingTable = routingTable; clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(false)); logger.info("Marking the shard as started"); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); int numberOfRelocatingShards = 0; int numberOfStartedShards = 0; for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), anyOf(equalTo(STARTED), equalTo(RELOCATING))); if (routingTable.index("test" + i).shard(0).shards().get(0).state() == STARTED) { numberOfStartedShards++; } else if (routingTable.index("test" + i).shard(0).shards().get(0).state() == RELOCATING) { numberOfRelocatingShards++; } assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true)); // make sure we still have 2 shards either relocating or started on the first 25 nodes (still) String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(); int nodeIndex = Integer.parseInt(nodeId.substring("node".length())); assertThat(nodeIndex, lessThan(25)); } assertThat(numberOfRelocatingShards, equalTo(25)); assertThat(numberOfStartedShards, equalTo(25)); }
@Test public void testSingleIndexShardFailed() { AllocationService strategy = createAllocationService( settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build()); logger.info("Building initial routing table"); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder().metaData(metaData).routingTable(routingTable).build(); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue()); logger.info("Adding one node and rerouting"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1"))) .build(); RoutingTable prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).unassigned(), equalTo(false)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1")); logger.info("Marking the shard as failed"); RoutingNodes routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyFailedShard( clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING).get(0)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue()); }