@Test public void testClusterStateSerialization() throws Exception { MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(10).numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); DiscoveryNodes nodes = DiscoveryNodes.builder() .put(newNode("node1")) .put(newNode("node2")) .put(newNode("node3")) .localNodeId("node1") .masterNodeId("node2") .build(); ClusterState clusterState = ClusterState.builder().nodes(nodes).metaData(metaData).routingTable(routingTable).build(); AllocationService strategy = createAllocationService(); clusterState = ClusterState.builder(clusterState) .routingTable(strategy.reroute(clusterState).routingTable()) .build(); ClusterState serializedClusterState = ClusterState.Builder.fromBytes( ClusterState.Builder.toBytes(clusterState), newNode("node1")); assertThat( serializedClusterState.routingTable().prettyPrint(), equalTo(clusterState.routingTable().prettyPrint())); }
private ClusterState addNode(ClusterState clusterState, AllocationService strategy) { logger.info( "now, start 1 more node, check that rebalancing will happen because we set it to always"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node" + numberOfNodes))) .build(); RoutingTable routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); RoutingNodes routingNodes = clusterState.getRoutingNodes(); // move initializing to started RoutingTable prev = routingTable; while (true) { routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); routingNodes = clusterState.getRoutingNodes(); if (routingTable == prev) break; prev = routingTable; } return clusterState; }
@Test public void testRoutingTableSerialization() throws Exception { MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(10).numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); DiscoveryNodes nodes = DiscoveryNodes.builder() .put(newNode("node1")) .put(newNode("node2")) .put(newNode("node3")) .build(); ClusterState clusterState = ClusterState.builder().nodes(nodes).metaData(metaData).routingTable(routingTable).build(); AllocationService strategy = createAllocationService(); RoutingTable source = strategy.reroute(clusterState).routingTable(); BytesStreamOutput outStream = new BytesStreamOutput(); RoutingTable.Builder.writeTo(source, outStream); BytesStreamInput inStream = new BytesStreamInput(outStream.bytes().toBytes(), false); RoutingTable target = RoutingTable.Builder.readFrom(inStream); assertThat(target.prettyPrint(), equalTo(source.prettyPrint())); }
@Test public void testSingleIndexShardFailed() { AllocationService strategy = createAllocationService( settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build()); logger.info("Building initial routing table"); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder().metaData(metaData).routingTable(routingTable).build(); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue()); logger.info("Adding one node and rerouting"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1"))) .build(); RoutingTable prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).unassigned(), equalTo(false)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1")); logger.info("Marking the shard as failed"); RoutingNodes routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyFailedShard( clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING).get(0)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue()); }
public static void main(String[] args) { final int numberOfRuns = 1; final int numIndices = 5 * 365; // five years final int numShards = 6; final int numReplicas = 2; final int numberOfNodes = 30; final int numberOfTags = 2; AllocationService strategy = ElasticsearchAllocationTestCase.createAllocationService( ImmutableSettings.EMPTY, new Random(1)); MetaData.Builder mb = MetaData.builder(); for (int i = 1; i <= numIndices; i++) { mb.put( IndexMetaData.builder("test_" + i) .numberOfShards(numShards) .numberOfReplicas(numReplicas)); } MetaData metaData = mb.build(); RoutingTable.Builder rb = RoutingTable.builder(); for (int i = 1; i <= numIndices; i++) { rb.addAsNew(metaData.index("test_" + i)); } RoutingTable routingTable = rb.build(); DiscoveryNodes.Builder nb = DiscoveryNodes.builder(); for (int i = 1; i <= numberOfNodes; i++) { nb.put(newNode("node" + i, ImmutableMap.of("tag", "tag_" + (i % numberOfTags)))); } ClusterState initialClusterState = ClusterState.builder().metaData(metaData).routingTable(routingTable).nodes(nb).build(); long start = System.currentTimeMillis(); for (int i = 0; i < numberOfRuns; i++) { logger.info("[{}] starting... ", i); long runStart = System.currentTimeMillis(); ClusterState clusterState = initialClusterState; while (clusterState.readOnlyRoutingNodes().hasUnassignedShards()) { logger.info( "[{}] remaining unassigned {}", i, clusterState.readOnlyRoutingNodes().unassigned().size()); RoutingAllocation.Result result = strategy.applyStartedShards( clusterState, clusterState.readOnlyRoutingNodes().shardsWithState(INITIALIZING)); clusterState = ClusterState.builder(clusterState).routingResult(result).build(); result = strategy.reroute(clusterState); clusterState = ClusterState.builder(clusterState).routingResult(result).build(); } logger.info( "[{}] took {}", i, TimeValue.timeValueMillis(System.currentTimeMillis() - runStart)); } long took = System.currentTimeMillis() - start; logger.info( "total took {}, AVG {}", TimeValue.timeValueMillis(took), TimeValue.timeValueMillis(took / numberOfRuns)); }
private ClusterState createInitialClusterState(AllocationService service) { MetaData.Builder metaBuilder = MetaData.builder(); metaBuilder.put( IndexMetaData.builder("idx") .settings(settings(Version.CURRENT)) .numberOfShards(1) .numberOfReplicas(0)); MetaData metaData = metaBuilder.build(); RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); routingTableBuilder.addAsNew(metaData.index("idx")); RoutingTable routingTable = routingTableBuilder.build(); ClusterState clusterState = ClusterState.builder( org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault( Settings.EMPTY)) .metaData(metaData) .routingTable(routingTable) .build(); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))) .build(); RoutingTable prevRoutingTable = routingTable; routingTable = service.reroute(clusterState, "reroute").routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(prevRoutingTable.index("idx").shards().size(), 1); assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); return clusterState; }
private ClusterState removeNodes(ClusterState clusterState, AllocationService strategy) { logger.info("Removing half the nodes (" + (numberOfNodes + 1) / 2 + ")"); DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(clusterState.nodes()); for (int i = (numberOfNodes + 1) / 2; i <= numberOfNodes; i++) { nodes.remove("node" + i); } clusterState = ClusterState.builder(clusterState).nodes(nodes.build()).build(); RoutingNodes routingNodes = clusterState.getRoutingNodes(); logger.info("start all the primary shards, replicas will start initializing"); RoutingTable routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); routingNodes = clusterState.getRoutingNodes(); logger.info("start the replica shards"); routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); routingNodes = clusterState.getRoutingNodes(); logger.info("rebalancing"); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); routingNodes = clusterState.getRoutingNodes(); logger.info("complete rebalancing"); RoutingTable prev = routingTable; while (true) { routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); routingNodes = clusterState.getRoutingNodes(); if (routingTable == prev) break; prev = routingTable; } return clusterState; }
@Test public void simpleFlagTests() { AllocationService allocation = new AllocationService( settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build()); logger.info("creating an index with 1 shard, no replica"); MetaData metaData = newMetaDataBuilder() .put(newIndexMetaDataBuilder("test").numberOfShards(1).numberOfReplicas(0)) .build(); RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build(); ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); assertThat( clusterState.routingTable().index("test").shard(0).primaryAllocatedPostApi(), equalTo(false)); logger.info("adding two nodes and performing rerouting"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2"))) .build(); RoutingAllocation.Result rerouteResult = allocation.reroute(clusterState); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat( clusterState.routingTable().index("test").shard(0).primaryAllocatedPostApi(), equalTo(false)); logger.info("start primary shard"); rerouteResult = allocation.applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat( clusterState.routingTable().index("test").shard(0).primaryAllocatedPostApi(), equalTo(true)); }
@Test public void sameHost() { AllocationService strategy = new AllocationService( settingsBuilder().put(SameShardAllocationDecider.SAME_HOST_SETTING, true).build()); MetaData metaData = newMetaDataBuilder() .put(newIndexMetaDataBuilder("test").numberOfShards(2).numberOfReplicas(1)) .build(); RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build(); ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); logger.info("--> adding two nodes with the same host"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes( newNodesBuilder() .put(newNode("node1", new InetSocketTransportAddress("test1", 80))) .put(newNode("node2", new InetSocketTransportAddress("test1", 80)))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat( clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.INITIALIZING), equalTo(2)); logger.info( "--> start all primary shards, no replica will be started since its on the same host"); routingTable = strategy .applyStartedShards( clusterState, clusterState.readOnlyRoutingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat( clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.STARTED), equalTo(2)); assertThat( clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.INITIALIZING), equalTo(0)); logger.info("--> add another node, with a different host, replicas will be allocating"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes( newNodesBuilder() .putAll(clusterState.nodes()) .put(newNode("node3", new InetSocketTransportAddress("test2", 80)))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat( clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.STARTED), equalTo(2)); assertThat( clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.INITIALIZING), equalTo(2)); for (MutableShardRouting shardRouting : clusterState.readOnlyRoutingNodes().shardsWithState(INITIALIZING)) { assertThat(shardRouting.currentNodeId(), equalTo("node3")); } }
public void testSimple() { AllocationService strategy = createAllocationService( Settings.builder() .put( ClusterRebalanceAllocationDecider .CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), ClusterRebalanceAllocationDecider.ClusterRebalanceType.ALWAYS.toString()) .build()); MetaData metaData = MetaData.builder() .put( IndexMetaData.builder("test1") .settings(settings(Version.CURRENT)) .numberOfShards(1) .numberOfReplicas(1)) .put( IndexMetaData.builder("test2") .settings(settings(Version.CURRENT)) .numberOfShards(1) .numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder() .addAsNew(metaData.index("test1")) .addAsNew(metaData.index("test2")) .build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("start two nodes"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); routingTable = strategy.reroute(clusterState, "reroute").routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); for (int i = 0; i < routingTable.index("test1").shards().size(); i++) { assertThat(routingTable.index("test1").shard(i).shards().size(), equalTo(2)); assertThat( routingTable.index("test1").shard(i).primaryShard().state(), equalTo(INITIALIZING)); assertThat( routingTable.index("test1").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED)); } for (int i = 0; i < routingTable.index("test2").shards().size(); i++) { assertThat(routingTable.index("test2").shard(i).shards().size(), equalTo(2)); assertThat( routingTable.index("test2").shard(i).primaryShard().state(), equalTo(INITIALIZING)); assertThat( routingTable.index("test2").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED)); } logger.info("start all the primary shards for test1, replicas will start initializing"); RoutingNodes routingNodes = clusterState.getRoutingNodes(); routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState("test1", INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); routingNodes = clusterState.getRoutingNodes(); for (int i = 0; i < routingTable.index("test1").shards().size(); i++) { assertThat(routingTable.index("test1").shard(i).shards().size(), equalTo(2)); assertThat(routingTable.index("test1").shard(i).primaryShard().state(), equalTo(STARTED)); assertThat( routingTable.index("test1").shard(i).replicaShards().get(0).state(), equalTo(INITIALIZING)); } for (int i = 0; i < routingTable.index("test2").shards().size(); i++) { assertThat(routingTable.index("test2").shard(i).shards().size(), equalTo(2)); assertThat( routingTable.index("test2").shard(i).primaryShard().state(), equalTo(INITIALIZING)); assertThat( routingTable.index("test2").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED)); } }
/** * Tests that higher prioritized primaries and replicas are allocated first even on the balanced * shard allocator See https://github.com/elastic/elasticsearch/issues/13249 for details */ public void testPrioritizedIndicesAllocatedFirst() { AllocationService allocation = createAllocationService( Settings.builder() .put( ThrottlingAllocationDecider .CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES_SETTING.getKey(), 1) .put( ThrottlingAllocationDecider .CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_OUTGOING_RECOVERIES_SETTING .getKey(), 10) .put( ThrottlingAllocationDecider .CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING .getKey(), 1) .put( ThrottlingAllocationDecider .CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING .getKey(), 1) .build()); final String highPriorityName; final String lowPriorityName; final int priorityFirst; final int prioritySecond; if (randomBoolean()) { highPriorityName = "first"; lowPriorityName = "second"; prioritySecond = 1; priorityFirst = 100; } else { lowPriorityName = "first"; highPriorityName = "second"; prioritySecond = 100; priorityFirst = 1; } MetaData metaData = MetaData.builder() .put( IndexMetaData.builder("first") .settings( settings(Version.CURRENT) .put(IndexMetaData.SETTING_PRIORITY, priorityFirst)) .numberOfShards(2) .numberOfReplicas(1)) .put( IndexMetaData.builder("second") .settings( settings(Version.CURRENT) .put(IndexMetaData.SETTING_PRIORITY, prioritySecond)) .numberOfShards(2) .numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder() .addAsNew(metaData.index("first")) .addAsNew(metaData.index("second")) .build(); ClusterState clusterState = ClusterState.builder( org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault( Settings.EMPTY)) .metaData(metaData) .routingTable(routingTable) .build(); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); RoutingAllocation.Result rerouteResult = allocation.reroute(clusterState, "reroute"); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); routingTable = allocation.reroute(clusterState, "reroute").routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(2, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size()); assertEquals( highPriorityName, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0).getIndexName()); assertEquals( highPriorityName, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(1).getIndexName()); routingTable = allocation .applyStartedShards( clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(2, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size()); assertEquals( lowPriorityName, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0).getIndexName()); assertEquals( lowPriorityName, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(1).getIndexName()); routingTable = allocation .applyStartedShards( clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals( clusterState.getRoutingNodes().shardsWithState(INITIALIZING).toString(), 2, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size()); assertEquals( highPriorityName, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0).getIndexName()); assertEquals( highPriorityName, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(1).getIndexName()); routingTable = allocation .applyStartedShards( clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(2, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size()); assertEquals( lowPriorityName, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0).getIndexName()); assertEquals( lowPriorityName, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(1).getIndexName()); }
@Test public void testPrimaryRecoveryThrottling() { AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.node_concurrent_recoveries", 3) .put("cluster.routing.allocation.node_initial_primaries_recoveries", 3) .build()); logger.info("Building initial routing table"); MetaData metaData = newMetaDataBuilder() .put(newIndexMetaDataBuilder("test").numberOfShards(10).numberOfReplicas(1)) .build(); RoutingTable routingTable = routingTable() .add(indexRoutingTable("test").initializeEmpty(metaData.index("test"))) .build(); ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); logger.info("start one node, do reroute, only 3 should initialize"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes(newNodesBuilder().put(RoutingAllocationTests.newNode("node1"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(0)); assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(3)); assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(17)); logger.info("start initializing, another 3 should initialize"); routingTable = strategy .applyStartedShards(clusterState, routingTable.shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(3)); assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(3)); assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(14)); logger.info("start initializing, another 3 should initialize"); routingTable = strategy .applyStartedShards(clusterState, routingTable.shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(6)); assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(3)); assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(11)); logger.info("start initializing, another 1 should initialize"); routingTable = strategy .applyStartedShards(clusterState, routingTable.shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(9)); assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(1)); assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(10)); logger.info("start initializing, all primaries should be started"); routingTable = strategy .applyStartedShards(clusterState, routingTable.shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(10)); assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(0)); assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(10)); }
@Test public void indexLevelShardsLimitRemain() { AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put("cluster.routing.allocation.node_initial_primaries_recoveries", 10) .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .put("cluster.routing.allocation.balance.index", 0.0f) .put("cluster.routing.allocation.balance.replica", 1.0f) .put("cluster.routing.allocation.balance.primary", 0.0f) .build()); logger.info("Building initial routing table"); MetaData metaData = newMetaDataBuilder() .put( newIndexMetaDataBuilder("test") .settings( ImmutableSettings.settingsBuilder() .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0))) .build(); RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build(); ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); logger.info("Adding one node and reroute"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes(newNodesBuilder().put(newNode("node1"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); logger.info("Start the primary shards"); RoutingNodes routingNodes = clusterState.routingNodes(); routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat(clusterState.readOnlyRoutingNodes().numberOfShardsOfType(STARTED), equalTo(5)); logger.info("add another index with 5 shards"); metaData = newMetaDataBuilder() .metaData(metaData) .put( newIndexMetaDataBuilder("test1") .settings( ImmutableSettings.settingsBuilder() .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0))) .build(); routingTable = routingTable().routingTable(routingTable).addAsNew(metaData.index("test1")).build(); clusterState = newClusterStateBuilder() .state(clusterState) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("Add another one node and reroute"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes(newNodesBuilder().putAll(clusterState.nodes()).put(newNode("node2"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); routingNodes = clusterState.routingNodes(); routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat(clusterState.readOnlyRoutingNodes().numberOfShardsOfType(STARTED), equalTo(10)); for (MutableShardRouting shardRouting : clusterState.readOnlyRoutingNodes().node("node1")) { assertThat(shardRouting.index(), equalTo("test")); } for (MutableShardRouting shardRouting : clusterState.readOnlyRoutingNodes().node("node2")) { assertThat(shardRouting.index(), equalTo("test1")); } logger.info( "update " + ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE + " for test, see that things move"); metaData = newMetaDataBuilder() .metaData(metaData) .put( newIndexMetaDataBuilder("test") .settings( ImmutableSettings.settingsBuilder() .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) .put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE, 3))) .build(); clusterState = newClusterStateBuilder().state(clusterState).metaData(metaData).build(); logger.info("reroute after setting"); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat( clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED), equalTo(3)); assertThat( clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(RELOCATING), equalTo(2)); assertThat( clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(RELOCATING), equalTo(2)); assertThat( clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(STARTED), equalTo(3)); // the first move will destroy the balance and the balancer will move 2 shards from node2 to // node one right after // moving the nodes to node2 since we consider INITIALIZING nodes during rebalance routingNodes = clusterState.routingNodes(); routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); // now we are done compared to EvenShardCountAllocator since the Balancer is not soely based on // the average assertThat( clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED), equalTo(5)); assertThat( clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(STARTED), equalTo(5)); }
public void testBalanceAllNodesStartedAddIndex() { AllocationService strategy = createAllocationService( Settings.builder() .put("cluster.routing.allocation.node_concurrent_recoveries", 10) .put("cluster.routing.allocation.node_initial_primaries_recoveries", 10) .put( ClusterRebalanceAllocationDecider .CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build()); logger.info("Building initial routing table"); MetaData metaData = MetaData.builder() .put( IndexMetaData.builder("test") .settings(settings(Version.CURRENT)) .numberOfShards(3) .numberOfReplicas(1)) .build(); RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder( org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault( Settings.EMPTY)) .metaData(metaData) .routingTable(initialRoutingTable) .build(); assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(3)); for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) { assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2)); assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2)); assertThat( clusterState.routingTable().index("test").shard(i).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat( clusterState.routingTable().index("test").shard(i).shards().get(1).state(), equalTo(UNASSIGNED)); assertThat( clusterState.routingTable().index("test").shard(i).shards().get(0).currentNodeId(), nullValue()); assertThat( clusterState.routingTable().index("test").shard(i).shards().get(1).currentNodeId(), nullValue()); } logger.info("Adding three node and performing rerouting"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder() .add(newNode("node1")) .add(newNode("node2")) .add(newNode("node3"))) .build(); ClusterState newState = strategy.reroute(clusterState, "reroute"); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(3)); for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) { assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2)); assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2)); assertThat( clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(INITIALIZING)); assertThat( clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1)); assertThat( clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED)); assertThat( clusterState.routingTable().index("test").shard(i).replicaShards().get(0).currentNodeId(), nullValue()); } logger.info("Another round of rebalancing"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes())) .build(); newState = strategy.reroute(clusterState, "reroute"); assertThat(newState, equalTo(clusterState)); RoutingNodes routingNodes = clusterState.getRoutingNodes(); newState = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(3)); for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) { assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2)); assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2)); assertThat( clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(STARTED)); assertThat( clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1)); // backup shards are initializing as well, we make sure that they // recover from primary *started* shards in the // IndicesClusterStateService assertThat( clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(INITIALIZING)); } logger.info("Reroute, nothing should change"); newState = strategy.reroute(clusterState, "reroute"); assertThat(newState, equalTo(clusterState)); logger.info("Start the more shards"); routingNodes = clusterState.getRoutingNodes(); newState = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingNodes = clusterState.getRoutingNodes(); assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(3)); for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) { assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2)); assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2)); assertThat( clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(STARTED)); assertThat( clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1)); } assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), equalTo(2)); assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), equalTo(2)); assertThat(routingNodes.node("node3").numberOfShardsWithState(STARTED), equalTo(2)); assertThat(routingNodes.node("node1").shardsWithState("test", STARTED).size(), equalTo(2)); assertThat(routingNodes.node("node2").shardsWithState("test", STARTED).size(), equalTo(2)); assertThat(routingNodes.node("node3").shardsWithState("test", STARTED).size(), equalTo(2)); logger.info("Add new index 3 shards 1 replica"); MetaData updatedMetaData = MetaData.builder(clusterState.metaData()) .put( IndexMetaData.builder("test1") .settings( settings(Version.CURRENT) .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 3) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1))) .build(); RoutingTable updatedRoutingTable = RoutingTable.builder(clusterState.routingTable()) .addAsNew(updatedMetaData.index("test1")) .build(); clusterState = ClusterState.builder(clusterState) .metaData(updatedMetaData) .routingTable(updatedRoutingTable) .build(); assertThat(clusterState.routingTable().index("test1").shards().size(), equalTo(3)); newState = strategy.reroute(clusterState, "reroute"); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; assertThat(clusterState.routingTable().index("test1").shards().size(), equalTo(3)); for (int i = 0; i < clusterState.routingTable().index("test1").shards().size(); i++) { assertThat(clusterState.routingTable().index("test1").shard(i).size(), equalTo(2)); assertThat(clusterState.routingTable().index("test1").shard(i).shards().size(), equalTo(2)); assertThat( clusterState.routingTable().index("test1").shard(i).primaryShard().state(), equalTo(INITIALIZING)); assertThat( clusterState.routingTable().index("test1").shard(i).replicaShards().size(), equalTo(1)); assertThat( clusterState.routingTable().index("test1").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED)); assertThat( clusterState .routingTable() .index("test1") .shard(i) .replicaShards() .get(0) .currentNodeId(), nullValue()); } logger.info("Another round of rebalancing"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes())) .build(); newState = strategy.reroute(clusterState, "reroute"); assertThat(newState, equalTo(clusterState)); routingNodes = clusterState.getRoutingNodes(); newState = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; assertThat(clusterState.routingTable().index("test1").shards().size(), equalTo(3)); for (int i = 0; i < clusterState.routingTable().index("test1").shards().size(); i++) { assertThat(clusterState.routingTable().index("test1").shard(i).size(), equalTo(2)); assertThat(clusterState.routingTable().index("test1").shard(i).shards().size(), equalTo(2)); assertThat( clusterState.routingTable().index("test1").shard(i).primaryShard().state(), equalTo(STARTED)); assertThat( clusterState.routingTable().index("test1").shard(i).replicaShards().size(), equalTo(1)); // backup shards are initializing as well, we make sure that they // recover from primary *started* shards in the // IndicesClusterStateService assertThat( clusterState.routingTable().index("test1").shard(i).replicaShards().get(0).state(), equalTo(INITIALIZING)); } logger.info("Reroute, nothing should change"); newState = strategy.reroute(clusterState, "reroute"); assertThat(newState, equalTo(clusterState)); logger.info("Start the more shards"); routingNodes = clusterState.getRoutingNodes(); newState = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)); assertThat(newState, not(equalTo(clusterState))); clusterState = newState; routingNodes = clusterState.getRoutingNodes(); assertThat(clusterState.routingTable().index("test1").shards().size(), equalTo(3)); for (int i = 0; i < clusterState.routingTable().index("test1").shards().size(); i++) { assertThat(clusterState.routingTable().index("test1").shard(i).size(), equalTo(2)); assertThat(clusterState.routingTable().index("test1").shard(i).shards().size(), equalTo(2)); assertThat( clusterState.routingTable().index("test1").shard(i).primaryShard().state(), equalTo(STARTED)); assertThat( clusterState.routingTable().index("test1").shard(i).replicaShards().size(), equalTo(1)); } assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), equalTo(4)); assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), equalTo(4)); assertThat(routingNodes.node("node3").numberOfShardsWithState(STARTED), equalTo(4)); assertThat(routingNodes.node("node1").shardsWithState("test1", STARTED).size(), equalTo(2)); assertThat(routingNodes.node("node2").shardsWithState("test1", STARTED).size(), equalTo(2)); assertThat(routingNodes.node("node3").shardsWithState("test1", STARTED).size(), equalTo(2)); }
@Test @TestLogging("cluster.routing.allocation.decider:TRACE") public void diskThresholdWithAbsoluteSizesTest() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "30b") .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "9b") .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node1", new DiskUsage("node1", 100, 10)); // 90% used usages.put("node2", new DiskUsage("node2", 100, 10)); // 90% used usages.put("node3", new DiskUsage("node3", 100, 60)); // 40% used usages.put("node4", new DiskUsage("node4", 100, 80)); // 20% used usages.put("node5", new DiskUsage("node5", 100, 85)); // 15% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 10L); // 10 bytes shardSizes.put("[test][0][r]", 10L); final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(2)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding node1 and node2 node"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Primary should initialize, even though both nodes are over the limit initialize assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); String nodeWithPrimary, nodeWithoutPrimary; if (clusterState.getRoutingNodes().node("node1").size() == 1) { nodeWithPrimary = "node1"; nodeWithoutPrimary = "node2"; } else { nodeWithPrimary = "node2"; nodeWithoutPrimary = "node1"; } logger.info("--> nodeWithPrimary: {}", nodeWithPrimary); logger.info("--> nodeWithoutPrimary: {}", nodeWithoutPrimary); // Make node without the primary now habitable to replicas usages.put(nodeWithoutPrimary, new DiskUsage(nodeWithoutPrimary, 100, 35)); // 65% used final ClusterInfo clusterInfo2 = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo2; } }; strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Now the replica should be able to initialize assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(2)); logger.info("--> start the shards (primaries)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that we're able to start the primary and replica, since they were both initializing assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2)); // Assert that node1 got a single shard (the primary), even though its disk usage is too high assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); // Assert that node2 got a single shard (a replica) assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); // Assert that one replica is still unassigned // assertThat(clusterState.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), // equalTo(1)); logger.info("--> adding node3"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that the replica is initialized now that node3 is available with enough space assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2)); assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1)); logger.info("--> start the shards (replicas)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that all replicas could be started assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(3)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> changing decider settings"); // Set the low threshold to 60 instead of 70 // Set the high threshold to 70 instead of 80 // node2 now should not have new shards allocated to it, but shards can remain diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "40b") .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "30b") .build(); deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(3)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> changing settings again"); // Set the low threshold to 50 instead of 60 // Set the high threshold to 60 instead of 70 // node2 now should not have new shards allocated to it, and shards cannot remain diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "50b") .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "40b") .build(); deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(3)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); // Shard hasn't been moved off of node2 yet because there's nowhere for it to go assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> adding node4"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node4"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2)); // One shard is relocating off of node1 assertThat(clusterState.routingNodes().shardsWithState(RELOCATING).size(), equalTo(1)); assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> apply INITIALIZING shards"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // primary shard already has been relocated away assertThat(clusterState.getRoutingNodes().node(nodeWithPrimary).size(), equalTo(0)); // node with increased space still has its shard assertThat(clusterState.getRoutingNodes().node(nodeWithoutPrimary).size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1)); logger.info("--> adding node5"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node5"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started on node3 and node4 assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2)); // One shard is relocating off of node2 now assertThat(clusterState.routingNodes().shardsWithState(RELOCATING).size(), equalTo(1)); // Initializing on node5 assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> apply INITIALIZING shards"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logger.info("--> final cluster state:"); logShardStates(clusterState); // Node1 still has no shards because it has no space for them assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); // Node5 is available now, so the shard is moved off of node2 assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node5").size(), equalTo(1)); }
@Test public void tesStartedShardsMatching() { AllocationService allocation = createAllocationService(); logger.info("--> building initial cluster state"); final IndexMetaData indexMetaData = IndexMetaData.builder("test").numberOfShards(3).numberOfReplicas(0).build(); ClusterState.Builder stateBuilder = ClusterState.builder(ClusterName.DEFAULT) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .metaData(MetaData.builder().put(indexMetaData, false)); final ImmutableShardRouting initShard = new ImmutableShardRouting( "test", 0, "node1", randomBoolean(), ShardRoutingState.INITIALIZING, 1); final ImmutableShardRouting startedShard = new ImmutableShardRouting( "test", 1, "node2", randomBoolean(), ShardRoutingState.STARTED, 1); final ImmutableShardRouting relocatingShard = new ImmutableShardRouting( "test", 2, "node1", "node2", randomBoolean(), ShardRoutingState.RELOCATING, 1); stateBuilder.routingTable( RoutingTable.builder() .add( IndexRoutingTable.builder("test") .addIndexShard( new IndexShardRoutingTable.Builder(initShard.shardId(), true) .addShard(initShard) .build()) .addIndexShard( new IndexShardRoutingTable.Builder(startedShard.shardId(), true) .addShard(startedShard) .build()) .addIndexShard( new IndexShardRoutingTable.Builder(relocatingShard.shardId(), true) .addShard(relocatingShard) .build()))); ClusterState state = stateBuilder.build(); logger.info("--> test starting of shard"); RoutingAllocation.Result result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( initShard.index(), initShard.id(), initShard.currentNodeId(), initShard.relocatingNodeId(), initShard.primary(), ShardRoutingState.INITIALIZING, randomInt())), false); assertTrue( "failed to start " + initShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); assertTrue( initShard + "isn't started \ncurrent routing table:" + result.routingTable().prettyPrint(), result .routingTable() .index("test") .shard(initShard.id()) .countWithState(ShardRoutingState.STARTED) == 1); logger.info("--> testing shard variants that shouldn't match the started shard"); result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( initShard.index(), initShard.id(), initShard.currentNodeId(), initShard.relocatingNodeId(), !initShard.primary(), ShardRoutingState.INITIALIZING, 1)), false); assertFalse( "wrong primary flag shouldn't start shard " + initShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( initShard.index(), initShard.id(), "some_node", initShard.currentNodeId(), initShard.primary(), ShardRoutingState.INITIALIZING, 1)), false); assertFalse( "relocating shard from node shouldn't start shard " + initShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( initShard.index(), initShard.id(), initShard.currentNodeId(), "some_node", initShard.primary(), ShardRoutingState.INITIALIZING, 1)), false); assertFalse( "relocating shard to node shouldn't start shard " + initShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); logger.info("--> testing double starting"); result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( startedShard.index(), startedShard.id(), startedShard.currentNodeId(), startedShard.relocatingNodeId(), startedShard.primary(), ShardRoutingState.INITIALIZING, 1)), false); assertFalse( "duplicate starting of the same shard should be ignored \ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); logger.info("--> testing starting of relocating shards"); result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), relocatingShard.currentNodeId(), relocatingShard.primary(), ShardRoutingState.INITIALIZING, randomInt())), false); assertTrue( "failed to start " + relocatingShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); ShardRouting shardRouting = result.routingTable().index("test").shard(relocatingShard.id()).getShards().get(0); assertThat(shardRouting.state(), equalTo(ShardRoutingState.STARTED)); assertThat(shardRouting.currentNodeId(), equalTo("node2")); assertThat(shardRouting.relocatingNodeId(), nullValue()); logger.info("--> testing shard variants that shouldn't match the relocating shard"); result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), relocatingShard.currentNodeId(), !relocatingShard.primary(), ShardRoutingState.INITIALIZING, 1)), false); assertFalse( "wrong primary flag shouldn't start shard " + relocatingShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( relocatingShard.index(), relocatingShard.id(), "some_node", relocatingShard.currentNodeId(), relocatingShard.primary(), ShardRoutingState.INITIALIZING, 1)), false); assertFalse( "relocating shard to a different node shouldn't start shard " + relocatingShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), "some_node", relocatingShard.primary(), ShardRoutingState.INITIALIZING, 1)), false); assertFalse( "relocating shard from a different node shouldn't start shard " + relocatingShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); result = allocation.applyStartedShards( state, Arrays.asList( new ImmutableShardRouting( relocatingShard.index(), relocatingShard.id(), relocatingShard.relocatingNodeId(), relocatingShard.primary(), ShardRoutingState.INITIALIZING, 1)), false); assertFalse( "non-relocating shard shouldn't start shard" + relocatingShard + "\ncurrent routing table:" + result.routingTable().prettyPrint(), result.changed()); }
@Test public void moveShardCommand() { AllocationService allocation = new AllocationService( settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build()); logger.info("creating an index with 1 shard, no replica"); MetaData metaData = newMetaDataBuilder() .put(newIndexMetaDataBuilder("test").numberOfShards(1).numberOfReplicas(0)) .build(); RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build(); ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); logger.info("adding two nodes and performing rerouting"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2"))) .build(); RoutingAllocationResult rerouteResult = allocation.reroute(clusterState); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); logger.info("start primary shard"); rerouteResult = allocation.applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); logger.info("move the shard"); String existingNodeId = clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(); String toNodeId; if ("node1".equals(existingNodeId)) { toNodeId = "node2"; } else { toNodeId = "node1"; } rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new MoveAllocationCommand(new ShardId("test", 0), existingNodeId, toNodeId))); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat( clusterState.routingNodes().node(existingNodeId).shards().get(0).state(), equalTo(ShardRoutingState.RELOCATING)); assertThat( clusterState.routingNodes().node(toNodeId).shards().get(0).state(), equalTo(ShardRoutingState.INITIALIZING)); logger.info("finish moving the shard"); rerouteResult = allocation.applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node(existingNodeId).shards().isEmpty(), equalTo(true)); assertThat( clusterState.routingNodes().node(toNodeId).shards().get(0).state(), equalTo(ShardRoutingState.STARTED)); }
@Test public void cancelCommand() { AllocationService allocation = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.disable_new_allocation", true) .put("cluster.routing.allocation.disable_allocation", true) .build()); logger.info("--> building initial routing table"); MetaData metaData = newMetaDataBuilder() .put(newIndexMetaDataBuilder("test").numberOfShards(1).numberOfReplicas(1)) .build(); RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build(); ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); logger.info("--> adding 3 nodes"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes( newNodesBuilder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3"))) .build(); RoutingAllocationResult rerouteResult = allocation.reroute(clusterState); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(0)); logger.info("--> allocating with primary flag set to true"); rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new AllocateAllocationCommand(new ShardId("test", 0), "node1", true))); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(INITIALIZING).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0)); logger.info("--> cancel primary allocation, make sure it fails..."); try { allocation.reroute( clusterState, new AllocationCommands( new CancelAllocationCommand(new ShardId("test", 0), "node1", false))); assert false; } catch (ElasticSearchIllegalArgumentException e) { } logger.info("--> start the primary shard"); rerouteResult = allocation.applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0)); logger.info("--> cancel primary allocation, make sure it fails..."); try { allocation.reroute( clusterState, new AllocationCommands( new CancelAllocationCommand(new ShardId("test", 0), "node1", false))); assert false; } catch (ElasticSearchIllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new AllocateAllocationCommand(new ShardId("test", 0), "node2", false))); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the relocation allocation"); rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new CancelAllocationCommand(new ShardId("test", 0), "node2", false))); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0)); assertThat(clusterState.routingNodes().node("node3").shards().size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new AllocateAllocationCommand(new ShardId("test", 0), "node2", false))); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the primary being replicated, make sure it fails"); try { allocation.reroute( clusterState, new AllocationCommands( new CancelAllocationCommand(new ShardId("test", 0), "node1", false))); assert false; } catch (ElasticSearchIllegalArgumentException e) { } logger.info("--> start the replica shard"); rerouteResult = allocation.applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> cancel allocation of the replica shard"); rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new CancelAllocationCommand(new ShardId("test", 0), "node2", false))); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0)); assertThat(clusterState.routingNodes().node("node3").shards().size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new AllocateAllocationCommand(new ShardId("test", 0), "node2", false))); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(rerouteResult.changed(), equalTo(true)); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> start the replica shard"); rerouteResult = allocation.applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> cancel the primary allocation (with allow_primary set to true)"); rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new CancelAllocationCommand(new ShardId("test", 0), "node1", true))); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(rerouteResult.changed(), equalTo(true)); assertThat( clusterState.routingNodes().node("node2").shardsWithState(STARTED).get(0).primary(), equalTo(true)); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(0)); assertThat(clusterState.routingNodes().node("node3").shards().size(), equalTo(0)); }
@Test public void allocateCommand() { AllocationService allocation = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.disable_new_allocation", true) .put("cluster.routing.allocation.disable_allocation", true) .build()); logger.info("--> building initial routing table"); MetaData metaData = newMetaDataBuilder() .put(newIndexMetaDataBuilder("test").numberOfShards(1).numberOfReplicas(1)) .build(); RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build(); ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); logger.info("--> adding 3 nodes on same rack and do rerouting"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes( newNodesBuilder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3"))) .build(); RoutingAllocationResult rerouteResult = allocation.reroute(clusterState); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(0)); logger.info("--> allocating with primary flag set to false, should fail"); try { allocation.reroute( clusterState, new AllocationCommands( new AllocateAllocationCommand(new ShardId("test", 0), "node1", false))); assert false; } catch (ElasticSearchIllegalArgumentException e) { } logger.info("--> allocating with primary flag set to true"); rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new AllocateAllocationCommand(new ShardId("test", 0), "node1", true))); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(INITIALIZING).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0)); logger.info("--> start the primary shard"); rerouteResult = allocation.applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0)); logger.info("--> allocate the replica shard on the primary shard node, should fail"); try { allocation.reroute( clusterState, new AllocationCommands( new AllocateAllocationCommand(new ShardId("test", 0), "node1", false))); assert false; } catch (ElasticSearchIllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); rerouteResult = allocation.reroute( clusterState, new AllocationCommands( new AllocateAllocationCommand(new ShardId("test", 0), "node2", false))); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> start the replica shard"); rerouteResult = allocation.applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)); clusterState = newClusterStateBuilder() .state(clusterState) .routingTable(rerouteResult.routingTable()) .build(); assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1)); assertThat( clusterState.routingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> verify that we fail when there are no unassigned shards"); try { allocation.reroute( clusterState, new AllocationCommands( new AllocateAllocationCommand(new ShardId("test", 0), "node3", false))); assert false; } catch (ElasticSearchIllegalArgumentException e) { } }
public void testSingleRetryOnIgnore() { ClusterState clusterState = createInitialClusterState(); RoutingTable routingTable = clusterState.routingTable(); final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); // now fail it N-1 times for (int i = 0; i < retries - 1; i++) { List<FailedRerouteAllocation.FailedShard> failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException())); RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), i + 1); assertEquals( routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); } // now we go and check that we are actually stick to unassigned on the next failure List<FailedRerouteAllocation.FailedShard> failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException())); RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals( routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); result = strategy.reroute( clusterState, new AllocationCommands(), false, true); // manual reroute should retry once assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals( routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); // now we go and check that we are actually stick to unassigned on the next failure ie. no retry failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException())); result = strategy.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries + 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals( routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); }
public void testClusterStateUpdateTask() { AllocationService allocationService = new AllocationService( Settings.builder().build(), new AllocationDeciders( Settings.EMPTY, Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); ClusterState clusterState = createInitialClusterState(allocationService); ClusterRerouteRequest req = new ClusterRerouteRequest(); req.dryRun(true); AtomicReference<ClusterRerouteResponse> responseRef = new AtomicReference<>(); ActionListener<ClusterRerouteResponse> responseActionListener = new ActionListener<ClusterRerouteResponse>() { @Override public void onResponse(ClusterRerouteResponse clusterRerouteResponse) { responseRef.set(clusterRerouteResponse); } @Override public void onFailure(Exception e) {} }; TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task = new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask( logger, allocationService, req, responseActionListener); ClusterState execute = task.execute(clusterState); assertSame(execute, clusterState); // dry-run task.onAllNodesAcked(null); assertNotSame(responseRef.get().getState(), execute); req.dryRun(false); // now we allocate final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); // now fail it N-1 times for (int i = 0; i < retries; i++) { ClusterState newState = task.execute(clusterState); assertNotSame(newState, clusterState); // dry-run=false clusterState = newState; RoutingTable routingTable = clusterState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), i); List<FailedRerouteAllocation.FailedShard> failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException())); RoutingAllocation.Result result = allocationService.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); clusterState = ClusterState.builder(clusterState).routingTable(result.routingTable()).build(); routingTable = clusterState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); if (i == retries - 1) { assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); } else { assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); } assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), i + 1); } // without retry_failed we won't allocate that shard ClusterState newState = task.execute(clusterState); assertNotSame(newState, clusterState); // dry-run=false task.onAllNodesAcked(null); assertSame(responseRef.get().getState(), newState); RoutingTable routingTable = clusterState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries); req.setRetryFailed(true); // now we manually retry and get the shard back into initializing newState = task.execute(clusterState); assertNotSame(newState, clusterState); // dry-run=false clusterState = newState; routingTable = clusterState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries); }
@Test public void testUpdateNumberOfReplicas() { AllocationService strategy = createAllocationService( settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build()); logger.info("Building initial routing table"); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder().metaData(metaData).routingTable(routingTable).build(); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(2)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(2)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat(routingTable.index("test").shard(0).shards().get(1).state(), equalTo(UNASSIGNED)); assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue()); assertThat(routingTable.index("test").shard(0).shards().get(1).currentNodeId(), nullValue()); logger.info("Adding two nodes and performing rerouting"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); RoutingTable prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logger.info("Start all the primary shards"); RoutingNodes routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logger.info("Start all the replica shards"); routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); final String nodeHoldingPrimary = routingTable.index("test").shard(0).primaryShard().currentNodeId(); final String nodeHoldingReplica = routingTable.index("test").shard(0).replicaShards().get(0).currentNodeId(); assertThat(nodeHoldingPrimary, not(equalTo(nodeHoldingReplica))); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(2)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(2)); assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).primaryShard().currentNodeId(), equalTo(nodeHoldingPrimary)); assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(1)); assertThat( routingTable.index("test").shard(0).replicaShards().get(0).state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).replicaShards().get(0).currentNodeId(), equalTo(nodeHoldingReplica)); logger.info("add another replica"); routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = RoutingTable.builder(routingTable).updateNumberOfReplicas(2).build(); metaData = MetaData.builder(clusterState.metaData()).updateNumberOfReplicas(2).build(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).metaData(metaData).build(); assertThat(clusterState.metaData().index("test").numberOfReplicas(), equalTo(2)); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(3)); assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).primaryShard().currentNodeId(), equalTo(nodeHoldingPrimary)); assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(2)); assertThat( routingTable.index("test").shard(0).replicaShards().get(0).state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).replicaShards().get(0).currentNodeId(), equalTo(nodeHoldingReplica)); assertThat( routingTable.index("test").shard(0).replicaShards().get(1).state(), equalTo(UNASSIGNED)); logger.info("Add another node and start the added replica"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3"))) .build(); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(3)); assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).primaryShard().currentNodeId(), equalTo(nodeHoldingPrimary)); assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(2)); assertThat( routingTable.index("test").shard(0).replicaShardsWithState(STARTED).size(), equalTo(1)); assertThat( routingTable.index("test").shard(0).replicaShardsWithState(STARTED).get(0).currentNodeId(), equalTo(nodeHoldingReplica)); assertThat( routingTable.index("test").shard(0).replicaShardsWithState(INITIALIZING).size(), equalTo(1)); assertThat( routingTable .index("test") .shard(0) .replicaShardsWithState(INITIALIZING) .get(0) .currentNodeId(), equalTo("node3")); routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(3)); assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).primaryShard().currentNodeId(), equalTo(nodeHoldingPrimary)); assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(2)); assertThat( routingTable.index("test").shard(0).replicaShardsWithState(STARTED).size(), equalTo(2)); assertThat( routingTable.index("test").shard(0).replicaShardsWithState(STARTED).get(0).currentNodeId(), anyOf(equalTo(nodeHoldingReplica), equalTo("node3"))); assertThat( routingTable.index("test").shard(0).replicaShardsWithState(STARTED).get(1).currentNodeId(), anyOf(equalTo(nodeHoldingReplica), equalTo("node3"))); logger.info("now remove a replica"); routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = RoutingTable.builder(routingTable).updateNumberOfReplicas(1).build(); metaData = MetaData.builder(clusterState.metaData()).updateNumberOfReplicas(1).build(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).metaData(metaData).build(); assertThat(clusterState.metaData().index("test").numberOfReplicas(), equalTo(1)); assertThat(prevRoutingTable != routingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(2)); assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).primaryShard().currentNodeId(), equalTo(nodeHoldingPrimary)); assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(1)); assertThat( routingTable.index("test").shard(0).replicaShards().get(0).state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).replicaShards().get(0).currentNodeId(), anyOf(equalTo(nodeHoldingReplica), equalTo("node3"))); logger.info("do a reroute, should remain the same"); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(false)); }
@Test public void diskThresholdTest() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.8) .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node1", new DiskUsage("node1", 100, 10)); // 90% used usages.put("node2", new DiskUsage("node2", 100, 35)); // 65% used usages.put("node3", new DiskUsage("node3", 100, 60)); // 40% used usages.put("node4", new DiskUsage("node4", 100, 80)); // 20% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 10L); // 10 bytes shardSizes.put("[test][0][r]", 10L); final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding two nodes"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Primary shard should be initializing, replica should not assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> start the shards (primaries)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that we're able to start the primary assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1)); // Assert that node1 didn't get any shards because its disk usage is too high assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); logger.info("--> start the shards (replicas)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that the replica couldn't be started since node1 doesn't have enough space assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1)); logger.info("--> adding node3"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that the replica is initialized now that node3 is available with enough space assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1)); assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1)); logger.info("--> start the shards (replicas)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that the replica couldn't be started since node1 doesn't have enough space assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> changing decider settings"); // Set the low threshold to 60 instead of 70 // Set the high threshold to 70 instead of 80 // node2 now should not have new shards allocated to it, but shards can remain diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "60%") .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.7) .build(); deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> changing settings again"); // Set the low threshold to 50 instead of 60 // Set the high threshold to 60 instead of 70 // node2 now should not have new shards allocated to it, and shards cannot remain diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.5) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.6) .build(); deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); // Shard hasn't been moved off of node2 yet because there's nowhere for it to go assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); logger.info("--> adding node4"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node4"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shards remain started assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> apply INITIALIZING shards"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); // Node4 is available now, so the shard is moved off of node2 assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1)); }
@Test public void testMultiIndexUnevenNodes() { AllocationService strategy = createAllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build()); final int numberOfIndices = 10; logger.info("Building initial routing table with " + numberOfIndices + " indices"); MetaData.Builder metaDataBuilder = MetaData.builder(); for (int i = 0; i < numberOfIndices; i++) { metaDataBuilder.put( IndexMetaData.builder("test" + i) .settings(settings(Version.CURRENT)) .numberOfShards(1) .numberOfReplicas(0)); } MetaData metaData = metaDataBuilder.build(); RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); for (int i = 0; i < numberOfIndices; i++) { routingTableBuilder.addAsNew(metaData.index("test" + i)); } RoutingTable routingTable = routingTableBuilder.build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices)); logger.info("Starting 3 nodes and rerouting"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder() .put(newNode("node1")) .put(newNode("node2")) .put(newNode("node3"))) .build(); RoutingTable prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING)); } RoutingNodes routingNodes = clusterState.routingNodes(); assertThat(numberOfShardsOfType(routingNodes, INITIALIZING), equalTo(numberOfIndices)); assertThat( routingNodes.node("node1").numberOfShardsWithState(INITIALIZING), anyOf(equalTo(3), equalTo(4))); assertThat( routingNodes.node("node2").numberOfShardsWithState(INITIALIZING), anyOf(equalTo(3), equalTo(4))); assertThat( routingNodes.node("node2").numberOfShardsWithState(INITIALIZING), anyOf(equalTo(3), equalTo(4))); logger.info("Start two more nodes, things should remain the same"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder(clusterState.nodes()) .put(newNode("node4")) .put(newNode("node5"))) .build(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); assertThat(prevRoutingTable == routingTable, equalTo(true)); routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), anyOf(equalTo(RELOCATING), equalTo(STARTED))); } routingNodes = clusterState.routingNodes(); assertThat( "4 source shard routing are relocating", numberOfShardsOfType(routingNodes, RELOCATING), equalTo(4)); assertThat( "4 target shard routing are initializing", numberOfShardsOfType(routingNodes, INITIALIZING), equalTo(4)); logger.info("Now, mark the relocated as started"); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); // routingTable = strategy.reroute(new RoutingStrategyInfo(metaData, routingTable), // nodes); assertThat(prevRoutingTable != routingTable, equalTo(true)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), anyOf(equalTo(RELOCATING), equalTo(STARTED))); } routingNodes = clusterState.routingNodes(); assertThat(numberOfShardsOfType(routingNodes, STARTED), equalTo(numberOfIndices)); for (RoutingNode routingNode : routingNodes) { assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(2)); } }
@Test public void testMultiIndexEvenDistribution() { AllocationService strategy = createAllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build()); final int numberOfIndices = 50; logger.info("Building initial routing table with " + numberOfIndices + " indices"); MetaData.Builder metaDataBuilder = MetaData.builder(); for (int i = 0; i < numberOfIndices; i++) { metaDataBuilder.put( IndexMetaData.builder("test" + i) .settings(settings(Version.CURRENT)) .numberOfShards(1) .numberOfReplicas(0)); } MetaData metaData = metaDataBuilder.build(); RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); for (int i = 0; i < numberOfIndices; i++) { routingTableBuilder.addAsNew(metaData.index("test" + i)); } RoutingTable routingTable = routingTableBuilder.build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(), nullValue()); } logger.info("Adding " + (numberOfIndices / 2) + " nodes"); DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); List<DiscoveryNode> nodes = newArrayList(); for (int i = 0; i < (numberOfIndices / 2); i++) { nodesBuilder.put(newNode("node" + i)); } RoutingTable prevRoutingTable = routingTable; clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING)); assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true)); // make sure we still have 2 shards initializing per node on the first 25 nodes String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(); int nodeIndex = Integer.parseInt(nodeId.substring("node".length())); assertThat(nodeIndex, lessThan(25)); } RoutingNodes routingNodes = clusterState.routingNodes(); Set<String> encounteredIndices = newHashSet(); for (RoutingNode routingNode : routingNodes) { assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(0)); assertThat(routingNode.size(), equalTo(2)); // make sure we still have 2 shards initializing per node on the only 25 nodes int nodeIndex = Integer.parseInt(routingNode.nodeId().substring("node".length())); assertThat(nodeIndex, lessThan(25)); // check that we don't have a shard associated with a node with the same index name (we have a // single shard) for (ShardRouting shardRoutingEntry : routingNode) { assertThat(encounteredIndices, not(hasItem(shardRoutingEntry.index()))); encounteredIndices.add(shardRoutingEntry.index()); } } logger.info("Adding additional " + (numberOfIndices / 2) + " nodes, nothing should change"); nodesBuilder = DiscoveryNodes.builder(clusterState.nodes()); for (int i = (numberOfIndices / 2); i < numberOfIndices; i++) { nodesBuilder.put(newNode("node" + i)); } prevRoutingTable = routingTable; clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(false)); logger.info("Marking the shard as started"); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(prevRoutingTable != routingTable, equalTo(true)); int numberOfRelocatingShards = 0; int numberOfStartedShards = 0; for (int i = 0; i < numberOfIndices; i++) { assertThat(routingTable.index("test" + i).shards().size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1)); assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false)); assertThat( routingTable.index("test" + i).shard(0).shards().get(0).state(), anyOf(equalTo(STARTED), equalTo(RELOCATING))); if (routingTable.index("test" + i).shard(0).shards().get(0).state() == STARTED) { numberOfStartedShards++; } else if (routingTable.index("test" + i).shard(0).shards().get(0).state() == RELOCATING) { numberOfRelocatingShards++; } assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true)); // make sure we still have 2 shards either relocating or started on the first 25 nodes (still) String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(); int nodeIndex = Integer.parseInt(nodeId.substring("node".length())); assertThat(nodeIndex, lessThan(25)); } assertThat(numberOfRelocatingShards, equalTo(25)); assertThat(numberOfStartedShards, equalTo(25)); }
@Test public void diskThresholdWithShardSizes() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "71%") .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node1", new DiskUsage("node1", 100, 31)); // 69% used usages.put("node2", new DiskUsage("node2", 100, 1)); // 99% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 10L); // 10 bytes final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding node1"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder() .put(newNode("node1")) .put( newNode( "node2")) // node2 is added because DiskThresholdDecider automatically // ignore single-node clusters ) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logger.info("--> start the shards (primaries)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Shard can't be allocated to node1 (or node2) because it would cause too much usage assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(0)); // No shards are started, no nodes have enough disk for allocation assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(0)); }
@Test public void testSingleIndexStartedShard() { AllocationService strategy = createAllocationService( settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build()); logger.info("Building initial routing table"); MetaData metaData = MetaData.builder() .put( IndexMetaData.builder("test") .settings(settings(Version.CURRENT)) .numberOfShards(1) .numberOfReplicas(0)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED)); assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue()); logger.info("Adding one node and performing rerouting"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1"))) .build(); RoutingTable prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1")); logger.info("Rerouting again, nothing should change"); prevRoutingTable = routingTable; clusterState = ClusterState.builder(clusterState).build(); routingTable = strategy.reroute(clusterState).routingTable(); assertThat(routingTable == prevRoutingTable, equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logger.info("Marking the shard as started"); RoutingNodes routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards( clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable != prevRoutingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1")); logger.info("Starting another node and making sure nothing changed"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node2"))) .build(); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable == prevRoutingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1")); logger.info("Killing node1 where the shard is, checking the shard is relocated"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node1")) .build(); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable != prevRoutingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node2")); logger.info( "Start another node, make sure that things remain the same (shard is in node2 and initializing)"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3"))) .build(); prevRoutingTable = routingTable; routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable == prevRoutingTable, equalTo(true)); logger.info("Start the shard on node 2"); routingNodes = clusterState.routingNodes(); prevRoutingTable = routingTable; routingTable = strategy .applyStartedShards( clusterState, routingNodes.node("node2").shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(routingTable != prevRoutingTable, equalTo(true)); assertThat(routingTable.index("test").shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1)); assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED)); assertThat( routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node2")); }
@Test public void unknownDiskUsageTest() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.85) .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node2", new DiskUsage("node2", 100, 50)); // 50% used usages.put("node3", new DiskUsage("node3", 100, 0)); // 100% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 10L); // 10 bytes shardSizes.put("[test][0][r]", 10L); // 10 bytes final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0)) .build(); RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding node1"); clusterState = ClusterState.builder(clusterState) .nodes( DiscoveryNodes.builder() .put(newNode("node1")) .put( newNode( "node3")) // node3 is added because DiskThresholdDecider automatically // ignore single-node clusters ) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); // Shard can be allocated to node1, even though it only has 25% free, // because it's a primary that's never been allocated before assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> start the shards (primaries)"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // A single shard is started on node1, even though it normally would not // be allowed, because it's a primary that hasn't been allocated, and node1 // is still below the high watermark (unlike node3) assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); }
@Test public void indexLevelShardsLimitAllocate() { AllocationService strategy = new AllocationService( settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build()); logger.info("Building initial routing table"); MetaData metaData = newMetaDataBuilder() .put( newIndexMetaDataBuilder("test") .settings( ImmutableSettings.settingsBuilder() .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 4) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1) .put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE, 2))) .build(); RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build(); ClusterState clusterState = newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build(); logger.info("Adding two nodes and performing rerouting"); clusterState = newClusterStateBuilder() .state(clusterState) .nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat( clusterState .readOnlyRoutingNodes() .node("node1") .numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(2)); assertThat( clusterState .readOnlyRoutingNodes() .node("node2") .numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(2)); logger.info("Start the primary shards"); RoutingNodes routingNodes = clusterState.routingNodes(); routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); assertThat( clusterState .readOnlyRoutingNodes() .node("node1") .numberOfShardsWithState(ShardRoutingState.STARTED), equalTo(2)); assertThat( clusterState .readOnlyRoutingNodes() .node("node1") .numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(0)); assertThat( clusterState .readOnlyRoutingNodes() .node("node2") .numberOfShardsWithState(ShardRoutingState.STARTED), equalTo(2)); assertThat( clusterState .readOnlyRoutingNodes() .node("node2") .numberOfShardsWithState(ShardRoutingState.INITIALIZING), equalTo(0)); assertThat(clusterState.readOnlyRoutingNodes().unassigned().size(), equalTo(4)); logger.info("Do another reroute, make sure its still not allocated"); routingNodes = clusterState.routingNodes(); routingTable = strategy .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING)) .routingTable(); clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build(); }
@Test public void testShardRelocationsTakenIntoAccount() { Settings diskSettings = settingsBuilder() .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS, true) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7) .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.8) .build(); Map<String, DiskUsage> usages = new HashMap<>(); usages.put("node1", new DiskUsage("node1", 100, 40)); // 60% used usages.put("node2", new DiskUsage("node2", 100, 40)); // 60% used usages.put("node2", new DiskUsage("node3", 100, 40)); // 60% used Map<String, Long> shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 14L); // 14 bytes shardSizes.put("[test][0][r]", 14L); shardSizes.put("[test2][0][p]", 1L); // 1 bytes shardSizes.put("[test2][0][r]", 1L); final ClusterInfo clusterInfo = new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes)); AllocationDeciders deciders = new AllocationDeciders( ImmutableSettings.EMPTY, new HashSet<>( Arrays.asList( new SameShardAllocationDecider(ImmutableSettings.EMPTY), new DiskThresholdDecider(diskSettings)))); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { logger.info("--> calling fake getClusterInfo"); return clusterInfo; } }; AllocationService strategy = new AllocationService( settingsBuilder() .put("cluster.routing.allocation.concurrent_recoveries", 10) .put( ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always") .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1) .build(), deciders, new ShardsAllocators(), cis); MetaData metaData = MetaData.builder() .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1)) .put(IndexMetaData.builder("test2").numberOfShards(1).numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder() .addAsNew(metaData.index("test")) .addAsNew(metaData.index("test2")) .build(); ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) .metaData(metaData) .routingTable(routingTable) .build(); logger.info("--> adding two nodes"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); routingTable = strategy.reroute(clusterState).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // shards should be initializing assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(4)); logger.info("--> start the shards"); routingTable = strategy .applyStartedShards( clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING)) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); // Assert that we're able to start the primary and replicas assertThat( clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(4)); logger.info("--> adding node3"); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3"))) .build(); AllocationCommand relocate1 = new MoveAllocationCommand(new ShardId("test", 0), "node2", "node3"); AllocationCommands cmds = new AllocationCommands(relocate1); routingTable = strategy.reroute(clusterState, cmds).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); AllocationCommand relocate2 = new MoveAllocationCommand(new ShardId("test2", 0), "node2", "node3"); cmds = new AllocationCommands(relocate2); try { // The shard for the "test" index is already being relocated to // node3, which will put it over the low watermark when it // completes, with shard relocations taken into account this should // throw an exception about not being able to complete strategy.reroute(clusterState, cmds).routingTable(); fail("should not have been able to reroute the shard"); } catch (ElasticsearchIllegalArgumentException e) { assertThat( "can't allocated because there isn't enough room: " + e.getMessage(), e.getMessage().contains("less than required [30.0%] free disk on node, free: [26.0%]"), equalTo(true)); } }