@Test
  public void unknownDiskUsageTest() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.85)
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node2", new DiskUsage("node2", 100, 50)); // 50% used
    usages.put("node3", new DiskUsage("node3", 100, 0)); // 100% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 10L); // 10 bytes
    shardSizes.put("[test][0][r]", 10L); // 10 bytes
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    logger.info("--> adding node1");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder()
                    .put(newNode("node1"))
                    .put(
                        newNode(
                            "node3")) // node3 is added because DiskThresholdDecider automatically
                                      // ignore single-node clusters
                )
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    // Shard can be allocated to node1, even though it only has 25% free,
    // because it's a primary that's never been allocated before
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> start the shards (primaries)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // A single shard is started on node1, even though it normally would not
    // be allowed, because it's a primary that hasn't been allocated, and node1
    // is still below the high watermark (unlike node3)
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
  }
  @Test
  public void testShardRelocationsTakenIntoAccount() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.8)
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node1", new DiskUsage("node1", 100, 40)); // 60% used
    usages.put("node2", new DiskUsage("node2", 100, 40)); // 60% used
    usages.put("node2", new DiskUsage("node3", 100, 40)); // 60% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 14L); // 14 bytes
    shardSizes.put("[test][0][r]", 14L);
    shardSizes.put("[test2][0][p]", 1L); // 1 bytes
    shardSizes.put("[test2][0][r]", 1L);
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1))
            .put(IndexMetaData.builder("test2").numberOfShards(1).numberOfReplicas(1))
            .build();

    RoutingTable routingTable =
        RoutingTable.builder()
            .addAsNew(metaData.index("test"))
            .addAsNew(metaData.index("test2"))
            .build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    logger.info("--> adding two nodes");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // shards should be initializing
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(4));

    logger.info("--> start the shards");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that we're able to start the primary and replicas
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(4));

    logger.info("--> adding node3");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")))
            .build();

    AllocationCommand relocate1 =
        new MoveAllocationCommand(new ShardId("test", 0), "node2", "node3");
    AllocationCommands cmds = new AllocationCommands(relocate1);

    routingTable = strategy.reroute(clusterState, cmds).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    AllocationCommand relocate2 =
        new MoveAllocationCommand(new ShardId("test2", 0), "node2", "node3");
    cmds = new AllocationCommands(relocate2);

    try {
      // The shard for the "test" index is already being relocated to
      // node3, which will put it over the low watermark when it
      // completes, with shard relocations taken into account this should
      // throw an exception about not being able to complete
      strategy.reroute(clusterState, cmds).routingTable();
      fail("should not have been able to reroute the shard");
    } catch (ElasticsearchIllegalArgumentException e) {
      assertThat(
          "can't allocated because there isn't enough room: " + e.getMessage(),
          e.getMessage().contains("less than required [30.0%] free disk on node, free: [26.0%]"),
          equalTo(true));
    }
  }
  @Test
  public void diskThresholdTest() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.8)
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node1", new DiskUsage("node1", 100, 10)); // 90% used
    usages.put("node2", new DiskUsage("node2", 100, 35)); // 65% used
    usages.put("node3", new DiskUsage("node3", 100, 60)); // 40% used
    usages.put("node4", new DiskUsage("node4", 100, 80)); // 20% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 10L); // 10 bytes
    shardSizes.put("[test][0][r]", 10L);
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    logger.info("--> adding two nodes");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Primary shard should be initializing, replica should not
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> start the shards (primaries)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that we're able to start the primary
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
    // Assert that node1 didn't get any shards because its disk usage is too high
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));

    logger.info("--> start the shards (replicas)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that the replica couldn't be started since node1 doesn't have enough space
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));

    logger.info("--> adding node3");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that the replica is initialized now that node3 is available with enough space
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(),
        equalTo(1));

    logger.info("--> start the shards (replicas)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that the replica couldn't be started since node1 doesn't have enough space
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> changing decider settings");

    // Set the low threshold to 60 instead of 70
    // Set the high threshold to 70 instead of 80
    // node2 now should not have new shards allocated to it, but shards can remain
    diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "60%")
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.7)
            .build();

    deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> changing settings again");

    // Set the low threshold to 50 instead of 60
    // Set the high threshold to 60 instead of 70
    // node2 now should not have new shards allocated to it, and shards cannot remain
    diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.5)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.6)
            .build();

    deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    // Shard hasn't been moved off of node2 yet because there's nowhere for it to go
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> adding node4");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node4")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> apply INITIALIZING shards");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    // Node4 is available now, so the shard is moved off of node2
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1));
  }
  @Test
  public void diskThresholdWithShardSizes() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "71%")
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node1", new DiskUsage("node1", 100, 31)); // 69% used
    usages.put("node2", new DiskUsage("node2", 100, 1)); // 99% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 10L); // 10 bytes
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    logger.info("--> adding node1");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder()
                    .put(newNode("node1"))
                    .put(
                        newNode(
                            "node2")) // node2 is added because DiskThresholdDecider automatically
                                      // ignore single-node clusters
                )
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logger.info("--> start the shards (primaries)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Shard can't be allocated to node1 (or node2) because it would cause too much usage
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(0));
    // No shards are started, no nodes have enough disk for allocation
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(0));
  }
  @Test
  @TestLogging("cluster.routing.allocation.decider:TRACE")
  public void diskThresholdWithAbsoluteSizesTest() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "30b")
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "9b")
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node1", new DiskUsage("node1", 100, 10)); // 90% used
    usages.put("node2", new DiskUsage("node2", 100, 10)); // 90% used
    usages.put("node3", new DiskUsage("node3", 100, 60)); // 40% used
    usages.put("node4", new DiskUsage("node4", 100, 80)); // 20% used
    usages.put("node5", new DiskUsage("node5", 100, 85)); // 15% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 10L); // 10 bytes
    shardSizes.put("[test][0][r]", 10L);
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(2))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    logger.info("--> adding node1 and node2 node");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .build();

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Primary should initialize, even though both nodes are over the limit initialize
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    String nodeWithPrimary, nodeWithoutPrimary;
    if (clusterState.getRoutingNodes().node("node1").size() == 1) {
      nodeWithPrimary = "node1";
      nodeWithoutPrimary = "node2";
    } else {
      nodeWithPrimary = "node2";
      nodeWithoutPrimary = "node1";
    }
    logger.info("--> nodeWithPrimary: {}", nodeWithPrimary);
    logger.info("--> nodeWithoutPrimary: {}", nodeWithoutPrimary);

    // Make node without the primary now habitable to replicas
    usages.put(nodeWithoutPrimary, new DiskUsage(nodeWithoutPrimary, 100, 35)); // 65% used
    final ClusterInfo clusterInfo2 =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));
    cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo2;
          }
        };
    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Now the replica should be able to initialize
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(2));

    logger.info("--> start the shards (primaries)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that we're able to start the primary and replica, since they were both initializing
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2));
    // Assert that node1 got a single shard (the primary), even though its disk usage is too high
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    // Assert that node2 got a single shard (a replica)
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));

    // Assert that one replica is still unassigned
    // assertThat(clusterState.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(),
    // equalTo(1));

    logger.info("--> adding node3");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that the replica is initialized now that node3 is available with enough space
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2));
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(),
        equalTo(1));

    logger.info("--> start the shards (replicas)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that all replicas could be started
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(3));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> changing decider settings");

    // Set the low threshold to 60 instead of 70
    // Set the high threshold to 70 instead of 80
    // node2 now should not have new shards allocated to it, but shards can remain
    diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "40b")
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "30b")
            .build();

    deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(3));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> changing settings again");

    // Set the low threshold to 50 instead of 60
    // Set the high threshold to 60 instead of 70
    // node2 now should not have new shards allocated to it, and shards cannot remain
    diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "50b")
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "40b")
            .build();

    deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(3));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    // Shard hasn't been moved off of node2 yet because there's nowhere for it to go
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> adding node4");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node4")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2));
    // One shard is relocating off of node1
    assertThat(clusterState.routingNodes().shardsWithState(RELOCATING).size(), equalTo(1));
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> apply INITIALIZING shards");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // primary shard already has been relocated away
    assertThat(clusterState.getRoutingNodes().node(nodeWithPrimary).size(), equalTo(0));
    // node with increased space still has its shard
    assertThat(clusterState.getRoutingNodes().node(nodeWithoutPrimary).size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1));

    logger.info("--> adding node5");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node5")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started on node3 and node4
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2));
    // One shard is relocating off of node2 now
    assertThat(clusterState.routingNodes().shardsWithState(RELOCATING).size(), equalTo(1));
    // Initializing on node5
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> apply INITIALIZING shards");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logger.info("--> final cluster state:");
    logShardStates(clusterState);
    // Node1 still has no shards because it has no space for them
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    // Node5 is available now, so the shard is moved off of node2
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node5").size(), equalTo(1));
  }
  @Test
  public void testSingleIndexStartedShard() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());

    logger.info("Building initial routing table");

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder().metaData(metaData).routingTable(routingTable).build();

    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
    assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue());

    logger.info("Adding one node and performing rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1"));

    logger.info("Rerouting again, nothing should change");
    prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    assertThat(routingTable == prevRoutingTable, equalTo(true));
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logger.info("Marking the shard as started");
    RoutingNodes routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable != prevRoutingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1"));

    logger.info("Starting another node and making sure nothing changed");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node2")))
            .build();
    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable == prevRoutingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1"));

    logger.info("Killing node1 where the shard is, checking the shard is relocated");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node1"))
            .build();
    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable != prevRoutingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node2"));

    logger.info(
        "Start another node, make sure that things remain the same (shard is in node2 and initializing)");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")))
            .build();
    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertThat(routingTable == prevRoutingTable, equalTo(true));

    logger.info("Start the shard on node 2");
    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, routingNodes.node("node2").shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable != prevRoutingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node2"));
  }
  @Test
  public void testMultiIndexUnevenNodes() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put("cluster.routing.allocation.allow_rebalance", "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build());

    final int numberOfIndices = 10;
    logger.info("Building initial routing table with " + numberOfIndices + " indices");

    MetaData.Builder metaDataBuilder = MetaData.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      metaDataBuilder.put(IndexMetaData.builder("test" + i).numberOfShards(1).numberOfReplicas(0));
    }
    MetaData metaData = metaDataBuilder.build();

    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      routingTableBuilder.addAsNew(metaData.index("test" + i));
    }
    RoutingTable routingTable = routingTableBuilder.build();

    ClusterState clusterState =
        ClusterState.builder().metaData(metaData).routingTable(routingTable).build();

    assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices));

    logger.info("Starting 3 nodes and rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder()
                    .put(newNode("node1"))
                    .put(newNode("node2"))
                    .put(newNode("node3")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    }
    RoutingNodes routingNodes = clusterState.routingNodes();
    assertThat(numberOfShardsOfType(routingNodes, INITIALIZING), equalTo(numberOfIndices));
    assertThat(
        routingNodes.node("node1").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));
    assertThat(
        routingNodes.node("node2").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));
    assertThat(
        routingNodes.node("node2").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));

    logger.info("Start two more nodes, things should remain the same");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder(clusterState.nodes())
                    .put(newNode("node4"))
                    .put(newNode("node5")))
            .build();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();

    assertThat(prevRoutingTable == routingTable, equalTo(true));

    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(RELOCATING), equalTo(STARTED)));
    }
    routingNodes = clusterState.routingNodes();
    assertThat(
        "4 source shard routing are relocating",
        numberOfShardsOfType(routingNodes, RELOCATING),
        equalTo(4));
    assertThat(
        "4 target shard routing are initializing",
        numberOfShardsOfType(routingNodes, INITIALIZING),
        equalTo(4));

    logger.info("Now, mark the relocated as started");
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    //        routingTable = strategy.reroute(new RoutingStrategyInfo(metaData, routingTable),
    // nodes);

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(RELOCATING), equalTo(STARTED)));
    }
    routingNodes = clusterState.routingNodes();
    assertThat(numberOfShardsOfType(routingNodes, STARTED), equalTo(numberOfIndices));
    for (RoutingNode routingNode : routingNodes) {
      assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(2));
    }
  }
  @Test
  public void testMultiIndexEvenDistribution() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put("cluster.routing.allocation.allow_rebalance", "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build());

    final int numberOfIndices = 50;
    logger.info("Building initial routing table with " + numberOfIndices + " indices");

    MetaData.Builder metaDataBuilder = MetaData.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      metaDataBuilder.put(IndexMetaData.builder("test" + i).numberOfShards(1).numberOfReplicas(0));
    }
    MetaData metaData = metaDataBuilder.build();

    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      routingTableBuilder.addAsNew(metaData.index("test" + i));
    }
    RoutingTable routingTable = routingTableBuilder.build();
    ClusterState clusterState =
        ClusterState.builder().metaData(metaData).routingTable(routingTable).build();

    assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(), nullValue());
    }

    logger.info("Adding " + (numberOfIndices / 2) + " nodes");
    DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder();
    List<DiscoveryNode> nodes = newArrayList();
    for (int i = 0; i < (numberOfIndices / 2); i++) {
      nodesBuilder.put(newNode("node" + i));
    }
    RoutingTable prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING));
      assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true));
      // make sure we still have 2 shards initializing per node on the first 25 nodes
      String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId();
      int nodeIndex = Integer.parseInt(nodeId.substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
    }
    RoutingNodes routingNodes = clusterState.routingNodes();
    Set<String> encounteredIndices = newHashSet();
    for (RoutingNode routingNode : routingNodes) {
      assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(0));
      assertThat(routingNode.size(), equalTo(2));
      // make sure we still have 2 shards initializing per node on the only 25 nodes
      int nodeIndex = Integer.parseInt(routingNode.nodeId().substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
      // check that we don't have a shard associated with a node with the same index name (we have a
      // single shard)
      for (MutableShardRouting shardRoutingEntry : routingNode) {
        assertThat(encounteredIndices, not(hasItem(shardRoutingEntry.index())));
        encounteredIndices.add(shardRoutingEntry.index());
      }
    }

    logger.info("Adding additional " + (numberOfIndices / 2) + " nodes, nothing should change");
    nodesBuilder = DiscoveryNodes.builder(clusterState.nodes());
    for (int i = (numberOfIndices / 2); i < numberOfIndices; i++) {
      nodesBuilder.put(newNode("node" + i));
    }
    prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(false));

    logger.info("Marking the shard as started");
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    int numberOfRelocatingShards = 0;
    int numberOfStartedShards = 0;
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(STARTED), equalTo(RELOCATING)));
      if (routingTable.index("test" + i).shard(0).shards().get(0).state() == STARTED) {
        numberOfStartedShards++;
      } else if (routingTable.index("test" + i).shard(0).shards().get(0).state() == RELOCATING) {
        numberOfRelocatingShards++;
      }
      assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true));
      // make sure we still have 2 shards either relocating or started on the first 25 nodes (still)
      String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId();
      int nodeIndex = Integer.parseInt(nodeId.substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
    }
    assertThat(numberOfRelocatingShards, equalTo(25));
    assertThat(numberOfStartedShards, equalTo(25));
  }
  @Test
  public void testSingleIndexShardFailed() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());

    logger.info("Building initial routing table");

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder().metaData(metaData).routingTable(routingTable).build();

    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
    assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue());

    logger.info("Adding one node and rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).unassigned(), equalTo(false));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1"));

    logger.info("Marking the shard as failed");
    RoutingNodes routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyFailedShard(
                clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING).get(0))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
    assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue());
  }