@Test
  public void testClusterStateSerialization() throws Exception {
    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(10).numberOfReplicas(1))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    DiscoveryNodes nodes =
        DiscoveryNodes.builder()
            .put(newNode("node1"))
            .put(newNode("node2"))
            .put(newNode("node3"))
            .localNodeId("node1")
            .masterNodeId("node2")
            .build();

    ClusterState clusterState =
        ClusterState.builder().nodes(nodes).metaData(metaData).routingTable(routingTable).build();

    AllocationService strategy = createAllocationService();
    clusterState =
        ClusterState.builder(clusterState)
            .routingTable(strategy.reroute(clusterState).routingTable())
            .build();

    ClusterState serializedClusterState =
        ClusterState.Builder.fromBytes(
            ClusterState.Builder.toBytes(clusterState), newNode("node1"));

    assertThat(
        serializedClusterState.routingTable().prettyPrint(),
        equalTo(clusterState.routingTable().prettyPrint()));
  }
  private ClusterState addNode(ClusterState clusterState, AllocationService strategy) {
    logger.info(
        "now, start 1 more node, check that rebalancing will happen because we set it to always");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node" + numberOfNodes)))
            .build();

    RoutingTable routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    RoutingNodes routingNodes = clusterState.getRoutingNodes();

    // move initializing to started

    RoutingTable prev = routingTable;
    while (true) {
      routingTable =
          strategy
              .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
              .routingTable();
      clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
      routingNodes = clusterState.getRoutingNodes();
      if (routingTable == prev) break;
      prev = routingTable;
    }

    return clusterState;
  }
  @Test
  public void testRoutingTableSerialization() throws Exception {
    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(10).numberOfReplicas(1))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    DiscoveryNodes nodes =
        DiscoveryNodes.builder()
            .put(newNode("node1"))
            .put(newNode("node2"))
            .put(newNode("node3"))
            .build();

    ClusterState clusterState =
        ClusterState.builder().nodes(nodes).metaData(metaData).routingTable(routingTable).build();

    AllocationService strategy = createAllocationService();
    RoutingTable source = strategy.reroute(clusterState).routingTable();

    BytesStreamOutput outStream = new BytesStreamOutput();
    RoutingTable.Builder.writeTo(source, outStream);
    BytesStreamInput inStream = new BytesStreamInput(outStream.bytes().toBytes(), false);
    RoutingTable target = RoutingTable.Builder.readFrom(inStream);

    assertThat(target.prettyPrint(), equalTo(source.prettyPrint()));
  }
  @Test
  public void testSingleIndexShardFailed() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());

    logger.info("Building initial routing table");

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder().metaData(metaData).routingTable(routingTable).build();

    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
    assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue());

    logger.info("Adding one node and rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).unassigned(), equalTo(false));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1"));

    logger.info("Marking the shard as failed");
    RoutingNodes routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyFailedShard(
                clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING).get(0))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
    assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue());
  }
  public static void main(String[] args) {
    final int numberOfRuns = 1;
    final int numIndices = 5 * 365; // five years
    final int numShards = 6;
    final int numReplicas = 2;
    final int numberOfNodes = 30;
    final int numberOfTags = 2;
    AllocationService strategy =
        ElasticsearchAllocationTestCase.createAllocationService(
            ImmutableSettings.EMPTY, new Random(1));

    MetaData.Builder mb = MetaData.builder();
    for (int i = 1; i <= numIndices; i++) {
      mb.put(
          IndexMetaData.builder("test_" + i)
              .numberOfShards(numShards)
              .numberOfReplicas(numReplicas));
    }
    MetaData metaData = mb.build();
    RoutingTable.Builder rb = RoutingTable.builder();
    for (int i = 1; i <= numIndices; i++) {
      rb.addAsNew(metaData.index("test_" + i));
    }
    RoutingTable routingTable = rb.build();
    DiscoveryNodes.Builder nb = DiscoveryNodes.builder();
    for (int i = 1; i <= numberOfNodes; i++) {
      nb.put(newNode("node" + i, ImmutableMap.of("tag", "tag_" + (i % numberOfTags))));
    }
    ClusterState initialClusterState =
        ClusterState.builder().metaData(metaData).routingTable(routingTable).nodes(nb).build();

    long start = System.currentTimeMillis();
    for (int i = 0; i < numberOfRuns; i++) {
      logger.info("[{}] starting... ", i);
      long runStart = System.currentTimeMillis();
      ClusterState clusterState = initialClusterState;
      while (clusterState.readOnlyRoutingNodes().hasUnassignedShards()) {
        logger.info(
            "[{}] remaining unassigned {}",
            i,
            clusterState.readOnlyRoutingNodes().unassigned().size());
        RoutingAllocation.Result result =
            strategy.applyStartedShards(
                clusterState, clusterState.readOnlyRoutingNodes().shardsWithState(INITIALIZING));
        clusterState = ClusterState.builder(clusterState).routingResult(result).build();
        result = strategy.reroute(clusterState);
        clusterState = ClusterState.builder(clusterState).routingResult(result).build();
      }
      logger.info(
          "[{}] took {}", i, TimeValue.timeValueMillis(System.currentTimeMillis() - runStart));
    }
    long took = System.currentTimeMillis() - start;
    logger.info(
        "total took {}, AVG {}",
        TimeValue.timeValueMillis(took),
        TimeValue.timeValueMillis(took / numberOfRuns));
  }
  private ClusterState createInitialClusterState(AllocationService service) {
    MetaData.Builder metaBuilder = MetaData.builder();
    metaBuilder.put(
        IndexMetaData.builder("idx")
            .settings(settings(Version.CURRENT))
            .numberOfShards(1)
            .numberOfReplicas(0));
    MetaData metaData = metaBuilder.build();
    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    routingTableBuilder.addAsNew(metaData.index("idx"));

    RoutingTable routingTable = routingTableBuilder.build();
    ClusterState clusterState =
        ClusterState.builder(
                org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(
                    Settings.EMPTY))
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = service.reroute(clusterState, "reroute").routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertEquals(prevRoutingTable.index("idx").shards().size(), 1);
    assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);

    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
    return clusterState;
  }
  private ClusterState removeNodes(ClusterState clusterState, AllocationService strategy) {
    logger.info("Removing half the nodes (" + (numberOfNodes + 1) / 2 + ")");
    DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(clusterState.nodes());

    for (int i = (numberOfNodes + 1) / 2; i <= numberOfNodes; i++) {
      nodes.remove("node" + i);
    }

    clusterState = ClusterState.builder(clusterState).nodes(nodes.build()).build();
    RoutingNodes routingNodes = clusterState.getRoutingNodes();

    logger.info("start all the primary shards, replicas will start initializing");
    RoutingTable routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    logger.info("start the replica shards");
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    logger.info("rebalancing");
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    logger.info("complete rebalancing");
    RoutingTable prev = routingTable;
    while (true) {
      routingTable =
          strategy
              .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
              .routingTable();
      clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
      routingNodes = clusterState.getRoutingNodes();
      if (routingTable == prev) break;
      prev = routingTable;
    }

    return clusterState;
  }
  @Test
  public void simpleFlagTests() {
    AllocationService allocation =
        new AllocationService(
            settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());

    logger.info("creating an index with 1 shard, no replica");
    MetaData metaData =
        newMetaDataBuilder()
            .put(newIndexMetaDataBuilder("test").numberOfShards(1).numberOfReplicas(0))
            .build();
    RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build();
    ClusterState clusterState =
        newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();
    assertThat(
        clusterState.routingTable().index("test").shard(0).primaryAllocatedPostApi(),
        equalTo(false));

    logger.info("adding two nodes and performing rerouting");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2")))
            .build();
    RoutingAllocation.Result rerouteResult = allocation.reroute(clusterState);
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(
        clusterState.routingTable().index("test").shard(0).primaryAllocatedPostApi(),
        equalTo(false));

    logger.info("start primary shard");
    rerouteResult =
        allocation.applyStartedShards(
            clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(
        clusterState.routingTable().index("test").shard(0).primaryAllocatedPostApi(),
        equalTo(true));
  }
  @Test
  public void sameHost() {
    AllocationService strategy =
        new AllocationService(
            settingsBuilder().put(SameShardAllocationDecider.SAME_HOST_SETTING, true).build());

    MetaData metaData =
        newMetaDataBuilder()
            .put(newIndexMetaDataBuilder("test").numberOfShards(2).numberOfReplicas(1))
            .build();

    RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build();
    ClusterState clusterState =
        newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();

    logger.info("--> adding two nodes with the same host");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(
                newNodesBuilder()
                    .put(newNode("node1", new InetSocketTransportAddress("test1", 80)))
                    .put(newNode("node2", new InetSocketTransportAddress("test1", 80))))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(
        clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.INITIALIZING),
        equalTo(2));

    logger.info(
        "--> start all primary shards, no replica will be started since its on the same host");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.readOnlyRoutingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(
        clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.STARTED),
        equalTo(2));
    assertThat(
        clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.INITIALIZING),
        equalTo(0));

    logger.info("--> add another node, with a different host, replicas will be allocating");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(
                newNodesBuilder()
                    .putAll(clusterState.nodes())
                    .put(newNode("node3", new InetSocketTransportAddress("test2", 80))))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(
        clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.STARTED),
        equalTo(2));
    assertThat(
        clusterState.readOnlyRoutingNodes().numberOfShardsOfType(ShardRoutingState.INITIALIZING),
        equalTo(2));
    for (MutableShardRouting shardRouting :
        clusterState.readOnlyRoutingNodes().shardsWithState(INITIALIZING)) {
      assertThat(shardRouting.currentNodeId(), equalTo("node3"));
    }
  }
  public void testSimple() {
    AllocationService strategy =
        createAllocationService(
            Settings.builder()
                .put(
                    ClusterRebalanceAllocationDecider
                        .CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(),
                    ClusterRebalanceAllocationDecider.ClusterRebalanceType.ALWAYS.toString())
                .build());

    MetaData metaData =
        MetaData.builder()
            .put(
                IndexMetaData.builder("test1")
                    .settings(settings(Version.CURRENT))
                    .numberOfShards(1)
                    .numberOfReplicas(1))
            .put(
                IndexMetaData.builder("test2")
                    .settings(settings(Version.CURRENT))
                    .numberOfShards(1)
                    .numberOfReplicas(1))
            .build();

    RoutingTable routingTable =
        RoutingTable.builder()
            .addAsNew(metaData.index("test1"))
            .addAsNew(metaData.index("test2"))
            .build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    logger.info("start two nodes");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .build();
    routingTable = strategy.reroute(clusterState, "reroute").routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    for (int i = 0; i < routingTable.index("test1").shards().size(); i++) {
      assertThat(routingTable.index("test1").shard(i).shards().size(), equalTo(2));
      assertThat(
          routingTable.index("test1").shard(i).primaryShard().state(), equalTo(INITIALIZING));
      assertThat(
          routingTable.index("test1").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED));
    }

    for (int i = 0; i < routingTable.index("test2").shards().size(); i++) {
      assertThat(routingTable.index("test2").shard(i).shards().size(), equalTo(2));
      assertThat(
          routingTable.index("test2").shard(i).primaryShard().state(), equalTo(INITIALIZING));
      assertThat(
          routingTable.index("test2").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED));
    }

    logger.info("start all the primary shards for test1, replicas will start initializing");
    RoutingNodes routingNodes = clusterState.getRoutingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState("test1", INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    for (int i = 0; i < routingTable.index("test1").shards().size(); i++) {
      assertThat(routingTable.index("test1").shard(i).shards().size(), equalTo(2));
      assertThat(routingTable.index("test1").shard(i).primaryShard().state(), equalTo(STARTED));
      assertThat(
          routingTable.index("test1").shard(i).replicaShards().get(0).state(),
          equalTo(INITIALIZING));
    }

    for (int i = 0; i < routingTable.index("test2").shards().size(); i++) {
      assertThat(routingTable.index("test2").shard(i).shards().size(), equalTo(2));
      assertThat(
          routingTable.index("test2").shard(i).primaryShard().state(), equalTo(INITIALIZING));
      assertThat(
          routingTable.index("test2").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED));
    }
  }
  /**
   * Tests that higher prioritized primaries and replicas are allocated first even on the balanced
   * shard allocator See https://github.com/elastic/elasticsearch/issues/13249 for details
   */
  public void testPrioritizedIndicesAllocatedFirst() {
    AllocationService allocation =
        createAllocationService(
            Settings.builder()
                .put(
                    ThrottlingAllocationDecider
                        .CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES_SETTING.getKey(),
                    1)
                .put(
                    ThrottlingAllocationDecider
                        .CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_OUTGOING_RECOVERIES_SETTING
                        .getKey(),
                    10)
                .put(
                    ThrottlingAllocationDecider
                        .CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING
                        .getKey(),
                    1)
                .put(
                    ThrottlingAllocationDecider
                        .CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING
                        .getKey(),
                    1)
                .build());
    final String highPriorityName;
    final String lowPriorityName;
    final int priorityFirst;
    final int prioritySecond;
    if (randomBoolean()) {
      highPriorityName = "first";
      lowPriorityName = "second";
      prioritySecond = 1;
      priorityFirst = 100;
    } else {
      lowPriorityName = "first";
      highPriorityName = "second";
      prioritySecond = 100;
      priorityFirst = 1;
    }
    MetaData metaData =
        MetaData.builder()
            .put(
                IndexMetaData.builder("first")
                    .settings(
                        settings(Version.CURRENT)
                            .put(IndexMetaData.SETTING_PRIORITY, priorityFirst))
                    .numberOfShards(2)
                    .numberOfReplicas(1))
            .put(
                IndexMetaData.builder("second")
                    .settings(
                        settings(Version.CURRENT)
                            .put(IndexMetaData.SETTING_PRIORITY, prioritySecond))
                    .numberOfShards(2)
                    .numberOfReplicas(1))
            .build();
    RoutingTable routingTable =
        RoutingTable.builder()
            .addAsNew(metaData.index("first"))
            .addAsNew(metaData.index("second"))
            .build();
    ClusterState clusterState =
        ClusterState.builder(
                org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(
                    Settings.EMPTY))
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .build();
    RoutingAllocation.Result rerouteResult = allocation.reroute(clusterState, "reroute");
    clusterState =
        ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build();

    routingTable = allocation.reroute(clusterState, "reroute").routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(2, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size());
    assertEquals(
        highPriorityName,
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0).getIndexName());
    assertEquals(
        highPriorityName,
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(1).getIndexName());

    routingTable =
        allocation
            .applyStartedShards(
                clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(2, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size());
    assertEquals(
        lowPriorityName,
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0).getIndexName());
    assertEquals(
        lowPriorityName,
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(1).getIndexName());

    routingTable =
        allocation
            .applyStartedShards(
                clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).toString(),
        2,
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size());
    assertEquals(
        highPriorityName,
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0).getIndexName());
    assertEquals(
        highPriorityName,
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(1).getIndexName());

    routingTable =
        allocation
            .applyStartedShards(
                clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(2, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size());
    assertEquals(
        lowPriorityName,
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0).getIndexName());
    assertEquals(
        lowPriorityName,
        clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(1).getIndexName());
  }
  @Test
  public void testPrimaryRecoveryThrottling() {
    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.node_concurrent_recoveries", 3)
                .put("cluster.routing.allocation.node_initial_primaries_recoveries", 3)
                .build());

    logger.info("Building initial routing table");

    MetaData metaData =
        newMetaDataBuilder()
            .put(newIndexMetaDataBuilder("test").numberOfShards(10).numberOfReplicas(1))
            .build();

    RoutingTable routingTable =
        routingTable()
            .add(indexRoutingTable("test").initializeEmpty(metaData.index("test")))
            .build();

    ClusterState clusterState =
        newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();

    logger.info("start one node, do reroute, only 3 should initialize");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(newNodesBuilder().put(RoutingAllocationTests.newNode("node1")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(0));
    assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(3));
    assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(17));

    logger.info("start initializing, another 3 should initialize");
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingTable.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(3));
    assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(3));
    assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(14));

    logger.info("start initializing, another 3 should initialize");
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingTable.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(6));
    assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(3));
    assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(11));

    logger.info("start initializing, another 1 should initialize");
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingTable.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(9));
    assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(1));
    assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(10));

    logger.info("start initializing, all primaries should be started");
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingTable.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(routingTable.shardsWithState(STARTED).size(), equalTo(10));
    assertThat(routingTable.shardsWithState(INITIALIZING).size(), equalTo(0));
    assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(10));
  }
  @Test
  public void indexLevelShardsLimitRemain() {
    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put("cluster.routing.allocation.node_initial_primaries_recoveries", 10)
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .put("cluster.routing.allocation.balance.index", 0.0f)
                .put("cluster.routing.allocation.balance.replica", 1.0f)
                .put("cluster.routing.allocation.balance.primary", 0.0f)
                .build());

    logger.info("Building initial routing table");

    MetaData metaData =
        newMetaDataBuilder()
            .put(
                newIndexMetaDataBuilder("test")
                    .settings(
                        ImmutableSettings.settingsBuilder()
                            .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
                            .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)))
            .build();

    RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();
    logger.info("Adding one node and reroute");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(newNodesBuilder().put(newNode("node1")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    logger.info("Start the primary shards");
    RoutingNodes routingNodes = clusterState.routingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(clusterState.readOnlyRoutingNodes().numberOfShardsOfType(STARTED), equalTo(5));

    logger.info("add another index with 5 shards");
    metaData =
        newMetaDataBuilder()
            .metaData(metaData)
            .put(
                newIndexMetaDataBuilder("test1")
                    .settings(
                        ImmutableSettings.settingsBuilder()
                            .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
                            .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)))
            .build();
    routingTable =
        routingTable().routingTable(routingTable).addAsNew(metaData.index("test1")).build();

    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    logger.info("Add another one node and reroute");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(newNodesBuilder().putAll(clusterState.nodes()).put(newNode("node2")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    routingNodes = clusterState.routingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(clusterState.readOnlyRoutingNodes().numberOfShardsOfType(STARTED), equalTo(10));

    for (MutableShardRouting shardRouting : clusterState.readOnlyRoutingNodes().node("node1")) {
      assertThat(shardRouting.index(), equalTo("test"));
    }
    for (MutableShardRouting shardRouting : clusterState.readOnlyRoutingNodes().node("node2")) {
      assertThat(shardRouting.index(), equalTo("test1"));
    }

    logger.info(
        "update "
            + ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE
            + " for test, see that things move");
    metaData =
        newMetaDataBuilder()
            .metaData(metaData)
            .put(
                newIndexMetaDataBuilder("test")
                    .settings(
                        ImmutableSettings.settingsBuilder()
                            .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
                            .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
                            .put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE, 3)))
            .build();

    clusterState = newClusterStateBuilder().state(clusterState).metaData(metaData).build();

    logger.info("reroute after setting");
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(
        clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED),
        equalTo(3));
    assertThat(
        clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(RELOCATING),
        equalTo(2));
    assertThat(
        clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(RELOCATING),
        equalTo(2));
    assertThat(
        clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(STARTED),
        equalTo(3));
    // the first move will destroy the balance and the balancer will move 2 shards from node2 to
    // node one right after
    // moving the nodes to node2 since we consider INITIALIZING nodes during rebalance
    routingNodes = clusterState.routingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
    // now we are done compared to EvenShardCountAllocator since the Balancer is not soely based on
    // the average
    assertThat(
        clusterState.readOnlyRoutingNodes().node("node1").numberOfShardsWithState(STARTED),
        equalTo(5));
    assertThat(
        clusterState.readOnlyRoutingNodes().node("node2").numberOfShardsWithState(STARTED),
        equalTo(5));
  }
  public void testBalanceAllNodesStartedAddIndex() {
    AllocationService strategy =
        createAllocationService(
            Settings.builder()
                .put("cluster.routing.allocation.node_concurrent_recoveries", 10)
                .put("cluster.routing.allocation.node_initial_primaries_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider
                        .CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(),
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build());

    logger.info("Building initial routing table");

    MetaData metaData =
        MetaData.builder()
            .put(
                IndexMetaData.builder("test")
                    .settings(settings(Version.CURRENT))
                    .numberOfShards(3)
                    .numberOfReplicas(1))
            .build();

    RoutingTable initialRoutingTable =
        RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(
                org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(
                    Settings.EMPTY))
            .metaData(metaData)
            .routingTable(initialRoutingTable)
            .build();

    assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(3));
    for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
      assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
      assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
      assertThat(
          clusterState.routingTable().index("test").shard(i).shards().get(0).state(),
          equalTo(UNASSIGNED));
      assertThat(
          clusterState.routingTable().index("test").shard(i).shards().get(1).state(),
          equalTo(UNASSIGNED));
      assertThat(
          clusterState.routingTable().index("test").shard(i).shards().get(0).currentNodeId(),
          nullValue());
      assertThat(
          clusterState.routingTable().index("test").shard(i).shards().get(1).currentNodeId(),
          nullValue());
    }

    logger.info("Adding three node and performing rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder()
                    .add(newNode("node1"))
                    .add(newNode("node2"))
                    .add(newNode("node3")))
            .build();

    ClusterState newState = strategy.reroute(clusterState, "reroute");
    assertThat(newState, not(equalTo(clusterState)));
    clusterState = newState;
    assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(3));
    for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
      assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
      assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
      assertThat(
          clusterState.routingTable().index("test").shard(i).primaryShard().state(),
          equalTo(INITIALIZING));
      assertThat(
          clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
      assertThat(
          clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(),
          equalTo(UNASSIGNED));
      assertThat(
          clusterState.routingTable().index("test").shard(i).replicaShards().get(0).currentNodeId(),
          nullValue());
    }

    logger.info("Another round of rebalancing");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()))
            .build();
    newState = strategy.reroute(clusterState, "reroute");
    assertThat(newState, equalTo(clusterState));

    RoutingNodes routingNodes = clusterState.getRoutingNodes();
    newState =
        strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING));
    assertThat(newState, not(equalTo(clusterState)));
    clusterState = newState;
    assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(3));
    for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
      assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
      assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
      assertThat(
          clusterState.routingTable().index("test").shard(i).primaryShard().state(),
          equalTo(STARTED));
      assertThat(
          clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
      // backup shards are initializing as well, we make sure that they
      // recover from primary *started* shards in the
      // IndicesClusterStateService
      assertThat(
          clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(),
          equalTo(INITIALIZING));
    }

    logger.info("Reroute, nothing should change");
    newState = strategy.reroute(clusterState, "reroute");
    assertThat(newState, equalTo(clusterState));

    logger.info("Start the more shards");
    routingNodes = clusterState.getRoutingNodes();
    newState =
        strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING));
    assertThat(newState, not(equalTo(clusterState)));
    clusterState = newState;
    routingNodes = clusterState.getRoutingNodes();
    assertThat(clusterState.routingTable().index("test").shards().size(), equalTo(3));
    for (int i = 0; i < clusterState.routingTable().index("test").shards().size(); i++) {
      assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
      assertThat(clusterState.routingTable().index("test").shard(i).shards().size(), equalTo(2));
      assertThat(
          clusterState.routingTable().index("test").shard(i).primaryShard().state(),
          equalTo(STARTED));
      assertThat(
          clusterState.routingTable().index("test").shard(i).replicaShards().size(), equalTo(1));
    }
    assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), equalTo(2));
    assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), equalTo(2));
    assertThat(routingNodes.node("node3").numberOfShardsWithState(STARTED), equalTo(2));

    assertThat(routingNodes.node("node1").shardsWithState("test", STARTED).size(), equalTo(2));
    assertThat(routingNodes.node("node2").shardsWithState("test", STARTED).size(), equalTo(2));
    assertThat(routingNodes.node("node3").shardsWithState("test", STARTED).size(), equalTo(2));

    logger.info("Add new index 3 shards 1 replica");

    MetaData updatedMetaData =
        MetaData.builder(clusterState.metaData())
            .put(
                IndexMetaData.builder("test1")
                    .settings(
                        settings(Version.CURRENT)
                            .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 3)
                            .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)))
            .build();
    RoutingTable updatedRoutingTable =
        RoutingTable.builder(clusterState.routingTable())
            .addAsNew(updatedMetaData.index("test1"))
            .build();
    clusterState =
        ClusterState.builder(clusterState)
            .metaData(updatedMetaData)
            .routingTable(updatedRoutingTable)
            .build();

    assertThat(clusterState.routingTable().index("test1").shards().size(), equalTo(3));

    newState = strategy.reroute(clusterState, "reroute");
    assertThat(newState, not(equalTo(clusterState)));
    clusterState = newState;
    assertThat(clusterState.routingTable().index("test1").shards().size(), equalTo(3));
    for (int i = 0; i < clusterState.routingTable().index("test1").shards().size(); i++) {
      assertThat(clusterState.routingTable().index("test1").shard(i).size(), equalTo(2));
      assertThat(clusterState.routingTable().index("test1").shard(i).shards().size(), equalTo(2));
      assertThat(
          clusterState.routingTable().index("test1").shard(i).primaryShard().state(),
          equalTo(INITIALIZING));
      assertThat(
          clusterState.routingTable().index("test1").shard(i).replicaShards().size(), equalTo(1));
      assertThat(
          clusterState.routingTable().index("test1").shard(i).replicaShards().get(0).state(),
          equalTo(UNASSIGNED));
      assertThat(
          clusterState
              .routingTable()
              .index("test1")
              .shard(i)
              .replicaShards()
              .get(0)
              .currentNodeId(),
          nullValue());
    }

    logger.info("Another round of rebalancing");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()))
            .build();
    newState = strategy.reroute(clusterState, "reroute");
    assertThat(newState, equalTo(clusterState));

    routingNodes = clusterState.getRoutingNodes();
    newState =
        strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING));
    assertThat(newState, not(equalTo(clusterState)));
    clusterState = newState;
    assertThat(clusterState.routingTable().index("test1").shards().size(), equalTo(3));
    for (int i = 0; i < clusterState.routingTable().index("test1").shards().size(); i++) {
      assertThat(clusterState.routingTable().index("test1").shard(i).size(), equalTo(2));
      assertThat(clusterState.routingTable().index("test1").shard(i).shards().size(), equalTo(2));
      assertThat(
          clusterState.routingTable().index("test1").shard(i).primaryShard().state(),
          equalTo(STARTED));
      assertThat(
          clusterState.routingTable().index("test1").shard(i).replicaShards().size(), equalTo(1));
      // backup shards are initializing as well, we make sure that they
      // recover from primary *started* shards in the
      // IndicesClusterStateService
      assertThat(
          clusterState.routingTable().index("test1").shard(i).replicaShards().get(0).state(),
          equalTo(INITIALIZING));
    }

    logger.info("Reroute, nothing should change");
    newState = strategy.reroute(clusterState, "reroute");
    assertThat(newState, equalTo(clusterState));

    logger.info("Start the more shards");
    routingNodes = clusterState.getRoutingNodes();
    newState =
        strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING));
    assertThat(newState, not(equalTo(clusterState)));
    clusterState = newState;
    routingNodes = clusterState.getRoutingNodes();
    assertThat(clusterState.routingTable().index("test1").shards().size(), equalTo(3));
    for (int i = 0; i < clusterState.routingTable().index("test1").shards().size(); i++) {
      assertThat(clusterState.routingTable().index("test1").shard(i).size(), equalTo(2));
      assertThat(clusterState.routingTable().index("test1").shard(i).shards().size(), equalTo(2));
      assertThat(
          clusterState.routingTable().index("test1").shard(i).primaryShard().state(),
          equalTo(STARTED));
      assertThat(
          clusterState.routingTable().index("test1").shard(i).replicaShards().size(), equalTo(1));
    }
    assertThat(routingNodes.node("node1").numberOfShardsWithState(STARTED), equalTo(4));
    assertThat(routingNodes.node("node2").numberOfShardsWithState(STARTED), equalTo(4));
    assertThat(routingNodes.node("node3").numberOfShardsWithState(STARTED), equalTo(4));

    assertThat(routingNodes.node("node1").shardsWithState("test1", STARTED).size(), equalTo(2));
    assertThat(routingNodes.node("node2").shardsWithState("test1", STARTED).size(), equalTo(2));
    assertThat(routingNodes.node("node3").shardsWithState("test1", STARTED).size(), equalTo(2));
  }
  @Test
  @TestLogging("cluster.routing.allocation.decider:TRACE")
  public void diskThresholdWithAbsoluteSizesTest() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "30b")
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "9b")
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node1", new DiskUsage("node1", 100, 10)); // 90% used
    usages.put("node2", new DiskUsage("node2", 100, 10)); // 90% used
    usages.put("node3", new DiskUsage("node3", 100, 60)); // 40% used
    usages.put("node4", new DiskUsage("node4", 100, 80)); // 20% used
    usages.put("node5", new DiskUsage("node5", 100, 85)); // 15% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 10L); // 10 bytes
    shardSizes.put("[test][0][r]", 10L);
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(2))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    logger.info("--> adding node1 and node2 node");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .build();

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Primary should initialize, even though both nodes are over the limit initialize
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    String nodeWithPrimary, nodeWithoutPrimary;
    if (clusterState.getRoutingNodes().node("node1").size() == 1) {
      nodeWithPrimary = "node1";
      nodeWithoutPrimary = "node2";
    } else {
      nodeWithPrimary = "node2";
      nodeWithoutPrimary = "node1";
    }
    logger.info("--> nodeWithPrimary: {}", nodeWithPrimary);
    logger.info("--> nodeWithoutPrimary: {}", nodeWithoutPrimary);

    // Make node without the primary now habitable to replicas
    usages.put(nodeWithoutPrimary, new DiskUsage(nodeWithoutPrimary, 100, 35)); // 65% used
    final ClusterInfo clusterInfo2 =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));
    cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo2;
          }
        };
    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Now the replica should be able to initialize
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(2));

    logger.info("--> start the shards (primaries)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that we're able to start the primary and replica, since they were both initializing
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2));
    // Assert that node1 got a single shard (the primary), even though its disk usage is too high
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    // Assert that node2 got a single shard (a replica)
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));

    // Assert that one replica is still unassigned
    // assertThat(clusterState.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(),
    // equalTo(1));

    logger.info("--> adding node3");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that the replica is initialized now that node3 is available with enough space
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2));
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(),
        equalTo(1));

    logger.info("--> start the shards (replicas)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that all replicas could be started
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(3));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> changing decider settings");

    // Set the low threshold to 60 instead of 70
    // Set the high threshold to 70 instead of 80
    // node2 now should not have new shards allocated to it, but shards can remain
    diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "40b")
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "30b")
            .build();

    deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(3));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> changing settings again");

    // Set the low threshold to 50 instead of 60
    // Set the high threshold to 60 instead of 70
    // node2 now should not have new shards allocated to it, and shards cannot remain
    diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "50b")
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "40b")
            .build();

    deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(3));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
    // Shard hasn't been moved off of node2 yet because there's nowhere for it to go
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> adding node4");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node4")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2));
    // One shard is relocating off of node1
    assertThat(clusterState.routingNodes().shardsWithState(RELOCATING).size(), equalTo(1));
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> apply INITIALIZING shards");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // primary shard already has been relocated away
    assertThat(clusterState.getRoutingNodes().node(nodeWithPrimary).size(), equalTo(0));
    // node with increased space still has its shard
    assertThat(clusterState.getRoutingNodes().node(nodeWithoutPrimary).size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1));

    logger.info("--> adding node5");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node5")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started on node3 and node4
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2));
    // One shard is relocating off of node2 now
    assertThat(clusterState.routingNodes().shardsWithState(RELOCATING).size(), equalTo(1));
    // Initializing on node5
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> apply INITIALIZING shards");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logger.info("--> final cluster state:");
    logShardStates(clusterState);
    // Node1 still has no shards because it has no space for them
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    // Node5 is available now, so the shard is moved off of node2
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node5").size(), equalTo(1));
  }
  @Test
  public void tesStartedShardsMatching() {
    AllocationService allocation = createAllocationService();

    logger.info("--> building initial cluster state");
    final IndexMetaData indexMetaData =
        IndexMetaData.builder("test").numberOfShards(3).numberOfReplicas(0).build();
    ClusterState.Builder stateBuilder =
        ClusterState.builder(ClusterName.DEFAULT)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .metaData(MetaData.builder().put(indexMetaData, false));

    final ImmutableShardRouting initShard =
        new ImmutableShardRouting(
            "test", 0, "node1", randomBoolean(), ShardRoutingState.INITIALIZING, 1);
    final ImmutableShardRouting startedShard =
        new ImmutableShardRouting(
            "test", 1, "node2", randomBoolean(), ShardRoutingState.STARTED, 1);
    final ImmutableShardRouting relocatingShard =
        new ImmutableShardRouting(
            "test", 2, "node1", "node2", randomBoolean(), ShardRoutingState.RELOCATING, 1);
    stateBuilder.routingTable(
        RoutingTable.builder()
            .add(
                IndexRoutingTable.builder("test")
                    .addIndexShard(
                        new IndexShardRoutingTable.Builder(initShard.shardId(), true)
                            .addShard(initShard)
                            .build())
                    .addIndexShard(
                        new IndexShardRoutingTable.Builder(startedShard.shardId(), true)
                            .addShard(startedShard)
                            .build())
                    .addIndexShard(
                        new IndexShardRoutingTable.Builder(relocatingShard.shardId(), true)
                            .addShard(relocatingShard)
                            .build())));

    ClusterState state = stateBuilder.build();

    logger.info("--> test starting of shard");

    RoutingAllocation.Result result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    initShard.index(),
                    initShard.id(),
                    initShard.currentNodeId(),
                    initShard.relocatingNodeId(),
                    initShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    randomInt())),
            false);
    assertTrue(
        "failed to start "
            + initShard
            + "\ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());
    assertTrue(
        initShard + "isn't started \ncurrent routing table:" + result.routingTable().prettyPrint(),
        result
                .routingTable()
                .index("test")
                .shard(initShard.id())
                .countWithState(ShardRoutingState.STARTED)
            == 1);

    logger.info("--> testing shard variants that shouldn't match the started shard");

    result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    initShard.index(),
                    initShard.id(),
                    initShard.currentNodeId(),
                    initShard.relocatingNodeId(),
                    !initShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    1)),
            false);
    assertFalse(
        "wrong primary flag shouldn't start shard "
            + initShard
            + "\ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());

    result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    initShard.index(),
                    initShard.id(),
                    "some_node",
                    initShard.currentNodeId(),
                    initShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    1)),
            false);
    assertFalse(
        "relocating shard from node shouldn't start shard "
            + initShard
            + "\ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());

    result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    initShard.index(),
                    initShard.id(),
                    initShard.currentNodeId(),
                    "some_node",
                    initShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    1)),
            false);
    assertFalse(
        "relocating shard to node shouldn't start shard "
            + initShard
            + "\ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());

    logger.info("--> testing double starting");

    result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    startedShard.index(),
                    startedShard.id(),
                    startedShard.currentNodeId(),
                    startedShard.relocatingNodeId(),
                    startedShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    1)),
            false);
    assertFalse(
        "duplicate starting of the same shard should be ignored \ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());

    logger.info("--> testing starting of relocating shards");
    result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    relocatingShard.index(),
                    relocatingShard.id(),
                    relocatingShard.relocatingNodeId(),
                    relocatingShard.currentNodeId(),
                    relocatingShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    randomInt())),
            false);
    assertTrue(
        "failed to start "
            + relocatingShard
            + "\ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());
    ShardRouting shardRouting =
        result.routingTable().index("test").shard(relocatingShard.id()).getShards().get(0);
    assertThat(shardRouting.state(), equalTo(ShardRoutingState.STARTED));
    assertThat(shardRouting.currentNodeId(), equalTo("node2"));
    assertThat(shardRouting.relocatingNodeId(), nullValue());

    logger.info("--> testing shard variants that shouldn't match the relocating shard");

    result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    relocatingShard.index(),
                    relocatingShard.id(),
                    relocatingShard.relocatingNodeId(),
                    relocatingShard.currentNodeId(),
                    !relocatingShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    1)),
            false);
    assertFalse(
        "wrong primary flag shouldn't start shard "
            + relocatingShard
            + "\ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());

    result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    relocatingShard.index(),
                    relocatingShard.id(),
                    "some_node",
                    relocatingShard.currentNodeId(),
                    relocatingShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    1)),
            false);
    assertFalse(
        "relocating shard to a different node shouldn't start shard "
            + relocatingShard
            + "\ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());

    result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    relocatingShard.index(),
                    relocatingShard.id(),
                    relocatingShard.relocatingNodeId(),
                    "some_node",
                    relocatingShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    1)),
            false);
    assertFalse(
        "relocating shard from a different node shouldn't start shard "
            + relocatingShard
            + "\ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());

    result =
        allocation.applyStartedShards(
            state,
            Arrays.asList(
                new ImmutableShardRouting(
                    relocatingShard.index(),
                    relocatingShard.id(),
                    relocatingShard.relocatingNodeId(),
                    relocatingShard.primary(),
                    ShardRoutingState.INITIALIZING,
                    1)),
            false);
    assertFalse(
        "non-relocating shard shouldn't start shard"
            + relocatingShard
            + "\ncurrent routing table:"
            + result.routingTable().prettyPrint(),
        result.changed());
  }
  @Test
  public void moveShardCommand() {
    AllocationService allocation =
        new AllocationService(
            settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());

    logger.info("creating an index with 1 shard, no replica");
    MetaData metaData =
        newMetaDataBuilder()
            .put(newIndexMetaDataBuilder("test").numberOfShards(1).numberOfReplicas(0))
            .build();
    RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build();
    ClusterState clusterState =
        newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();

    logger.info("adding two nodes and performing rerouting");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2")))
            .build();
    RoutingAllocationResult rerouteResult = allocation.reroute(clusterState);
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();

    logger.info("start primary shard");
    rerouteResult =
        allocation.applyStartedShards(
            clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();

    logger.info("move the shard");
    String existingNodeId =
        clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId();
    String toNodeId;
    if ("node1".equals(existingNodeId)) {
      toNodeId = "node2";
    } else {
      toNodeId = "node1";
    }
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new MoveAllocationCommand(new ShardId("test", 0), existingNodeId, toNodeId)));
    assertThat(rerouteResult.changed(), equalTo(true));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(
        clusterState.routingNodes().node(existingNodeId).shards().get(0).state(),
        equalTo(ShardRoutingState.RELOCATING));
    assertThat(
        clusterState.routingNodes().node(toNodeId).shards().get(0).state(),
        equalTo(ShardRoutingState.INITIALIZING));

    logger.info("finish moving the shard");
    rerouteResult =
        allocation.applyStartedShards(
            clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();

    assertThat(clusterState.routingNodes().node(existingNodeId).shards().isEmpty(), equalTo(true));
    assertThat(
        clusterState.routingNodes().node(toNodeId).shards().get(0).state(),
        equalTo(ShardRoutingState.STARTED));
  }
  @Test
  public void cancelCommand() {
    AllocationService allocation =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.disable_new_allocation", true)
                .put("cluster.routing.allocation.disable_allocation", true)
                .build());

    logger.info("--> building initial routing table");
    MetaData metaData =
        newMetaDataBuilder()
            .put(newIndexMetaDataBuilder("test").numberOfShards(1).numberOfReplicas(1))
            .build();
    RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build();
    ClusterState clusterState =
        newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();

    logger.info("--> adding 3 nodes");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(
                newNodesBuilder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3")))
            .build();
    RoutingAllocationResult rerouteResult = allocation.reroute(clusterState);
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(0));

    logger.info("--> allocating with primary flag set to true");
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new AllocateAllocationCommand(new ShardId("test", 0), "node1", true)));
    assertThat(rerouteResult.changed(), equalTo(true));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(INITIALIZING).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0));

    logger.info("--> cancel primary allocation, make sure it fails...");
    try {
      allocation.reroute(
          clusterState,
          new AllocationCommands(
              new CancelAllocationCommand(new ShardId("test", 0), "node1", false)));
      assert false;
    } catch (ElasticSearchIllegalArgumentException e) {
    }

    logger.info("--> start the primary shard");
    rerouteResult =
        allocation.applyStartedShards(
            clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0));

    logger.info("--> cancel primary allocation, make sure it fails...");
    try {
      allocation.reroute(
          clusterState,
          new AllocationCommands(
              new CancelAllocationCommand(new ShardId("test", 0), "node1", false)));
      assert false;
    } catch (ElasticSearchIllegalArgumentException e) {
    }

    logger.info("--> allocate the replica shard on on the second node");
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new AllocateAllocationCommand(new ShardId("test", 0), "node2", false)));
    assertThat(rerouteResult.changed(), equalTo(true));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> cancel the relocation allocation");
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new CancelAllocationCommand(new ShardId("test", 0), "node2", false)));
    assertThat(rerouteResult.changed(), equalTo(true));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0));
    assertThat(clusterState.routingNodes().node("node3").shards().size(), equalTo(0));

    logger.info("--> allocate the replica shard on on the second node");
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new AllocateAllocationCommand(new ShardId("test", 0), "node2", false)));
    assertThat(rerouteResult.changed(), equalTo(true));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> cancel the primary being replicated, make sure it fails");
    try {
      allocation.reroute(
          clusterState,
          new AllocationCommands(
              new CancelAllocationCommand(new ShardId("test", 0), "node1", false)));
      assert false;
    } catch (ElasticSearchIllegalArgumentException e) {
    }

    logger.info("--> start the replica shard");
    rerouteResult =
        allocation.applyStartedShards(
            clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1));

    logger.info("--> cancel allocation of the replica shard");
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new CancelAllocationCommand(new ShardId("test", 0), "node2", false)));
    assertThat(rerouteResult.changed(), equalTo(true));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0));
    assertThat(clusterState.routingNodes().node("node3").shards().size(), equalTo(0));

    logger.info("--> allocate the replica shard on on the second node");
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new AllocateAllocationCommand(new ShardId("test", 0), "node2", false)));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(rerouteResult.changed(), equalTo(true));
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1));
    logger.info("--> start the replica shard");
    rerouteResult =
        allocation.applyStartedShards(
            clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1));

    logger.info("--> cancel the primary allocation (with allow_primary set to true)");
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new CancelAllocationCommand(new ShardId("test", 0), "node1", true)));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(rerouteResult.changed(), equalTo(true));
    assertThat(
        clusterState.routingNodes().node("node2").shardsWithState(STARTED).get(0).primary(),
        equalTo(true));
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(0));
    assertThat(clusterState.routingNodes().node("node3").shards().size(), equalTo(0));
  }
  @Test
  public void allocateCommand() {
    AllocationService allocation =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.disable_new_allocation", true)
                .put("cluster.routing.allocation.disable_allocation", true)
                .build());

    logger.info("--> building initial routing table");
    MetaData metaData =
        newMetaDataBuilder()
            .put(newIndexMetaDataBuilder("test").numberOfShards(1).numberOfReplicas(1))
            .build();
    RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build();
    ClusterState clusterState =
        newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();

    logger.info("--> adding 3 nodes on same rack and do rerouting");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(
                newNodesBuilder().put(newNode("node1")).put(newNode("node2")).put(newNode("node3")))
            .build();
    RoutingAllocationResult rerouteResult = allocation.reroute(clusterState);
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(0));

    logger.info("--> allocating with primary flag set to false, should fail");
    try {
      allocation.reroute(
          clusterState,
          new AllocationCommands(
              new AllocateAllocationCommand(new ShardId("test", 0), "node1", false)));
      assert false;
    } catch (ElasticSearchIllegalArgumentException e) {
    }

    logger.info("--> allocating with primary flag set to true");
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new AllocateAllocationCommand(new ShardId("test", 0), "node1", true)));
    assertThat(rerouteResult.changed(), equalTo(true));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(INITIALIZING).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0));

    logger.info("--> start the primary shard");
    rerouteResult =
        allocation.applyStartedShards(
            clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(0));

    logger.info("--> allocate the replica shard on the primary shard node, should fail");
    try {
      allocation.reroute(
          clusterState,
          new AllocationCommands(
              new AllocateAllocationCommand(new ShardId("test", 0), "node1", false)));
      assert false;
    } catch (ElasticSearchIllegalArgumentException e) {
    }

    logger.info("--> allocate the replica shard on on the second node");
    rerouteResult =
        allocation.reroute(
            clusterState,
            new AllocationCommands(
                new AllocateAllocationCommand(new ShardId("test", 0), "node2", false)));
    assertThat(rerouteResult.changed(), equalTo(true));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> start the replica shard");
    rerouteResult =
        allocation.applyStartedShards(
            clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING));
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .routingTable(rerouteResult.routingTable())
            .build();
    assertThat(clusterState.routingNodes().node("node1").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().node("node2").shards().size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1));

    logger.info("--> verify that we fail when there are no unassigned shards");
    try {
      allocation.reroute(
          clusterState,
          new AllocationCommands(
              new AllocateAllocationCommand(new ShardId("test", 0), "node3", false)));
      assert false;
    } catch (ElasticSearchIllegalArgumentException e) {
    }
  }
  public void testSingleRetryOnIgnore() {
    ClusterState clusterState = createInitialClusterState();
    RoutingTable routingTable = clusterState.routingTable();
    final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY);
    // now fail it N-1 times
    for (int i = 0; i < retries - 1; i++) {
      List<FailedRerouteAllocation.FailedShard> failedShards =
          Collections.singletonList(
              new FailedRerouteAllocation.FailedShard(
                  routingTable.index("idx").shard(0).shards().get(0),
                  "boom" + i,
                  new UnsupportedOperationException()));
      RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards);
      assertTrue(result.changed());
      routingTable = result.routingTable();
      clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
      assertEquals(routingTable.index("idx").shards().size(), 1);
      assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
      assertEquals(
          routingTable
              .index("idx")
              .shard(0)
              .shards()
              .get(0)
              .unassignedInfo()
              .getNumFailedAllocations(),
          i + 1);
      assertEquals(
          routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(),
          "boom" + i);
    }
    // now we go and check that we are actually stick to unassigned on the next failure
    List<FailedRerouteAllocation.FailedShard> failedShards =
        Collections.singletonList(
            new FailedRerouteAllocation.FailedShard(
                routingTable.index("idx").shard(0).shards().get(0),
                "boom",
                new UnsupportedOperationException()));
    RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards);
    assertTrue(result.changed());
    routingTable = result.routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
    assertEquals(
        routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom");

    result =
        strategy.reroute(
            clusterState,
            new AllocationCommands(),
            false,
            true); // manual reroute should retry once
    assertTrue(result.changed());
    routingTable = result.routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
    assertEquals(
        routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom");

    // now we go and check that we are actually stick to unassigned on the next failure ie. no retry
    failedShards =
        Collections.singletonList(
            new FailedRerouteAllocation.FailedShard(
                routingTable.index("idx").shard(0).shards().get(0),
                "boom",
                new UnsupportedOperationException()));
    result = strategy.applyFailedShards(clusterState, failedShards);
    assertTrue(result.changed());
    routingTable = result.routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries + 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
    assertEquals(
        routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom");
  }
  public void testClusterStateUpdateTask() {
    AllocationService allocationService =
        new AllocationService(
            Settings.builder().build(),
            new AllocationDeciders(
                Settings.EMPTY,
                Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))),
            NoopGatewayAllocator.INSTANCE,
            new BalancedShardsAllocator(Settings.EMPTY),
            EmptyClusterInfoService.INSTANCE);
    ClusterState clusterState = createInitialClusterState(allocationService);
    ClusterRerouteRequest req = new ClusterRerouteRequest();
    req.dryRun(true);
    AtomicReference<ClusterRerouteResponse> responseRef = new AtomicReference<>();
    ActionListener<ClusterRerouteResponse> responseActionListener =
        new ActionListener<ClusterRerouteResponse>() {
          @Override
          public void onResponse(ClusterRerouteResponse clusterRerouteResponse) {
            responseRef.set(clusterRerouteResponse);
          }

          @Override
          public void onFailure(Exception e) {}
        };
    TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task =
        new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask(
            logger, allocationService, req, responseActionListener);
    ClusterState execute = task.execute(clusterState);
    assertSame(execute, clusterState); // dry-run
    task.onAllNodesAcked(null);
    assertNotSame(responseRef.get().getState(), execute);

    req.dryRun(false); // now we allocate

    final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY);
    // now fail it N-1 times
    for (int i = 0; i < retries; i++) {
      ClusterState newState = task.execute(clusterState);
      assertNotSame(newState, clusterState); // dry-run=false
      clusterState = newState;
      RoutingTable routingTable = clusterState.routingTable();
      assertEquals(routingTable.index("idx").shards().size(), 1);
      assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
      assertEquals(
          routingTable
              .index("idx")
              .shard(0)
              .shards()
              .get(0)
              .unassignedInfo()
              .getNumFailedAllocations(),
          i);
      List<FailedRerouteAllocation.FailedShard> failedShards =
          Collections.singletonList(
              new FailedRerouteAllocation.FailedShard(
                  routingTable.index("idx").shard(0).shards().get(0),
                  "boom" + i,
                  new UnsupportedOperationException()));
      RoutingAllocation.Result result =
          allocationService.applyFailedShards(clusterState, failedShards);
      assertTrue(result.changed());
      clusterState = ClusterState.builder(clusterState).routingTable(result.routingTable()).build();
      routingTable = clusterState.routingTable();
      assertEquals(routingTable.index("idx").shards().size(), 1);
      if (i == retries - 1) {
        assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
      } else {
        assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
      }
      assertEquals(
          routingTable
              .index("idx")
              .shard(0)
              .shards()
              .get(0)
              .unassignedInfo()
              .getNumFailedAllocations(),
          i + 1);
    }

    // without retry_failed we won't allocate that shard
    ClusterState newState = task.execute(clusterState);
    assertNotSame(newState, clusterState); // dry-run=false
    task.onAllNodesAcked(null);
    assertSame(responseRef.get().getState(), newState);
    RoutingTable routingTable = clusterState.routingTable();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries);

    req.setRetryFailed(true); // now we manually retry and get the shard back into initializing
    newState = task.execute(clusterState);
    assertNotSame(newState, clusterState); // dry-run=false
    clusterState = newState;
    routingTable = clusterState.routingTable();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries);
  }
  @Test
  public void testUpdateNumberOfReplicas() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());

    logger.info("Building initial routing table");

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder().metaData(metaData).routingTable(routingTable).build();

    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(2));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(2));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
    assertThat(routingTable.index("test").shard(0).shards().get(1).state(), equalTo(UNASSIGNED));
    assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue());
    assertThat(routingTable.index("test").shard(0).shards().get(1).currentNodeId(), nullValue());

    logger.info("Adding two nodes and performing rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .build();

    RoutingTable prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logger.info("Start all the primary shards");
    RoutingNodes routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logger.info("Start all the replica shards");
    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    final String nodeHoldingPrimary =
        routingTable.index("test").shard(0).primaryShard().currentNodeId();
    final String nodeHoldingReplica =
        routingTable.index("test").shard(0).replicaShards().get(0).currentNodeId();
    assertThat(nodeHoldingPrimary, not(equalTo(nodeHoldingReplica)));
    assertThat(prevRoutingTable != routingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(2));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(2));
    assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).primaryShard().currentNodeId(),
        equalTo(nodeHoldingPrimary));
    assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(1));
    assertThat(
        routingTable.index("test").shard(0).replicaShards().get(0).state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).replicaShards().get(0).currentNodeId(),
        equalTo(nodeHoldingReplica));

    logger.info("add another replica");
    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable = RoutingTable.builder(routingTable).updateNumberOfReplicas(2).build();
    metaData = MetaData.builder(clusterState.metaData()).updateNumberOfReplicas(2).build();
    clusterState =
        ClusterState.builder(clusterState).routingTable(routingTable).metaData(metaData).build();

    assertThat(clusterState.metaData().index("test").numberOfReplicas(), equalTo(2));

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(3));
    assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).primaryShard().currentNodeId(),
        equalTo(nodeHoldingPrimary));
    assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(2));
    assertThat(
        routingTable.index("test").shard(0).replicaShards().get(0).state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).replicaShards().get(0).currentNodeId(),
        equalTo(nodeHoldingReplica));
    assertThat(
        routingTable.index("test").shard(0).replicaShards().get(1).state(), equalTo(UNASSIGNED));

    logger.info("Add another node and start the added replica");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")))
            .build();
    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(3));
    assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).primaryShard().currentNodeId(),
        equalTo(nodeHoldingPrimary));
    assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(2));
    assertThat(
        routingTable.index("test").shard(0).replicaShardsWithState(STARTED).size(), equalTo(1));
    assertThat(
        routingTable.index("test").shard(0).replicaShardsWithState(STARTED).get(0).currentNodeId(),
        equalTo(nodeHoldingReplica));
    assertThat(
        routingTable.index("test").shard(0).replicaShardsWithState(INITIALIZING).size(),
        equalTo(1));
    assertThat(
        routingTable
            .index("test")
            .shard(0)
            .replicaShardsWithState(INITIALIZING)
            .get(0)
            .currentNodeId(),
        equalTo("node3"));

    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(3));
    assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).primaryShard().currentNodeId(),
        equalTo(nodeHoldingPrimary));
    assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(2));
    assertThat(
        routingTable.index("test").shard(0).replicaShardsWithState(STARTED).size(), equalTo(2));
    assertThat(
        routingTable.index("test").shard(0).replicaShardsWithState(STARTED).get(0).currentNodeId(),
        anyOf(equalTo(nodeHoldingReplica), equalTo("node3")));
    assertThat(
        routingTable.index("test").shard(0).replicaShardsWithState(STARTED).get(1).currentNodeId(),
        anyOf(equalTo(nodeHoldingReplica), equalTo("node3")));

    logger.info("now remove a replica");
    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable = RoutingTable.builder(routingTable).updateNumberOfReplicas(1).build();
    metaData = MetaData.builder(clusterState.metaData()).updateNumberOfReplicas(1).build();
    clusterState =
        ClusterState.builder(clusterState).routingTable(routingTable).metaData(metaData).build();

    assertThat(clusterState.metaData().index("test").numberOfReplicas(), equalTo(1));

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(2));
    assertThat(routingTable.index("test").shard(0).primaryShard().state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).primaryShard().currentNodeId(),
        equalTo(nodeHoldingPrimary));
    assertThat(routingTable.index("test").shard(0).replicaShards().size(), equalTo(1));
    assertThat(
        routingTable.index("test").shard(0).replicaShards().get(0).state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).replicaShards().get(0).currentNodeId(),
        anyOf(equalTo(nodeHoldingReplica), equalTo("node3")));

    logger.info("do a reroute, should remain the same");
    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(false));
  }
  @Test
  public void diskThresholdTest() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.8)
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node1", new DiskUsage("node1", 100, 10)); // 90% used
    usages.put("node2", new DiskUsage("node2", 100, 35)); // 65% used
    usages.put("node3", new DiskUsage("node3", 100, 60)); // 40% used
    usages.put("node4", new DiskUsage("node4", 100, 80)); // 20% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 10L); // 10 bytes
    shardSizes.put("[test][0][r]", 10L);
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    logger.info("--> adding two nodes");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Primary shard should be initializing, replica should not
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> start the shards (primaries)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that we're able to start the primary
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
    // Assert that node1 didn't get any shards because its disk usage is too high
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));

    logger.info("--> start the shards (replicas)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that the replica couldn't be started since node1 doesn't have enough space
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));

    logger.info("--> adding node3");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that the replica is initialized now that node3 is available with enough space
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(),
        equalTo(1));

    logger.info("--> start the shards (replicas)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that the replica couldn't be started since node1 doesn't have enough space
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> changing decider settings");

    // Set the low threshold to 60 instead of 70
    // Set the high threshold to 70 instead of 80
    // node2 now should not have new shards allocated to it, but shards can remain
    diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, "60%")
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.7)
            .build();

    deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> changing settings again");

    // Set the low threshold to 50 instead of 60
    // Set the high threshold to 60 instead of 70
    // node2 now should not have new shards allocated to it, and shards cannot remain
    diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.5)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.6)
            .build();

    deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(2));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    // Shard hasn't been moved off of node2 yet because there's nowhere for it to go
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));

    logger.info("--> adding node4");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node4")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Shards remain started
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> apply INITIALIZING shards");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0));
    // Node4 is available now, so the shard is moved off of node2
    assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0));
    assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1));
  }
  @Test
  public void testMultiIndexUnevenNodes() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build());

    final int numberOfIndices = 10;
    logger.info("Building initial routing table with " + numberOfIndices + " indices");

    MetaData.Builder metaDataBuilder = MetaData.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      metaDataBuilder.put(
          IndexMetaData.builder("test" + i)
              .settings(settings(Version.CURRENT))
              .numberOfShards(1)
              .numberOfReplicas(0));
    }
    MetaData metaData = metaDataBuilder.build();

    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      routingTableBuilder.addAsNew(metaData.index("test" + i));
    }
    RoutingTable routingTable = routingTableBuilder.build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices));

    logger.info("Starting 3 nodes and rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder()
                    .put(newNode("node1"))
                    .put(newNode("node2"))
                    .put(newNode("node3")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    }
    RoutingNodes routingNodes = clusterState.routingNodes();
    assertThat(numberOfShardsOfType(routingNodes, INITIALIZING), equalTo(numberOfIndices));
    assertThat(
        routingNodes.node("node1").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));
    assertThat(
        routingNodes.node("node2").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));
    assertThat(
        routingNodes.node("node2").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));

    logger.info("Start two more nodes, things should remain the same");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder(clusterState.nodes())
                    .put(newNode("node4"))
                    .put(newNode("node5")))
            .build();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();

    assertThat(prevRoutingTable == routingTable, equalTo(true));

    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(RELOCATING), equalTo(STARTED)));
    }
    routingNodes = clusterState.routingNodes();
    assertThat(
        "4 source shard routing are relocating",
        numberOfShardsOfType(routingNodes, RELOCATING),
        equalTo(4));
    assertThat(
        "4 target shard routing are initializing",
        numberOfShardsOfType(routingNodes, INITIALIZING),
        equalTo(4));

    logger.info("Now, mark the relocated as started");
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    //        routingTable = strategy.reroute(new RoutingStrategyInfo(metaData, routingTable),
    // nodes);

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(RELOCATING), equalTo(STARTED)));
    }
    routingNodes = clusterState.routingNodes();
    assertThat(numberOfShardsOfType(routingNodes, STARTED), equalTo(numberOfIndices));
    for (RoutingNode routingNode : routingNodes) {
      assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(2));
    }
  }
  @Test
  public void testMultiIndexEvenDistribution() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build());

    final int numberOfIndices = 50;
    logger.info("Building initial routing table with " + numberOfIndices + " indices");

    MetaData.Builder metaDataBuilder = MetaData.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      metaDataBuilder.put(
          IndexMetaData.builder("test" + i)
              .settings(settings(Version.CURRENT))
              .numberOfShards(1)
              .numberOfReplicas(0));
    }
    MetaData metaData = metaDataBuilder.build();

    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      routingTableBuilder.addAsNew(metaData.index("test" + i));
    }
    RoutingTable routingTable = routingTableBuilder.build();
    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(), nullValue());
    }

    logger.info("Adding " + (numberOfIndices / 2) + " nodes");
    DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder();
    List<DiscoveryNode> nodes = newArrayList();
    for (int i = 0; i < (numberOfIndices / 2); i++) {
      nodesBuilder.put(newNode("node" + i));
    }
    RoutingTable prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING));
      assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true));
      // make sure we still have 2 shards initializing per node on the first 25 nodes
      String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId();
      int nodeIndex = Integer.parseInt(nodeId.substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
    }
    RoutingNodes routingNodes = clusterState.routingNodes();
    Set<String> encounteredIndices = newHashSet();
    for (RoutingNode routingNode : routingNodes) {
      assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(0));
      assertThat(routingNode.size(), equalTo(2));
      // make sure we still have 2 shards initializing per node on the only 25 nodes
      int nodeIndex = Integer.parseInt(routingNode.nodeId().substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
      // check that we don't have a shard associated with a node with the same index name (we have a
      // single shard)
      for (ShardRouting shardRoutingEntry : routingNode) {
        assertThat(encounteredIndices, not(hasItem(shardRoutingEntry.index())));
        encounteredIndices.add(shardRoutingEntry.index());
      }
    }

    logger.info("Adding additional " + (numberOfIndices / 2) + " nodes, nothing should change");
    nodesBuilder = DiscoveryNodes.builder(clusterState.nodes());
    for (int i = (numberOfIndices / 2); i < numberOfIndices; i++) {
      nodesBuilder.put(newNode("node" + i));
    }
    prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(false));

    logger.info("Marking the shard as started");
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    int numberOfRelocatingShards = 0;
    int numberOfStartedShards = 0;
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(STARTED), equalTo(RELOCATING)));
      if (routingTable.index("test" + i).shard(0).shards().get(0).state() == STARTED) {
        numberOfStartedShards++;
      } else if (routingTable.index("test" + i).shard(0).shards().get(0).state() == RELOCATING) {
        numberOfRelocatingShards++;
      }
      assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true));
      // make sure we still have 2 shards either relocating or started on the first 25 nodes (still)
      String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId();
      int nodeIndex = Integer.parseInt(nodeId.substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
    }
    assertThat(numberOfRelocatingShards, equalTo(25));
    assertThat(numberOfStartedShards, equalTo(25));
  }
  @Test
  public void diskThresholdWithShardSizes() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, "71%")
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node1", new DiskUsage("node1", 100, 31)); // 69% used
    usages.put("node2", new DiskUsage("node2", 100, 1)); // 99% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 10L); // 10 bytes
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    logger.info("--> adding node1");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder()
                    .put(newNode("node1"))
                    .put(
                        newNode(
                            "node2")) // node2 is added because DiskThresholdDecider automatically
                                      // ignore single-node clusters
                )
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logger.info("--> start the shards (primaries)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // Shard can't be allocated to node1 (or node2) because it would cause too much usage
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(0));
    // No shards are started, no nodes have enough disk for allocation
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(0));
  }
  @Test
  public void testSingleIndexStartedShard() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());

    logger.info("Building initial routing table");

    MetaData metaData =
        MetaData.builder()
            .put(
                IndexMetaData.builder("test")
                    .settings(settings(Version.CURRENT))
                    .numberOfShards(1)
                    .numberOfReplicas(0))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
    assertThat(routingTable.index("test").shard(0).shards().get(0).currentNodeId(), nullValue());

    logger.info("Adding one node and performing rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1"));

    logger.info("Rerouting again, nothing should change");
    prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    assertThat(routingTable == prevRoutingTable, equalTo(true));
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logger.info("Marking the shard as started");
    RoutingNodes routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable != prevRoutingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1"));

    logger.info("Starting another node and making sure nothing changed");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node2")))
            .build();
    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable == prevRoutingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node1"));

    logger.info("Killing node1 where the shard is, checking the shard is relocated");

    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node1"))
            .build();
    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable != prevRoutingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node2"));

    logger.info(
        "Start another node, make sure that things remain the same (shard is in node2 and initializing)");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")))
            .build();
    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertThat(routingTable == prevRoutingTable, equalTo(true));

    logger.info("Start the shard on node 2");
    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, routingNodes.node("node2").shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(routingTable != prevRoutingTable, equalTo(true));
    assertThat(routingTable.index("test").shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().size(), equalTo(1));
    assertThat(routingTable.index("test").shard(0).shards().get(0).state(), equalTo(STARTED));
    assertThat(
        routingTable.index("test").shard(0).shards().get(0).currentNodeId(), equalTo("node2"));
  }
  @Test
  public void unknownDiskUsageTest() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.85)
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node2", new DiskUsage("node2", 100, 50)); // 50% used
    usages.put("node3", new DiskUsage("node3", 100, 0)); // 100% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 10L); // 10 bytes
    shardSizes.put("[test][0][r]", 10L); // 10 bytes
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(0))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    logger.info("--> adding node1");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder()
                    .put(newNode("node1"))
                    .put(
                        newNode(
                            "node3")) // node3 is added because DiskThresholdDecider automatically
                                      // ignore single-node clusters
                )
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    // Shard can be allocated to node1, even though it only has 25% free,
    // because it's a primary that's never been allocated before
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));

    logger.info("--> start the shards (primaries)");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // A single shard is started on node1, even though it normally would not
    // be allowed, because it's a primary that hasn't been allocated, and node1
    // is still below the high watermark (unlike node3)
    assertThat(clusterState.routingNodes().shardsWithState(STARTED).size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
  }
  @Test
  public void indexLevelShardsLimitAllocate() {
    AllocationService strategy =
        new AllocationService(
            settingsBuilder().put("cluster.routing.allocation.concurrent_recoveries", 10).build());

    logger.info("Building initial routing table");

    MetaData metaData =
        newMetaDataBuilder()
            .put(
                newIndexMetaDataBuilder("test")
                    .settings(
                        ImmutableSettings.settingsBuilder()
                            .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 4)
                            .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)
                            .put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE, 2)))
            .build();

    RoutingTable routingTable = routingTable().addAsNew(metaData.index("test")).build();

    ClusterState clusterState =
        newClusterStateBuilder().metaData(metaData).routingTable(routingTable).build();
    logger.info("Adding two nodes and performing rerouting");
    clusterState =
        newClusterStateBuilder()
            .state(clusterState)
            .nodes(newNodesBuilder().put(newNode("node1")).put(newNode("node2")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(
        clusterState
            .readOnlyRoutingNodes()
            .node("node1")
            .numberOfShardsWithState(ShardRoutingState.INITIALIZING),
        equalTo(2));
    assertThat(
        clusterState
            .readOnlyRoutingNodes()
            .node("node2")
            .numberOfShardsWithState(ShardRoutingState.INITIALIZING),
        equalTo(2));

    logger.info("Start the primary shards");
    RoutingNodes routingNodes = clusterState.routingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();

    assertThat(
        clusterState
            .readOnlyRoutingNodes()
            .node("node1")
            .numberOfShardsWithState(ShardRoutingState.STARTED),
        equalTo(2));
    assertThat(
        clusterState
            .readOnlyRoutingNodes()
            .node("node1")
            .numberOfShardsWithState(ShardRoutingState.INITIALIZING),
        equalTo(0));
    assertThat(
        clusterState
            .readOnlyRoutingNodes()
            .node("node2")
            .numberOfShardsWithState(ShardRoutingState.STARTED),
        equalTo(2));
    assertThat(
        clusterState
            .readOnlyRoutingNodes()
            .node("node2")
            .numberOfShardsWithState(ShardRoutingState.INITIALIZING),
        equalTo(0));
    assertThat(clusterState.readOnlyRoutingNodes().unassigned().size(), equalTo(4));

    logger.info("Do another reroute, make sure its still not allocated");
    routingNodes = clusterState.routingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = newClusterStateBuilder().state(clusterState).routingTable(routingTable).build();
  }
  @Test
  public void testShardRelocationsTakenIntoAccount() {
    Settings diskSettings =
        settingsBuilder()
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS, true)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK, 0.7)
            .put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK, 0.8)
            .build();

    Map<String, DiskUsage> usages = new HashMap<>();
    usages.put("node1", new DiskUsage("node1", 100, 40)); // 60% used
    usages.put("node2", new DiskUsage("node2", 100, 40)); // 60% used
    usages.put("node2", new DiskUsage("node3", 100, 40)); // 60% used

    Map<String, Long> shardSizes = new HashMap<>();
    shardSizes.put("[test][0][p]", 14L); // 14 bytes
    shardSizes.put("[test][0][r]", 14L);
    shardSizes.put("[test2][0][p]", 1L); // 1 bytes
    shardSizes.put("[test2][0][r]", 1L);
    final ClusterInfo clusterInfo =
        new ClusterInfo(ImmutableMap.copyOf(usages), ImmutableMap.copyOf(shardSizes));

    AllocationDeciders deciders =
        new AllocationDeciders(
            ImmutableSettings.EMPTY,
            new HashSet<>(
                Arrays.asList(
                    new SameShardAllocationDecider(ImmutableSettings.EMPTY),
                    new DiskThresholdDecider(diskSettings))));

    ClusterInfoService cis =
        new ClusterInfoService() {
          @Override
          public ClusterInfo getClusterInfo() {
            logger.info("--> calling fake getClusterInfo");
            return clusterInfo;
          }
        };

    AllocationService strategy =
        new AllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build(),
            deciders,
            new ShardsAllocators(),
            cis);

    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(1).numberOfReplicas(1))
            .put(IndexMetaData.builder("test2").numberOfShards(1).numberOfReplicas(1))
            .build();

    RoutingTable routingTable =
        RoutingTable.builder()
            .addAsNew(metaData.index("test"))
            .addAsNew(metaData.index("test2"))
            .build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    logger.info("--> adding two nodes");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2")))
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    // shards should be initializing
    assertThat(clusterState.routingNodes().shardsWithState(INITIALIZING).size(), equalTo(4));

    logger.info("--> start the shards");
    routingTable =
        strategy
            .applyStartedShards(
                clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    logShardStates(clusterState);
    // Assert that we're able to start the primary and replicas
    assertThat(
        clusterState.routingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(4));

    logger.info("--> adding node3");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder(clusterState.nodes()).put(newNode("node3")))
            .build();

    AllocationCommand relocate1 =
        new MoveAllocationCommand(new ShardId("test", 0), "node2", "node3");
    AllocationCommands cmds = new AllocationCommands(relocate1);

    routingTable = strategy.reroute(clusterState, cmds).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    logShardStates(clusterState);

    AllocationCommand relocate2 =
        new MoveAllocationCommand(new ShardId("test2", 0), "node2", "node3");
    cmds = new AllocationCommands(relocate2);

    try {
      // The shard for the "test" index is already being relocated to
      // node3, which will put it over the low watermark when it
      // completes, with shard relocations taken into account this should
      // throw an exception about not being able to complete
      strategy.reroute(clusterState, cmds).routingTable();
      fail("should not have been able to reroute the shard");
    } catch (ElasticsearchIllegalArgumentException e) {
      assertThat(
          "can't allocated because there isn't enough room: " + e.getMessage(),
          e.getMessage().contains("less than required [30.0%] free disk on node, free: [26.0%]"),
          equalTo(true));
    }
  }