Ejemplo n.º 1
0
  private ClusterState createInitialClusterState(AllocationService service) {
    MetaData.Builder metaBuilder = MetaData.builder();
    metaBuilder.put(
        IndexMetaData.builder("idx")
            .settings(settings(Version.CURRENT))
            .numberOfShards(1)
            .numberOfReplicas(0));
    MetaData metaData = metaBuilder.build();
    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    routingTableBuilder.addAsNew(metaData.index("idx"));

    RoutingTable routingTable = routingTableBuilder.build();
    ClusterState clusterState =
        ClusterState.builder(
                org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(
                    Settings.EMPTY))
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = service.reroute(clusterState, "reroute").routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertEquals(prevRoutingTable.index("idx").shards().size(), 1);
    assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);

    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
    return clusterState;
  }
  public static void main(String[] args) {
    final int numberOfRuns = 1;
    final int numIndices = 5 * 365; // five years
    final int numShards = 6;
    final int numReplicas = 2;
    final int numberOfNodes = 30;
    final int numberOfTags = 2;
    AllocationService strategy =
        ElasticsearchAllocationTestCase.createAllocationService(
            ImmutableSettings.EMPTY, new Random(1));

    MetaData.Builder mb = MetaData.builder();
    for (int i = 1; i <= numIndices; i++) {
      mb.put(
          IndexMetaData.builder("test_" + i)
              .numberOfShards(numShards)
              .numberOfReplicas(numReplicas));
    }
    MetaData metaData = mb.build();
    RoutingTable.Builder rb = RoutingTable.builder();
    for (int i = 1; i <= numIndices; i++) {
      rb.addAsNew(metaData.index("test_" + i));
    }
    RoutingTable routingTable = rb.build();
    DiscoveryNodes.Builder nb = DiscoveryNodes.builder();
    for (int i = 1; i <= numberOfNodes; i++) {
      nb.put(newNode("node" + i, ImmutableMap.of("tag", "tag_" + (i % numberOfTags))));
    }
    ClusterState initialClusterState =
        ClusterState.builder().metaData(metaData).routingTable(routingTable).nodes(nb).build();

    long start = System.currentTimeMillis();
    for (int i = 0; i < numberOfRuns; i++) {
      logger.info("[{}] starting... ", i);
      long runStart = System.currentTimeMillis();
      ClusterState clusterState = initialClusterState;
      while (clusterState.readOnlyRoutingNodes().hasUnassignedShards()) {
        logger.info(
            "[{}] remaining unassigned {}",
            i,
            clusterState.readOnlyRoutingNodes().unassigned().size());
        RoutingAllocation.Result result =
            strategy.applyStartedShards(
                clusterState, clusterState.readOnlyRoutingNodes().shardsWithState(INITIALIZING));
        clusterState = ClusterState.builder(clusterState).routingResult(result).build();
        result = strategy.reroute(clusterState);
        clusterState = ClusterState.builder(clusterState).routingResult(result).build();
      }
      logger.info(
          "[{}] took {}", i, TimeValue.timeValueMillis(System.currentTimeMillis() - runStart));
    }
    long took = System.currentTimeMillis() - start;
    logger.info(
        "total took {}, AVG {}",
        TimeValue.timeValueMillis(took),
        TimeValue.timeValueMillis(took / numberOfRuns));
  }
  /** Creates cluster state with several shards and one replica and all shards STARTED. */
  public static ClusterState stateWithAssignedPrimariesAndOneReplica(
      String index, int numberOfShards) {

    int numberOfNodes = 2; // we need a non-local master to test shard failures
    DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder();
    for (int i = 0; i < numberOfNodes + 1; i++) {
      final DiscoveryNode node = newNode(i);
      discoBuilder = discoBuilder.put(node);
    }
    discoBuilder.localNodeId(newNode(0).getId());
    discoBuilder.masterNodeId(
        newNode(1).getId()); // we need a non-local master to test shard failures
    IndexMetaData indexMetaData =
        IndexMetaData.builder(index)
            .settings(
                Settings.builder()
                    .put(SETTING_VERSION_CREATED, Version.CURRENT)
                    .put(SETTING_NUMBER_OF_SHARDS, numberOfShards)
                    .put(SETTING_NUMBER_OF_REPLICAS, 1)
                    .put(SETTING_CREATION_DATE, System.currentTimeMillis()))
            .build();
    ClusterState.Builder state = ClusterState.builder(new ClusterName("test"));
    state.nodes(discoBuilder);
    state.metaData(MetaData.builder().put(indexMetaData, false).generateClusterUuidIfNeeded());
    IndexRoutingTable.Builder indexRoutingTableBuilder =
        IndexRoutingTable.builder(indexMetaData.getIndex());
    for (int i = 0; i < numberOfShards; i++) {
      RoutingTable.Builder routing = new RoutingTable.Builder();
      routing.addAsNew(indexMetaData);
      final ShardId shardId = new ShardId(index, "_na_", i);
      IndexShardRoutingTable.Builder indexShardRoutingBuilder =
          new IndexShardRoutingTable.Builder(shardId);
      indexShardRoutingBuilder.addShard(
          TestShardRouting.newShardRouting(
              index, i, newNode(0).getId(), null, null, true, ShardRoutingState.STARTED, null));
      indexShardRoutingBuilder.addShard(
          TestShardRouting.newShardRouting(
              index, i, newNode(1).getId(), null, null, false, ShardRoutingState.STARTED, null));
      indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilder.build());
    }
    state.routingTable(RoutingTable.builder().add(indexRoutingTableBuilder.build()).build());
    return state.build();
  }
  @Test
  public void testMultiIndexUnevenNodes() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build());

    final int numberOfIndices = 10;
    logger.info("Building initial routing table with " + numberOfIndices + " indices");

    MetaData.Builder metaDataBuilder = MetaData.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      metaDataBuilder.put(
          IndexMetaData.builder("test" + i)
              .settings(settings(Version.CURRENT))
              .numberOfShards(1)
              .numberOfReplicas(0));
    }
    MetaData metaData = metaDataBuilder.build();

    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      routingTableBuilder.addAsNew(metaData.index("test" + i));
    }
    RoutingTable routingTable = routingTableBuilder.build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices));

    logger.info("Starting 3 nodes and rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder()
                    .put(newNode("node1"))
                    .put(newNode("node2"))
                    .put(newNode("node3")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    }
    RoutingNodes routingNodes = clusterState.routingNodes();
    assertThat(numberOfShardsOfType(routingNodes, INITIALIZING), equalTo(numberOfIndices));
    assertThat(
        routingNodes.node("node1").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));
    assertThat(
        routingNodes.node("node2").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));
    assertThat(
        routingNodes.node("node2").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));

    logger.info("Start two more nodes, things should remain the same");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder(clusterState.nodes())
                    .put(newNode("node4"))
                    .put(newNode("node5")))
            .build();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();

    assertThat(prevRoutingTable == routingTable, equalTo(true));

    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(RELOCATING), equalTo(STARTED)));
    }
    routingNodes = clusterState.routingNodes();
    assertThat(
        "4 source shard routing are relocating",
        numberOfShardsOfType(routingNodes, RELOCATING),
        equalTo(4));
    assertThat(
        "4 target shard routing are initializing",
        numberOfShardsOfType(routingNodes, INITIALIZING),
        equalTo(4));

    logger.info("Now, mark the relocated as started");
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    //        routingTable = strategy.reroute(new RoutingStrategyInfo(metaData, routingTable),
    // nodes);

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(RELOCATING), equalTo(STARTED)));
    }
    routingNodes = clusterState.routingNodes();
    assertThat(numberOfShardsOfType(routingNodes, STARTED), equalTo(numberOfIndices));
    for (RoutingNode routingNode : routingNodes) {
      assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(2));
    }
  }
  @Test
  public void testMultiIndexEvenDistribution() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build());

    final int numberOfIndices = 50;
    logger.info("Building initial routing table with " + numberOfIndices + " indices");

    MetaData.Builder metaDataBuilder = MetaData.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      metaDataBuilder.put(
          IndexMetaData.builder("test" + i)
              .settings(settings(Version.CURRENT))
              .numberOfShards(1)
              .numberOfReplicas(0));
    }
    MetaData metaData = metaDataBuilder.build();

    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      routingTableBuilder.addAsNew(metaData.index("test" + i));
    }
    RoutingTable routingTable = routingTableBuilder.build();
    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(), nullValue());
    }

    logger.info("Adding " + (numberOfIndices / 2) + " nodes");
    DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder();
    List<DiscoveryNode> nodes = newArrayList();
    for (int i = 0; i < (numberOfIndices / 2); i++) {
      nodesBuilder.put(newNode("node" + i));
    }
    RoutingTable prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING));
      assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true));
      // make sure we still have 2 shards initializing per node on the first 25 nodes
      String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId();
      int nodeIndex = Integer.parseInt(nodeId.substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
    }
    RoutingNodes routingNodes = clusterState.routingNodes();
    Set<String> encounteredIndices = newHashSet();
    for (RoutingNode routingNode : routingNodes) {
      assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(0));
      assertThat(routingNode.size(), equalTo(2));
      // make sure we still have 2 shards initializing per node on the only 25 nodes
      int nodeIndex = Integer.parseInt(routingNode.nodeId().substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
      // check that we don't have a shard associated with a node with the same index name (we have a
      // single shard)
      for (ShardRouting shardRoutingEntry : routingNode) {
        assertThat(encounteredIndices, not(hasItem(shardRoutingEntry.index())));
        encounteredIndices.add(shardRoutingEntry.index());
      }
    }

    logger.info("Adding additional " + (numberOfIndices / 2) + " nodes, nothing should change");
    nodesBuilder = DiscoveryNodes.builder(clusterState.nodes());
    for (int i = (numberOfIndices / 2); i < numberOfIndices; i++) {
      nodesBuilder.put(newNode("node" + i));
    }
    prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(false));

    logger.info("Marking the shard as started");
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    int numberOfRelocatingShards = 0;
    int numberOfStartedShards = 0;
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(STARTED), equalTo(RELOCATING)));
      if (routingTable.index("test" + i).shard(0).shards().get(0).state() == STARTED) {
        numberOfStartedShards++;
      } else if (routingTable.index("test" + i).shard(0).shards().get(0).state() == RELOCATING) {
        numberOfRelocatingShards++;
      }
      assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true));
      // make sure we still have 2 shards either relocating or started on the first 25 nodes (still)
      String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId();
      int nodeIndex = Integer.parseInt(nodeId.substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
    }
    assertThat(numberOfRelocatingShards, equalTo(25));
    assertThat(numberOfStartedShards, equalTo(25));
  }
  /**
   * Creates cluster state with and index that has one shard and #(replicaStates) replicas
   *
   * @param index name of the index
   * @param activePrimaryLocal if active primary should coincide with the local node in the cluster
   *     state
   * @param primaryState state of primary
   * @param replicaStates states of the replicas. length of this array determines also the number of
   *     replicas
   */
  public static ClusterState state(
      String index,
      boolean activePrimaryLocal,
      ShardRoutingState primaryState,
      ShardRoutingState... replicaStates) {
    final int numberOfReplicas = replicaStates.length;

    int numberOfNodes = numberOfReplicas + 1;
    if (primaryState == ShardRoutingState.RELOCATING) {
      numberOfNodes++;
    }
    for (ShardRoutingState state : replicaStates) {
      if (state == ShardRoutingState.RELOCATING) {
        numberOfNodes++;
      }
    }
    numberOfNodes = Math.max(2, numberOfNodes); // we need a non-local master to test shard failures
    final ShardId shardId = new ShardId(index, "_na_", 0);
    DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder();
    Set<String> unassignedNodes = new HashSet<>();
    for (int i = 0; i < numberOfNodes + 1; i++) {
      final DiscoveryNode node = newNode(i);
      discoBuilder = discoBuilder.put(node);
      unassignedNodes.add(node.getId());
    }
    discoBuilder.localNodeId(newNode(0).getId());
    discoBuilder.masterNodeId(
        newNode(1).getId()); // we need a non-local master to test shard failures
    final int primaryTerm = 1 + randomInt(200);
    IndexMetaData indexMetaData =
        IndexMetaData.builder(index)
            .settings(
                Settings.builder()
                    .put(SETTING_VERSION_CREATED, Version.CURRENT)
                    .put(SETTING_NUMBER_OF_SHARDS, 1)
                    .put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicas)
                    .put(SETTING_CREATION_DATE, System.currentTimeMillis()))
            .primaryTerm(0, primaryTerm)
            .build();

    RoutingTable.Builder routing = new RoutingTable.Builder();
    routing.addAsNew(indexMetaData);
    IndexShardRoutingTable.Builder indexShardRoutingBuilder =
        new IndexShardRoutingTable.Builder(shardId);

    String primaryNode = null;
    String relocatingNode = null;
    UnassignedInfo unassignedInfo = null;
    if (primaryState != ShardRoutingState.UNASSIGNED) {
      if (activePrimaryLocal) {
        primaryNode = newNode(0).getId();
        unassignedNodes.remove(primaryNode);
      } else {
        Set<String> unassignedNodesExecludingPrimary = new HashSet<>(unassignedNodes);
        unassignedNodesExecludingPrimary.remove(newNode(0).getId());
        primaryNode = selectAndRemove(unassignedNodesExecludingPrimary);
      }
      if (primaryState == ShardRoutingState.RELOCATING) {
        relocatingNode = selectAndRemove(unassignedNodes);
      }
    } else {
      unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null);
    }
    indexShardRoutingBuilder.addShard(
        TestShardRouting.newShardRouting(
            index, 0, primaryNode, relocatingNode, null, true, primaryState, unassignedInfo));

    for (ShardRoutingState replicaState : replicaStates) {
      String replicaNode = null;
      relocatingNode = null;
      unassignedInfo = null;
      if (replicaState != ShardRoutingState.UNASSIGNED) {
        assert primaryNode != null : "a replica is assigned but the primary isn't";
        replicaNode = selectAndRemove(unassignedNodes);
        if (replicaState == ShardRoutingState.RELOCATING) {
          relocatingNode = selectAndRemove(unassignedNodes);
        }
      } else {
        unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null);
      }
      indexShardRoutingBuilder.addShard(
          TestShardRouting.newShardRouting(
              index,
              shardId.id(),
              replicaNode,
              relocatingNode,
              null,
              false,
              replicaState,
              unassignedInfo));
    }

    ClusterState.Builder state = ClusterState.builder(new ClusterName("test"));
    state.nodes(discoBuilder);
    state.metaData(MetaData.builder().put(indexMetaData, false).generateClusterUuidIfNeeded());
    state.routingTable(
        RoutingTable.builder()
            .add(
                IndexRoutingTable.builder(indexMetaData.getIndex())
                    .addIndexShard(indexShardRoutingBuilder.build()))
            .build());
    return state.build();
  }
  @Test
  public void testNoRebalanceOnPrimaryOverload() {
    Settings.Builder settings = settingsBuilder();
    AllocationService strategy =
        new AllocationService(
            settings.build(),
            randomAllocationDeciders(
                settings.build(),
                new NodeSettingsService(Settings.Builder.EMPTY_SETTINGS),
                getRandom()),
            new ShardsAllocators(
                settings.build(),
                NoopGatewayAllocator.INSTANCE,
                new ShardsAllocator() {

                  @Override
                  public boolean rebalance(RoutingAllocation allocation) {
                    return false;
                  }

                  @Override
                  public boolean move(
                      ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
                    return false;
                  }

                  @Override
                  public void applyStartedShards(StartedRerouteAllocation allocation) {}

                  @Override
                  public void applyFailedShards(FailedRerouteAllocation allocation) {}

                  /*
                  *  // this allocator tries to rebuild this scenario where a rebalance is
                  *  // triggered solely by the primary overload on node [1] where a shard
                  *  // is rebalanced to node 0
                     routing_nodes:
                     -----node_id[0][V]
                     --------[test][0], node[0], [R], s[STARTED]
                     --------[test][4], node[0], [R], s[STARTED]
                     -----node_id[1][V]
                     --------[test][0], node[1], [P], s[STARTED]
                     --------[test][1], node[1], [P], s[STARTED]
                     --------[test][3], node[1], [R], s[STARTED]
                     -----node_id[2][V]
                     --------[test][1], node[2], [R], s[STARTED]
                     --------[test][2], node[2], [R], s[STARTED]
                     --------[test][4], node[2], [P], s[STARTED]
                     -----node_id[3][V]
                     --------[test][2], node[3], [P], s[STARTED]
                     --------[test][3], node[3], [P], s[STARTED]
                     ---- unassigned
                  */
                  @Override
                  public boolean allocateUnassigned(RoutingAllocation allocation) {
                    RoutingNodes.UnassignedShards unassigned =
                        allocation.routingNodes().unassigned();
                    boolean changed = !unassigned.isEmpty();
                    for (ShardRouting sr : unassigned) {
                      switch (sr.id()) {
                        case 0:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node1");
                          } else {
                            allocation.routingNodes().initialize(sr, "node0");
                          }
                          break;
                        case 1:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node1");
                          } else {
                            allocation.routingNodes().initialize(sr, "node2");
                          }
                          break;
                        case 2:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node3");
                          } else {
                            allocation.routingNodes().initialize(sr, "node2");
                          }
                          break;
                        case 3:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node3");
                          } else {
                            allocation.routingNodes().initialize(sr, "node1");
                          }
                          break;
                        case 4:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node2");
                          } else {
                            allocation.routingNodes().initialize(sr, "node0");
                          }
                          break;
                      }
                    }
                    unassigned.clear();
                    return changed;
                  }
                }),
            EmptyClusterInfoService.INSTANCE);
    MetaData.Builder metaDataBuilder = MetaData.builder();
    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    IndexMetaData.Builder indexMeta =
        IndexMetaData.builder("test")
            .settings(settings(Version.CURRENT))
            .numberOfShards(5)
            .numberOfReplicas(1);
    metaDataBuilder = metaDataBuilder.put(indexMeta);
    MetaData metaData = metaDataBuilder.build();
    for (ObjectCursor<IndexMetaData> cursor : metaData.indices().values()) {
      routingTableBuilder.addAsNew(cursor.value);
    }
    RoutingTable routingTable = routingTableBuilder.build();
    DiscoveryNodes.Builder nodes = DiscoveryNodes.builder();
    for (int i = 0; i < 4; i++) {
      DiscoveryNode node = newNode("node" + i);
      nodes.put(node);
    }

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .nodes(nodes)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    RoutingNodes routingNodes = clusterState.getRoutingNodes();

    for (RoutingNode routingNode : routingNodes) {
      for (ShardRouting shardRouting : routingNode) {
        assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.INITIALIZING));
      }
    }
    strategy = createAllocationService(settings.build());

    logger.info("use the new allocator and check if it moves shards");
    routingNodes = clusterState.getRoutingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();
    for (RoutingNode routingNode : routingNodes) {
      for (ShardRouting shardRouting : routingNode) {
        assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
      }
    }

    logger.info("start the replica shards");
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    for (RoutingNode routingNode : routingNodes) {
      for (ShardRouting shardRouting : routingNode) {
        assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
      }
    }

    logger.info("rebalancing");
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    for (RoutingNode routingNode : routingNodes) {
      for (ShardRouting shardRouting : routingNode) {
        assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
      }
    }
  }
  private ClusterState initCluster(AllocationService strategy) {
    MetaData.Builder metaDataBuilder = MetaData.builder();
    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();

    for (int i = 0; i < numberOfIndices; i++) {
      IndexMetaData.Builder index =
          IndexMetaData.builder("test" + i)
              .settings(settings(Version.CURRENT))
              .numberOfShards(numberOfShards)
              .numberOfReplicas(numberOfReplicas);
      metaDataBuilder = metaDataBuilder.put(index);
    }

    MetaData metaData = metaDataBuilder.build();

    for (ObjectCursor<IndexMetaData> cursor : metaData.indices().values()) {
      routingTableBuilder.addAsNew(cursor.value);
    }

    RoutingTable routingTable = routingTableBuilder.build();

    logger.info("start " + numberOfNodes + " nodes");
    DiscoveryNodes.Builder nodes = DiscoveryNodes.builder();
    for (int i = 0; i < numberOfNodes; i++) {
      nodes.put(newNode("node" + i));
    }
    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .nodes(nodes)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    RoutingNodes routingNodes = clusterState.getRoutingNodes();

    logger.info("restart all the primary shards, replicas will start initializing");
    routingNodes = clusterState.getRoutingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    logger.info("start the replica shards");
    routingNodes = clusterState.getRoutingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    logger.info("complete rebalancing");
    RoutingTable prev = routingTable;
    while (true) {
      routingTable =
          strategy
              .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
              .routingTable();
      clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
      routingNodes = clusterState.getRoutingNodes();
      if (routingTable == prev) break;
      prev = routingTable;
    }

    return clusterState;
  }