@Test
  public void testRoutingTableSerialization() throws Exception {
    MetaData metaData =
        MetaData.builder()
            .put(IndexMetaData.builder("test").numberOfShards(10).numberOfReplicas(1))
            .build();

    RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();

    DiscoveryNodes nodes =
        DiscoveryNodes.builder()
            .put(newNode("node1"))
            .put(newNode("node2"))
            .put(newNode("node3"))
            .build();

    ClusterState clusterState =
        ClusterState.builder().nodes(nodes).metaData(metaData).routingTable(routingTable).build();

    AllocationService strategy = createAllocationService();
    RoutingTable source = strategy.reroute(clusterState).routingTable();

    BytesStreamOutput outStream = new BytesStreamOutput();
    RoutingTable.Builder.writeTo(source, outStream);
    BytesStreamInput inStream = new BytesStreamInput(outStream.bytes().toBytes(), false);
    RoutingTable target = RoutingTable.Builder.readFrom(inStream);

    assertThat(target.prettyPrint(), equalTo(source.prettyPrint()));
  }
  private ClusterState createInitialClusterState(AllocationService service) {
    MetaData.Builder metaBuilder = MetaData.builder();
    metaBuilder.put(
        IndexMetaData.builder("idx")
            .settings(settings(Version.CURRENT))
            .numberOfShards(1)
            .numberOfReplicas(0));
    MetaData metaData = metaBuilder.build();
    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    routingTableBuilder.addAsNew(metaData.index("idx"));

    RoutingTable routingTable = routingTableBuilder.build();
    ClusterState clusterState =
        ClusterState.builder(
                org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(
                    Settings.EMPTY))
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = service.reroute(clusterState, "reroute").routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertEquals(prevRoutingTable.index("idx").shards().size(), 1);
    assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);

    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
    return clusterState;
  }
  public static void main(String[] args) {
    final int numberOfRuns = 1;
    final int numIndices = 5 * 365; // five years
    final int numShards = 6;
    final int numReplicas = 2;
    final int numberOfNodes = 30;
    final int numberOfTags = 2;
    AllocationService strategy =
        ElasticsearchAllocationTestCase.createAllocationService(
            ImmutableSettings.EMPTY, new Random(1));

    MetaData.Builder mb = MetaData.builder();
    for (int i = 1; i <= numIndices; i++) {
      mb.put(
          IndexMetaData.builder("test_" + i)
              .numberOfShards(numShards)
              .numberOfReplicas(numReplicas));
    }
    MetaData metaData = mb.build();
    RoutingTable.Builder rb = RoutingTable.builder();
    for (int i = 1; i <= numIndices; i++) {
      rb.addAsNew(metaData.index("test_" + i));
    }
    RoutingTable routingTable = rb.build();
    DiscoveryNodes.Builder nb = DiscoveryNodes.builder();
    for (int i = 1; i <= numberOfNodes; i++) {
      nb.put(newNode("node" + i, ImmutableMap.of("tag", "tag_" + (i % numberOfTags))));
    }
    ClusterState initialClusterState =
        ClusterState.builder().metaData(metaData).routingTable(routingTable).nodes(nb).build();

    long start = System.currentTimeMillis();
    for (int i = 0; i < numberOfRuns; i++) {
      logger.info("[{}] starting... ", i);
      long runStart = System.currentTimeMillis();
      ClusterState clusterState = initialClusterState;
      while (clusterState.readOnlyRoutingNodes().hasUnassignedShards()) {
        logger.info(
            "[{}] remaining unassigned {}",
            i,
            clusterState.readOnlyRoutingNodes().unassigned().size());
        RoutingAllocation.Result result =
            strategy.applyStartedShards(
                clusterState, clusterState.readOnlyRoutingNodes().shardsWithState(INITIALIZING));
        clusterState = ClusterState.builder(clusterState).routingResult(result).build();
        result = strategy.reroute(clusterState);
        clusterState = ClusterState.builder(clusterState).routingResult(result).build();
      }
      logger.info(
          "[{}] took {}", i, TimeValue.timeValueMillis(System.currentTimeMillis() - runStart));
    }
    long took = System.currentTimeMillis() - start;
    logger.info(
        "total took {}, AVG {}",
        TimeValue.timeValueMillis(took),
        TimeValue.timeValueMillis(took / numberOfRuns));
  }
  @Override
  protected void masterOperation(
      final ClusterStateRequest request,
      final ClusterState state,
      ActionListener<ClusterStateResponse> listener)
      throws ElasticsearchException {
    ClusterState currentState = clusterService.state();
    logger.trace("Serving cluster state request using version {}", currentState.version());
    ClusterState.Builder builder = ClusterState.builder(currentState.getClusterName());
    builder.version(currentState.version());
    if (request.nodes()) {
      builder.nodes(currentState.nodes());
    }
    if (request.routingTable()) {
      if (request.indices().length > 0) {
        RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
        for (String filteredIndex : request.indices()) {
          if (currentState.routingTable().getIndicesRouting().containsKey(filteredIndex)) {
            routingTableBuilder.add(
                currentState.routingTable().getIndicesRouting().get(filteredIndex));
          }
        }
        builder.routingTable(routingTableBuilder);
      } else {
        builder.routingTable(currentState.routingTable());
      }
    }
    if (request.blocks()) {
      builder.blocks(currentState.blocks());
    }
    if (request.metaData()) {
      MetaData.Builder mdBuilder;
      if (request.indices().length == 0) {
        mdBuilder = MetaData.builder(currentState.metaData());
      } else {
        mdBuilder = MetaData.builder();
      }

      if (request.indices().length > 0) {
        String[] indices =
            currentState
                .metaData()
                .concreteIndices(IndicesOptions.lenientExpandOpen(), request.indices());
        for (String filteredIndex : indices) {
          IndexMetaData indexMetaData = currentState.metaData().index(filteredIndex);
          if (indexMetaData != null) {
            mdBuilder.put(indexMetaData, false);
          }
        }
      }

      builder.metaData(mdBuilder);
    }
    listener.onResponse(new ClusterStateResponse(clusterName, builder.build()));
  }
 public static void writeTo(ClusterState state, StreamOutput out) throws IOException {
   out.writeLong(state.version());
   MetaData.Builder.writeTo(state.metaData(), out);
   RoutingTable.Builder.writeTo(state.routingTable(), out);
   DiscoveryNodes.Builder.writeTo(state.nodes(), out);
   ClusterBlocks.Builder.writeClusterBlocks(state.blocks(), out);
   state.allocationExplanation().writeTo(out);
 }
 private void removeIndex(
     ClusterBlocks.Builder blocks,
     MetaData.Builder metaData,
     RoutingTable.Builder routingTable,
     IndexMetaData index) {
   metaData.remove(index.getIndex());
   routingTable.remove(index.getIndex());
   blocks.removeIndexBlocks(index.getIndex());
 }
  /** Creates cluster state with several shards and one replica and all shards STARTED. */
  public static ClusterState stateWithAssignedPrimariesAndOneReplica(
      String index, int numberOfShards) {

    int numberOfNodes = 2; // we need a non-local master to test shard failures
    DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder();
    for (int i = 0; i < numberOfNodes + 1; i++) {
      final DiscoveryNode node = newNode(i);
      discoBuilder = discoBuilder.put(node);
    }
    discoBuilder.localNodeId(newNode(0).getId());
    discoBuilder.masterNodeId(
        newNode(1).getId()); // we need a non-local master to test shard failures
    IndexMetaData indexMetaData =
        IndexMetaData.builder(index)
            .settings(
                Settings.builder()
                    .put(SETTING_VERSION_CREATED, Version.CURRENT)
                    .put(SETTING_NUMBER_OF_SHARDS, numberOfShards)
                    .put(SETTING_NUMBER_OF_REPLICAS, 1)
                    .put(SETTING_CREATION_DATE, System.currentTimeMillis()))
            .build();
    ClusterState.Builder state = ClusterState.builder(new ClusterName("test"));
    state.nodes(discoBuilder);
    state.metaData(MetaData.builder().put(indexMetaData, false).generateClusterUuidIfNeeded());
    IndexRoutingTable.Builder indexRoutingTableBuilder =
        IndexRoutingTable.builder(indexMetaData.getIndex());
    for (int i = 0; i < numberOfShards; i++) {
      RoutingTable.Builder routing = new RoutingTable.Builder();
      routing.addAsNew(indexMetaData);
      final ShardId shardId = new ShardId(index, "_na_", i);
      IndexShardRoutingTable.Builder indexShardRoutingBuilder =
          new IndexShardRoutingTable.Builder(shardId);
      indexShardRoutingBuilder.addShard(
          TestShardRouting.newShardRouting(
              index, i, newNode(0).getId(), null, null, true, ShardRoutingState.STARTED, null));
      indexShardRoutingBuilder.addShard(
          TestShardRouting.newShardRouting(
              index, i, newNode(1).getId(), null, null, false, ShardRoutingState.STARTED, null));
      indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilder.build());
    }
    state.routingTable(RoutingTable.builder().add(indexRoutingTableBuilder.build()).build());
    return state.build();
  }
 public static ClusterState readFrom(
     StreamInput in, @Nullable Settings globalSettings, @Nullable DiscoveryNode localNode)
     throws IOException {
   Builder builder = new Builder();
   builder.version = in.readLong();
   builder.metaData = MetaData.Builder.readFrom(in, globalSettings);
   builder.routingTable = RoutingTable.Builder.readFrom(in);
   builder.nodes = DiscoveryNodes.Builder.readFrom(in, localNode);
   builder.blocks = ClusterBlocks.Builder.readClusterBlocks(in);
   builder.allocationExplanation = AllocationExplanation.readAllocationExplanation(in);
   return builder.build();
 }
 private void addNewIndex(
     ClusterState tribeState,
     ClusterBlocks.Builder blocks,
     MetaData.Builder metaData,
     RoutingTable.Builder routingTable,
     IndexMetaData tribeIndex) {
   Settings tribeSettings =
       Settings.builder().put(tribeIndex.getSettings()).put(TRIBE_NAME, tribeName).build();
   metaData.put(IndexMetaData.builder(tribeIndex).settings(tribeSettings));
   routingTable.add(tribeState.routingTable().index(tribeIndex.getIndex()));
   if (Regex.simpleMatch(blockIndicesMetadata, tribeIndex.getIndex())) {
     blocks.addIndexBlock(tribeIndex.getIndex(), IndexMetaData.INDEX_METADATA_BLOCK);
   }
   if (Regex.simpleMatch(blockIndicesRead, tribeIndex.getIndex())) {
     blocks.addIndexBlock(tribeIndex.getIndex(), IndexMetaData.INDEX_READ_BLOCK);
   }
   if (Regex.simpleMatch(blockIndicesWrite, tribeIndex.getIndex())) {
     blocks.addIndexBlock(tribeIndex.getIndex(), IndexMetaData.INDEX_WRITE_BLOCK);
   }
 }
  @Test
  public void testMultiIndexUnevenNodes() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build());

    final int numberOfIndices = 10;
    logger.info("Building initial routing table with " + numberOfIndices + " indices");

    MetaData.Builder metaDataBuilder = MetaData.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      metaDataBuilder.put(
          IndexMetaData.builder("test" + i)
              .settings(settings(Version.CURRENT))
              .numberOfShards(1)
              .numberOfReplicas(0));
    }
    MetaData metaData = metaDataBuilder.build();

    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      routingTableBuilder.addAsNew(metaData.index("test" + i));
    }
    RoutingTable routingTable = routingTableBuilder.build();

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices));

    logger.info("Starting 3 nodes and rerouting");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder()
                    .put(newNode("node1"))
                    .put(newNode("node2"))
                    .put(newNode("node3")))
            .build();
    RoutingTable prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING));
    }
    RoutingNodes routingNodes = clusterState.routingNodes();
    assertThat(numberOfShardsOfType(routingNodes, INITIALIZING), equalTo(numberOfIndices));
    assertThat(
        routingNodes.node("node1").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));
    assertThat(
        routingNodes.node("node2").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));
    assertThat(
        routingNodes.node("node2").numberOfShardsWithState(INITIALIZING),
        anyOf(equalTo(3), equalTo(4)));

    logger.info("Start two more nodes, things should remain the same");
    clusterState =
        ClusterState.builder(clusterState)
            .nodes(
                DiscoveryNodes.builder(clusterState.nodes())
                    .put(newNode("node4"))
                    .put(newNode("node5")))
            .build();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    prevRoutingTable = routingTable;
    routingTable = strategy.reroute(clusterState).routingTable();

    assertThat(prevRoutingTable == routingTable, equalTo(true));

    routingNodes = clusterState.routingNodes();
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(RELOCATING), equalTo(STARTED)));
    }
    routingNodes = clusterState.routingNodes();
    assertThat(
        "4 source shard routing are relocating",
        numberOfShardsOfType(routingNodes, RELOCATING),
        equalTo(4));
    assertThat(
        "4 target shard routing are initializing",
        numberOfShardsOfType(routingNodes, INITIALIZING),
        equalTo(4));

    logger.info("Now, mark the relocated as started");
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    //        routingTable = strategy.reroute(new RoutingStrategyInfo(metaData, routingTable),
    // nodes);

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(RELOCATING), equalTo(STARTED)));
    }
    routingNodes = clusterState.routingNodes();
    assertThat(numberOfShardsOfType(routingNodes, STARTED), equalTo(numberOfIndices));
    for (RoutingNode routingNode : routingNodes) {
      assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(2));
    }
  }
  @Test
  public void testMultiIndexEvenDistribution() {
    AllocationService strategy =
        createAllocationService(
            settingsBuilder()
                .put("cluster.routing.allocation.concurrent_recoveries", 10)
                .put(
                    ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE,
                    "always")
                .put("cluster.routing.allocation.cluster_concurrent_rebalance", -1)
                .build());

    final int numberOfIndices = 50;
    logger.info("Building initial routing table with " + numberOfIndices + " indices");

    MetaData.Builder metaDataBuilder = MetaData.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      metaDataBuilder.put(
          IndexMetaData.builder("test" + i)
              .settings(settings(Version.CURRENT))
              .numberOfShards(1)
              .numberOfReplicas(0));
    }
    MetaData metaData = metaDataBuilder.build();

    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    for (int i = 0; i < numberOfIndices; i++) {
      routingTableBuilder.addAsNew(metaData.index("test" + i));
    }
    RoutingTable routingTable = routingTableBuilder.build();
    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();

    assertThat(routingTable.indicesRouting().size(), equalTo(numberOfIndices));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(UNASSIGNED));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId(), nullValue());
    }

    logger.info("Adding " + (numberOfIndices / 2) + " nodes");
    DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder();
    List<DiscoveryNode> nodes = newArrayList();
    for (int i = 0; i < (numberOfIndices / 2); i++) {
      nodesBuilder.put(newNode("node" + i));
    }
    RoutingTable prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(), equalTo(INITIALIZING));
      assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true));
      // make sure we still have 2 shards initializing per node on the first 25 nodes
      String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId();
      int nodeIndex = Integer.parseInt(nodeId.substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
    }
    RoutingNodes routingNodes = clusterState.routingNodes();
    Set<String> encounteredIndices = newHashSet();
    for (RoutingNode routingNode : routingNodes) {
      assertThat(routingNode.numberOfShardsWithState(STARTED), equalTo(0));
      assertThat(routingNode.size(), equalTo(2));
      // make sure we still have 2 shards initializing per node on the only 25 nodes
      int nodeIndex = Integer.parseInt(routingNode.nodeId().substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
      // check that we don't have a shard associated with a node with the same index name (we have a
      // single shard)
      for (ShardRouting shardRoutingEntry : routingNode) {
        assertThat(encounteredIndices, not(hasItem(shardRoutingEntry.index())));
        encounteredIndices.add(shardRoutingEntry.index());
      }
    }

    logger.info("Adding additional " + (numberOfIndices / 2) + " nodes, nothing should change");
    nodesBuilder = DiscoveryNodes.builder(clusterState.nodes());
    for (int i = (numberOfIndices / 2); i < numberOfIndices; i++) {
      nodesBuilder.put(newNode("node" + i));
    }
    prevRoutingTable = routingTable;
    clusterState = ClusterState.builder(clusterState).nodes(nodesBuilder).build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(false));

    logger.info("Marking the shard as started");
    prevRoutingTable = routingTable;
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    assertThat(prevRoutingTable != routingTable, equalTo(true));
    int numberOfRelocatingShards = 0;
    int numberOfStartedShards = 0;
    for (int i = 0; i < numberOfIndices; i++) {
      assertThat(routingTable.index("test" + i).shards().size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).size(), equalTo(1));
      assertThat(routingTable.index("test" + i).shard(0).shards().size(), equalTo(1));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).unassigned(), equalTo(false));
      assertThat(
          routingTable.index("test" + i).shard(0).shards().get(0).state(),
          anyOf(equalTo(STARTED), equalTo(RELOCATING)));
      if (routingTable.index("test" + i).shard(0).shards().get(0).state() == STARTED) {
        numberOfStartedShards++;
      } else if (routingTable.index("test" + i).shard(0).shards().get(0).state() == RELOCATING) {
        numberOfRelocatingShards++;
      }
      assertThat(routingTable.index("test" + i).shard(0).shards().get(0).primary(), equalTo(true));
      // make sure we still have 2 shards either relocating or started on the first 25 nodes (still)
      String nodeId = routingTable.index("test" + i).shard(0).shards().get(0).currentNodeId();
      int nodeIndex = Integer.parseInt(nodeId.substring("node".length()));
      assertThat(nodeIndex, lessThan(25));
    }
    assertThat(numberOfRelocatingShards, equalTo(25));
    assertThat(numberOfStartedShards, equalTo(25));
  }
  /**
   * Creates cluster state with and index that has one shard and #(replicaStates) replicas
   *
   * @param index name of the index
   * @param activePrimaryLocal if active primary should coincide with the local node in the cluster
   *     state
   * @param primaryState state of primary
   * @param replicaStates states of the replicas. length of this array determines also the number of
   *     replicas
   */
  public static ClusterState state(
      String index,
      boolean activePrimaryLocal,
      ShardRoutingState primaryState,
      ShardRoutingState... replicaStates) {
    final int numberOfReplicas = replicaStates.length;

    int numberOfNodes = numberOfReplicas + 1;
    if (primaryState == ShardRoutingState.RELOCATING) {
      numberOfNodes++;
    }
    for (ShardRoutingState state : replicaStates) {
      if (state == ShardRoutingState.RELOCATING) {
        numberOfNodes++;
      }
    }
    numberOfNodes = Math.max(2, numberOfNodes); // we need a non-local master to test shard failures
    final ShardId shardId = new ShardId(index, "_na_", 0);
    DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder();
    Set<String> unassignedNodes = new HashSet<>();
    for (int i = 0; i < numberOfNodes + 1; i++) {
      final DiscoveryNode node = newNode(i);
      discoBuilder = discoBuilder.put(node);
      unassignedNodes.add(node.getId());
    }
    discoBuilder.localNodeId(newNode(0).getId());
    discoBuilder.masterNodeId(
        newNode(1).getId()); // we need a non-local master to test shard failures
    final int primaryTerm = 1 + randomInt(200);
    IndexMetaData indexMetaData =
        IndexMetaData.builder(index)
            .settings(
                Settings.builder()
                    .put(SETTING_VERSION_CREATED, Version.CURRENT)
                    .put(SETTING_NUMBER_OF_SHARDS, 1)
                    .put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicas)
                    .put(SETTING_CREATION_DATE, System.currentTimeMillis()))
            .primaryTerm(0, primaryTerm)
            .build();

    RoutingTable.Builder routing = new RoutingTable.Builder();
    routing.addAsNew(indexMetaData);
    IndexShardRoutingTable.Builder indexShardRoutingBuilder =
        new IndexShardRoutingTable.Builder(shardId);

    String primaryNode = null;
    String relocatingNode = null;
    UnassignedInfo unassignedInfo = null;
    if (primaryState != ShardRoutingState.UNASSIGNED) {
      if (activePrimaryLocal) {
        primaryNode = newNode(0).getId();
        unassignedNodes.remove(primaryNode);
      } else {
        Set<String> unassignedNodesExecludingPrimary = new HashSet<>(unassignedNodes);
        unassignedNodesExecludingPrimary.remove(newNode(0).getId());
        primaryNode = selectAndRemove(unassignedNodesExecludingPrimary);
      }
      if (primaryState == ShardRoutingState.RELOCATING) {
        relocatingNode = selectAndRemove(unassignedNodes);
      }
    } else {
      unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null);
    }
    indexShardRoutingBuilder.addShard(
        TestShardRouting.newShardRouting(
            index, 0, primaryNode, relocatingNode, null, true, primaryState, unassignedInfo));

    for (ShardRoutingState replicaState : replicaStates) {
      String replicaNode = null;
      relocatingNode = null;
      unassignedInfo = null;
      if (replicaState != ShardRoutingState.UNASSIGNED) {
        assert primaryNode != null : "a replica is assigned but the primary isn't";
        replicaNode = selectAndRemove(unassignedNodes);
        if (replicaState == ShardRoutingState.RELOCATING) {
          relocatingNode = selectAndRemove(unassignedNodes);
        }
      } else {
        unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null);
      }
      indexShardRoutingBuilder.addShard(
          TestShardRouting.newShardRouting(
              index,
              shardId.id(),
              replicaNode,
              relocatingNode,
              null,
              false,
              replicaState,
              unassignedInfo));
    }

    ClusterState.Builder state = ClusterState.builder(new ClusterName("test"));
    state.nodes(discoBuilder);
    state.metaData(MetaData.builder().put(indexMetaData, false).generateClusterUuidIfNeeded());
    state.routingTable(
        RoutingTable.builder()
            .add(
                IndexRoutingTable.builder(indexMetaData.getIndex())
                    .addIndexShard(indexShardRoutingBuilder.build()))
            .build());
    return state.build();
  }
Exemple #13
0
    private ClusterState applyUpdate(ClusterState currentState, ClusterChangedEvent task) {
      boolean clusterStateChanged = false;
      ClusterState tribeState = task.state();
      DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(currentState.nodes());
      // -- merge nodes
      // go over existing nodes, and see if they need to be removed
      for (DiscoveryNode discoNode : currentState.nodes()) {
        String markedTribeName = discoNode.attributes().get(TRIBE_NAME);
        if (markedTribeName != null && markedTribeName.equals(tribeName)) {
          if (tribeState.nodes().get(discoNode.id()) == null) {
            clusterStateChanged = true;
            logger.info("[{}] removing node [{}]", tribeName, discoNode);
            nodes.remove(discoNode.id());
          }
        }
      }
      // go over tribe nodes, and see if they need to be added
      for (DiscoveryNode tribe : tribeState.nodes()) {
        if (currentState.nodes().get(tribe.id()) == null) {
          // a new node, add it, but also add the tribe name to the attributes
          Map<String, String> tribeAttr = new HashMap<>();
          for (ObjectObjectCursor<String, String> attr : tribe.attributes()) {
            tribeAttr.put(attr.key, attr.value);
          }
          tribeAttr.put(TRIBE_NAME, tribeName);
          DiscoveryNode discoNode =
              new DiscoveryNode(
                  tribe.name(),
                  tribe.id(),
                  tribe.getHostName(),
                  tribe.getHostAddress(),
                  tribe.address(),
                  unmodifiableMap(tribeAttr),
                  tribe.version());
          clusterStateChanged = true;
          logger.info("[{}] adding node [{}]", tribeName, discoNode);
          nodes.put(discoNode);
        }
      }

      // -- merge metadata
      ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks());
      MetaData.Builder metaData = MetaData.builder(currentState.metaData());
      RoutingTable.Builder routingTable = RoutingTable.builder(currentState.routingTable());
      // go over existing indices, and see if they need to be removed
      for (IndexMetaData index : currentState.metaData()) {
        String markedTribeName = index.getSettings().get(TRIBE_NAME);
        if (markedTribeName != null && markedTribeName.equals(tribeName)) {
          IndexMetaData tribeIndex = tribeState.metaData().index(index.getIndex());
          clusterStateChanged = true;
          if (tribeIndex == null || tribeIndex.getState() == IndexMetaData.State.CLOSE) {
            logger.info("[{}] removing index [{}]", tribeName, index.getIndex());
            removeIndex(blocks, metaData, routingTable, index);
          } else {
            // always make sure to update the metadata and routing table, in case
            // there are changes in them (new mapping, shards moving from initializing to started)
            routingTable.add(tribeState.routingTable().index(index.getIndex()));
            Settings tribeSettings =
                Settings.builder().put(tribeIndex.getSettings()).put(TRIBE_NAME, tribeName).build();
            metaData.put(IndexMetaData.builder(tribeIndex).settings(tribeSettings));
          }
        }
      }
      // go over tribe one, and see if they need to be added
      for (IndexMetaData tribeIndex : tribeState.metaData()) {
        // if there is no routing table yet, do nothing with it...
        IndexRoutingTable table = tribeState.routingTable().index(tribeIndex.getIndex());
        if (table == null) {
          continue;
        }
        final IndexMetaData indexMetaData = currentState.metaData().index(tribeIndex.getIndex());
        if (indexMetaData == null) {
          if (!droppedIndices.contains(tribeIndex.getIndex())) {
            // a new index, add it, and add the tribe name as a setting
            clusterStateChanged = true;
            logger.info("[{}] adding index [{}]", tribeName, tribeIndex.getIndex());
            addNewIndex(tribeState, blocks, metaData, routingTable, tribeIndex);
          }
        } else {
          String existingFromTribe = indexMetaData.getSettings().get(TRIBE_NAME);
          if (!tribeName.equals(existingFromTribe)) {
            // we have a potential conflict on index names, decide what to do...
            if (ON_CONFLICT_ANY.equals(onConflict)) {
              // we chose any tribe, carry on
            } else if (ON_CONFLICT_DROP.equals(onConflict)) {
              // drop the indices, there is a conflict
              clusterStateChanged = true;
              logger.info(
                  "[{}] dropping index [{}] due to conflict with [{}]",
                  tribeName,
                  tribeIndex.getIndex(),
                  existingFromTribe);
              removeIndex(blocks, metaData, routingTable, tribeIndex);
              droppedIndices.add(tribeIndex.getIndex());
            } else if (onConflict.startsWith(ON_CONFLICT_PREFER)) {
              // on conflict, prefer a tribe...
              String preferredTribeName = onConflict.substring(ON_CONFLICT_PREFER.length());
              if (tribeName.equals(preferredTribeName)) {
                // the new one is hte preferred one, replace...
                clusterStateChanged = true;
                logger.info(
                    "[{}] adding index [{}], preferred over [{}]",
                    tribeName,
                    tribeIndex.getIndex(),
                    existingFromTribe);
                removeIndex(blocks, metaData, routingTable, tribeIndex);
                addNewIndex(tribeState, blocks, metaData, routingTable, tribeIndex);
              } // else: either the existing one is the preferred one, or we haven't seen one, carry
                // on
            }
          }
        }
      }

      if (!clusterStateChanged) {
        return currentState;
      } else {
        return ClusterState.builder(currentState)
            .incrementVersion()
            .blocks(blocks)
            .nodes(nodes)
            .metaData(metaData)
            .routingTable(routingTable.build())
            .build();
      }
    }
 public Builder routingTable(RoutingTable.Builder routingTable) {
   return routingTable(routingTable.build());
 }
  @Test
  public void testNoRebalanceOnPrimaryOverload() {
    Settings.Builder settings = settingsBuilder();
    AllocationService strategy =
        new AllocationService(
            settings.build(),
            randomAllocationDeciders(
                settings.build(),
                new NodeSettingsService(Settings.Builder.EMPTY_SETTINGS),
                getRandom()),
            new ShardsAllocators(
                settings.build(),
                NoopGatewayAllocator.INSTANCE,
                new ShardsAllocator() {

                  @Override
                  public boolean rebalance(RoutingAllocation allocation) {
                    return false;
                  }

                  @Override
                  public boolean move(
                      ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
                    return false;
                  }

                  @Override
                  public void applyStartedShards(StartedRerouteAllocation allocation) {}

                  @Override
                  public void applyFailedShards(FailedRerouteAllocation allocation) {}

                  /*
                  *  // this allocator tries to rebuild this scenario where a rebalance is
                  *  // triggered solely by the primary overload on node [1] where a shard
                  *  // is rebalanced to node 0
                     routing_nodes:
                     -----node_id[0][V]
                     --------[test][0], node[0], [R], s[STARTED]
                     --------[test][4], node[0], [R], s[STARTED]
                     -----node_id[1][V]
                     --------[test][0], node[1], [P], s[STARTED]
                     --------[test][1], node[1], [P], s[STARTED]
                     --------[test][3], node[1], [R], s[STARTED]
                     -----node_id[2][V]
                     --------[test][1], node[2], [R], s[STARTED]
                     --------[test][2], node[2], [R], s[STARTED]
                     --------[test][4], node[2], [P], s[STARTED]
                     -----node_id[3][V]
                     --------[test][2], node[3], [P], s[STARTED]
                     --------[test][3], node[3], [P], s[STARTED]
                     ---- unassigned
                  */
                  @Override
                  public boolean allocateUnassigned(RoutingAllocation allocation) {
                    RoutingNodes.UnassignedShards unassigned =
                        allocation.routingNodes().unassigned();
                    boolean changed = !unassigned.isEmpty();
                    for (ShardRouting sr : unassigned) {
                      switch (sr.id()) {
                        case 0:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node1");
                          } else {
                            allocation.routingNodes().initialize(sr, "node0");
                          }
                          break;
                        case 1:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node1");
                          } else {
                            allocation.routingNodes().initialize(sr, "node2");
                          }
                          break;
                        case 2:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node3");
                          } else {
                            allocation.routingNodes().initialize(sr, "node2");
                          }
                          break;
                        case 3:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node3");
                          } else {
                            allocation.routingNodes().initialize(sr, "node1");
                          }
                          break;
                        case 4:
                          if (sr.primary()) {
                            allocation.routingNodes().initialize(sr, "node2");
                          } else {
                            allocation.routingNodes().initialize(sr, "node0");
                          }
                          break;
                      }
                    }
                    unassigned.clear();
                    return changed;
                  }
                }),
            EmptyClusterInfoService.INSTANCE);
    MetaData.Builder metaDataBuilder = MetaData.builder();
    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    IndexMetaData.Builder indexMeta =
        IndexMetaData.builder("test")
            .settings(settings(Version.CURRENT))
            .numberOfShards(5)
            .numberOfReplicas(1);
    metaDataBuilder = metaDataBuilder.put(indexMeta);
    MetaData metaData = metaDataBuilder.build();
    for (ObjectCursor<IndexMetaData> cursor : metaData.indices().values()) {
      routingTableBuilder.addAsNew(cursor.value);
    }
    RoutingTable routingTable = routingTableBuilder.build();
    DiscoveryNodes.Builder nodes = DiscoveryNodes.builder();
    for (int i = 0; i < 4; i++) {
      DiscoveryNode node = newNode("node" + i);
      nodes.put(node);
    }

    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .nodes(nodes)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    RoutingNodes routingNodes = clusterState.getRoutingNodes();

    for (RoutingNode routingNode : routingNodes) {
      for (ShardRouting shardRouting : routingNode) {
        assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.INITIALIZING));
      }
    }
    strategy = createAllocationService(settings.build());

    logger.info("use the new allocator and check if it moves shards");
    routingNodes = clusterState.getRoutingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();
    for (RoutingNode routingNode : routingNodes) {
      for (ShardRouting shardRouting : routingNode) {
        assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
      }
    }

    logger.info("start the replica shards");
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    for (RoutingNode routingNode : routingNodes) {
      for (ShardRouting shardRouting : routingNode) {
        assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
      }
    }

    logger.info("rebalancing");
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    for (RoutingNode routingNode : routingNodes) {
      for (ShardRouting shardRouting : routingNode) {
        assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
      }
    }
  }
  private ClusterState initCluster(AllocationService strategy) {
    MetaData.Builder metaDataBuilder = MetaData.builder();
    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();

    for (int i = 0; i < numberOfIndices; i++) {
      IndexMetaData.Builder index =
          IndexMetaData.builder("test" + i)
              .settings(settings(Version.CURRENT))
              .numberOfShards(numberOfShards)
              .numberOfReplicas(numberOfReplicas);
      metaDataBuilder = metaDataBuilder.put(index);
    }

    MetaData metaData = metaDataBuilder.build();

    for (ObjectCursor<IndexMetaData> cursor : metaData.indices().values()) {
      routingTableBuilder.addAsNew(cursor.value);
    }

    RoutingTable routingTable = routingTableBuilder.build();

    logger.info("start " + numberOfNodes + " nodes");
    DiscoveryNodes.Builder nodes = DiscoveryNodes.builder();
    for (int i = 0; i < numberOfNodes; i++) {
      nodes.put(newNode("node" + i));
    }
    ClusterState clusterState =
        ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT)
            .nodes(nodes)
            .metaData(metaData)
            .routingTable(routingTable)
            .build();
    routingTable = strategy.reroute(clusterState).routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    RoutingNodes routingNodes = clusterState.getRoutingNodes();

    logger.info("restart all the primary shards, replicas will start initializing");
    routingNodes = clusterState.getRoutingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    logger.info("start the replica shards");
    routingNodes = clusterState.getRoutingNodes();
    routingTable =
        strategy
            .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    logger.info("complete rebalancing");
    RoutingTable prev = routingTable;
    while (true) {
      routingTable =
          strategy
              .applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
              .routingTable();
      clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
      routingNodes = clusterState.getRoutingNodes();
      if (routingTable == prev) break;
      prev = routingTable;
    }

    return clusterState;
  }