public void testSingleRetryOnIgnore() {
    ClusterState clusterState = createInitialClusterState();
    RoutingTable routingTable = clusterState.routingTable();
    final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY);
    // now fail it N-1 times
    for (int i = 0; i < retries - 1; i++) {
      List<FailedRerouteAllocation.FailedShard> failedShards =
          Collections.singletonList(
              new FailedRerouteAllocation.FailedShard(
                  routingTable.index("idx").shard(0).shards().get(0),
                  "boom" + i,
                  new UnsupportedOperationException()));
      RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards);
      assertTrue(result.changed());
      routingTable = result.routingTable();
      clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
      assertEquals(routingTable.index("idx").shards().size(), 1);
      assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
      assertEquals(
          routingTable
              .index("idx")
              .shard(0)
              .shards()
              .get(0)
              .unassignedInfo()
              .getNumFailedAllocations(),
          i + 1);
      assertEquals(
          routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(),
          "boom" + i);
    }
    // now we go and check that we are actually stick to unassigned on the next failure
    List<FailedRerouteAllocation.FailedShard> failedShards =
        Collections.singletonList(
            new FailedRerouteAllocation.FailedShard(
                routingTable.index("idx").shard(0).shards().get(0),
                "boom",
                new UnsupportedOperationException()));
    RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards);
    assertTrue(result.changed());
    routingTable = result.routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
    assertEquals(
        routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom");

    result =
        strategy.reroute(
            clusterState,
            new AllocationCommands(),
            false,
            true); // manual reroute should retry once
    assertTrue(result.changed());
    routingTable = result.routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
    assertEquals(
        routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom");

    // now we go and check that we are actually stick to unassigned on the next failure ie. no retry
    failedShards =
        Collections.singletonList(
            new FailedRerouteAllocation.FailedShard(
                routingTable.index("idx").shard(0).shards().get(0),
                "boom",
                new UnsupportedOperationException()));
    result = strategy.applyFailedShards(clusterState, failedShards);
    assertTrue(result.changed());
    routingTable = result.routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries + 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
    assertEquals(
        routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom");
  }
  public void testClusterStateUpdateTask() {
    AllocationService allocationService =
        new AllocationService(
            Settings.builder().build(),
            new AllocationDeciders(
                Settings.EMPTY,
                Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))),
            NoopGatewayAllocator.INSTANCE,
            new BalancedShardsAllocator(Settings.EMPTY),
            EmptyClusterInfoService.INSTANCE);
    ClusterState clusterState = createInitialClusterState(allocationService);
    ClusterRerouteRequest req = new ClusterRerouteRequest();
    req.dryRun(true);
    AtomicReference<ClusterRerouteResponse> responseRef = new AtomicReference<>();
    ActionListener<ClusterRerouteResponse> responseActionListener =
        new ActionListener<ClusterRerouteResponse>() {
          @Override
          public void onResponse(ClusterRerouteResponse clusterRerouteResponse) {
            responseRef.set(clusterRerouteResponse);
          }

          @Override
          public void onFailure(Exception e) {}
        };
    TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task =
        new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask(
            logger, allocationService, req, responseActionListener);
    ClusterState execute = task.execute(clusterState);
    assertSame(execute, clusterState); // dry-run
    task.onAllNodesAcked(null);
    assertNotSame(responseRef.get().getState(), execute);

    req.dryRun(false); // now we allocate

    final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY);
    // now fail it N-1 times
    for (int i = 0; i < retries; i++) {
      ClusterState newState = task.execute(clusterState);
      assertNotSame(newState, clusterState); // dry-run=false
      clusterState = newState;
      RoutingTable routingTable = clusterState.routingTable();
      assertEquals(routingTable.index("idx").shards().size(), 1);
      assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
      assertEquals(
          routingTable
              .index("idx")
              .shard(0)
              .shards()
              .get(0)
              .unassignedInfo()
              .getNumFailedAllocations(),
          i);
      List<FailedRerouteAllocation.FailedShard> failedShards =
          Collections.singletonList(
              new FailedRerouteAllocation.FailedShard(
                  routingTable.index("idx").shard(0).shards().get(0),
                  "boom" + i,
                  new UnsupportedOperationException()));
      RoutingAllocation.Result result =
          allocationService.applyFailedShards(clusterState, failedShards);
      assertTrue(result.changed());
      clusterState = ClusterState.builder(clusterState).routingTable(result.routingTable()).build();
      routingTable = clusterState.routingTable();
      assertEquals(routingTable.index("idx").shards().size(), 1);
      if (i == retries - 1) {
        assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
      } else {
        assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
      }
      assertEquals(
          routingTable
              .index("idx")
              .shard(0)
              .shards()
              .get(0)
              .unassignedInfo()
              .getNumFailedAllocations(),
          i + 1);
    }

    // without retry_failed we won't allocate that shard
    ClusterState newState = task.execute(clusterState);
    assertNotSame(newState, clusterState); // dry-run=false
    task.onAllNodesAcked(null);
    assertSame(responseRef.get().getState(), newState);
    RoutingTable routingTable = clusterState.routingTable();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries);

    req.setRetryFailed(true); // now we manually retry and get the shard back into initializing
    newState = task.execute(clusterState);
    assertNotSame(newState, clusterState); // dry-run=false
    clusterState = newState;
    routingTable = clusterState.routingTable();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
    assertEquals(
        routingTable
            .index("idx")
            .shard(0)
            .shards()
            .get(0)
            .unassignedInfo()
            .getNumFailedAllocations(),
        retries);
  }
  public void testFailedAllocation() {
    ClusterState clusterState = createInitialClusterState();
    RoutingTable routingTable = clusterState.routingTable();
    final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY);
    // now fail it N-1 times
    for (int i = 0; i < retries - 1; i++) {
      List<FailedRerouteAllocation.FailedShard> failedShards =
          Collections.singletonList(
              new FailedRerouteAllocation.FailedShard(
                  routingTable.index("idx").shard(0).shards().get(0),
                  "boom" + i,
                  new UnsupportedOperationException()));
      RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards);
      assertTrue(result.changed());
      routingTable = result.routingTable();
      clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
      assertEquals(routingTable.index("idx").shards().size(), 1);
      ShardRouting unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0);
      assertEquals(unassignedPrimary.state(), INITIALIZING);
      assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), i + 1);
      assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "boom" + i);
      // MaxRetryAllocationDecider#canForceAllocatePrimary should return YES decisions because
      // canAllocate returns YES here
      assertEquals(
          Decision.YES,
          new MaxRetryAllocationDecider(Settings.EMPTY)
              .canForceAllocatePrimary(
                  unassignedPrimary,
                  null,
                  new RoutingAllocation(null, null, clusterState, null, 0, false)));
    }
    // now we go and check that we are actually stick to unassigned on the next failure
    {
      List<FailedRerouteAllocation.FailedShard> failedShards =
          Collections.singletonList(
              new FailedRerouteAllocation.FailedShard(
                  routingTable.index("idx").shard(0).shards().get(0),
                  "boom",
                  new UnsupportedOperationException()));
      RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards);
      assertTrue(result.changed());
      routingTable = result.routingTable();
      clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
      assertEquals(routingTable.index("idx").shards().size(), 1);
      ShardRouting unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0);
      assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), retries);
      assertEquals(unassignedPrimary.state(), UNASSIGNED);
      assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "boom");
      // MaxRetryAllocationDecider#canForceAllocatePrimary should return a NO decision because
      // canAllocate returns NO here
      assertEquals(
          Decision.NO,
          new MaxRetryAllocationDecider(Settings.EMPTY)
              .canForceAllocatePrimary(
                  unassignedPrimary,
                  null,
                  new RoutingAllocation(null, null, clusterState, null, 0, false)));
    }

    // change the settings and ensure we can do another round of allocation for that index.
    clusterState =
        ClusterState.builder(clusterState)
            .routingTable(routingTable)
            .metaData(
                MetaData.builder(clusterState.metaData())
                    .put(
                        IndexMetaData.builder(clusterState.metaData().index("idx"))
                            .settings(
                                Settings.builder()
                                    .put(clusterState.metaData().index("idx").getSettings())
                                    .put("index.allocation.max_retries", retries + 1)
                                    .build())
                            .build(),
                        true)
                    .build())
            .build();
    RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed", false);
    assertTrue(result.changed());
    routingTable = result.routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    // good we are initializing and we are maintaining failure information
    assertEquals(routingTable.index("idx").shards().size(), 1);
    ShardRouting unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0);
    assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), retries);
    assertEquals(unassignedPrimary.state(), INITIALIZING);
    assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "boom");
    // bumped up the max retry count, so canForceAllocatePrimary should return a YES decision
    assertEquals(
        Decision.YES,
        new MaxRetryAllocationDecider(Settings.EMPTY)
            .canForceAllocatePrimary(
                routingTable.index("idx").shard(0).shards().get(0),
                null,
                new RoutingAllocation(null, null, clusterState, null, 0, false)));

    // now we start the shard
    routingTable =
        strategy
            .applyStartedShards(
                clusterState,
                Collections.singletonList(routingTable.index("idx").shard(0).shards().get(0)))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();

    // all counters have been reset to 0 ie. no unassigned info
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertNull(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo());
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), STARTED);

    // now fail again and see if it has a new counter
    List<FailedRerouteAllocation.FailedShard> failedShards =
        Collections.singletonList(
            new FailedRerouteAllocation.FailedShard(
                routingTable.index("idx").shard(0).shards().get(0),
                "ZOOOMG",
                new UnsupportedOperationException()));
    result = strategy.applyFailedShards(clusterState, failedShards);
    assertTrue(result.changed());
    routingTable = result.routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0);
    assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), 1);
    assertEquals(unassignedPrimary.state(), INITIALIZING);
    assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "ZOOOMG");
    // Counter reset, so MaxRetryAllocationDecider#canForceAllocatePrimary should return a YES
    // decision
    assertEquals(
        Decision.YES,
        new MaxRetryAllocationDecider(Settings.EMPTY)
            .canForceAllocatePrimary(
                unassignedPrimary,
                null,
                new RoutingAllocation(null, null, clusterState, null, 0, false)));
  }