public void testSingleRetryOnIgnore() { ClusterState clusterState = createInitialClusterState(); RoutingTable routingTable = clusterState.routingTable(); final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); // now fail it N-1 times for (int i = 0; i < retries - 1; i++) { List<FailedRerouteAllocation.FailedShard> failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException())); RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), i + 1); assertEquals( routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); } // now we go and check that we are actually stick to unassigned on the next failure List<FailedRerouteAllocation.FailedShard> failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException())); RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals( routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); result = strategy.reroute( clusterState, new AllocationCommands(), false, true); // manual reroute should retry once assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals( routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); // now we go and check that we are actually stick to unassigned on the next failure ie. no retry failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException())); result = strategy.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries + 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals( routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); }
public void testClusterStateUpdateTask() { AllocationService allocationService = new AllocationService( Settings.builder().build(), new AllocationDeciders( Settings.EMPTY, Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); ClusterState clusterState = createInitialClusterState(allocationService); ClusterRerouteRequest req = new ClusterRerouteRequest(); req.dryRun(true); AtomicReference<ClusterRerouteResponse> responseRef = new AtomicReference<>(); ActionListener<ClusterRerouteResponse> responseActionListener = new ActionListener<ClusterRerouteResponse>() { @Override public void onResponse(ClusterRerouteResponse clusterRerouteResponse) { responseRef.set(clusterRerouteResponse); } @Override public void onFailure(Exception e) {} }; TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task = new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask( logger, allocationService, req, responseActionListener); ClusterState execute = task.execute(clusterState); assertSame(execute, clusterState); // dry-run task.onAllNodesAcked(null); assertNotSame(responseRef.get().getState(), execute); req.dryRun(false); // now we allocate final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); // now fail it N-1 times for (int i = 0; i < retries; i++) { ClusterState newState = task.execute(clusterState); assertNotSame(newState, clusterState); // dry-run=false clusterState = newState; RoutingTable routingTable = clusterState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), i); List<FailedRerouteAllocation.FailedShard> failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException())); RoutingAllocation.Result result = allocationService.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); clusterState = ClusterState.builder(clusterState).routingTable(result.routingTable()).build(); routingTable = clusterState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); if (i == retries - 1) { assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); } else { assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); } assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), i + 1); } // without retry_failed we won't allocate that shard ClusterState newState = task.execute(clusterState); assertNotSame(newState, clusterState); // dry-run=false task.onAllNodesAcked(null); assertSame(responseRef.get().getState(), newState); RoutingTable routingTable = clusterState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries); req.setRetryFailed(true); // now we manually retry and get the shard back into initializing newState = task.execute(clusterState); assertNotSame(newState, clusterState); // dry-run=false clusterState = newState; routingTable = clusterState.routingTable(); assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals( routingTable .index("idx") .shard(0) .shards() .get(0) .unassignedInfo() .getNumFailedAllocations(), retries); }
public void testFailedAllocation() { ClusterState clusterState = createInitialClusterState(); RoutingTable routingTable = clusterState.routingTable(); final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); // now fail it N-1 times for (int i = 0; i < retries - 1; i++) { List<FailedRerouteAllocation.FailedShard> failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException())); RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); ShardRouting unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0); assertEquals(unassignedPrimary.state(), INITIALIZING); assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), i + 1); assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "boom" + i); // MaxRetryAllocationDecider#canForceAllocatePrimary should return YES decisions because // canAllocate returns YES here assertEquals( Decision.YES, new MaxRetryAllocationDecider(Settings.EMPTY) .canForceAllocatePrimary( unassignedPrimary, null, new RoutingAllocation(null, null, clusterState, null, 0, false))); } // now we go and check that we are actually stick to unassigned on the next failure { List<FailedRerouteAllocation.FailedShard> failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException())); RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); ShardRouting unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0); assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), retries); assertEquals(unassignedPrimary.state(), UNASSIGNED); assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "boom"); // MaxRetryAllocationDecider#canForceAllocatePrimary should return a NO decision because // canAllocate returns NO here assertEquals( Decision.NO, new MaxRetryAllocationDecider(Settings.EMPTY) .canForceAllocatePrimary( unassignedPrimary, null, new RoutingAllocation(null, null, clusterState, null, 0, false))); } // change the settings and ensure we can do another round of allocation for that index. clusterState = ClusterState.builder(clusterState) .routingTable(routingTable) .metaData( MetaData.builder(clusterState.metaData()) .put( IndexMetaData.builder(clusterState.metaData().index("idx")) .settings( Settings.builder() .put(clusterState.metaData().index("idx").getSettings()) .put("index.allocation.max_retries", retries + 1) .build()) .build(), true) .build()) .build(); RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed", false); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); // good we are initializing and we are maintaining failure information assertEquals(routingTable.index("idx").shards().size(), 1); ShardRouting unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0); assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), retries); assertEquals(unassignedPrimary.state(), INITIALIZING); assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "boom"); // bumped up the max retry count, so canForceAllocatePrimary should return a YES decision assertEquals( Decision.YES, new MaxRetryAllocationDecider(Settings.EMPTY) .canForceAllocatePrimary( routingTable.index("idx").shard(0).shards().get(0), null, new RoutingAllocation(null, null, clusterState, null, 0, false))); // now we start the shard routingTable = strategy .applyStartedShards( clusterState, Collections.singletonList(routingTable.index("idx").shard(0).shards().get(0))) .routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); // all counters have been reset to 0 ie. no unassigned info assertEquals(routingTable.index("idx").shards().size(), 1); assertNull(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo()); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), STARTED); // now fail again and see if it has a new counter List<FailedRerouteAllocation.FailedShard> failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard( routingTable.index("idx").shard(0).shards().get(0), "ZOOOMG", new UnsupportedOperationException())); result = strategy.applyFailedShards(clusterState, failedShards); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); unassignedPrimary = routingTable.index("idx").shard(0).shards().get(0); assertEquals(unassignedPrimary.unassignedInfo().getNumFailedAllocations(), 1); assertEquals(unassignedPrimary.state(), INITIALIZING); assertEquals(unassignedPrimary.unassignedInfo().getMessage(), "ZOOOMG"); // Counter reset, so MaxRetryAllocationDecider#canForceAllocatePrimary should return a YES // decision assertEquals( Decision.YES, new MaxRetryAllocationDecider(Settings.EMPTY) .canForceAllocatePrimary( unassignedPrimary, null, new RoutingAllocation(null, null, clusterState, null, 0, false))); }