public void testClusterSettingsUpdateResponse() { String key1 = IndexStoreConfig.INDICES_STORE_THROTTLE_MAX_BYTES_PER_SEC_SETTING.getKey(); int value1 = 10; String key2 = EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(); String value2 = EnableAllocationDecider.Allocation.NONE.name(); Settings transientSettings1 = Settings.builder().put(key1, value1, ByteSizeUnit.BYTES).build(); Settings persistentSettings1 = Settings.builder().put(key2, value2).build(); ClusterUpdateSettingsResponse response1 = client() .admin() .cluster() .prepareUpdateSettings() .setTransientSettings(transientSettings1) .setPersistentSettings(persistentSettings1) .execute() .actionGet(); assertAcked(response1); assertThat(response1.getTransientSettings().get(key1), notNullValue()); assertThat(response1.getTransientSettings().get(key2), nullValue()); assertThat(response1.getPersistentSettings().get(key1), nullValue()); assertThat(response1.getPersistentSettings().get(key2), notNullValue()); Settings transientSettings2 = Settings.builder().put(key1, value1, ByteSizeUnit.BYTES).put(key2, value2).build(); Settings persistentSettings2 = Settings.EMPTY; ClusterUpdateSettingsResponse response2 = client() .admin() .cluster() .prepareUpdateSettings() .setTransientSettings(transientSettings2) .setPersistentSettings(persistentSettings2) .execute() .actionGet(); assertAcked(response2); assertThat(response2.getTransientSettings().get(key1), notNullValue()); assertThat(response2.getTransientSettings().get(key2), notNullValue()); assertThat(response2.getPersistentSettings().get(key1), nullValue()); assertThat(response2.getPersistentSettings().get(key2), nullValue()); Settings transientSettings3 = Settings.EMPTY; Settings persistentSettings3 = Settings.builder().put(key1, value1, ByteSizeUnit.BYTES).put(key2, value2).build(); ClusterUpdateSettingsResponse response3 = client() .admin() .cluster() .prepareUpdateSettings() .setTransientSettings(transientSettings3) .setPersistentSettings(persistentSettings3) .execute() .actionGet(); assertAcked(response3); assertThat(response3.getTransientSettings().get(key1), nullValue()); assertThat(response3.getTransientSettings().get(key2), nullValue()); assertThat(response3.getPersistentSettings().get(key1), notNullValue()); assertThat(response3.getPersistentSettings().get(key2), notNullValue()); }
public void testShardActiveElsewhereDoesNotDeleteAnother() throws Exception { InternalTestCluster.Async<String> masterFuture = internalCluster() .startNodeAsync( Settings.builder() .put( Node.NODE_MASTER_SETTING.getKey(), true, Node.NODE_DATA_SETTING.getKey(), false) .build()); InternalTestCluster.Async<List<String>> nodesFutures = internalCluster() .startNodesAsync( 4, Settings.builder() .put( Node.NODE_MASTER_SETTING.getKey(), false, Node.NODE_DATA_SETTING.getKey(), true) .build()); final String masterNode = masterFuture.get(); final String node1 = nodesFutures.get().get(0); final String node2 = nodesFutures.get().get(1); final String node3 = nodesFutures.get().get(2); // we will use this later on, handy to start now to make sure it has a different data folder // that node 1,2 &3 final String node4 = nodesFutures.get().get(3); assertAcked( prepareCreate("test") .setSettings( Settings.builder() .put(indexSettings()) .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 3) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1) .put( IndexMetaData.INDEX_ROUTING_EXCLUDE_GROUP_SETTING.getKey() + "_name", node4))); assertFalse( client() .admin() .cluster() .prepareHealth() .setWaitForNoRelocatingShards(true) .setWaitForGreenStatus() .setWaitForNodes("5") .get() .isTimedOut()); // disable allocation to control the situation more easily assertAcked( client() .admin() .cluster() .prepareUpdateSettings() .setTransientSettings( Settings.builder() .put( EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "none"))); logger.debug("--> shutting down two random nodes"); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node1, node2, node3)); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node1, node2, node3)); logger.debug("--> verifying index is red"); ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); if (health.getStatus() != ClusterHealthStatus.RED) { logClusterState(); fail("cluster didn't become red, despite of shutting 2 of 3 nodes"); } logger.debug("--> allowing index to be assigned to node [{}]", node4); assertAcked( client() .admin() .indices() .prepareUpdateSettings("test") .setSettings( Settings.builder() .put( IndexMetaData.INDEX_ROUTING_EXCLUDE_GROUP_SETTING.getKey() + "_name", "NONE"))); assertAcked( client() .admin() .cluster() .prepareUpdateSettings() .setTransientSettings( Settings.builder() .put( EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "all"))); logger.debug("--> waiting for shards to recover on [{}]", node4); // we have to do this in two steps as we now do async shard fetching before assigning, so the // change to the // allocation filtering may not have immediate effect // TODO: we should add an easier to do this. It's too much of a song and dance.. Index index = resolveIndex("test"); assertBusy( new Runnable() { @Override public void run() { assertTrue(internalCluster().getInstance(IndicesService.class, node4).hasIndex(index)); } }); // wait for 4 active shards - we should have lost one shard assertFalse( client().admin().cluster().prepareHealth().setWaitForActiveShards(4).get().isTimedOut()); // disable allocation again to control concurrency a bit and allow shard active to kick in // before allocation assertAcked( client() .admin() .cluster() .prepareUpdateSettings() .setTransientSettings( Settings.builder() .put( EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "none"))); logger.debug("--> starting the two old nodes back"); internalCluster() .startNodesAsync( 2, Settings.builder() .put( Node.NODE_MASTER_SETTING.getKey(), false, Node.NODE_DATA_SETTING.getKey(), true) .build()); assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("5").get().isTimedOut()); assertAcked( client() .admin() .cluster() .prepareUpdateSettings() .setTransientSettings( Settings.builder() .put( EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "all"))); logger.debug("--> waiting for the lost shard to be recovered"); ensureGreen("test"); }
/** * This test verifies that if we corrupt a replica, we can still get to green, even though listing * its store fails. Note, we need to make sure that replicas are allocated on all data nodes, so * that replica won't be sneaky and allocated on a node that doesn't have a corrupted replica. */ public void testReplicaCorruption() throws Exception { int numDocs = scaledRandomIntBetween(100, 1000); internalCluster().ensureAtLeastNumDataNodes(2); assertAcked( prepareCreate("test") .setSettings( Settings.builder() .put( PrimaryShardAllocator.INDEX_RECOVERY_INITIAL_SHARDS_SETTING.getKey(), "one") .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, cluster().numDataNodes() - 1) .put(MergePolicyConfig.INDEX_MERGE_ENABLED, false) .put( MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), false) // no checkindex - we corrupt shards on purpose .put( IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue( 1, ByteSizeUnit .PB)) // no translog based flush - it might change the .liv / // segments.N files )); ensureGreen(); IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs]; for (int i = 0; i < builders.length; i++) { builders[i] = client().prepareIndex("test", "type").setSource("field", "value"); } indexRandom(true, builders); ensureGreen(); assertAllSuccessful( client() .admin() .indices() .prepareFlush() .setForce(true) .setWaitIfOngoing(true) .execute() .actionGet()); // we have to flush at least once here since we don't corrupt the translog SearchResponse countResponse = client().prepareSearch().setSize(0).get(); assertHitCount(countResponse, numDocs); // disable allocations of replicas post restart (the restart will change replicas to primaries, // so we have // to capture replicas post restart) assertAcked( client() .admin() .cluster() .prepareUpdateSettings() .setPersistentSettings( Settings.builder() .put( EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "primaries"))); internalCluster().fullRestart(); ensureYellow(); final Index index = resolveIndex("test"); final IndicesShardStoresResponse stores = client().admin().indices().prepareShardStores(index.getName()).get(); for (IntObjectCursor<List<IndicesShardStoresResponse.StoreStatus>> shards : stores.getStoreStatuses().get(index.getName())) { for (IndicesShardStoresResponse.StoreStatus store : shards.value) { final ShardId shardId = new ShardId(index, shards.key); if (store .getAllocationStatus() .equals(IndicesShardStoresResponse.StoreStatus.AllocationStatus.UNUSED)) { for (Path path : findFilesToCorruptOnNode(store.getNode().getName(), shardId)) { try (OutputStream os = Files.newOutputStream(path)) { os.write(0); } logger.info("corrupting file {} on node {}", path, store.getNode().getName()); } } } } // enable allocation assertAcked( client() .admin() .cluster() .prepareUpdateSettings() .setPersistentSettings( Settings.builder() .putNull( EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING .getKey()))); ensureGreen(); }
public void testCancellationCleansTempFiles() throws Exception { final String indexName = "test"; final String p_node = internalCluster().startNode(); prepareCreate( indexName, Settings.builder() .put( IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)) .get(); internalCluster().startNode(); internalCluster().startNode(); List<IndexRequestBuilder> requests = new ArrayList<>(); int numDocs = scaledRandomIntBetween(25, 250); for (int i = 0; i < numDocs; i++) { requests.add(client().prepareIndex(indexName, "type").setSource("{}")); } indexRandom(true, requests); assertFalse( client() .admin() .cluster() .prepareHealth() .setWaitForNodes("3") .setWaitForGreenStatus() .get() .isTimedOut()); flush(); int allowedFailures = randomIntBetween(3, 10); logger.info("--> blocking recoveries from primary (allowed failures: [{}])", allowedFailures); CountDownLatch corruptionCount = new CountDownLatch(allowedFailures); ClusterService clusterService = internalCluster().getInstance(ClusterService.class, p_node); MockTransportService mockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, p_node); for (DiscoveryNode node : clusterService.state().nodes()) { if (!node.equals(clusterService.localNode())) { mockTransportService.addDelegate( internalCluster().getInstance(TransportService.class, node.getName()), new RecoveryCorruption(mockTransportService.original(), corruptionCount)); } } client() .admin() .indices() .prepareUpdateSettings(indexName) .setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)) .get(); corruptionCount.await(); logger.info("--> stopping replica assignment"); assertAcked( client() .admin() .cluster() .prepareUpdateSettings() .setTransientSettings( Settings.builder() .put( EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "none"))); logger.info("--> wait for all replica shards to be removed, on all nodes"); assertBusy( () -> { for (String node : internalCluster().getNodeNames()) { if (node.equals(p_node)) { continue; } ClusterState state = client(node).admin().cluster().prepareState().setLocal(true).get().getState(); assertThat( node + " indicates assigned replicas", state .getRoutingTable() .index(indexName) .shardsWithState(ShardRoutingState.UNASSIGNED) .size(), equalTo(1)); } }); logger.info("--> verifying no temporary recoveries are left"); for (String node : internalCluster().getNodeNames()) { NodeEnvironment nodeEnvironment = internalCluster().getInstance(NodeEnvironment.class, node); for (final Path shardLoc : nodeEnvironment.availableShardPaths(new ShardId(indexName, "_na_", 0))) { if (Files.exists(shardLoc)) { assertBusy( () -> { try { Files.walkFileTree( shardLoc, new SimpleFileVisitor<Path>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { assertThat( "found a temporary recovery file: " + file, file.getFileName().toString(), not(startsWith("recovery."))); return FileVisitResult.CONTINUE; } }); } catch (IOException e) { throw new AssertionError( "failed to walk file tree starting at [" + shardLoc + "]", e); } }); } } } }