/** * Tests that we can actually recover from a corruption on the primary given that we have replica * shards around. */ public void testCorruptFileAndRecover() throws ExecutionException, InterruptedException, IOException { int numDocs = scaledRandomIntBetween(100, 1000); // have enough space for 3 copies internalCluster().ensureAtLeastNumDataNodes(3); if (cluster().numDataNodes() == 3) { logger.info("--> cluster has [3] data nodes, corrupted primary will be overwritten"); } assertThat(cluster().numDataNodes(), greaterThanOrEqualTo(3)); assertAcked( prepareCreate("test") .setSettings( Settings.builder() .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, "1") .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1") .put(MergePolicyConfig.INDEX_MERGE_ENABLED, false) .put( MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), false) // no checkindex - we corrupt shards on purpose .put( IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue( 1, ByteSizeUnit .PB)) // no translog based flush - it might change the .liv / // segments.N files )); ensureGreen(); disableAllocation("test"); IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs]; for (int i = 0; i < builders.length; i++) { builders[i] = client().prepareIndex("test", "type").setSource("field", "value"); } indexRandom(true, builders); ensureGreen(); assertAllSuccessful( client() .admin() .indices() .prepareFlush() .setForce(true) .setWaitIfOngoing(true) .execute() .actionGet()); // we have to flush at least once here since we don't corrupt the translog SearchResponse countResponse = client().prepareSearch().setSize(0).get(); assertHitCount(countResponse, numDocs); final int numShards = numShards("test"); ShardRouting corruptedShardRouting = corruptRandomPrimaryFile(); logger.info("--> {} corrupted", corruptedShardRouting); enableAllocation("test"); /* * we corrupted the primary shard - now lets make sure we never recover from it successfully */ Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "2").build(); client().admin().indices().prepareUpdateSettings("test").setSettings(build).get(); ClusterHealthResponse health = client() .admin() .cluster() .health( Requests.clusterHealthRequest("test") .waitForGreenStatus() .timeout( "5m") // sometimes due to cluster rebalacing and random settings default // timeout is just not enough. .waitForRelocatingShards(0)) .actionGet(); if (health.isTimedOut()) { logger.info( "cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState().prettyPrint(), client().admin().cluster().preparePendingClusterTasks().get().prettyPrint()); assertThat("timed out waiting for green state", health.isTimedOut(), equalTo(false)); } assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN)); final int numIterations = scaledRandomIntBetween(5, 20); for (int i = 0; i < numIterations; i++) { SearchResponse response = client().prepareSearch().setSize(numDocs).get(); assertHitCount(response, numDocs); } /* * now hook into the IndicesService and register a close listener to * run the checkindex. if the corruption is still there we will catch it. */ final CountDownLatch latch = new CountDownLatch(numShards * 3); // primary + 2 replicas final CopyOnWriteArrayList<Exception> exception = new CopyOnWriteArrayList<>(); final IndexEventListener listener = new IndexEventListener() { @Override public void afterIndexShardClosed( ShardId sid, @Nullable IndexShard indexShard, Settings indexSettings) { if (indexShard != null) { Store store = indexShard.store(); store.incRef(); try { if (!Lucene.indexExists(store.directory()) && indexShard.state() == IndexShardState.STARTED) { return; } try (CheckIndex checkIndex = new CheckIndex(store.directory())) { BytesStreamOutput os = new BytesStreamOutput(); PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name()); checkIndex.setInfoStream(out); out.flush(); CheckIndex.Status status = checkIndex.checkIndex(); if (!status.clean) { logger.warn("check index [failure]\n{}", os.bytes().utf8ToString()); throw new IOException("index check failure"); } } } catch (Exception e) { exception.add(e); } finally { store.decRef(); latch.countDown(); } } } }; for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) { eventListener.setNewDelegate(listener); } try { client().admin().indices().prepareDelete("test").get(); latch.await(); assertThat(exception, empty()); } finally { for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) { eventListener.setNewDelegate(null); } } }
public void testRelocationWhileRefreshing() throws Exception { int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4); int numberOfReplicas = randomBoolean() ? 0 : 1; int numberOfNodes = numberOfReplicas == 0 ? 2 : 3; logger.info( "testRelocationWhileIndexingRandom(numRelocations={}, numberOfReplicas={}, numberOfNodes={})", numberOfRelocations, numberOfReplicas, numberOfNodes); String[] nodes = new String[numberOfNodes]; logger.info("--> starting [node_0] ..."); nodes[0] = internalCluster().startNode(); logger.info("--> creating test index ..."); prepareCreate( "test", Settings.builder() .put("index.number_of_shards", 1) .put("index.number_of_replicas", numberOfReplicas) .put("index.refresh_interval", -1) // we want to control refreshes c ) .get(); for (int i = 1; i < numberOfNodes; i++) { logger.info("--> starting [node_{}] ...", i); nodes[i] = internalCluster().startNode(); if (i != numberOfNodes - 1) { ClusterHealthResponse healthResponse = client() .admin() .cluster() .prepareHealth() .setWaitForEvents(Priority.LANGUID) .setWaitForNodes(Integer.toString(i + 1)) .setWaitForGreenStatus() .execute() .actionGet(); assertThat(healthResponse.isTimedOut(), equalTo(false)); } } final Semaphore postRecoveryShards = new Semaphore(0); final IndexEventListener listener = new IndexEventListener() { @Override public void indexShardStateChanged( IndexShard indexShard, @Nullable IndexShardState previousState, IndexShardState currentState, @Nullable String reason) { if (currentState == IndexShardState.POST_RECOVERY) { postRecoveryShards.release(); } } }; for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getInstances(MockIndexEventListener.TestEventListener.class)) { eventListener.setNewDelegate(listener); } logger.info("--> starting relocations..."); int nodeShiftBased = numberOfReplicas; // if we have replicas shift those for (int i = 0; i < numberOfRelocations; i++) { int fromNode = (i % 2); int toNode = fromNode == 0 ? 1 : 0; fromNode += nodeShiftBased; toNode += nodeShiftBased; List<IndexRequestBuilder> builders1 = new ArrayList<>(); for (int numDocs = randomIntBetween(10, 30); numDocs > 0; numDocs--) { builders1.add(client().prepareIndex("test", "type").setSource("{}")); } List<IndexRequestBuilder> builders2 = new ArrayList<>(); for (int numDocs = randomIntBetween(10, 30); numDocs > 0; numDocs--) { builders2.add(client().prepareIndex("test", "type").setSource("{}")); } logger.info("--> START relocate the shard from {} to {}", nodes[fromNode], nodes[toNode]); client() .admin() .cluster() .prepareReroute() .add(new MoveAllocationCommand("test", 0, nodes[fromNode], nodes[toNode])) .get(); logger.debug("--> index [{}] documents", builders1.size()); indexRandom(false, true, builders1); // wait for shard to reach post recovery postRecoveryShards.acquire(1); logger.debug("--> index [{}] documents", builders2.size()); indexRandom(true, true, builders2); // verify cluster was finished. assertFalse( client() .admin() .cluster() .prepareHealth() .setWaitForNoRelocatingShards(true) .setWaitForEvents(Priority.LANGUID) .setTimeout("30s") .get() .isTimedOut()); logger.info("--> DONE relocate the shard from {} to {}", fromNode, toNode); logger.debug("--> verifying all searches return the same number of docs"); long expectedCount = -1; for (Client client : clients()) { SearchResponse response = client.prepareSearch("test").setPreference("_local").setSize(0).get(); assertNoFailures(response); if (expectedCount < 0) { expectedCount = response.getHits().totalHits(); } else { assertEquals(expectedCount, response.getHits().totalHits()); } } } }