/**
   * Tests that we can actually recover from a corruption on the primary given that we have replica
   * shards around.
   */
  public void testCorruptFileAndRecover()
      throws ExecutionException, InterruptedException, IOException {
    int numDocs = scaledRandomIntBetween(100, 1000);
    // have enough space for 3 copies
    internalCluster().ensureAtLeastNumDataNodes(3);
    if (cluster().numDataNodes() == 3) {
      logger.info("--> cluster has [3] data nodes, corrupted primary will be overwritten");
    }

    assertThat(cluster().numDataNodes(), greaterThanOrEqualTo(3));

    assertAcked(
        prepareCreate("test")
            .setSettings(
                Settings.builder()
                    .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, "1")
                    .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1")
                    .put(MergePolicyConfig.INDEX_MERGE_ENABLED, false)
                    .put(
                        MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(),
                        false) // no checkindex - we corrupt shards on purpose
                    .put(
                        IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(),
                        new ByteSizeValue(
                            1,
                            ByteSizeUnit
                                .PB)) // no translog based flush - it might change the .liv /
                // segments.N files
                ));
    ensureGreen();
    disableAllocation("test");
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < builders.length; i++) {
      builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
    }
    indexRandom(true, builders);
    ensureGreen();
    assertAllSuccessful(
        client()
            .admin()
            .indices()
            .prepareFlush()
            .setForce(true)
            .setWaitIfOngoing(true)
            .execute()
            .actionGet());
    // we have to flush at least once here since we don't corrupt the translog
    SearchResponse countResponse = client().prepareSearch().setSize(0).get();
    assertHitCount(countResponse, numDocs);

    final int numShards = numShards("test");
    ShardRouting corruptedShardRouting = corruptRandomPrimaryFile();
    logger.info("--> {} corrupted", corruptedShardRouting);
    enableAllocation("test");
    /*
     * we corrupted the primary shard - now lets make sure we never recover from it successfully
     */
    Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "2").build();
    client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
    ClusterHealthResponse health =
        client()
            .admin()
            .cluster()
            .health(
                Requests.clusterHealthRequest("test")
                    .waitForGreenStatus()
                    .timeout(
                        "5m") // sometimes due to cluster rebalacing and random settings default
                    // timeout is just not enough.
                    .waitForRelocatingShards(0))
            .actionGet();
    if (health.isTimedOut()) {
      logger.info(
          "cluster state:\n{}\n{}",
          client().admin().cluster().prepareState().get().getState().prettyPrint(),
          client().admin().cluster().preparePendingClusterTasks().get().prettyPrint());
      assertThat("timed out waiting for green state", health.isTimedOut(), equalTo(false));
    }
    assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
    final int numIterations = scaledRandomIntBetween(5, 20);
    for (int i = 0; i < numIterations; i++) {
      SearchResponse response = client().prepareSearch().setSize(numDocs).get();
      assertHitCount(response, numDocs);
    }

    /*
     * now hook into the IndicesService and register a close listener to
     * run the checkindex. if the corruption is still there we will catch it.
     */
    final CountDownLatch latch = new CountDownLatch(numShards * 3); // primary + 2 replicas
    final CopyOnWriteArrayList<Exception> exception = new CopyOnWriteArrayList<>();
    final IndexEventListener listener =
        new IndexEventListener() {
          @Override
          public void afterIndexShardClosed(
              ShardId sid, @Nullable IndexShard indexShard, Settings indexSettings) {
            if (indexShard != null) {
              Store store = indexShard.store();
              store.incRef();
              try {
                if (!Lucene.indexExists(store.directory())
                    && indexShard.state() == IndexShardState.STARTED) {
                  return;
                }
                try (CheckIndex checkIndex = new CheckIndex(store.directory())) {
                  BytesStreamOutput os = new BytesStreamOutput();
                  PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
                  checkIndex.setInfoStream(out);
                  out.flush();
                  CheckIndex.Status status = checkIndex.checkIndex();
                  if (!status.clean) {
                    logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
                    throw new IOException("index check failure");
                  }
                }
              } catch (Exception e) {
                exception.add(e);
              } finally {
                store.decRef();
                latch.countDown();
              }
            }
          }
        };

    for (MockIndexEventListener.TestEventListener eventListener :
        internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
      eventListener.setNewDelegate(listener);
    }
    try {
      client().admin().indices().prepareDelete("test").get();
      latch.await();
      assertThat(exception, empty());
    } finally {
      for (MockIndexEventListener.TestEventListener eventListener :
          internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
        eventListener.setNewDelegate(null);
      }
    }
  }
  public void testRelocationWhileRefreshing() throws Exception {
    int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4);
    int numberOfReplicas = randomBoolean() ? 0 : 1;
    int numberOfNodes = numberOfReplicas == 0 ? 2 : 3;

    logger.info(
        "testRelocationWhileIndexingRandom(numRelocations={}, numberOfReplicas={}, numberOfNodes={})",
        numberOfRelocations,
        numberOfReplicas,
        numberOfNodes);

    String[] nodes = new String[numberOfNodes];
    logger.info("--> starting [node_0] ...");
    nodes[0] = internalCluster().startNode();

    logger.info("--> creating test index ...");
    prepareCreate(
            "test",
            Settings.builder()
                .put("index.number_of_shards", 1)
                .put("index.number_of_replicas", numberOfReplicas)
                .put("index.refresh_interval", -1) // we want to control refreshes c
            )
        .get();

    for (int i = 1; i < numberOfNodes; i++) {
      logger.info("--> starting [node_{}] ...", i);
      nodes[i] = internalCluster().startNode();
      if (i != numberOfNodes - 1) {
        ClusterHealthResponse healthResponse =
            client()
                .admin()
                .cluster()
                .prepareHealth()
                .setWaitForEvents(Priority.LANGUID)
                .setWaitForNodes(Integer.toString(i + 1))
                .setWaitForGreenStatus()
                .execute()
                .actionGet();
        assertThat(healthResponse.isTimedOut(), equalTo(false));
      }
    }

    final Semaphore postRecoveryShards = new Semaphore(0);
    final IndexEventListener listener =
        new IndexEventListener() {
          @Override
          public void indexShardStateChanged(
              IndexShard indexShard,
              @Nullable IndexShardState previousState,
              IndexShardState currentState,
              @Nullable String reason) {
            if (currentState == IndexShardState.POST_RECOVERY) {
              postRecoveryShards.release();
            }
          }
        };
    for (MockIndexEventListener.TestEventListener eventListener :
        internalCluster().getInstances(MockIndexEventListener.TestEventListener.class)) {
      eventListener.setNewDelegate(listener);
    }

    logger.info("--> starting relocations...");
    int nodeShiftBased = numberOfReplicas; // if we have replicas shift those
    for (int i = 0; i < numberOfRelocations; i++) {
      int fromNode = (i % 2);
      int toNode = fromNode == 0 ? 1 : 0;
      fromNode += nodeShiftBased;
      toNode += nodeShiftBased;

      List<IndexRequestBuilder> builders1 = new ArrayList<>();
      for (int numDocs = randomIntBetween(10, 30); numDocs > 0; numDocs--) {
        builders1.add(client().prepareIndex("test", "type").setSource("{}"));
      }

      List<IndexRequestBuilder> builders2 = new ArrayList<>();
      for (int numDocs = randomIntBetween(10, 30); numDocs > 0; numDocs--) {
        builders2.add(client().prepareIndex("test", "type").setSource("{}"));
      }

      logger.info("--> START relocate the shard from {} to {}", nodes[fromNode], nodes[toNode]);

      client()
          .admin()
          .cluster()
          .prepareReroute()
          .add(new MoveAllocationCommand("test", 0, nodes[fromNode], nodes[toNode]))
          .get();

      logger.debug("--> index [{}] documents", builders1.size());
      indexRandom(false, true, builders1);
      // wait for shard to reach post recovery
      postRecoveryShards.acquire(1);

      logger.debug("--> index [{}] documents", builders2.size());
      indexRandom(true, true, builders2);

      // verify cluster was finished.
      assertFalse(
          client()
              .admin()
              .cluster()
              .prepareHealth()
              .setWaitForNoRelocatingShards(true)
              .setWaitForEvents(Priority.LANGUID)
              .setTimeout("30s")
              .get()
              .isTimedOut());
      logger.info("--> DONE relocate the shard from {} to {}", fromNode, toNode);

      logger.debug("--> verifying all searches return the same number of docs");
      long expectedCount = -1;
      for (Client client : clients()) {
        SearchResponse response =
            client.prepareSearch("test").setPreference("_local").setSize(0).get();
        assertNoFailures(response);
        if (expectedCount < 0) {
          expectedCount = response.getHits().totalHits();
        } else {
          assertEquals(expectedCount, response.getHits().totalHits());
        }
      }
    }
  }