/** Build from lists of information about each node. */
    private JvmStats(List<NodeInfo> nodeInfos, List<NodeStats> nodeStatsList) {
      this.versions = new ObjectIntHashMap<>();
      long threads = 0;
      long maxUptime = 0;
      long heapMax = 0;
      long heapUsed = 0;
      for (NodeInfo nodeInfo : nodeInfos) {
        versions.addTo(new JvmVersion(nodeInfo.getJvm()), 1);
      }

      for (NodeStats nodeStats : nodeStatsList) {
        org.elasticsearch.monitor.jvm.JvmStats js = nodeStats.getJvm();
        if (js == null) {
          continue;
        }
        if (js.getThreads() != null) {
          threads += js.getThreads().getCount();
        }
        maxUptime = Math.max(maxUptime, js.getUptime().millis());
        if (js.getMem() != null) {
          heapUsed += js.getMem().getHeapUsed().getBytes();
          heapMax += js.getMem().getHeapMax().getBytes();
        }
      }
      this.threads = threads;
      this.maxUptime = maxUptime;
      this.heapUsed = heapUsed;
      this.heapMax = heapMax;
    }
    /** Build the stats from information about each node. */
    private OsStats(List<NodeInfo> nodeInfos, List<NodeStats> nodeStatsList) {
      this.names = new ObjectIntHashMap<>();
      int availableProcessors = 0;
      int allocatedProcessors = 0;
      for (NodeInfo nodeInfo : nodeInfos) {
        availableProcessors += nodeInfo.getOs().getAvailableProcessors();
        allocatedProcessors += nodeInfo.getOs().getAllocatedProcessors();

        if (nodeInfo.getOs().getName() != null) {
          names.addTo(nodeInfo.getOs().getName(), 1);
        }
      }
      this.availableProcessors = availableProcessors;
      this.allocatedProcessors = allocatedProcessors;

      long totalMemory = 0;
      long freeMemory = 0;
      for (NodeStats nodeStats : nodeStatsList) {
        if (nodeStats.getOs() != null) {
          long total = nodeStats.getOs().getMem().getTotal().getBytes();
          if (total > 0) {
            totalMemory += total;
          }
          long free = nodeStats.getOs().getMem().getFree().getBytes();
          if (free > 0) {
            freeMemory += free;
          }
        }
      }
      this.mem = new org.elasticsearch.monitor.os.OsStats.Mem(totalMemory, freeMemory);
    }
 /** Build from looking at a list of node statistics. */
 private ProcessStats(List<NodeStats> nodeStatsList) {
   int count = 0;
   int cpuPercent = 0;
   long totalOpenFileDescriptors = 0;
   long minOpenFileDescriptors = Long.MAX_VALUE;
   long maxOpenFileDescriptors = Long.MIN_VALUE;
   for (NodeStats nodeStats : nodeStatsList) {
     if (nodeStats.getProcess() == null) {
       continue;
     }
     count++;
     if (nodeStats.getProcess().getCpu() != null) {
       cpuPercent += nodeStats.getProcess().getCpu().getPercent();
     }
     long fd = nodeStats.getProcess().getOpenFileDescriptors();
     if (fd > 0) {
       // fd can be -1 if not supported on platform
       totalOpenFileDescriptors += fd;
     }
     // we still do min max calc on -1, so we'll have an indication of it not being supported on
     // one of the nodes.
     minOpenFileDescriptors = Math.min(minOpenFileDescriptors, fd);
     maxOpenFileDescriptors = Math.max(maxOpenFileDescriptors, fd);
   }
   this.count = count;
   this.cpuPercent = cpuPercent;
   this.totalOpenFileDescriptors = totalOpenFileDescriptors;
   this.minOpenFileDescriptors = minOpenFileDescriptors;
   this.maxOpenFileDescriptors = maxOpenFileDescriptors;
 }
 private void assertSearchContextsClosed() {
   NodesStatsResponse nodesStats =
       client().admin().cluster().prepareNodesStats().setIndices(true).get();
   for (NodeStats nodeStat : nodesStats.getNodes()) {
     assertThat(nodeStat.getIndices().getSearch().getOpenContexts(), equalTo(0L));
   }
 }
  public void testCustomCircuitBreakerRegistration() throws Exception {
    Iterable<CircuitBreakerService> serviceIter =
        internalCluster().getInstances(CircuitBreakerService.class);

    final String breakerName = "customBreaker";
    BreakerSettings breakerSettings = new BreakerSettings(breakerName, 8, 1.03);
    CircuitBreaker breaker = null;

    for (CircuitBreakerService s : serviceIter) {
      s.registerBreaker(breakerSettings);
      breaker = s.getBreaker(breakerSettings.getName());
    }

    if (breaker != null) {
      try {
        breaker.addEstimateBytesAndMaybeBreak(16, "test");
      } catch (CircuitBreakingException e) {
        // ignore, we forced a circuit break
      }
    }

    NodesStatsResponse stats =
        client().admin().cluster().prepareNodesStats().clear().setBreaker(true).get();
    int breaks = 0;
    for (NodeStats stat : stats.getNodes()) {
      CircuitBreakerStats breakerStats = stat.getBreaker().getStats(breakerName);
      breaks += breakerStats.getTrippedCount();
    }
    assertThat(breaks, greaterThanOrEqualTo(1));
  }
  public void testRamAccountingTermsEnum() throws Exception {
    if (noopBreakerUsed()) {
      logger.info("--> noop breakers used, skipping test");
      return;
    }
    final Client client = client();

    // Create an index where the mappings have a field data filter
    assertAcked(
        prepareCreate("ramtest")
            .setSource(
                "{\"mappings\": {\"type\": {\"properties\": {\"test\": "
                    + "{\"type\": \"text\",\"fielddata\": true,\"fielddata_frequency_filter\": {\"max\": 10000}}}}}}"));

    ensureGreen("ramtest");

    // index some different terms so we have some field data for loading
    int docCount = scaledRandomIntBetween(300, 1000);
    List<IndexRequestBuilder> reqs = new ArrayList<>();
    for (long id = 0; id < docCount; id++) {
      reqs.add(
          client
              .prepareIndex("ramtest", "type", Long.toString(id))
              .setSource("test", "value" + id));
    }
    indexRandom(true, false, true, reqs);

    // execute a search that loads field data (sorting on the "test" field)
    client.prepareSearch("ramtest").setQuery(matchAllQuery()).addSort("test", SortOrder.DESC).get();

    // clear field data cache (thus setting the loaded field data back to 0)
    clearFieldData();

    // Update circuit breaker settings
    Settings settings =
        Settings.builder()
            .put(
                HierarchyCircuitBreakerService.FIELDDATA_CIRCUIT_BREAKER_LIMIT_SETTING.getKey(),
                "100b")
            .put(
                HierarchyCircuitBreakerService.FIELDDATA_CIRCUIT_BREAKER_OVERHEAD_SETTING.getKey(),
                1.05)
            .build();
    assertAcked(client.admin().cluster().prepareUpdateSettings().setTransientSettings(settings));

    // execute a search that loads field data (sorting on the "test" field)
    // again, this time it should trip the breaker
    assertFailures(
        client.prepareSearch("ramtest").setQuery(matchAllQuery()).addSort("test", SortOrder.DESC),
        RestStatus.INTERNAL_SERVER_ERROR,
        containsString("Data too large, data for [test] would be larger than limit of [100/100b]"));

    NodesStatsResponse stats = client.admin().cluster().prepareNodesStats().setBreaker(true).get();
    int breaks = 0;
    for (NodeStats stat : stats.getNodes()) {
      CircuitBreakerStats breakerStats = stat.getBreaker().getStats(CircuitBreaker.FIELDDATA);
      breaks += breakerStats.getTrippedCount();
    }
    assertThat(breaks, greaterThanOrEqualTo(1));
  }
 private long getTotalHttpConnections() {
   NodesStatsResponse nodeStats =
       client().admin().cluster().prepareNodesStats().setHttp(true).get();
   int totalOpenConnections = 0;
   for (NodeStats stats : nodeStats.getNodes()) {
     totalOpenConnections += stats.getHttp().getTotalOpen();
   }
   return totalOpenConnections;
 }
 /** Returns true if any of the nodes used a noop breaker */
 private boolean noopBreakerUsed() {
   NodesStatsResponse stats =
       client().admin().cluster().prepareNodesStats().setBreaker(true).get();
   for (NodeStats nodeStats : stats.getNodes()) {
     if (nodeStats.getBreaker().getStats(CircuitBreaker.REQUEST).getLimit()
         == NoopCircuitBreaker.LIMIT) {
       return true;
     }
     if (nodeStats.getBreaker().getStats(CircuitBreaker.IN_FLIGHT_REQUESTS).getLimit()
         == NoopCircuitBreaker.LIMIT) {
       return true;
     }
     if (nodeStats.getBreaker().getStats(CircuitBreaker.FIELDDATA).getLimit()
         == NoopCircuitBreaker.LIMIT) {
       return true;
     }
   }
   return false;
 }
Esempio n. 9
0
 public void addNodeStats(NodeStats nodeStats) {
   if (nodeStats.getProcess() == null) {
     return;
   }
   count++;
   if (nodeStats.getProcess().cpu() != null) {
     // with no sigar, this may not be available
     cpuPercent += nodeStats.getProcess().cpu().getPercent();
   }
   long fd = nodeStats.getProcess().openFileDescriptors();
   if (fd > 0) {
     // fd can be -1 if not supported on platform
     totalOpenFileDescriptors += fd;
   }
   // we still do min max calc on -1, so we'll have an indication of it not being supported on
   // one of the nodes.
   minOpenFileDescriptors = Math.min(minOpenFileDescriptors, fd);
   maxOpenFileDescriptors = Math.max(maxOpenFileDescriptors, fd);
 }
Esempio n. 10
0
 public void addNodeInfoStats(NodeInfo nodeInfo, NodeStats nodeStats) {
   versions.addTo(new JvmVersion(nodeInfo.getJvm()), 1);
   org.elasticsearch.monitor.jvm.JvmStats js = nodeStats.getJvm();
   if (js == null) {
     return;
   }
   if (js.threads() != null) {
     threads += js.threads().count();
   }
   maxUptime = Math.max(maxUptime, js.uptime().millis());
   if (js.mem() != null) {
     heapUsed += js.mem().getHeapUsed().bytes();
     heapMax += js.mem().getHeapMax().bytes();
   }
 }
Esempio n. 11
0
  @Override
  public void execute() throws Exception {

    // If Elasticsearch is started then only start the monitoring
    if (!ElasticsearchProcessMonitor.isElasticsearchRunning()) {
      String exceptionMsg = "Elasticsearch is not yet started, check back again later";
      logger.info(exceptionMsg);
      return;
    }

    ThreadPoolStatsBean tpStatsBean = new ThreadPoolStatsBean();
    try {
      NodesStatsResponse ndsStatsResponse = ESTransportClient.getNodesStatsResponse(config);
      ThreadPoolStats tpstats = null;
      NodeStats ndStat = null;
      if (ndsStatsResponse.getNodes().length > 0) {
        ndStat = ndsStatsResponse.getAt(0);
      }
      if (ndStat == null) {
        logger.info("NodeStats is null,hence returning (No ThreadPoolStats).");
        return;
      }
      tpstats = ndStat.getThreadPool();
      if (tpstats == null) {
        logger.info("ThreadPoolStats is null,hence returning (No ThreadPoolStats).");
        return;
      }
      Iterator<ThreadPoolStats.Stats> iter = tpstats.iterator();
      while (iter.hasNext()) {
        ThreadPoolStats.Stats stat = iter.next();
        if (stat.getName().equals("index")) {
          tpStatsBean.indexThreads = stat.getThreads();
          tpStatsBean.indexQueue = stat.getQueue();
          tpStatsBean.indexActive = stat.getActive();
          tpStatsBean.indexRejected = stat.getRejected();
          tpStatsBean.indexLargest = stat.getLargest();
          tpStatsBean.indexCompleted = stat.getCompleted();
        } else if (stat.getName().equals("get")) {
          tpStatsBean.getThreads = stat.getThreads();
          tpStatsBean.getQueue = stat.getQueue();
          tpStatsBean.getActive = stat.getActive();
          tpStatsBean.getRejected = stat.getRejected();
          tpStatsBean.getLargest = stat.getLargest();
          tpStatsBean.getCompleted = stat.getCompleted();
        } else if (stat.getName().equals("search")) {
          tpStatsBean.searchThreads = stat.getThreads();
          tpStatsBean.searchQueue = stat.getQueue();
          tpStatsBean.searchActive = stat.getActive();
          tpStatsBean.searchRejected = stat.getRejected();
          tpStatsBean.searchLargest = stat.getLargest();
          tpStatsBean.searchCompleted = stat.getCompleted();
        } else if (stat.getName().equals("bulk")) {
          tpStatsBean.bulkThreads = stat.getThreads();
          tpStatsBean.bulkQueue = stat.getQueue();
          tpStatsBean.bulkActive = stat.getActive();
          tpStatsBean.bulkRejected = stat.getRejected();
          tpStatsBean.bulkLargest = stat.getLargest();
          tpStatsBean.bulkCompleted = stat.getCompleted();
        }
      }
    } catch (Exception e) {
      logger.warn("failed to load Thread Pool stats data", e);
    }
    tpStatsReporter.threadPoolBean.set(tpStatsBean);
  }
  public void testLimitsRequestSize() throws Exception {
    ByteSizeValue inFlightRequestsLimit = new ByteSizeValue(8, ByteSizeUnit.KB);
    if (noopBreakerUsed()) {
      logger.info("--> noop breakers used, skipping test");
      return;
    }

    internalCluster().ensureAtLeastNumDataNodes(2);

    NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get();
    List<NodeStats> dataNodeStats = new ArrayList<>();
    for (NodeStats stat : nodeStats.getNodes()) {
      if (stat.getNode().isDataNode()) {
        dataNodeStats.add(stat);
      }
    }

    assertThat(dataNodeStats.size(), greaterThanOrEqualTo(2));
    Collections.shuffle(dataNodeStats, random());

    // send bulk request from source node to target node later. The sole shard is bound to the
    // target node.
    NodeStats targetNode = dataNodeStats.get(0);
    NodeStats sourceNode = dataNodeStats.get(1);

    assertAcked(
        prepareCreate("index")
            .setSettings(
                Settings.builder()
                    .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
                    .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
                    .put("index.routing.allocation.include._name", targetNode.getNode().getName())
                    .put(
                        EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING.getKey(),
                        EnableAllocationDecider.Rebalance.NONE)));

    Client client = client(sourceNode.getNode().getName());

    // we use the limit size as a (very) rough indication on how many requests we should sent to hit
    // the limit
    int numRequests = inFlightRequestsLimit.bytesAsInt();
    BulkRequest bulkRequest = new BulkRequest();
    for (int i = 0; i < numRequests; i++) {
      IndexRequest indexRequest = new IndexRequest("index", "type", Integer.toString(i));
      indexRequest.source("field", "value", "num", i);
      bulkRequest.add(indexRequest);
    }

    Settings limitSettings =
        Settings.builder()
            .put(
                HierarchyCircuitBreakerService.IN_FLIGHT_REQUESTS_CIRCUIT_BREAKER_LIMIT_SETTING
                    .getKey(),
                inFlightRequestsLimit)
            .build();

    assertAcked(
        client().admin().cluster().prepareUpdateSettings().setTransientSettings(limitSettings));

    // can either fail directly with an exception or the response contains exceptions (depending on
    // client)
    try {
      BulkResponse response = client.bulk(bulkRequest).actionGet();
      if (!response.hasFailures()) {
        fail("Should have thrown CircuitBreakingException");
      } else {
        // each item must have failed with CircuitBreakingException
        for (BulkItemResponse bulkItemResponse : response) {
          Throwable cause = ExceptionsHelper.unwrapCause(bulkItemResponse.getFailure().getCause());
          assertThat(cause, instanceOf(CircuitBreakingException.class));
          assertEquals(
              ((CircuitBreakingException) cause).getByteLimit(), inFlightRequestsLimit.bytes());
        }
      }
    } catch (CircuitBreakingException ex) {
      assertEquals(ex.getByteLimit(), inFlightRequestsLimit.bytes());
    }
  }
  private Table buildTable(
      RestRequest req,
      ClusterStateResponse state,
      NodesInfoResponse nodesInfo,
      NodesStatsResponse nodesStats) {
    boolean fullId = req.paramAsBoolean("full_id", false);

    DiscoveryNodes nodes = state.getState().nodes();
    String masterId = nodes.masterNodeId();
    Table table = getTableWithHeader(req);

    for (DiscoveryNode node : nodes) {
      NodeInfo info = nodesInfo.getNodesMap().get(node.id());
      NodeStats stats = nodesStats.getNodesMap().get(node.id());

      JvmInfo jvmInfo = info == null ? null : info.getJvm();
      JvmStats jvmStats = stats == null ? null : stats.getJvm();
      FsInfo fsInfo = stats == null ? null : stats.getFs();
      OsStats osStats = stats == null ? null : stats.getOs();
      ProcessStats processStats = stats == null ? null : stats.getProcess();
      NodeIndicesStats indicesStats = stats == null ? null : stats.getIndices();

      table.startRow();

      table.addCell(fullId ? node.id() : Strings.substring(node.getId(), 0, 4));
      table.addCell(info == null ? null : info.getProcess().getId());
      table.addCell(node.getHostName());
      table.addCell(node.getHostAddress());
      if (node.address() instanceof InetSocketTransportAddress) {
        table.addCell(((InetSocketTransportAddress) node.address()).address().getPort());
      } else {
        table.addCell("-");
      }

      table.addCell(node.getVersion().number());
      table.addCell(info == null ? null : info.getBuild().shortHash());
      table.addCell(jvmInfo == null ? null : jvmInfo.version());
      table.addCell(fsInfo == null ? null : fsInfo.getTotal().getAvailable());
      table.addCell(jvmStats == null ? null : jvmStats.getMem().getHeapUsed());
      table.addCell(jvmStats == null ? null : jvmStats.getMem().getHeapUsedPercent());
      table.addCell(jvmInfo == null ? null : jvmInfo.getMem().getHeapMax());
      table.addCell(
          osStats == null ? null : osStats.getMem() == null ? null : osStats.getMem().getUsed());
      table.addCell(
          osStats == null
              ? null
              : osStats.getMem() == null ? null : osStats.getMem().getUsedPercent());
      table.addCell(
          osStats == null ? null : osStats.getMem() == null ? null : osStats.getMem().getTotal());
      table.addCell(processStats == null ? null : processStats.getOpenFileDescriptors());
      table.addCell(
          processStats == null
              ? null
              : calculatePercentage(
                  processStats.getOpenFileDescriptors(), processStats.getMaxFileDescriptors()));
      table.addCell(processStats == null ? null : processStats.getMaxFileDescriptors());

      table.addCell(
          osStats == null ? null : String.format(Locale.ROOT, "%.2f", osStats.getLoadAverage()));
      table.addCell(jvmStats == null ? null : jvmStats.getUptime());
      table.addCell(node.clientNode() ? "c" : node.dataNode() ? "d" : "-");
      table.addCell(
          masterId == null
              ? "x"
              : masterId.equals(node.id()) ? "*" : node.masterNode() ? "m" : "-");
      table.addCell(node.name());

      CompletionStats completionStats =
          indicesStats == null ? null : stats.getIndices().getCompletion();
      table.addCell(completionStats == null ? null : completionStats.getSize());

      FieldDataStats fdStats = indicesStats == null ? null : stats.getIndices().getFieldData();
      table.addCell(fdStats == null ? null : fdStats.getMemorySize());
      table.addCell(fdStats == null ? null : fdStats.getEvictions());

      QueryCacheStats fcStats = indicesStats == null ? null : indicesStats.getQueryCache();
      table.addCell(fcStats == null ? null : fcStats.getMemorySize());
      table.addCell(fcStats == null ? null : fcStats.getEvictions());

      RequestCacheStats qcStats = indicesStats == null ? null : indicesStats.getRequestCache();
      table.addCell(qcStats == null ? null : qcStats.getMemorySize());
      table.addCell(qcStats == null ? null : qcStats.getEvictions());
      table.addCell(qcStats == null ? null : qcStats.getHitCount());
      table.addCell(qcStats == null ? null : qcStats.getMissCount());

      FlushStats flushStats = indicesStats == null ? null : indicesStats.getFlush();
      table.addCell(flushStats == null ? null : flushStats.getTotal());
      table.addCell(flushStats == null ? null : flushStats.getTotalTime());

      GetStats getStats = indicesStats == null ? null : indicesStats.getGet();
      table.addCell(getStats == null ? null : getStats.current());
      table.addCell(getStats == null ? null : getStats.getTime());
      table.addCell(getStats == null ? null : getStats.getCount());
      table.addCell(getStats == null ? null : getStats.getExistsTime());
      table.addCell(getStats == null ? null : getStats.getExistsCount());
      table.addCell(getStats == null ? null : getStats.getMissingTime());
      table.addCell(getStats == null ? null : getStats.getMissingCount());

      IndexingStats indexingStats = indicesStats == null ? null : indicesStats.getIndexing();
      table.addCell(indexingStats == null ? null : indexingStats.getTotal().getDeleteCurrent());
      table.addCell(indexingStats == null ? null : indexingStats.getTotal().getDeleteTime());
      table.addCell(indexingStats == null ? null : indexingStats.getTotal().getDeleteCount());
      table.addCell(indexingStats == null ? null : indexingStats.getTotal().getIndexCurrent());
      table.addCell(indexingStats == null ? null : indexingStats.getTotal().getIndexTime());
      table.addCell(indexingStats == null ? null : indexingStats.getTotal().getIndexCount());
      table.addCell(indexingStats == null ? null : indexingStats.getTotal().getIndexFailedCount());

      MergeStats mergeStats = indicesStats == null ? null : indicesStats.getMerge();
      table.addCell(mergeStats == null ? null : mergeStats.getCurrent());
      table.addCell(mergeStats == null ? null : mergeStats.getCurrentNumDocs());
      table.addCell(mergeStats == null ? null : mergeStats.getCurrentSize());
      table.addCell(mergeStats == null ? null : mergeStats.getTotal());
      table.addCell(mergeStats == null ? null : mergeStats.getTotalNumDocs());
      table.addCell(mergeStats == null ? null : mergeStats.getTotalSize());
      table.addCell(mergeStats == null ? null : mergeStats.getTotalTime());

      PercolateStats percolateStats = indicesStats == null ? null : indicesStats.getPercolate();
      table.addCell(percolateStats == null ? null : percolateStats.getCurrent());
      table.addCell(percolateStats == null ? null : percolateStats.getMemorySize());
      table.addCell(percolateStats == null ? null : percolateStats.getNumQueries());
      table.addCell(percolateStats == null ? null : percolateStats.getTime());
      table.addCell(percolateStats == null ? null : percolateStats.getCount());

      RefreshStats refreshStats = indicesStats == null ? null : indicesStats.getRefresh();
      table.addCell(refreshStats == null ? null : refreshStats.getTotal());
      table.addCell(refreshStats == null ? null : refreshStats.getTotalTime());

      ScriptStats scriptStats = stats == null ? null : stats.getScriptStats();
      table.addCell(scriptStats == null ? null : scriptStats.getCompilations());
      table.addCell(scriptStats == null ? null : scriptStats.getCacheEvictions());

      SearchStats searchStats = indicesStats == null ? null : indicesStats.getSearch();
      table.addCell(searchStats == null ? null : searchStats.getTotal().getFetchCurrent());
      table.addCell(searchStats == null ? null : searchStats.getTotal().getFetchTime());
      table.addCell(searchStats == null ? null : searchStats.getTotal().getFetchCount());
      table.addCell(searchStats == null ? null : searchStats.getOpenContexts());
      table.addCell(searchStats == null ? null : searchStats.getTotal().getQueryCurrent());
      table.addCell(searchStats == null ? null : searchStats.getTotal().getQueryTime());
      table.addCell(searchStats == null ? null : searchStats.getTotal().getQueryCount());
      table.addCell(searchStats == null ? null : searchStats.getTotal().getScrollCurrent());
      table.addCell(searchStats == null ? null : searchStats.getTotal().getScrollTime());
      table.addCell(searchStats == null ? null : searchStats.getTotal().getScrollCount());

      SegmentsStats segmentsStats = indicesStats == null ? null : indicesStats.getSegments();
      table.addCell(segmentsStats == null ? null : segmentsStats.getCount());
      table.addCell(segmentsStats == null ? null : segmentsStats.getMemory());
      table.addCell(segmentsStats == null ? null : segmentsStats.getIndexWriterMemory());
      table.addCell(segmentsStats == null ? null : segmentsStats.getIndexWriterMaxMemory());
      table.addCell(segmentsStats == null ? null : segmentsStats.getVersionMapMemory());
      table.addCell(segmentsStats == null ? null : segmentsStats.getBitsetMemory());

      SuggestStats suggestStats = indicesStats == null ? null : indicesStats.getSuggest();
      table.addCell(suggestStats == null ? null : suggestStats.getCurrent());
      table.addCell(suggestStats == null ? null : suggestStats.getTime());
      table.addCell(suggestStats == null ? null : suggestStats.getCount());

      table.endRow();
    }

    return table;
  }
  private Table buildTable(
      RestRequest req,
      ClusterStateResponse state,
      NodesInfoResponse nodesInfo,
      NodesStatsResponse nodesStats) {
    final String[] threadPools = req.paramAsStringArray("thread_pool_patterns", new String[] {"*"});
    final DiscoveryNodes nodes = state.getState().nodes();
    final Table table = getTableWithHeader(req);

    // collect all thread pool names that we see across the nodes
    final Set<String> candidates = new HashSet<>();
    for (final NodeStats nodeStats : nodesStats.getNodes()) {
      for (final ThreadPoolStats.Stats threadPoolStats : nodeStats.getThreadPool()) {
        candidates.add(threadPoolStats.getName());
      }
    }

    // collect all thread pool names that match the specified thread pool patterns
    final Set<String> included = new HashSet<>();
    for (final String candidate : candidates) {
      if (Regex.simpleMatch(threadPools, candidate)) {
        included.add(candidate);
      }
    }

    for (final DiscoveryNode node : nodes) {
      final NodeInfo info = nodesInfo.getNodesMap().get(node.getId());
      final NodeStats stats = nodesStats.getNodesMap().get(node.getId());

      final Map<String, ThreadPoolStats.Stats> poolThreadStats;
      final Map<String, ThreadPool.Info> poolThreadInfo;

      if (stats == null) {
        poolThreadStats = Collections.emptyMap();
        poolThreadInfo = Collections.emptyMap();
      } else {
        // we use a sorted map to ensure that thread pools are sorted by name
        poolThreadStats = new TreeMap<>();
        poolThreadInfo = new HashMap<>();

        ThreadPoolStats threadPoolStats = stats.getThreadPool();
        for (ThreadPoolStats.Stats threadPoolStat : threadPoolStats) {
          poolThreadStats.put(threadPoolStat.getName(), threadPoolStat);
        }
        if (info != null) {
          for (ThreadPool.Info threadPoolInfo : info.getThreadPool()) {
            poolThreadInfo.put(threadPoolInfo.getName(), threadPoolInfo);
          }
        }
      }
      for (Map.Entry<String, ThreadPoolStats.Stats> entry : poolThreadStats.entrySet()) {

        if (!included.contains(entry.getKey())) continue;

        table.startRow();

        table.addCell(node.getName());
        table.addCell(node.getId());
        table.addCell(node.getEphemeralId());
        table.addCell(info == null ? null : info.getProcess().getId());
        table.addCell(node.getHostName());
        table.addCell(node.getHostAddress());
        table.addCell(node.getAddress().address().getPort());
        final ThreadPoolStats.Stats poolStats = entry.getValue();
        final ThreadPool.Info poolInfo = poolThreadInfo.get(entry.getKey());

        Long maxQueueSize = null;
        String keepAlive = null;
        Integer minThreads = null;
        Integer maxThreads = null;

        if (poolInfo != null) {
          if (poolInfo.getQueueSize() != null) {
            maxQueueSize = poolInfo.getQueueSize().singles();
          }
          if (poolInfo.getKeepAlive() != null) {
            keepAlive = poolInfo.getKeepAlive().toString();
          }
          if (poolInfo.getMin() >= 0) {
            minThreads = poolInfo.getMin();
          }
          if (poolInfo.getMax() >= 0) {
            maxThreads = poolInfo.getMax();
          }
        }

        table.addCell(entry.getKey());
        table.addCell(poolInfo == null ? null : poolInfo.getThreadPoolType().getType());
        table.addCell(poolStats == null ? null : poolStats.getActive());
        table.addCell(poolStats == null ? null : poolStats.getThreads());
        table.addCell(poolStats == null ? null : poolStats.getQueue());
        table.addCell(maxQueueSize);
        table.addCell(poolStats == null ? null : poolStats.getRejected());
        table.addCell(poolStats == null ? null : poolStats.getLargest());
        table.addCell(poolStats == null ? null : poolStats.getCompleted());
        table.addCell(minThreads);
        table.addCell(maxThreads);
        table.addCell(keepAlive);

        table.endRow();
      }
    }

    return table;
  }
Esempio n. 15
0
  public void testRerouteRecovery() throws Exception {
    logger.info("--> start node A");
    final String nodeA = internalCluster().startNode();

    logger.info("--> create index on node: {}", nodeA);
    ByteSizeValue shardSize =
        createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT)
            .getShards()[0]
            .getStats()
            .getStore()
            .size();

    logger.info("--> start node B");
    final String nodeB = internalCluster().startNode();

    ensureGreen();

    logger.info("--> slowing down recoveries");
    slowDownRecovery(shardSize);

    logger.info("--> move shard from: {} to: {}", nodeA, nodeB);
    client()
        .admin()
        .cluster()
        .prepareReroute()
        .add(new MoveAllocationCommand(INDEX_NAME, 0, nodeA, nodeB))
        .execute()
        .actionGet()
        .getState();

    logger.info("--> waiting for recovery to start both on source and target");
    final Index index = resolveIndex(INDEX_NAME);
    assertBusy(
        new Runnable() {
          @Override
          public void run() {

            IndicesService indicesService =
                internalCluster().getInstance(IndicesService.class, nodeA);
            assertThat(
                indicesService
                    .indexServiceSafe(index)
                    .getShard(0)
                    .recoveryStats()
                    .currentAsSource(),
                equalTo(1));
            indicesService = internalCluster().getInstance(IndicesService.class, nodeB);
            assertThat(
                indicesService
                    .indexServiceSafe(index)
                    .getShard(0)
                    .recoveryStats()
                    .currentAsTarget(),
                equalTo(1));
          }
        });

    logger.info("--> request recoveries");
    RecoveryResponse response =
        client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();

    List<RecoveryState> recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    List<RecoveryState> nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeARecoveryStates.size(), equalTo(1));
    List<RecoveryState> nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBRecoveryStates.size(), equalTo(1));

    assertRecoveryState(
        nodeARecoveryStates.get(0),
        0,
        StoreRecoverySource.EMPTY_STORE_INSTANCE,
        true,
        Stage.DONE,
        null,
        nodeA);
    validateIndexRecoveryState(nodeARecoveryStates.get(0).getIndex());

    assertOnGoingRecoveryState(
        nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());

    logger.info("--> request node recovery stats");
    NodesStatsResponse statsResponse =
        client()
            .admin()
            .cluster()
            .prepareNodesStats()
            .clear()
            .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery))
            .get();
    long nodeAThrottling = Long.MAX_VALUE;
    long nodeBThrottling = Long.MAX_VALUE;
    for (NodeStats nodeStats : statsResponse.getNodes()) {
      final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
      if (nodeStats.getNode().getName().equals(nodeA)) {
        assertThat(
            "node A should have ongoing recovery as source",
            recoveryStats.currentAsSource(),
            equalTo(1));
        assertThat(
            "node A should not have ongoing recovery as target",
            recoveryStats.currentAsTarget(),
            equalTo(0));
        nodeAThrottling = recoveryStats.throttleTime().millis();
      }
      if (nodeStats.getNode().getName().equals(nodeB)) {
        assertThat(
            "node B should not have ongoing recovery as source",
            recoveryStats.currentAsSource(),
            equalTo(0));
        assertThat(
            "node B should have ongoing recovery as target",
            recoveryStats.currentAsTarget(),
            equalTo(1));
        nodeBThrottling = recoveryStats.throttleTime().millis();
      }
    }

    logger.info("--> checking throttling increases");
    final long finalNodeAThrottling = nodeAThrottling;
    final long finalNodeBThrottling = nodeBThrottling;
    assertBusy(
        new Runnable() {
          @Override
          public void run() {
            NodesStatsResponse statsResponse =
                client()
                    .admin()
                    .cluster()
                    .prepareNodesStats()
                    .clear()
                    .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery))
                    .get();
            assertThat(statsResponse.getNodes(), hasSize(2));
            for (NodeStats nodeStats : statsResponse.getNodes()) {
              final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
              if (nodeStats.getNode().getName().equals(nodeA)) {
                assertThat(
                    "node A throttling should increase",
                    recoveryStats.throttleTime().millis(),
                    greaterThan(finalNodeAThrottling));
              }
              if (nodeStats.getNode().getName().equals(nodeB)) {
                assertThat(
                    "node B throttling should increase",
                    recoveryStats.throttleTime().millis(),
                    greaterThan(finalNodeBThrottling));
              }
            }
          }
        });

    logger.info("--> speeding up recoveries");
    restoreRecoverySpeed();

    // wait for it to be finished
    ensureGreen();

    response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();

    recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    assertThat(recoveryStates.size(), equalTo(1));

    assertRecoveryState(
        recoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(recoveryStates.get(0).getIndex());

    statsResponse =
        client()
            .admin()
            .cluster()
            .prepareNodesStats()
            .clear()
            .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery))
            .get();
    assertThat(statsResponse.getNodes(), hasSize(2));
    for (NodeStats nodeStats : statsResponse.getNodes()) {
      final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
      assertThat(recoveryStats.currentAsSource(), equalTo(0));
      assertThat(recoveryStats.currentAsTarget(), equalTo(0));
      if (nodeStats.getNode().getName().equals(nodeA)) {
        assertThat(
            "node A throttling should be >0",
            recoveryStats.throttleTime().millis(),
            greaterThan(0L));
      }
      if (nodeStats.getNode().getName().equals(nodeB)) {
        assertThat(
            "node B throttling should be >0 ",
            recoveryStats.throttleTime().millis(),
            greaterThan(0L));
      }
    }

    logger.info("--> bump replica count");
    client()
        .admin()
        .indices()
        .prepareUpdateSettings(INDEX_NAME)
        .setSettings(Settings.builder().put("number_of_replicas", 1))
        .execute()
        .actionGet();
    ensureGreen();

    statsResponse =
        client()
            .admin()
            .cluster()
            .prepareNodesStats()
            .clear()
            .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery))
            .get();
    assertThat(statsResponse.getNodes(), hasSize(2));
    for (NodeStats nodeStats : statsResponse.getNodes()) {
      final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
      assertThat(recoveryStats.currentAsSource(), equalTo(0));
      assertThat(recoveryStats.currentAsTarget(), equalTo(0));
      if (nodeStats.getNode().getName().equals(nodeA)) {
        assertThat(
            "node A throttling should be >0",
            recoveryStats.throttleTime().millis(),
            greaterThan(0L));
      }
      if (nodeStats.getNode().getName().equals(nodeB)) {
        assertThat(
            "node B throttling should be >0 ",
            recoveryStats.throttleTime().millis(),
            greaterThan(0L));
      }
    }

    logger.info("--> start node C");
    String nodeC = internalCluster().startNode();
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("3").get().isTimedOut());

    logger.info("--> slowing down recoveries");
    slowDownRecovery(shardSize);

    logger.info("--> move replica shard from: {} to: {}", nodeA, nodeC);
    client()
        .admin()
        .cluster()
        .prepareReroute()
        .add(new MoveAllocationCommand(INDEX_NAME, 0, nodeA, nodeC))
        .execute()
        .actionGet()
        .getState();

    response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
    recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);

    nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeARecoveryStates.size(), equalTo(1));
    nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBRecoveryStates.size(), equalTo(1));
    List<RecoveryState> nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
    assertThat(nodeCRecoveryStates.size(), equalTo(1));

    assertRecoveryState(
        nodeARecoveryStates.get(0),
        0,
        PeerRecoverySource.INSTANCE,
        false,
        Stage.DONE,
        nodeB,
        nodeA);
    validateIndexRecoveryState(nodeARecoveryStates.get(0).getIndex());

    assertRecoveryState(
        nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());

    // relocations of replicas are marked as REPLICA and the source node is the node holding the
    // primary (B)
    assertOnGoingRecoveryState(
        nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, nodeB, nodeC);
    validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());

    if (randomBoolean()) {
      // shutdown node with relocation source of replica shard and check if recovery continues
      internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeA));
      ensureStableCluster(2);

      response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
      recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);

      nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
      assertThat(nodeARecoveryStates.size(), equalTo(0));
      nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
      assertThat(nodeBRecoveryStates.size(), equalTo(1));
      nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
      assertThat(nodeCRecoveryStates.size(), equalTo(1));

      assertRecoveryState(
          nodeBRecoveryStates.get(0),
          0,
          PeerRecoverySource.INSTANCE,
          true,
          Stage.DONE,
          nodeA,
          nodeB);
      validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());

      assertOnGoingRecoveryState(
          nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, nodeB, nodeC);
      validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
    }

    logger.info("--> speeding up recoveries");
    restoreRecoverySpeed();
    ensureGreen();

    response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
    recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);

    nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeARecoveryStates.size(), equalTo(0));
    nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBRecoveryStates.size(), equalTo(1));
    nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
    assertThat(nodeCRecoveryStates.size(), equalTo(1));

    assertRecoveryState(
        nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());

    // relocations of replicas are marked as REPLICA and the source node is the node holding the
    // primary (B)
    assertRecoveryState(
        nodeCRecoveryStates.get(0),
        0,
        PeerRecoverySource.INSTANCE,
        false,
        Stage.DONE,
        nodeB,
        nodeC);
    validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
  }
  /**
   * Tests corruption that happens on the network layer and that the primary does not get affected
   * by corruption that happens on the way to the replica. The file on disk stays uncorrupted
   */
  public void testCorruptionOnNetworkLayer() throws ExecutionException, InterruptedException {
    int numDocs = scaledRandomIntBetween(100, 1000);
    internalCluster().ensureAtLeastNumDataNodes(2);
    if (cluster().numDataNodes() < 3) {
      internalCluster()
          .startNode(
              Settings.builder()
                  .put(Node.NODE_DATA_SETTING.getKey(), true)
                  .put(Node.NODE_MASTER_SETTING.getKey(), false));
    }
    NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get();
    List<NodeStats> dataNodeStats = new ArrayList<>();
    for (NodeStats stat : nodeStats.getNodes()) {
      if (stat.getNode().isDataNode()) {
        dataNodeStats.add(stat);
      }
    }

    assertThat(dataNodeStats.size(), greaterThanOrEqualTo(2));
    Collections.shuffle(dataNodeStats, random());
    NodeStats primariesNode = dataNodeStats.get(0);
    NodeStats unluckyNode = dataNodeStats.get(1);

    assertAcked(
        prepareCreate("test")
            .setSettings(
                Settings.builder()
                    .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0")
                    .put(
                        IndexMetaData.SETTING_NUMBER_OF_SHARDS,
                        between(1, 4)) // don't go crazy here it must recovery fast
                    // This does corrupt files on the replica, so we can't check:
                    .put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), false)
                    .put(
                        "index.routing.allocation.include._name", primariesNode.getNode().getName())
                    .put(
                        EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING.getKey(),
                        EnableAllocationDecider.Rebalance.NONE)));
    ensureGreen();
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < builders.length; i++) {
      builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
    }
    indexRandom(true, builders);
    ensureGreen();
    assertAllSuccessful(
        client()
            .admin()
            .indices()
            .prepareFlush()
            .setForce(true)
            .setWaitIfOngoing(true)
            .execute()
            .actionGet());
    // we have to flush at least once here since we don't corrupt the translog
    SearchResponse countResponse = client().prepareSearch().setSize(0).get();
    assertHitCount(countResponse, numDocs);
    final boolean truncate = randomBoolean();
    for (NodeStats dataNode : dataNodeStats) {
      MockTransportService mockTransportService =
          ((MockTransportService)
              internalCluster().getInstance(TransportService.class, dataNode.getNode().getName()));
      mockTransportService.addDelegate(
          internalCluster().getInstance(TransportService.class, unluckyNode.getNode().getName()),
          new MockTransportService.DelegateTransport(mockTransportService.original()) {

            @Override
            public void sendRequest(
                DiscoveryNode node,
                long requestId,
                String action,
                TransportRequest request,
                TransportRequestOptions options)
                throws IOException, TransportException {
              if (action.equals(RecoveryTargetService.Actions.FILE_CHUNK)) {
                RecoveryFileChunkRequest req = (RecoveryFileChunkRequest) request;
                if (truncate && req.length() > 1) {
                  BytesRef bytesRef = req.content().toBytesRef();
                  BytesArray array =
                      new BytesArray(bytesRef.bytes, bytesRef.offset, (int) req.length() - 1);
                  request =
                      new RecoveryFileChunkRequest(
                          req.recoveryId(),
                          req.shardId(),
                          req.metadata(),
                          req.position(),
                          array,
                          req.lastChunk(),
                          req.totalTranslogOps(),
                          req.sourceThrottleTimeInNanos());
                } else {
                  assert req.content().toBytesRef().bytes == req.content().toBytesRef().bytes
                      : "no internal reference!!";
                  final byte[] array = req.content().toBytesRef().bytes;
                  int i = randomIntBetween(0, req.content().length() - 1);
                  array[i] = (byte) ~array[i]; // flip one byte in the content
                }
              }
              super.sendRequest(node, requestId, action, request, options);
            }
          });
    }

    Settings build =
        Settings.builder()
            .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1")
            .put("index.routing.allocation.include._name", "*")
            .build();
    client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
    client().admin().cluster().prepareReroute().get();
    ClusterHealthResponse actionGet =
        client()
            .admin()
            .cluster()
            .health(Requests.clusterHealthRequest("test").waitForGreenStatus())
            .actionGet();
    if (actionGet.isTimedOut()) {
      logger.info(
          "ensureGreen timed out, cluster state:\n{}\n{}",
          client().admin().cluster().prepareState().get().getState().prettyPrint(),
          client().admin().cluster().preparePendingClusterTasks().get().prettyPrint());
      assertThat("timed out waiting for green state", actionGet.isTimedOut(), equalTo(false));
    }
    // we are green so primaries got not corrupted.
    // ensure that no shard is actually allocated on the unlucky node
    ClusterStateResponse clusterStateResponse = client().admin().cluster().prepareState().get();
    for (IndexShardRoutingTable table :
        clusterStateResponse.getState().getRoutingTable().index("test")) {
      for (ShardRouting routing : table) {
        if (unluckyNode.getNode().getId().equals(routing.currentNodeId())) {
          assertThat(routing.state(), not(equalTo(ShardRoutingState.STARTED)));
          assertThat(routing.state(), not(equalTo(ShardRoutingState.RELOCATING)));
        }
      }
    }
    final int numIterations = scaledRandomIntBetween(5, 20);
    for (int i = 0; i < numIterations; i++) {
      SearchResponse response = client().prepareSearch().setSize(numDocs).get();
      assertHitCount(response, numDocs);
    }
  }
  /**
   * This test triggers a corrupt index exception during finalization size if an empty commit point
   * is transferred during recovery we don't know the version of the segments_N file because it has
   * no segments we can take it from. This simulates recoveries from old indices or even without
   * checksums and makes sure if we fail during finalization we also check if the primary is ok.
   * Without the relevant checks this test fails with a RED cluster
   */
  public void testCorruptionOnNetworkLayerFinalizingRecovery()
      throws ExecutionException, InterruptedException, IOException {
    internalCluster().ensureAtLeastNumDataNodes(2);
    NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get();
    List<NodeStats> dataNodeStats = new ArrayList<>();
    for (NodeStats stat : nodeStats.getNodes()) {
      if (stat.getNode().isDataNode()) {
        dataNodeStats.add(stat);
      }
    }

    assertThat(dataNodeStats.size(), greaterThanOrEqualTo(2));
    Collections.shuffle(dataNodeStats, random());
    NodeStats primariesNode = dataNodeStats.get(0);
    NodeStats unluckyNode = dataNodeStats.get(1);
    assertAcked(
        prepareCreate("test")
            .setSettings(
                Settings.builder()
                    .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0")
                    .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
                    .put(
                        "index.routing.allocation.include._name", primariesNode.getNode().getName())
                    .put(
                        EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING.getKey(),
                        EnableAllocationDecider.Rebalance.NONE)
                    .put("index.allocation.max_retries", Integer.MAX_VALUE) // keep on retrying
                ));
    ensureGreen(); // allocated with empty commit
    final AtomicBoolean corrupt = new AtomicBoolean(true);
    final CountDownLatch hasCorrupted = new CountDownLatch(1);
    for (NodeStats dataNode : dataNodeStats) {
      MockTransportService mockTransportService =
          ((MockTransportService)
              internalCluster().getInstance(TransportService.class, dataNode.getNode().getName()));
      mockTransportService.addDelegate(
          internalCluster().getInstance(TransportService.class, unluckyNode.getNode().getName()),
          new MockTransportService.DelegateTransport(mockTransportService.original()) {

            @Override
            public void sendRequest(
                DiscoveryNode node,
                long requestId,
                String action,
                TransportRequest request,
                TransportRequestOptions options)
                throws IOException, TransportException {
              if (corrupt.get() && action.equals(RecoveryTargetService.Actions.FILE_CHUNK)) {
                RecoveryFileChunkRequest req = (RecoveryFileChunkRequest) request;
                byte[] array = BytesRef.deepCopyOf(req.content().toBytesRef()).bytes;
                int i = randomIntBetween(0, req.content().length() - 1);
                array[i] = (byte) ~array[i]; // flip one byte in the content
                hasCorrupted.countDown();
              }
              super.sendRequest(node, requestId, action, request, options);
            }
          });
    }

    Settings build =
        Settings.builder()
            .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1")
            .put(
                "index.routing.allocation.include._name",
                primariesNode.getNode().getName() + "," + unluckyNode.getNode().getName())
            .build();
    client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
    client().admin().cluster().prepareReroute().get();
    hasCorrupted.await();
    corrupt.set(false);
    ensureGreen();
  }