private void testLoad(boolean fullRecovery) { logger.info("Running with fullRecover [{}]", fullRecovery); startNode("server1"); logger.info("Running Cluster Health (waiting for node to startup properly)"); ClusterHealthResponse clusterHealth = client("server1") .admin() .cluster() .health(clusterHealthRequest().waitForGreenStatus()) .actionGet(); logger.info("Done Cluster Health, status " + clusterHealth.getStatus()); assertThat(clusterHealth.isTimedOut(), equalTo(false)); assertThat(clusterHealth.getStatus(), equalTo(ClusterHealthStatus.GREEN)); // get the environment, so we can clear the work dir when needed Environment environment = ((InternalNode) node("server1")).injector().getInstance(Environment.class); logger.info("--> creating test index ..."); client("server1").admin().indices().prepareCreate("test").execute().actionGet(); logger.info("Running Cluster Health (wait for the shards to startup)"); clusterHealth = client("server1") .admin() .cluster() .health(clusterHealthRequest().waitForYellowStatus().waitForActiveShards(1)) .actionGet(); logger.info("Done Cluster Health, status " + clusterHealth.getStatus()); assertThat(clusterHealth.isTimedOut(), equalTo(false)); assertThat(clusterHealth.getStatus(), equalTo(ClusterHealthStatus.YELLOW)); logger.info("--> refreshing and checking count"); client("server1").admin().indices().prepareRefresh().execute().actionGet(); assertThat( client("server1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().getCount(), equalTo(0l)); logger.info("--> indexing 1234 docs"); for (long i = 0; i < 1234; i++) { client("server1") .prepareIndex("test", "type1", Long.toString(i)) .setCreate( true) // make sure we use create, so if we recover wrongly, we will get increments... .setSource(MapBuilder.<String, Object>newMapBuilder().put("test", "value" + i).map()) .execute() .actionGet(); // snapshot every 100 so we get some actions going on in the gateway if ((i % 11) == 0) { client("server1").admin().indices().prepareGatewaySnapshot().execute().actionGet(); } // flush every once is a while, so we get different data if ((i % 55) == 0) { client("server1").admin().indices().prepareFlush().execute().actionGet(); } } logger.info("--> refreshing and checking count"); client("server1").admin().indices().prepareRefresh().execute().actionGet(); assertThat( client("server1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().getCount(), equalTo(1234l)); logger.info("--> closing the server"); closeNode("server1"); if (fullRecovery) { logger.info("Clearing cluster data dir, so there will be a full recovery from the gateway"); FileSystemUtils.deleteRecursively(environment.dataWithClusterFiles()); logger.info( "Starting the server, should recover from the gateway (both index and translog) without reusing work dir"); } startNode("server1"); logger.info("--> running Cluster Health (wait for the shards to startup)"); clusterHealth = client("server1") .admin() .cluster() .health(clusterHealthRequest().waitForYellowStatus().waitForActiveShards(1)) .actionGet(); logger.info("--> done Cluster Health, status " + clusterHealth.getStatus()); assertThat(clusterHealth.isTimedOut(), equalTo(false)); assertThat(clusterHealth.getStatus(), equalTo(ClusterHealthStatus.YELLOW)); logger.info("--> checking count"); assertThat( client("server1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().getCount(), equalTo(1234l)); logger.info("--> checking reuse / recovery status"); IndicesStatusResponse statusResponse = client("server1").admin().indices().prepareStatus().setRecovery(true).execute().actionGet(); for (IndexShardStatus indexShardStatus : statusResponse.getIndex("test")) { for (ShardStatus shardStatus : indexShardStatus) { if (shardStatus.getShardRouting().primary()) { if (fullRecovery || !isPersistentStorage()) { assertThat( shardStatus.getGatewayRecoveryStatus().getReusedIndexSize().bytes(), equalTo(0l)); } else { assertThat( shardStatus.getGatewayRecoveryStatus().getReusedIndexSize().bytes(), greaterThan( shardStatus.getGatewayRecoveryStatus().getIndexSize().bytes() - 8196 /* segments file and others */)); } } } } }
public void run() throws Exception { Node[] nodes = new Node[numberOfNodes]; for (int i = 0; i < nodes.length; i++) { nodes[i] = NodeBuilder.nodeBuilder().settings(settings).node(); } client = NodeBuilder.nodeBuilder().settings(settings).client(true).node(); client .client() .admin() .indices() .prepareCreate("test") .setSettings( settingsBuilder() .put("index.number_of_shards", numberOfShards) .put("index.number_of_replicas", numberOfReplicas)) .execute() .actionGet(); logger.info("********** [START] INDEXING INITIAL DOCS"); for (long i = 0; i < initialNumberOfDocs; i++) { indexDoc(); } logger.info("********** [DONE ] INDEXING INITIAL DOCS"); Indexer[] indexerThreads = new Indexer[indexers]; for (int i = 0; i < indexerThreads.length; i++) { indexerThreads[i] = new Indexer(); } for (int i = 0; i < indexerThreads.length; i++) { indexerThreads[i].start(); } long testStart = System.currentTimeMillis(); // start doing the rolling restart int nodeIndex = 0; while (true) { File[] nodeData = ((InternalNode) nodes[nodeIndex]) .injector() .getInstance(NodeEnvironment.class) .nodeDataLocations(); nodes[nodeIndex].close(); if (clearNodeData) { FileSystemUtils.deleteRecursively(nodeData); } try { ClusterHealthResponse clusterHealth = client .client() .admin() .cluster() .prepareHealth() .setWaitForGreenStatus() .setWaitForNodes(Integer.toString(numberOfNodes + 0 /* client node*/)) .setWaitForRelocatingShards(0) .setTimeout("10m") .execute() .actionGet(); if (clusterHealth.timedOut()) { logger.warn("timed out waiting for green status...."); } } catch (Exception e) { logger.warn("failed to execute cluster health...."); } nodes[nodeIndex] = NodeBuilder.nodeBuilder().settings(settings).node(); Thread.sleep(1000); try { ClusterHealthResponse clusterHealth = client .client() .admin() .cluster() .prepareHealth() .setWaitForGreenStatus() .setWaitForNodes(Integer.toString(numberOfNodes + 1 /* client node*/)) .setWaitForRelocatingShards(0) .setTimeout("10m") .execute() .actionGet(); if (clusterHealth.timedOut()) { logger.warn("timed out waiting for green status...."); } } catch (Exception e) { logger.warn("failed to execute cluster health...."); } if (++nodeIndex == nodes.length) { nodeIndex = 0; } if ((System.currentTimeMillis() - testStart) > period.millis()) { logger.info("test finished"); break; } } for (int i = 0; i < indexerThreads.length; i++) { indexerThreads[i].close = true; } Thread.sleep(indexerThrottle.millis() + 10000); for (int i = 0; i < indexerThreads.length; i++) { if (!indexerThreads[i].closed) { logger.warn("thread not closed!"); } } client.client().admin().indices().prepareRefresh().execute().actionGet(); // check the status IndicesStatusResponse status = client.client().admin().indices().prepareStatus("test").execute().actionGet(); for (IndexShardStatus shardStatus : status.index("test")) { ShardStatus shard = shardStatus.shards()[0]; logger.info("shard [{}], docs [{}]", shard.shardId(), shard.getDocs().numDocs()); for (ShardStatus shardStatu : shardStatus) { if (shard.docs().numDocs() != shardStatu.docs().numDocs()) { logger.warn( "shard doc number does not match!, got {} and {}", shard.docs().numDocs(), shardStatu.docs().numDocs()); } } } // check the count for (int i = 0; i < (nodes.length * 5); i++) { CountResponse count = client.client().prepareCount().setQuery(matchAllQuery()).execute().actionGet(); logger.info( "indexed [{}], count [{}], [{}]", count.count(), indexCounter.get(), count.count() == indexCounter.get() ? "OK" : "FAIL"); if (count.count() != indexCounter.get()) { logger.warn("count does not match!"); } } // scan all the docs, verify all have the same version based on the number of replicas SearchResponse searchResponse = client .client() .prepareSearch() .setSearchType(SearchType.SCAN) .setQuery(matchAllQuery()) .setSize(50) .setScroll(TimeValue.timeValueMinutes(2)) .execute() .actionGet(); logger.info("Verifying versions for {} hits...", searchResponse.hits().totalHits()); while (true) { searchResponse = client .client() .prepareSearchScroll(searchResponse.scrollId()) .setScroll(TimeValue.timeValueMinutes(2)) .execute() .actionGet(); if (searchResponse.failedShards() > 0) { logger.warn("Search Failures " + Arrays.toString(searchResponse.shardFailures())); } for (SearchHit hit : searchResponse.hits()) { long version = -1; for (int i = 0; i < (numberOfReplicas + 1); i++) { GetResponse getResponse = client.client().prepareGet(hit.index(), hit.type(), hit.id()).execute().actionGet(); if (version == -1) { version = getResponse.version(); } else { if (version != getResponse.version()) { logger.warn( "Doc {} has different version numbers {} and {}", hit.id(), version, getResponse.version()); } } } } if (searchResponse.hits().hits().length == 0) { break; } } logger.info("Done verifying versions"); client.close(); for (Node node : nodes) { node.close(); } }