Пример #1
0
  public void testGatewayRecovery() throws Exception {
    logger.info("--> start nodes");
    String node = internalCluster().startNode();

    createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT);

    logger.info("--> restarting cluster");
    internalCluster().fullRestart();
    ensureGreen();

    logger.info("--> request recoveries");
    RecoveryResponse response =
        client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
    assertThat(response.shardRecoveryStates().size(), equalTo(SHARD_COUNT));
    assertThat(response.shardRecoveryStates().get(INDEX_NAME).size(), equalTo(1));

    List<RecoveryState> recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    assertThat(recoveryStates.size(), equalTo(1));

    RecoveryState recoveryState = recoveryStates.get(0);

    assertRecoveryState(
        recoveryState,
        0,
        StoreRecoverySource.EXISTING_STORE_INSTANCE,
        true,
        Stage.DONE,
        null,
        node);

    validateIndexRecoveryState(recoveryState.getIndex());
  }
Пример #2
0
 void assertIndexSanity(String indexName, Version indexCreated) {
   GetIndexResponse getIndexResponse =
       client().admin().indices().prepareGetIndex().addIndices(indexName).get();
   assertEquals(1, getIndexResponse.indices().length);
   assertEquals(indexName, getIndexResponse.indices()[0]);
   Version actualVersionCreated =
       Version.indexCreated(getIndexResponse.getSettings().get(indexName));
   assertEquals(indexCreated, actualVersionCreated);
   ensureYellow(indexName);
   RecoveryResponse recoveryResponse =
       client()
           .admin()
           .indices()
           .prepareRecoveries(indexName)
           .setDetailed(true)
           .setActiveOnly(false)
           .get();
   boolean foundTranslog = false;
   for (List<RecoveryState> states : recoveryResponse.shardRecoveryStates().values()) {
     for (RecoveryState state : states) {
       if (state.getStage() == RecoveryState.Stage.DONE
           && state.getPrimary()
           && state.getRecoverySource().getType() == RecoverySource.Type.EXISTING_STORE) {
         assertFalse("more than one primary recoverd?", foundTranslog);
         assertNotEquals(0, state.getTranslog().recoveredOperations());
         foundTranslog = true;
       }
     }
   }
   assertTrue("expected translog but nothing was recovered", foundTranslog);
   IndicesSegmentResponse segmentsResponse =
       client().admin().indices().prepareSegments(indexName).get();
   IndexSegments segments = segmentsResponse.getIndices().get(indexName);
   int numCurrent = 0;
   int numBWC = 0;
   for (IndexShardSegments indexShardSegments : segments) {
     for (ShardSegments shardSegments : indexShardSegments) {
       for (Segment segment : shardSegments) {
         if (indexCreated.luceneVersion.equals(segment.version)) {
           numBWC++;
           if (Version.CURRENT.luceneVersion.equals(segment.version)) {
             numCurrent++;
           }
         } else if (Version.CURRENT.luceneVersion.equals(segment.version)) {
           numCurrent++;
         } else {
           fail("unexpected version " + segment.version);
         }
       }
     }
   }
   assertNotEquals("expected at least 1 current segment after translog recovery", 0, numCurrent);
   assertNotEquals("expected at least 1 old segment", 0, numBWC);
   SearchResponse test = client().prepareSearch(indexName).get();
   assertThat(test.getHits().getTotalHits(), greaterThanOrEqualTo(1L));
 }
Пример #3
0
  public void testReplicaRecovery() throws Exception {
    logger.info("--> start node A");
    String nodeA = internalCluster().startNode();

    logger.info("--> create index on node: {}", nodeA);
    createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT);

    logger.info("--> start node B");
    String nodeB = internalCluster().startNode();
    ensureGreen();

    // force a shard recovery from nodeA to nodeB
    logger.info("--> bump replica count");
    client()
        .admin()
        .indices()
        .prepareUpdateSettings(INDEX_NAME)
        .setSettings(Settings.builder().put("number_of_replicas", 1))
        .execute()
        .actionGet();
    ensureGreen();

    logger.info("--> request recoveries");
    RecoveryResponse response =
        client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();

    // we should now have two total shards, one primary and one replica
    List<RecoveryState> recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    assertThat(recoveryStates.size(), equalTo(2));

    List<RecoveryState> nodeAResponses = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeAResponses.size(), equalTo(1));
    List<RecoveryState> nodeBResponses = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBResponses.size(), equalTo(1));

    // validate node A recovery
    RecoveryState nodeARecoveryState = nodeAResponses.get(0);
    assertRecoveryState(
        nodeARecoveryState,
        0,
        StoreRecoverySource.EMPTY_STORE_INSTANCE,
        true,
        Stage.DONE,
        null,
        nodeA);
    validateIndexRecoveryState(nodeARecoveryState.getIndex());

    // validate node B recovery
    RecoveryState nodeBRecoveryState = nodeBResponses.get(0);
    assertRecoveryState(
        nodeBRecoveryState, 0, PeerRecoverySource.INSTANCE, false, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryState.getIndex());
  }
Пример #4
0
  public void testGatewayRecoveryTestActiveOnly() throws Exception {
    logger.info("--> start nodes");
    internalCluster().startNode();

    createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT);

    logger.info("--> restarting cluster");
    internalCluster().fullRestart();
    ensureGreen();

    logger.info("--> request recoveries");
    RecoveryResponse response =
        client()
            .admin()
            .indices()
            .prepareRecoveries(INDEX_NAME)
            .setActiveOnly(true)
            .execute()
            .actionGet();

    List<RecoveryState> recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    assertThat(recoveryStates.size(), equalTo(0)); // Should not expect any responses back
  }
Пример #5
0
  public void testSnapshotRecovery() throws Exception {
    logger.info("--> start node A");
    String nodeA = internalCluster().startNode();

    logger.info("--> create repository");
    assertAcked(
        client()
            .admin()
            .cluster()
            .preparePutRepository(REPO_NAME)
            .setType("fs")
            .setSettings(
                Settings.builder().put("location", randomRepoPath()).put("compress", false))
            .get());

    ensureGreen();

    logger.info("--> create index on node: {}", nodeA);
    createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT);

    logger.info("--> snapshot");
    CreateSnapshotResponse createSnapshotResponse =
        client()
            .admin()
            .cluster()
            .prepareCreateSnapshot(REPO_NAME, SNAP_NAME)
            .setWaitForCompletion(true)
            .setIndices(INDEX_NAME)
            .get();
    assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
    assertThat(
        createSnapshotResponse.getSnapshotInfo().successfulShards(),
        equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()));

    assertThat(
        client()
            .admin()
            .cluster()
            .prepareGetSnapshots(REPO_NAME)
            .setSnapshots(SNAP_NAME)
            .get()
            .getSnapshots()
            .get(0)
            .state(),
        equalTo(SnapshotState.SUCCESS));

    client().admin().indices().prepareClose(INDEX_NAME).execute().actionGet();

    logger.info("--> restore");
    RestoreSnapshotResponse restoreSnapshotResponse =
        client()
            .admin()
            .cluster()
            .prepareRestoreSnapshot(REPO_NAME, SNAP_NAME)
            .setWaitForCompletion(true)
            .execute()
            .actionGet();
    int totalShards = restoreSnapshotResponse.getRestoreInfo().totalShards();
    assertThat(totalShards, greaterThan(0));

    ensureGreen();

    logger.info("--> request recoveries");
    RecoveryResponse response =
        client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();

    for (Map.Entry<String, List<RecoveryState>> indexRecoveryStates :
        response.shardRecoveryStates().entrySet()) {

      assertThat(indexRecoveryStates.getKey(), equalTo(INDEX_NAME));
      List<RecoveryState> recoveryStates = indexRecoveryStates.getValue();
      assertThat(recoveryStates.size(), equalTo(totalShards));

      for (RecoveryState recoveryState : recoveryStates) {
        SnapshotRecoverySource recoverySource =
            new SnapshotRecoverySource(
                new Snapshot(REPO_NAME, createSnapshotResponse.getSnapshotInfo().snapshotId()),
                Version.CURRENT,
                INDEX_NAME);
        assertRecoveryState(recoveryState, 0, recoverySource, true, Stage.DONE, null, nodeA);
        validateIndexRecoveryState(recoveryState.getIndex());
      }
    }
  }
Пример #6
0
  public void testRerouteRecovery() throws Exception {
    logger.info("--> start node A");
    final String nodeA = internalCluster().startNode();

    logger.info("--> create index on node: {}", nodeA);
    ByteSizeValue shardSize =
        createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT)
            .getShards()[0]
            .getStats()
            .getStore()
            .size();

    logger.info("--> start node B");
    final String nodeB = internalCluster().startNode();

    ensureGreen();

    logger.info("--> slowing down recoveries");
    slowDownRecovery(shardSize);

    logger.info("--> move shard from: {} to: {}", nodeA, nodeB);
    client()
        .admin()
        .cluster()
        .prepareReroute()
        .add(new MoveAllocationCommand(INDEX_NAME, 0, nodeA, nodeB))
        .execute()
        .actionGet()
        .getState();

    logger.info("--> waiting for recovery to start both on source and target");
    final Index index = resolveIndex(INDEX_NAME);
    assertBusy(
        new Runnable() {
          @Override
          public void run() {

            IndicesService indicesService =
                internalCluster().getInstance(IndicesService.class, nodeA);
            assertThat(
                indicesService
                    .indexServiceSafe(index)
                    .getShard(0)
                    .recoveryStats()
                    .currentAsSource(),
                equalTo(1));
            indicesService = internalCluster().getInstance(IndicesService.class, nodeB);
            assertThat(
                indicesService
                    .indexServiceSafe(index)
                    .getShard(0)
                    .recoveryStats()
                    .currentAsTarget(),
                equalTo(1));
          }
        });

    logger.info("--> request recoveries");
    RecoveryResponse response =
        client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();

    List<RecoveryState> recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    List<RecoveryState> nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeARecoveryStates.size(), equalTo(1));
    List<RecoveryState> nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBRecoveryStates.size(), equalTo(1));

    assertRecoveryState(
        nodeARecoveryStates.get(0),
        0,
        StoreRecoverySource.EMPTY_STORE_INSTANCE,
        true,
        Stage.DONE,
        null,
        nodeA);
    validateIndexRecoveryState(nodeARecoveryStates.get(0).getIndex());

    assertOnGoingRecoveryState(
        nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());

    logger.info("--> request node recovery stats");
    NodesStatsResponse statsResponse =
        client()
            .admin()
            .cluster()
            .prepareNodesStats()
            .clear()
            .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery))
            .get();
    long nodeAThrottling = Long.MAX_VALUE;
    long nodeBThrottling = Long.MAX_VALUE;
    for (NodeStats nodeStats : statsResponse.getNodes()) {
      final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
      if (nodeStats.getNode().getName().equals(nodeA)) {
        assertThat(
            "node A should have ongoing recovery as source",
            recoveryStats.currentAsSource(),
            equalTo(1));
        assertThat(
            "node A should not have ongoing recovery as target",
            recoveryStats.currentAsTarget(),
            equalTo(0));
        nodeAThrottling = recoveryStats.throttleTime().millis();
      }
      if (nodeStats.getNode().getName().equals(nodeB)) {
        assertThat(
            "node B should not have ongoing recovery as source",
            recoveryStats.currentAsSource(),
            equalTo(0));
        assertThat(
            "node B should have ongoing recovery as target",
            recoveryStats.currentAsTarget(),
            equalTo(1));
        nodeBThrottling = recoveryStats.throttleTime().millis();
      }
    }

    logger.info("--> checking throttling increases");
    final long finalNodeAThrottling = nodeAThrottling;
    final long finalNodeBThrottling = nodeBThrottling;
    assertBusy(
        new Runnable() {
          @Override
          public void run() {
            NodesStatsResponse statsResponse =
                client()
                    .admin()
                    .cluster()
                    .prepareNodesStats()
                    .clear()
                    .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery))
                    .get();
            assertThat(statsResponse.getNodes(), hasSize(2));
            for (NodeStats nodeStats : statsResponse.getNodes()) {
              final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
              if (nodeStats.getNode().getName().equals(nodeA)) {
                assertThat(
                    "node A throttling should increase",
                    recoveryStats.throttleTime().millis(),
                    greaterThan(finalNodeAThrottling));
              }
              if (nodeStats.getNode().getName().equals(nodeB)) {
                assertThat(
                    "node B throttling should increase",
                    recoveryStats.throttleTime().millis(),
                    greaterThan(finalNodeBThrottling));
              }
            }
          }
        });

    logger.info("--> speeding up recoveries");
    restoreRecoverySpeed();

    // wait for it to be finished
    ensureGreen();

    response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();

    recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    assertThat(recoveryStates.size(), equalTo(1));

    assertRecoveryState(
        recoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(recoveryStates.get(0).getIndex());

    statsResponse =
        client()
            .admin()
            .cluster()
            .prepareNodesStats()
            .clear()
            .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery))
            .get();
    assertThat(statsResponse.getNodes(), hasSize(2));
    for (NodeStats nodeStats : statsResponse.getNodes()) {
      final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
      assertThat(recoveryStats.currentAsSource(), equalTo(0));
      assertThat(recoveryStats.currentAsTarget(), equalTo(0));
      if (nodeStats.getNode().getName().equals(nodeA)) {
        assertThat(
            "node A throttling should be >0",
            recoveryStats.throttleTime().millis(),
            greaterThan(0L));
      }
      if (nodeStats.getNode().getName().equals(nodeB)) {
        assertThat(
            "node B throttling should be >0 ",
            recoveryStats.throttleTime().millis(),
            greaterThan(0L));
      }
    }

    logger.info("--> bump replica count");
    client()
        .admin()
        .indices()
        .prepareUpdateSettings(INDEX_NAME)
        .setSettings(Settings.builder().put("number_of_replicas", 1))
        .execute()
        .actionGet();
    ensureGreen();

    statsResponse =
        client()
            .admin()
            .cluster()
            .prepareNodesStats()
            .clear()
            .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery))
            .get();
    assertThat(statsResponse.getNodes(), hasSize(2));
    for (NodeStats nodeStats : statsResponse.getNodes()) {
      final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
      assertThat(recoveryStats.currentAsSource(), equalTo(0));
      assertThat(recoveryStats.currentAsTarget(), equalTo(0));
      if (nodeStats.getNode().getName().equals(nodeA)) {
        assertThat(
            "node A throttling should be >0",
            recoveryStats.throttleTime().millis(),
            greaterThan(0L));
      }
      if (nodeStats.getNode().getName().equals(nodeB)) {
        assertThat(
            "node B throttling should be >0 ",
            recoveryStats.throttleTime().millis(),
            greaterThan(0L));
      }
    }

    logger.info("--> start node C");
    String nodeC = internalCluster().startNode();
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("3").get().isTimedOut());

    logger.info("--> slowing down recoveries");
    slowDownRecovery(shardSize);

    logger.info("--> move replica shard from: {} to: {}", nodeA, nodeC);
    client()
        .admin()
        .cluster()
        .prepareReroute()
        .add(new MoveAllocationCommand(INDEX_NAME, 0, nodeA, nodeC))
        .execute()
        .actionGet()
        .getState();

    response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
    recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);

    nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeARecoveryStates.size(), equalTo(1));
    nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBRecoveryStates.size(), equalTo(1));
    List<RecoveryState> nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
    assertThat(nodeCRecoveryStates.size(), equalTo(1));

    assertRecoveryState(
        nodeARecoveryStates.get(0),
        0,
        PeerRecoverySource.INSTANCE,
        false,
        Stage.DONE,
        nodeB,
        nodeA);
    validateIndexRecoveryState(nodeARecoveryStates.get(0).getIndex());

    assertRecoveryState(
        nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());

    // relocations of replicas are marked as REPLICA and the source node is the node holding the
    // primary (B)
    assertOnGoingRecoveryState(
        nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, nodeB, nodeC);
    validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());

    if (randomBoolean()) {
      // shutdown node with relocation source of replica shard and check if recovery continues
      internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeA));
      ensureStableCluster(2);

      response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
      recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);

      nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
      assertThat(nodeARecoveryStates.size(), equalTo(0));
      nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
      assertThat(nodeBRecoveryStates.size(), equalTo(1));
      nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
      assertThat(nodeCRecoveryStates.size(), equalTo(1));

      assertRecoveryState(
          nodeBRecoveryStates.get(0),
          0,
          PeerRecoverySource.INSTANCE,
          true,
          Stage.DONE,
          nodeA,
          nodeB);
      validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());

      assertOnGoingRecoveryState(
          nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, nodeB, nodeC);
      validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
    }

    logger.info("--> speeding up recoveries");
    restoreRecoverySpeed();
    ensureGreen();

    response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
    recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);

    nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeARecoveryStates.size(), equalTo(0));
    nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBRecoveryStates.size(), equalTo(1));
    nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
    assertThat(nodeCRecoveryStates.size(), equalTo(1));

    assertRecoveryState(
        nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());

    // relocations of replicas are marked as REPLICA and the source node is the node holding the
    // primary (B)
    assertRecoveryState(
        nodeCRecoveryStates.get(0),
        0,
        PeerRecoverySource.INSTANCE,
        false,
        Stage.DONE,
        nodeB,
        nodeC);
    validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
  }