synchronized void cancel(IndexShard shard, String reason) {
   final ShardRecoveryContext shardRecoveryContext = ongoingRecoveries.get(shard);
   if (shardRecoveryContext != null) {
     final List<Exception> failures = new ArrayList<>();
     for (RecoverySourceHandler handlers : shardRecoveryContext.recoveryHandlers) {
       try {
         handlers.cancel(reason);
       } catch (Exception ex) {
         failures.add(ex);
       } finally {
         shard.recoveryStats().decCurrentAsSource();
       }
     }
     ExceptionsHelper.maybeThrowRuntimeAndSuppress(failures);
   }
 }
 public void recoverReplica(
     IndexShard replica,
     BiFunction<IndexShard, DiscoveryNode, RecoveryTarget> targetSupplier,
     boolean markAsRecovering)
     throws IOException {
   final DiscoveryNode pNode = getPrimaryNode();
   final DiscoveryNode rNode = getDiscoveryNode(replica.routingEntry().currentNodeId());
   if (markAsRecovering) {
     replica.markAsRecovering(
         "remote",
         new RecoveryState(replica.shardId(), false, RecoveryState.Type.REPLICA, pNode, rNode));
   } else {
     assertEquals(replica.state(), IndexShardState.RECOVERING);
   }
   replica.prepareForIndexRecovery();
   RecoveryTarget recoveryTarget = targetSupplier.apply(replica, pNode);
   StartRecoveryRequest request =
       new StartRecoveryRequest(
           replica.shardId(),
           pNode,
           rNode,
           getMetadataSnapshotOrEmpty(replica),
           RecoveryState.Type.REPLICA,
           0);
   RecoverySourceHandler recovery =
       new RecoverySourceHandler(
           primary,
           recoveryTarget,
           request,
           () -> 0L,
           e -> () -> {},
           (int) ByteSizeUnit.MB.toKB(1),
           logger);
   recovery.recoverToTarget();
   recoveryTarget.markAsDone();
   replica.updateRoutingEntry(ShardRoutingHelper.moveToStarted(replica.routingEntry()));
 }
  private RecoveryResponse recover(final StartRecoveryRequest request) throws IOException {
    final IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
    final IndexShard shard = indexService.getShard(request.shardId().id());

    // starting recovery from that our (the source) shard state is marking the shard to be in
    // recovery mode as well, otherwise
    // the index operations will not be routed to it properly
    RoutingNode node = clusterService.state().getRoutingNodes().node(request.targetNode().getId());
    if (node == null) {
      logger.debug(
          "delaying recovery of {} as source node {} is unknown",
          request.shardId(),
          request.targetNode());
      throw new DelayRecoveryException(
          "source node does not have the node [" + request.targetNode() + "] in its state yet..");
    }

    ShardRouting routingEntry = shard.routingEntry();
    if (request.isPrimaryRelocation()
        && (routingEntry.relocating() == false
            || routingEntry.relocatingNodeId().equals(request.targetNode().getId()) == false)) {
      logger.debug(
          "delaying recovery of {} as source shard is not marked yet as relocating to {}",
          request.shardId(),
          request.targetNode());
      throw new DelayRecoveryException(
          "source shard is not marked yet as relocating to [" + request.targetNode() + "]");
    }

    ShardRouting targetShardRouting = node.getByShardId(request.shardId());
    if (targetShardRouting == null) {
      logger.debug(
          "delaying recovery of {} as it is not listed as assigned to target node {}",
          request.shardId(),
          request.targetNode());
      throw new DelayRecoveryException(
          "source node does not have the shard listed in its state as allocated on the node");
    }
    if (!targetShardRouting.initializing()) {
      logger.debug(
          "delaying recovery of {} as it is not listed as initializing on the target node {}. known shards state is [{}]",
          request.shardId(),
          request.targetNode(),
          targetShardRouting.state());
      throw new DelayRecoveryException(
          "source node has the state of the target shard to be ["
              + targetShardRouting.state()
              + "], expecting to be [initializing]");
    }

    RecoverySourceHandler handler =
        ongoingRecoveries.addNewRecovery(request, targetShardRouting.allocationId().getId(), shard);
    logger.trace(
        "[{}][{}] starting recovery to {}",
        request.shardId().getIndex().getName(),
        request.shardId().id(),
        request.targetNode());
    try {
      return handler.recoverToTarget();
    } finally {
      ongoingRecoveries.remove(shard, handler);
    }
  }
Пример #4
0
  private RecoveryResponse recover(final StartRecoveryRequest request) {
    final IndexService indexService =
        indicesService.indexServiceSafe(request.shardId().index().name());
    final IndexShard shard = indexService.getShard(request.shardId().id());

    // starting recovery from that our (the source) shard state is marking the shard to be in
    // recovery mode as well, otherwise
    // the index operations will not be routed to it properly
    RoutingNode node = clusterService.state().getRoutingNodes().node(request.targetNode().id());
    if (node == null) {
      logger.debug(
          "delaying recovery of {} as source node {} is unknown",
          request.shardId(),
          request.targetNode());
      throw new DelayRecoveryException(
          "source node does not have the node [" + request.targetNode() + "] in its state yet..");
    }
    ShardRouting targetShardRouting = null;
    for (ShardRouting shardRouting : node) {
      if (shardRouting.shardId().equals(request.shardId())) {
        targetShardRouting = shardRouting;
        break;
      }
    }
    if (targetShardRouting == null) {
      logger.debug(
          "delaying recovery of {} as it is not listed as assigned to target node {}",
          request.shardId(),
          request.targetNode());
      throw new DelayRecoveryException(
          "source node does not have the shard listed in its state as allocated on the node");
    }
    if (!targetShardRouting.initializing()) {
      logger.debug(
          "delaying recovery of {} as it is not listed as initializing on the target node {}. known shards state is [{}]",
          request.shardId(),
          request.targetNode(),
          targetShardRouting.state());
      throw new DelayRecoveryException(
          "source node has the state of the target shard to be ["
              + targetShardRouting.state()
              + "], expecting to be [initializing]");
    }

    logger.trace(
        "[{}][{}] starting recovery to {}, mark_as_relocated {}",
        request.shardId().index().name(),
        request.shardId().id(),
        request.targetNode(),
        request.markAsRelocated());
    final RecoverySourceHandler handler;
    if (IndexMetaData.isOnSharedFilesystem(shard.indexSettings())) {
      handler =
          new SharedFSRecoverySourceHandler(
              shard, request, recoverySettings, transportService, logger);
    } else {
      handler =
          new RecoverySourceHandler(shard, request, recoverySettings, transportService, logger);
    }
    ongoingRecoveries.add(shard, handler);
    try {
      return handler.recoverToTarget();
    } finally {
      ongoingRecoveries.remove(shard, handler);
    }
  }
 public void testSendFiles() throws Throwable {
   Settings settings =
       Settings.builder()
           .put("indices.recovery.concurrent_streams", 1)
           .put("indices.recovery.concurrent_small_file_streams", 1)
           .build();
   final RecoverySettings recoverySettings = new RecoverySettings(settings, service);
   StartRecoveryRequest request =
       new StartRecoveryRequest(
           shardId,
           new DiscoveryNode("b", DummyTransportAddress.INSTANCE, Version.CURRENT),
           new DiscoveryNode("b", DummyTransportAddress.INSTANCE, Version.CURRENT),
           null,
           RecoveryState.Type.STORE,
           randomLong());
   Store store = newStore(createTempDir());
   RecoverySourceHandler handler =
       new RecoverySourceHandler(null, request, recoverySettings, null, logger);
   Directory dir = store.directory();
   RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
   int numDocs = randomIntBetween(10, 100);
   for (int i = 0; i < numDocs; i++) {
     Document document = new Document();
     document.add(new StringField("id", Integer.toString(i), Field.Store.YES));
     document.add(
         newField("field", randomUnicodeOfCodepointLengthBetween(1, 10), TextField.TYPE_STORED));
     writer.addDocument(document);
   }
   writer.commit();
   Store.MetadataSnapshot metadata = store.getMetadata();
   List<StoreFileMetaData> metas = new ArrayList<>();
   for (StoreFileMetaData md : metadata) {
     metas.add(md);
   }
   Store targetStore = newStore(createTempDir());
   handler.sendFiles(
       store,
       metas.toArray(new StoreFileMetaData[0]),
       (md) -> {
         try {
           return new IndexOutputOutputStream(
               targetStore.createVerifyingOutput(md.name(), md, IOContext.DEFAULT)) {
             @Override
             public void close() throws IOException {
               super.close();
               store
                   .directory()
                   .sync(Collections.singleton(md.name())); // sync otherwise MDW will mess with it
             }
           };
         } catch (IOException e) {
           throw new RuntimeException(e);
         }
       });
   Store.MetadataSnapshot targetStoreMetadata = targetStore.getMetadata();
   Store.RecoveryDiff recoveryDiff = targetStoreMetadata.recoveryDiff(metadata);
   assertEquals(metas.size(), recoveryDiff.identical.size());
   assertEquals(0, recoveryDiff.different.size());
   assertEquals(0, recoveryDiff.missing.size());
   IndexReader reader = DirectoryReader.open(targetStore.directory());
   assertEquals(numDocs, reader.maxDoc());
   IOUtils.close(reader, writer, store, targetStore);
 }
  public void testHandleExceptinoOnSendSendFiles() throws Throwable {
    Settings settings =
        Settings.builder()
            .put("indices.recovery.concurrent_streams", 1)
            .put("indices.recovery.concurrent_small_file_streams", 1)
            .build();
    final RecoverySettings recoverySettings = new RecoverySettings(settings, service);
    StartRecoveryRequest request =
        new StartRecoveryRequest(
            shardId,
            new DiscoveryNode("b", DummyTransportAddress.INSTANCE, Version.CURRENT),
            new DiscoveryNode("b", DummyTransportAddress.INSTANCE, Version.CURRENT),
            null,
            RecoveryState.Type.STORE,
            randomLong());
    Path tempDir = createTempDir();
    Store store = newStore(tempDir, false);
    AtomicBoolean failedEngine = new AtomicBoolean(false);
    RecoverySourceHandler handler =
        new RecoverySourceHandler(null, request, recoverySettings, null, logger) {
          @Override
          protected void failEngine(IOException cause) {
            assertFalse(failedEngine.get());
            failedEngine.set(true);
          }
        };
    Directory dir = store.directory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
    int numDocs = randomIntBetween(10, 100);
    for (int i = 0; i < numDocs; i++) {
      Document document = new Document();
      document.add(new StringField("id", Integer.toString(i), Field.Store.YES));
      document.add(
          newField("field", randomUnicodeOfCodepointLengthBetween(1, 10), TextField.TYPE_STORED));
      writer.addDocument(document);
    }
    writer.commit();
    writer.close();

    Store.MetadataSnapshot metadata = store.getMetadata();
    List<StoreFileMetaData> metas = new ArrayList<>();
    for (StoreFileMetaData md : metadata) {
      metas.add(md);
    }
    final boolean throwCorruptedIndexException = randomBoolean();
    Store targetStore = newStore(createTempDir(), false);
    try {
      handler.sendFiles(
          store,
          metas.toArray(new StoreFileMetaData[0]),
          (md) -> {
            if (throwCorruptedIndexException) {
              throw new RuntimeException(new CorruptIndexException("foo", "bar"));
            } else {
              throw new RuntimeException("boom");
            }
          });
      fail("exception index");
    } catch (RuntimeException ex) {
      assertNull(ExceptionsHelper.unwrapCorruption(ex));
      if (throwCorruptedIndexException) {
        assertEquals(
            ex.getMessage(), "[File corruption occurred on recovery but checksums are ok]");
      } else {
        assertEquals(ex.getMessage(), "boom");
      }
    } catch (CorruptIndexException ex) {
      fail("not expected here");
    }
    assertFalse(failedEngine.get());
    IOUtils.close(store, targetStore);
  }
  public void testHandleCorruptedIndexOnSendSendFiles() throws Throwable {
    Settings settings =
        Settings.builder()
            .put("indices.recovery.concurrent_streams", 1)
            .put("indices.recovery.concurrent_small_file_streams", 1)
            .build();
    final RecoverySettings recoverySettings = new RecoverySettings(settings, service);
    StartRecoveryRequest request =
        new StartRecoveryRequest(
            shardId,
            new DiscoveryNode("b", DummyTransportAddress.INSTANCE, Version.CURRENT),
            new DiscoveryNode("b", DummyTransportAddress.INSTANCE, Version.CURRENT),
            null,
            RecoveryState.Type.STORE,
            randomLong());
    Path tempDir = createTempDir();
    Store store = newStore(tempDir, false);
    AtomicBoolean failedEngine = new AtomicBoolean(false);
    RecoverySourceHandler handler =
        new RecoverySourceHandler(null, request, recoverySettings, null, logger) {
          @Override
          protected void failEngine(IOException cause) {
            assertFalse(failedEngine.get());
            failedEngine.set(true);
          }
        };
    Directory dir = store.directory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
    int numDocs = randomIntBetween(10, 100);
    for (int i = 0; i < numDocs; i++) {
      Document document = new Document();
      document.add(new StringField("id", Integer.toString(i), Field.Store.YES));
      document.add(
          newField("field", randomUnicodeOfCodepointLengthBetween(1, 10), TextField.TYPE_STORED));
      writer.addDocument(document);
    }
    writer.commit();
    writer.close();

    Store.MetadataSnapshot metadata = store.getMetadata();
    List<StoreFileMetaData> metas = new ArrayList<>();
    for (StoreFileMetaData md : metadata) {
      metas.add(md);
    }

    CorruptionUtils.corruptFile(
        getRandom(),
        FileSystemUtils.files(
            tempDir,
            (p) ->
                (p.getFileName().toString().equals("write.lock")
                        || p.getFileName().toString().startsWith("extra"))
                    == false));
    Store targetStore = newStore(createTempDir(), false);
    try {
      handler.sendFiles(
          store,
          metas.toArray(new StoreFileMetaData[0]),
          (md) -> {
            try {
              return new IndexOutputOutputStream(
                  targetStore.createVerifyingOutput(md.name(), md, IOContext.DEFAULT)) {
                @Override
                public void close() throws IOException {
                  super.close();
                  store
                      .directory()
                      .sync(
                          Collections.singleton(md.name())); // sync otherwise MDW will mess with it
                }
              };
            } catch (IOException e) {
              throw new RuntimeException(e);
            }
          });
      fail("corrupted index");
    } catch (IOException ex) {
      assertNotNull(ExceptionsHelper.unwrapCorruption(ex));
    }
    assertTrue(failedEngine.get());
    IOUtils.close(store, targetStore);
  }