@Override
 public void messageReceived(RecoveryCleanFilesRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps());
     // first, we go and move files that were created with the recovery id suffix to
     // the actual names, its ok if we have a corrupted index here, since we have replicas
     // to recover from in case of a full cluster shutdown just when this code executes...
     recoveryStatus
         .indexShard()
         .deleteShardState(); // we have to delete it first since even if we fail to rename the
                              // shard might be invalid
     recoveryStatus.renameAllTempFiles();
     final Store store = recoveryStatus.store();
     // now write checksums
     recoveryStatus.legacyChecksums().write(store);
     Store.MetadataSnapshot sourceMetaData = request.sourceMetaSnapshot();
     try {
       store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetaData);
     } catch (CorruptIndexException
         | IndexFormatTooNewException
         | IndexFormatTooOldException ex) {
       // this is a fatal exception at this stage.
       // this means we transferred files from the remote that have not be checksummed and they
       // are
       // broken. We have to clean up this shard entirely, remove all files and bubble it up to
       // the
       // source shard since this index might be broken there as well? The Source can handle this
       // and checks
       // its content on disk if possible.
       try {
         try {
           store.removeCorruptionMarker();
         } finally {
           Lucene.cleanLuceneIndex(store.directory()); // clean up and delete all files
         }
       } catch (Throwable e) {
         logger.debug("Failed to clean lucene index", e);
         ex.addSuppressed(e);
       }
       RecoveryFailedException rfe =
           new RecoveryFailedException(
               recoveryStatus.state(), "failed to clean after recovery", ex);
       recoveryStatus.fail(rfe, true);
       throw rfe;
     } catch (Exception ex) {
       RecoveryFailedException rfe =
           new RecoveryFailedException(
               recoveryStatus.state(), "failed to clean after recovery", ex);
       recoveryStatus.fail(rfe, true);
       throw rfe;
     }
     channel.sendResponse(TransportResponse.Empty.INSTANCE);
   }
 }
Example #2
0
  @Test
  public void testCleanupFromSnapshot() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // this time random codec....
    IndexWriterConfig indexWriterConfig =
        newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(actualDefaultCodec());
    // we keep all commits and that allows us clean based on multiple snapshots
    indexWriterConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    IndexWriter writer = new IndexWriter(store.directory(), indexWriterConfig);
    int docs = 1 + random().nextInt(100);
    int numCommits = 0;
    for (int i = 0; i < docs; i++) {
      if (i > 0 && randomIntBetween(0, 10) == 0) {
        writer.commit();
        numCommits++;
      }
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (numCommits < 1) {
      writer.commit();
      Document doc = new Document();
      doc.add(
          new TextField(
              "id", "" + docs++, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }

    Store.MetadataSnapshot firstMeta = store.getMetadata();

    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    writer.commit();
    writer.close();

    Store.MetadataSnapshot secondMeta = store.getMetadata();

    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    Map<String, StoreFileMetaData> legacyMeta = new HashMap<>();
    for (String file : store.directory().listAll()) {
      if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
        continue;
      }
      BytesRef hash = new BytesRef();
      if (file.startsWith("segments")) {
        hash = Store.MetadataSnapshot.hashFile(store.directory(), file);
      }
      StoreFileMetaData storeFileMetaData =
          new StoreFileMetaData(
              file, store.directory().fileLength(file), file + "checksum", null, hash);
      legacyMeta.put(file, storeFileMetaData);
      checksums.add(storeFileMetaData);
    }
    checksums.write(
        store); // write one checksum file here - we expect it to survive all the cleanups

    if (randomBoolean()) {
      store.cleanupAndVerify("test", firstMeta);
      String[] strings = store.directory().listAll();
      int numChecksums = 0;
      int numNotFound = 0;
      for (String file : strings) {
        assertTrue(firstMeta.contains(file) || Store.isChecksum(file));
        if (Store.isChecksum(file)) {
          numChecksums++;
        } else if (secondMeta.contains(file) == false) {
          numNotFound++;
        }
      }
      assertTrue(
          "at least one file must not be in here since we have two commits?", numNotFound > 0);
      assertEquals(
          "we wrote one checksum but it's gone now? - checksums are supposed to be kept",
          numChecksums,
          1);
    } else {
      store.cleanupAndVerify("test", secondMeta);
      String[] strings = store.directory().listAll();
      int numChecksums = 0;
      int numNotFound = 0;
      for (String file : strings) {
        assertTrue(secondMeta.contains(file) || Store.isChecksum(file));
        if (Store.isChecksum(file)) {
          numChecksums++;
        } else if (firstMeta.contains(file) == false) {
          numNotFound++;
        }
      }
      assertTrue(
          "at least one file must not be in here since we have two commits?", numNotFound > 0);
      assertEquals(
          "we wrote one checksum but it's gone now? - checksums are supposed to be kept",
          numChecksums,
          1);
    }

    store.deleteContent();
    IOUtils.close(store);
  }