@Override public void messageReceived(RecoveryCleanFilesRequest request, TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final RecoveryStatus recoveryStatus = statusRef.status(); recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps()); // first, we go and move files that were created with the recovery id suffix to // the actual names, its ok if we have a corrupted index here, since we have replicas // to recover from in case of a full cluster shutdown just when this code executes... recoveryStatus .indexShard() .deleteShardState(); // we have to delete it first since even if we fail to rename the // shard might be invalid recoveryStatus.renameAllTempFiles(); final Store store = recoveryStatus.store(); // now write checksums recoveryStatus.legacyChecksums().write(store); Store.MetadataSnapshot sourceMetaData = request.sourceMetaSnapshot(); try { store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetaData); } catch (CorruptIndexException | IndexFormatTooNewException | IndexFormatTooOldException ex) { // this is a fatal exception at this stage. // this means we transferred files from the remote that have not be checksummed and they // are // broken. We have to clean up this shard entirely, remove all files and bubble it up to // the // source shard since this index might be broken there as well? The Source can handle this // and checks // its content on disk if possible. try { try { store.removeCorruptionMarker(); } finally { Lucene.cleanLuceneIndex(store.directory()); // clean up and delete all files } } catch (Throwable e) { logger.debug("Failed to clean lucene index", e); ex.addSuppressed(e); } RecoveryFailedException rfe = new RecoveryFailedException( recoveryStatus.state(), "failed to clean after recovery", ex); recoveryStatus.fail(rfe, true); throw rfe; } catch (Exception ex) { RecoveryFailedException rfe = new RecoveryFailedException( recoveryStatus.state(), "failed to clean after recovery", ex); recoveryStatus.fail(rfe, true); throw rfe; } channel.sendResponse(TransportResponse.Empty.INSTANCE); } }
@Test public void testCleanupFromSnapshot() throws IOException { final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random()); Store store = new Store( shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); // this time random codec.... IndexWriterConfig indexWriterConfig = newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(actualDefaultCodec()); // we keep all commits and that allows us clean based on multiple snapshots indexWriterConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); IndexWriter writer = new IndexWriter(store.directory(), indexWriterConfig); int docs = 1 + random().nextInt(100); int numCommits = 0; for (int i = 0; i < docs; i++) { if (i > 0 && randomIntBetween(0, 10) == 0) { writer.commit(); numCommits++; } Document doc = new Document(); doc.add( new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new SortedDocValuesField( "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); writer.addDocument(doc); } if (numCommits < 1) { writer.commit(); Document doc = new Document(); doc.add( new TextField( "id", "" + docs++, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new SortedDocValuesField( "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random())))); writer.addDocument(doc); } Store.MetadataSnapshot firstMeta = store.getMetadata(); if (random().nextBoolean()) { for (int i = 0; i < docs; i++) { if (random().nextBoolean()) { Document doc = new Document(); doc.add( new TextField( "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); doc.add( new TextField( "body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.updateDocument(new Term("id", "" + i), doc); } } } writer.commit(); writer.close(); Store.MetadataSnapshot secondMeta = store.getMetadata(); Store.LegacyChecksums checksums = new Store.LegacyChecksums(); Map<String, StoreFileMetaData> legacyMeta = new HashMap<>(); for (String file : store.directory().listAll()) { if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) { continue; } BytesRef hash = new BytesRef(); if (file.startsWith("segments")) { hash = Store.MetadataSnapshot.hashFile(store.directory(), file); } StoreFileMetaData storeFileMetaData = new StoreFileMetaData( file, store.directory().fileLength(file), file + "checksum", null, hash); legacyMeta.put(file, storeFileMetaData); checksums.add(storeFileMetaData); } checksums.write( store); // write one checksum file here - we expect it to survive all the cleanups if (randomBoolean()) { store.cleanupAndVerify("test", firstMeta); String[] strings = store.directory().listAll(); int numChecksums = 0; int numNotFound = 0; for (String file : strings) { assertTrue(firstMeta.contains(file) || Store.isChecksum(file)); if (Store.isChecksum(file)) { numChecksums++; } else if (secondMeta.contains(file) == false) { numNotFound++; } } assertTrue( "at least one file must not be in here since we have two commits?", numNotFound > 0); assertEquals( "we wrote one checksum but it's gone now? - checksums are supposed to be kept", numChecksums, 1); } else { store.cleanupAndVerify("test", secondMeta); String[] strings = store.directory().listAll(); int numChecksums = 0; int numNotFound = 0; for (String file : strings) { assertTrue(secondMeta.contains(file) || Store.isChecksum(file)); if (Store.isChecksum(file)) { numChecksums++; } else if (firstMeta.contains(file) == false) { numNotFound++; } } assertTrue( "at least one file must not be in here since we have two commits?", numNotFound > 0); assertEquals( "we wrote one checksum but it's gone now? - checksums are supposed to be kept", numChecksums, 1); } store.deleteContent(); IOUtils.close(store); }