private void recoverIndex(CommitPoint commitPoint, ImmutableMap<String, BlobMetaData> blobs) throws Exception { int numberOfFiles = 0; long totalSize = 0; int numberOfReusedFiles = 0; long reusedTotalSize = 0; List<CommitPoint.FileInfo> filesToRecover = Lists.newArrayList(); for (CommitPoint.FileInfo fileInfo : commitPoint.indexFiles()) { String fileName = fileInfo.physicalName(); StoreFileMetaData md = null; try { md = store.metaData(fileName); } catch (Exception e) { // no file } // we don't compute checksum for segments, so always recover them if (!fileName.startsWith("segments") && md != null && fileInfo.isSame(md)) { numberOfFiles++; totalSize += md.length(); numberOfReusedFiles++; reusedTotalSize += md.length(); if (logger.isTraceEnabled()) { logger.trace( "not_recovering [{}], exists in local store and is same", fileInfo.physicalName()); } } else { if (logger.isTraceEnabled()) { if (md == null) { logger.trace( "recovering [{}], does not exists in local store", fileInfo.physicalName()); } else { logger.trace( "recovering [{}], exists in local store but is different", fileInfo.physicalName()); } } numberOfFiles++; totalSize += fileInfo.length(); filesToRecover.add(fileInfo); } } recoveryStatus.index().files(numberOfFiles, totalSize, numberOfReusedFiles, reusedTotalSize); if (filesToRecover.isEmpty()) { logger.trace("no files to recover, all exists within the local store"); } if (logger.isTraceEnabled()) { logger.trace( "recovering_files [{}] with total_size [{}], reusing_files [{}] with reused_size [{}]", numberOfFiles, new ByteSizeValue(totalSize), numberOfReusedFiles, new ByteSizeValue(reusedTotalSize)); } final CountDownLatch latch = new CountDownLatch(filesToRecover.size()); final CopyOnWriteArrayList<Throwable> failures = new CopyOnWriteArrayList<Throwable>(); for (final CommitPoint.FileInfo fileToRecover : filesToRecover) { recoverFile(fileToRecover, blobs, latch, failures); } try { latch.await(); } catch (InterruptedException e) { throw new IndexShardGatewayRecoveryException( shardId, "Interrupted while recovering index", e); } if (!failures.isEmpty()) { throw new IndexShardGatewayRecoveryException( shardId, "Failed to recover index", failures.get(0)); } // read the gateway data persisted long version = -1; try { if (IndexReader.indexExists(store.directory())) { version = IndexReader.getCurrentVersion(store.directory()); } } catch (IOException e) { throw new IndexShardGatewayRecoveryException( shardId(), "Failed to fetch index version after copying it over", e); } recoveryStatus.index().updateVersion(version); /// now, go over and clean files that are in the store, but were not in the gateway try { for (String storeFile : store.directory().listAll()) { if (!commitPoint.containPhysicalIndexFile(storeFile)) { try { store.directory().deleteFile(storeFile); } catch (Exception e) { // ignore } } } } catch (Exception e) { // ignore } }
@Override public void recover(RecoveryStatus recoveryStatus) throws IndexShardGatewayRecoveryException { this.recoveryStatus = recoveryStatus; final ImmutableMap<String, BlobMetaData> blobs; try { blobs = blobContainer.listBlobs(); } catch (IOException e) { throw new IndexShardGatewayRecoveryException(shardId, "Failed to list content of gateway", e); } List<CommitPoint> commitPointsList = Lists.newArrayList(); boolean atLeastOneCommitPointExists = false; for (String name : blobs.keySet()) { if (name.startsWith("commit-")) { atLeastOneCommitPointExists = true; try { commitPointsList.add(CommitPoints.fromXContent(blobContainer.readBlobFully(name))); } catch (Exception e) { logger.warn("failed to read commit point [{}]", e, name); } } } if (atLeastOneCommitPointExists && commitPointsList.isEmpty()) { // no commit point managed to load, bail so we won't corrupt the index, will require manual // intervention throw new IndexShardGatewayRecoveryException( shardId, "Commit points exists but none could be loaded", null); } CommitPoints commitPoints = new CommitPoints(commitPointsList); if (commitPoints.commits().isEmpty()) { // no commit points, clean the store just so we won't recover wrong files try { indexShard.store().deleteContent(); } catch (IOException e) { logger.warn("failed to clean store before starting shard", e); } recoveryStatus.index().startTime(System.currentTimeMillis()); recoveryStatus.index().time(System.currentTimeMillis() - recoveryStatus.index().startTime()); recoveryStatus.translog().startTime(System.currentTimeMillis()); recoveryStatus .translog() .time(System.currentTimeMillis() - recoveryStatus.index().startTime()); return; } for (CommitPoint commitPoint : commitPoints) { if (!commitPointExistsInBlobs(commitPoint, blobs)) { logger.warn( "listed commit_point [{}]/[{}], but not all files exists, ignoring", commitPoint.name(), commitPoint.version()); continue; } try { recoveryStatus.index().startTime(System.currentTimeMillis()); recoveryStatus.updateStage(RecoveryStatus.Stage.INDEX); recoverIndex(commitPoint, blobs); recoveryStatus .index() .time(System.currentTimeMillis() - recoveryStatus.index().startTime()); recoveryStatus.translog().startTime(System.currentTimeMillis()); recoveryStatus.updateStage(RecoveryStatus.Stage.TRANSLOG); recoverTranslog(commitPoint, blobs); recoveryStatus .translog() .time(System.currentTimeMillis() - recoveryStatus.index().startTime()); return; } catch (Exception e) { throw new IndexShardGatewayRecoveryException( shardId, "failed to recover commit_point [" + commitPoint.name() + "]/[" + commitPoint.version() + "]", e); } } throw new IndexShardGatewayRecoveryException( shardId, "No commit point data is available in gateway", null); }