@Override public void recover(RecoveryStatus recoveryStatus) throws IndexShardGatewayRecoveryException { this.recoveryStatus = recoveryStatus; final ImmutableMap<String, BlobMetaData> blobs; try { blobs = blobContainer.listBlobs(); } catch (IOException e) { throw new IndexShardGatewayRecoveryException(shardId, "Failed to list content of gateway", e); } List<CommitPoint> commitPointsList = Lists.newArrayList(); boolean atLeastOneCommitPointExists = false; for (String name : blobs.keySet()) { if (name.startsWith("commit-")) { atLeastOneCommitPointExists = true; try { commitPointsList.add(CommitPoints.fromXContent(blobContainer.readBlobFully(name))); } catch (Exception e) { logger.warn("failed to read commit point [{}]", e, name); } } } if (atLeastOneCommitPointExists && commitPointsList.isEmpty()) { // no commit point managed to load, bail so we won't corrupt the index, will require manual // intervention throw new IndexShardGatewayRecoveryException( shardId, "Commit points exists but none could be loaded", null); } CommitPoints commitPoints = new CommitPoints(commitPointsList); if (commitPoints.commits().isEmpty()) { // no commit points, clean the store just so we won't recover wrong files try { indexShard.store().deleteContent(); } catch (IOException e) { logger.warn("failed to clean store before starting shard", e); } recoveryStatus.index().startTime(System.currentTimeMillis()); recoveryStatus.index().time(System.currentTimeMillis() - recoveryStatus.index().startTime()); recoveryStatus.translog().startTime(System.currentTimeMillis()); recoveryStatus .translog() .time(System.currentTimeMillis() - recoveryStatus.index().startTime()); return; } for (CommitPoint commitPoint : commitPoints) { if (!commitPointExistsInBlobs(commitPoint, blobs)) { logger.warn( "listed commit_point [{}]/[{}], but not all files exists, ignoring", commitPoint.name(), commitPoint.version()); continue; } try { recoveryStatus.index().startTime(System.currentTimeMillis()); recoveryStatus.updateStage(RecoveryStatus.Stage.INDEX); recoverIndex(commitPoint, blobs); recoveryStatus .index() .time(System.currentTimeMillis() - recoveryStatus.index().startTime()); recoveryStatus.translog().startTime(System.currentTimeMillis()); recoveryStatus.updateStage(RecoveryStatus.Stage.TRANSLOG); recoverTranslog(commitPoint, blobs); recoveryStatus .translog() .time(System.currentTimeMillis() - recoveryStatus.index().startTime()); return; } catch (Exception e) { throw new IndexShardGatewayRecoveryException( shardId, "failed to recover commit_point [" + commitPoint.name() + "]/[" + commitPoint.version() + "]", e); } } throw new IndexShardGatewayRecoveryException( shardId, "No commit point data is available in gateway", null); }
private void doSnapshot(final Snapshot snapshot) throws IndexShardGatewaySnapshotFailedException { ImmutableMap<String, BlobMetaData> blobs; try { blobs = blobContainer.listBlobs(); } catch (IOException e) { throw new IndexShardGatewaySnapshotFailedException(shardId, "failed to list blobs", e); } long generation = findLatestFileNameGeneration(blobs); CommitPoints commitPoints = buildCommitPoints(blobs); currentSnapshotStatus.index().startTime(System.currentTimeMillis()); currentSnapshotStatus.updateStage(SnapshotStatus.Stage.INDEX); final SnapshotIndexCommit snapshotIndexCommit = snapshot.indexCommit(); final Translog.Snapshot translogSnapshot = snapshot.translogSnapshot(); final CountDownLatch indexLatch = new CountDownLatch(snapshotIndexCommit.getFiles().length); final CopyOnWriteArrayList<Throwable> failures = new CopyOnWriteArrayList<Throwable>(); final List<CommitPoint.FileInfo> indexCommitPointFiles = Lists.newArrayList(); int indexNumberOfFiles = 0; long indexTotalFilesSize = 0; for (final String fileName : snapshotIndexCommit.getFiles()) { StoreFileMetaData md; try { md = store.metaData(fileName); } catch (IOException e) { throw new IndexShardGatewaySnapshotFailedException( shardId, "Failed to get store file metadata", e); } boolean snapshotRequired = false; if (snapshot.indexChanged() && fileName.equals(snapshotIndexCommit.getSegmentsFileName())) { snapshotRequired = true; // we want to always snapshot the segment file if the index changed } CommitPoint.FileInfo fileInfo = commitPoints.findPhysicalIndexFile(fileName); if (fileInfo == null || !fileInfo.isSame(md) || !commitPointFileExistsInBlobs(fileInfo, blobs)) { // commit point file does not exists in any commit point, or has different length, or does // not fully exists in the listed blobs snapshotRequired = true; } if (snapshotRequired) { indexNumberOfFiles++; indexTotalFilesSize += md.length(); // create a new FileInfo try { CommitPoint.FileInfo snapshotFileInfo = new CommitPoint.FileInfo( fileNameFromGeneration(++generation), fileName, md.length(), md.checksum()); indexCommitPointFiles.add(snapshotFileInfo); snapshotFile(snapshotIndexCommit.getDirectory(), snapshotFileInfo, indexLatch, failures); } catch (IOException e) { failures.add(e); indexLatch.countDown(); } } else { indexCommitPointFiles.add(fileInfo); indexLatch.countDown(); } } currentSnapshotStatus.index().files(indexNumberOfFiles, indexTotalFilesSize); try { indexLatch.await(); } catch (InterruptedException e) { failures.add(e); } if (!failures.isEmpty()) { throw new IndexShardGatewaySnapshotFailedException( shardId(), "Failed to perform snapshot (index files)", failures.get(failures.size() - 1)); } currentSnapshotStatus .index() .time(System.currentTimeMillis() - currentSnapshotStatus.index().startTime()); currentSnapshotStatus.updateStage(SnapshotStatus.Stage.TRANSLOG); currentSnapshotStatus.translog().startTime(System.currentTimeMillis()); // Note, we assume the snapshot is always started from "base 0". We need to seek forward if we // want to lastTranslogPosition if we want the delta List<CommitPoint.FileInfo> translogCommitPointFiles = Lists.newArrayList(); int expectedNumberOfOperations = 0; boolean snapshotRequired = false; if (snapshot.newTranslogCreated()) { if (translogSnapshot.lengthInBytes() > 0) { snapshotRequired = true; expectedNumberOfOperations = translogSnapshot.estimatedTotalOperations(); } } else { // if we have a commit point, check that we have all the files listed in it in the blob store if (!commitPoints.commits().isEmpty()) { CommitPoint commitPoint = commitPoints.commits().get(0); boolean allTranslogFilesExists = true; for (CommitPoint.FileInfo fileInfo : commitPoint.translogFiles()) { if (!commitPointFileExistsInBlobs(fileInfo, blobs)) { allTranslogFilesExists = false; break; } } // if everything exists, we can seek forward in case there are new operations, otherwise, we // copy over all again... if (allTranslogFilesExists) { translogCommitPointFiles.addAll(commitPoint.translogFiles()); if (snapshot.sameTranslogNewOperations()) { translogSnapshot.seekForward(snapshot.lastTranslogLength()); if (translogSnapshot.lengthInBytes() > 0) { snapshotRequired = true; expectedNumberOfOperations = translogSnapshot.estimatedTotalOperations() - snapshot.lastTotalTranslogOperations(); } } // else (no operations, nothing to snapshot) } else { // a full translog snapshot is required if (translogSnapshot.lengthInBytes() > 0) { expectedNumberOfOperations = translogSnapshot.estimatedTotalOperations(); snapshotRequired = true; } } } else { // no commit point, snapshot all the translog if (translogSnapshot.lengthInBytes() > 0) { expectedNumberOfOperations = translogSnapshot.estimatedTotalOperations(); snapshotRequired = true; } } } currentSnapshotStatus.translog().expectedNumberOfOperations(expectedNumberOfOperations); if (snapshotRequired) { CommitPoint.FileInfo addedTranslogFileInfo = new CommitPoint.FileInfo( fileNameFromGeneration(++generation), "translog-" + translogSnapshot.translogId(), translogSnapshot.lengthInBytes(), null /* no need for checksum in translog */); translogCommitPointFiles.add(addedTranslogFileInfo); try { snapshotTranslog(translogSnapshot, addedTranslogFileInfo); } catch (Exception e) { throw new IndexShardGatewaySnapshotFailedException( shardId, "Failed to snapshot translog", e); } } currentSnapshotStatus .translog() .time(System.currentTimeMillis() - currentSnapshotStatus.translog().startTime()); // now create and write the commit point currentSnapshotStatus.updateStage(SnapshotStatus.Stage.FINALIZE); long version = 0; if (!commitPoints.commits().isEmpty()) { version = commitPoints.commits().iterator().next().version() + 1; } String commitPointName = "commit-" + Long.toString(version, Character.MAX_RADIX); CommitPoint commitPoint = new CommitPoint( version, commitPointName, CommitPoint.Type.GENERATED, indexCommitPointFiles, translogCommitPointFiles); try { byte[] commitPointData = CommitPoints.toXContent(commitPoint); blobContainer.writeBlob( commitPointName, new FastByteArrayInputStream(commitPointData), commitPointData.length); } catch (Exception e) { throw new IndexShardGatewaySnapshotFailedException( shardId, "Failed to write commit point", e); } // delete all files that are not referenced by any commit point // build a new CommitPoint, that includes this one and all the saved ones List<CommitPoint> newCommitPointsList = Lists.newArrayList(); newCommitPointsList.add(commitPoint); for (CommitPoint point : commitPoints) { if (point.type() == CommitPoint.Type.SAVED) { newCommitPointsList.add(point); } } CommitPoints newCommitPoints = new CommitPoints(newCommitPointsList); // first, go over and delete all the commit points for (String blobName : blobs.keySet()) { if (!blobName.startsWith("commit-")) { continue; } long checkedVersion = Long.parseLong(blobName.substring("commit-".length()), Character.MAX_RADIX); if (!newCommitPoints.hasVersion(checkedVersion)) { try { blobContainer.deleteBlob(blobName); } catch (IOException e) { // ignore } } } // now go over all the blobs, and if they don't exists in a commit point, delete them for (String blobName : blobs.keySet()) { String name = blobName; if (!name.startsWith("__")) { continue; } if (blobName.contains(".part")) { name = blobName.substring(0, blobName.indexOf(".part")); } if (newCommitPoints.findNameFile(name) == null) { try { blobContainer.deleteBlob(blobName); } catch (IOException e) { // ignore, will delete it laters } } } }