@Override public void messageReceived(RecoveryCleanFilesRequest request, TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final RecoveryStatus recoveryStatus = statusRef.status(); recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps()); // first, we go and move files that were created with the recovery id suffix to // the actual names, its ok if we have a corrupted index here, since we have replicas // to recover from in case of a full cluster shutdown just when this code executes... recoveryStatus .indexShard() .deleteShardState(); // we have to delete it first since even if we fail to rename the // shard might be invalid recoveryStatus.renameAllTempFiles(); final Store store = recoveryStatus.store(); // now write checksums recoveryStatus.legacyChecksums().write(store); Store.MetadataSnapshot sourceMetaData = request.sourceMetaSnapshot(); try { store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetaData); } catch (CorruptIndexException | IndexFormatTooNewException | IndexFormatTooOldException ex) { // this is a fatal exception at this stage. // this means we transferred files from the remote that have not be checksummed and they // are // broken. We have to clean up this shard entirely, remove all files and bubble it up to // the // source shard since this index might be broken there as well? The Source can handle this // and checks // its content on disk if possible. try { try { store.removeCorruptionMarker(); } finally { Lucene.cleanLuceneIndex(store.directory()); // clean up and delete all files } } catch (Throwable e) { logger.debug("Failed to clean lucene index", e); ex.addSuppressed(e); } RecoveryFailedException rfe = new RecoveryFailedException( recoveryStatus.state(), "failed to clean after recovery", ex); recoveryStatus.fail(rfe, true); throw rfe; } catch (Exception ex) { RecoveryFailedException rfe = new RecoveryFailedException( recoveryStatus.state(), "failed to clean after recovery", ex); recoveryStatus.fail(rfe, true); throw rfe; } channel.sendResponse(TransportResponse.Empty.INSTANCE); } }
@Override public void messageReceived(RecoveryFinalizeRecoveryRequest request, TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final RecoveryStatus recoveryStatus = statusRef.status(); recoveryStatus.indexShard().finalizeRecovery(); } channel.sendResponse(TransportResponse.Empty.INSTANCE); }
@Override public void messageReceived(final RecoveryFileChunkRequest request, TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final RecoveryStatus recoveryStatus = statusRef.status(); final Store store = recoveryStatus.store(); recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps()); final RecoveryState.Index indexState = recoveryStatus.state().getIndex(); if (request.sourceThrottleTimeInNanos() != RecoveryState.Index.UNKNOWN) { indexState.addSourceThrottling(request.sourceThrottleTimeInNanos()); } IndexOutput indexOutput; if (request.position() == 0) { indexOutput = recoveryStatus.openAndPutIndexOutput(request.name(), request.metadata(), store); } else { indexOutput = recoveryStatus.getOpenIndexOutput(request.name()); } BytesReference content = request.content(); if (!content.hasArray()) { content = content.toBytesArray(); } RateLimiter rl = recoverySettings.rateLimiter(); if (rl != null) { long bytes = bytesSinceLastPause.addAndGet(content.length()); if (bytes > rl.getMinPauseCheckBytes()) { // Time to pause bytesSinceLastPause.addAndGet(-bytes); long throttleTimeInNanos = rl.pause(bytes); indexState.addTargetThrottling(throttleTimeInNanos); recoveryStatus.indexShard().recoveryStats().addThrottleTime(throttleTimeInNanos); } } indexOutput.writeBytes(content.array(), content.arrayOffset(), content.length()); indexState.addRecoveredBytesToFile(request.name(), content.length()); if (indexOutput.getFilePointer() >= request.length() || request.lastChunk()) { try { Store.verify(indexOutput); } finally { // we are done indexOutput.close(); } // write the checksum recoveryStatus.legacyChecksums().add(request.metadata()); final String temporaryFileName = recoveryStatus.getTempNameForFile(request.name()); assert Arrays.asList(store.directory().listAll()).contains(temporaryFileName); store.directory().sync(Collections.singleton(temporaryFileName)); IndexOutput remove = recoveryStatus.removeOpenIndexOutputs(request.name()); assert remove == null || remove == indexOutput; // remove maybe null if we got finished } } channel.sendResponse(TransportResponse.Empty.INSTANCE); }
@Override public void messageReceived( RecoveryPrepareForTranslogOperationsRequest request, TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final RecoveryStatus recoveryStatus = statusRef.status(); recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps()); recoveryStatus.indexShard().skipTranslogRecovery(); } channel.sendResponse(TransportResponse.Empty.INSTANCE); }
@Override public void doRun() { RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatus(recoveryId); if (statusRef == null) { logger.trace( "not running recovery with id [{}] - can't find it (probably finished)", recoveryId); return; } try { doRecovery(statusRef.status()); } finally { statusRef.close(); } }
@Override public void onFailure(Throwable t) { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatus(recoveryId)) { if (statusRef != null) { logger.error("unexpected error during recovery [{}], failing shard", t, recoveryId); onGoingRecoveries.failRecovery( recoveryId, new RecoveryFailedException(statusRef.status().state(), "unexpected error", t), true // be safe ); } else { logger.debug( "unexpected error during recovery, but recovery id [{}] is finished", t, recoveryId); } } }
@Override public void messageReceived(RecoveryFilesInfoRequest request, TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final RecoveryStatus recoveryStatus = statusRef.status(); final RecoveryState.Index index = recoveryStatus.state().getIndex(); for (int i = 0; i < request.phase1ExistingFileNames.size(); i++) { index.addFileDetail( request.phase1ExistingFileNames.get(i), request.phase1ExistingFileSizes.get(i), true); } for (int i = 0; i < request.phase1FileNames.size(); i++) { index.addFileDetail( request.phase1FileNames.get(i), request.phase1FileSizes.get(i), false); } recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps); recoveryStatus.state().getTranslog().totalOperationsOnStart(request.totalTranslogOps); // recoveryBytesCount / recoveryFileCount will be set as we go... channel.sendResponse(TransportResponse.Empty.INSTANCE); } }
@Override public void messageReceived( final RecoveryTranslogOperationsRequest request, final TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final ClusterStateObserver observer = new ClusterStateObserver(clusterService, null, logger); final RecoveryStatus recoveryStatus = statusRef.status(); final RecoveryState.Translog translog = recoveryStatus.state().getTranslog(); translog.totalOperations(request.totalTranslogOps()); assert recoveryStatus.indexShard().recoveryState() == recoveryStatus.state(); try { recoveryStatus.indexShard().performBatchRecovery(request.operations()); channel.sendResponse(TransportResponse.Empty.INSTANCE); } catch (TranslogRecoveryPerformer.BatchOperationException exception) { MapperException mapperException = (MapperException) ExceptionsHelper.unwrap(exception, MapperException.class); if (mapperException == null) { throw exception; } // in very rare cases a translog replay from primary is processed before a mapping update // on this node // which causes local mapping changes. we want to wait until these mappings are processed. logger.trace( "delaying recovery due to missing mapping changes (rolling back stats for [{}] ops)", exception, exception.completedOperations()); translog.decrementRecoveredOperations(exception.completedOperations()); // we do not need to use a timeout here since the entire recovery mechanism has an // inactivity protection (it will be // canceled) observer.waitForNextChange( new ClusterStateObserver.Listener() { @Override public void onNewClusterState(ClusterState state) { try { messageReceived(request, channel); } catch (Exception e) { onFailure(e); } } protected void onFailure(Exception e) { try { channel.sendResponse(e); } catch (IOException e1) { logger.warn("failed to send error back to recovery source", e1); } } @Override public void onClusterServiceClose() { onFailure( new ElasticsearchException( "cluster service was closed while waiting for mapping updates")); } @Override public void onTimeout(TimeValue timeout) { // note that we do not use a timeout (see comment above) onFailure( new ElasticsearchTimeoutException( "timed out waiting for mapping updates (timeout [" + timeout + "])")); } }); } } }