예제 #1
0
 @Override
 public void messageReceived(RecoveryCleanFilesRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps());
     // first, we go and move files that were created with the recovery id suffix to
     // the actual names, its ok if we have a corrupted index here, since we have replicas
     // to recover from in case of a full cluster shutdown just when this code executes...
     recoveryStatus
         .indexShard()
         .deleteShardState(); // we have to delete it first since even if we fail to rename the
                              // shard might be invalid
     recoveryStatus.renameAllTempFiles();
     final Store store = recoveryStatus.store();
     // now write checksums
     recoveryStatus.legacyChecksums().write(store);
     Store.MetadataSnapshot sourceMetaData = request.sourceMetaSnapshot();
     try {
       store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetaData);
     } catch (CorruptIndexException
         | IndexFormatTooNewException
         | IndexFormatTooOldException ex) {
       // this is a fatal exception at this stage.
       // this means we transferred files from the remote that have not be checksummed and they
       // are
       // broken. We have to clean up this shard entirely, remove all files and bubble it up to
       // the
       // source shard since this index might be broken there as well? The Source can handle this
       // and checks
       // its content on disk if possible.
       try {
         try {
           store.removeCorruptionMarker();
         } finally {
           Lucene.cleanLuceneIndex(store.directory()); // clean up and delete all files
         }
       } catch (Throwable e) {
         logger.debug("Failed to clean lucene index", e);
         ex.addSuppressed(e);
       }
       RecoveryFailedException rfe =
           new RecoveryFailedException(
               recoveryStatus.state(), "failed to clean after recovery", ex);
       recoveryStatus.fail(rfe, true);
       throw rfe;
     } catch (Exception ex) {
       RecoveryFailedException rfe =
           new RecoveryFailedException(
               recoveryStatus.state(), "failed to clean after recovery", ex);
       recoveryStatus.fail(rfe, true);
       throw rfe;
     }
     channel.sendResponse(TransportResponse.Empty.INSTANCE);
   }
 }
예제 #2
0
 @Override
 public void messageReceived(RecoveryFinalizeRecoveryRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     recoveryStatus.indexShard().finalizeRecovery();
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
예제 #3
0
 @Override
 public void messageReceived(final RecoveryFileChunkRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     final Store store = recoveryStatus.store();
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps());
     final RecoveryState.Index indexState = recoveryStatus.state().getIndex();
     if (request.sourceThrottleTimeInNanos() != RecoveryState.Index.UNKNOWN) {
       indexState.addSourceThrottling(request.sourceThrottleTimeInNanos());
     }
     IndexOutput indexOutput;
     if (request.position() == 0) {
       indexOutput =
           recoveryStatus.openAndPutIndexOutput(request.name(), request.metadata(), store);
     } else {
       indexOutput = recoveryStatus.getOpenIndexOutput(request.name());
     }
     BytesReference content = request.content();
     if (!content.hasArray()) {
       content = content.toBytesArray();
     }
     RateLimiter rl = recoverySettings.rateLimiter();
     if (rl != null) {
       long bytes = bytesSinceLastPause.addAndGet(content.length());
       if (bytes > rl.getMinPauseCheckBytes()) {
         // Time to pause
         bytesSinceLastPause.addAndGet(-bytes);
         long throttleTimeInNanos = rl.pause(bytes);
         indexState.addTargetThrottling(throttleTimeInNanos);
         recoveryStatus.indexShard().recoveryStats().addThrottleTime(throttleTimeInNanos);
       }
     }
     indexOutput.writeBytes(content.array(), content.arrayOffset(), content.length());
     indexState.addRecoveredBytesToFile(request.name(), content.length());
     if (indexOutput.getFilePointer() >= request.length() || request.lastChunk()) {
       try {
         Store.verify(indexOutput);
       } finally {
         // we are done
         indexOutput.close();
       }
       // write the checksum
       recoveryStatus.legacyChecksums().add(request.metadata());
       final String temporaryFileName = recoveryStatus.getTempNameForFile(request.name());
       assert Arrays.asList(store.directory().listAll()).contains(temporaryFileName);
       store.directory().sync(Collections.singleton(temporaryFileName));
       IndexOutput remove = recoveryStatus.removeOpenIndexOutputs(request.name());
       assert remove == null || remove == indexOutput; // remove maybe null if we got finished
     }
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
예제 #4
0
 @Override
 public void messageReceived(
     RecoveryPrepareForTranslogOperationsRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps());
     recoveryStatus.indexShard().skipTranslogRecovery();
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
예제 #5
0
 @Override
 public void doRun() {
   RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatus(recoveryId);
   if (statusRef == null) {
     logger.trace(
         "not running recovery with id [{}] - can't find it (probably finished)", recoveryId);
     return;
   }
   try {
     doRecovery(statusRef.status());
   } finally {
     statusRef.close();
   }
 }
예제 #6
0
 @Override
 public void onFailure(Throwable t) {
   try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatus(recoveryId)) {
     if (statusRef != null) {
       logger.error("unexpected error during recovery [{}], failing shard", t, recoveryId);
       onGoingRecoveries.failRecovery(
           recoveryId,
           new RecoveryFailedException(statusRef.status().state(), "unexpected error", t),
           true // be safe
           );
     } else {
       logger.debug(
           "unexpected error during recovery, but recovery id [{}] is finished", t, recoveryId);
     }
   }
 }
예제 #7
0
 @Override
 public void messageReceived(RecoveryFilesInfoRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     final RecoveryState.Index index = recoveryStatus.state().getIndex();
     for (int i = 0; i < request.phase1ExistingFileNames.size(); i++) {
       index.addFileDetail(
           request.phase1ExistingFileNames.get(i), request.phase1ExistingFileSizes.get(i), true);
     }
     for (int i = 0; i < request.phase1FileNames.size(); i++) {
       index.addFileDetail(
           request.phase1FileNames.get(i), request.phase1FileSizes.get(i), false);
     }
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps);
     recoveryStatus.state().getTranslog().totalOperationsOnStart(request.totalTranslogOps);
     // recoveryBytesCount / recoveryFileCount will be set as we go...
     channel.sendResponse(TransportResponse.Empty.INSTANCE);
   }
 }
예제 #8
0
    @Override
    public void messageReceived(
        final RecoveryTranslogOperationsRequest request, final TransportChannel channel)
        throws Exception {
      try (RecoveriesCollection.StatusRef statusRef =
          onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
        final ClusterStateObserver observer =
            new ClusterStateObserver(clusterService, null, logger);
        final RecoveryStatus recoveryStatus = statusRef.status();
        final RecoveryState.Translog translog = recoveryStatus.state().getTranslog();
        translog.totalOperations(request.totalTranslogOps());
        assert recoveryStatus.indexShard().recoveryState() == recoveryStatus.state();
        try {
          recoveryStatus.indexShard().performBatchRecovery(request.operations());
          channel.sendResponse(TransportResponse.Empty.INSTANCE);
        } catch (TranslogRecoveryPerformer.BatchOperationException exception) {
          MapperException mapperException =
              (MapperException) ExceptionsHelper.unwrap(exception, MapperException.class);
          if (mapperException == null) {
            throw exception;
          }
          // in very rare cases a translog replay from primary is processed before a mapping update
          // on this node
          // which causes local mapping changes. we want to wait until these mappings are processed.
          logger.trace(
              "delaying recovery due to missing mapping changes (rolling back stats for [{}] ops)",
              exception,
              exception.completedOperations());
          translog.decrementRecoveredOperations(exception.completedOperations());
          // we do not need to use a timeout here since the entire recovery mechanism has an
          // inactivity protection (it will be
          // canceled)
          observer.waitForNextChange(
              new ClusterStateObserver.Listener() {
                @Override
                public void onNewClusterState(ClusterState state) {
                  try {
                    messageReceived(request, channel);
                  } catch (Exception e) {
                    onFailure(e);
                  }
                }

                protected void onFailure(Exception e) {
                  try {
                    channel.sendResponse(e);
                  } catch (IOException e1) {
                    logger.warn("failed to send error back to recovery source", e1);
                  }
                }

                @Override
                public void onClusterServiceClose() {
                  onFailure(
                      new ElasticsearchException(
                          "cluster service was closed while waiting for mapping updates"));
                }

                @Override
                public void onTimeout(TimeValue timeout) {
                  // note that we do not use a timeout (see comment above)
                  onFailure(
                      new ElasticsearchTimeoutException(
                          "timed out waiting for mapping updates (timeout [" + timeout + "])"));
                }
              });
        }
      }
    }