Пример #1
0
 @Override
 public void messageReceived(JoinRequest request, TransportChannel channel) throws Exception {
   ClusterState clusterState = listener.onJoin(request.node);
   if (request.withClusterState) {
     channel.sendResponse(new JoinResponse(clusterState));
   } else {
     channel.sendResponse(VoidStreamable.INSTANCE);
   }
 }
    @Override
    public void messageReceived(final NodeRequest request, TransportChannel channel)
        throws Exception {
      List<ShardRouting> shards = request.getShards();
      final int totalShards = shards.size();
      logger.trace("[{}] executing operation on [{}] shards", actionName, totalShards);
      final Object[] shardResultOrExceptions = new Object[totalShards];

      int shardIndex = -1;
      for (final ShardRouting shardRouting : shards) {
        shardIndex++;
        onShardOperation(request, shardResultOrExceptions, shardIndex, shardRouting);
      }

      List<BroadcastShardOperationFailedException> accumulatedExceptions = new ArrayList<>();
      List<ShardOperationResult> results = new ArrayList<>();
      for (int i = 0; i < totalShards; i++) {
        if (shardResultOrExceptions[i] instanceof BroadcastShardOperationFailedException) {
          accumulatedExceptions.add(
              (BroadcastShardOperationFailedException) shardResultOrExceptions[i]);
        } else {
          results.add((ShardOperationResult) shardResultOrExceptions[i]);
        }
      }

      channel.sendResponse(
          new NodeResponse(request.getNodeId(), totalShards, results, accumulatedExceptions));
    }
 @Override
 public void messageReceived(
     final ShardSingleOperationRequest request, final TransportChannel channel)
     throws Exception {
   Response response = shardOperation(request.request(), request.shardId());
   channel.sendResponse(response);
 }
    @Override
    public void messageReceived(final RecoveryFileChunkRequest request, TransportChannel channel)
        throws Exception {
      try (RecoveriesCollection.RecoveryRef recoveryRef =
          onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
        final RecoveryTarget status = recoveryRef.status();
        final RecoveryState.Index indexState = status.state().getIndex();
        if (request.sourceThrottleTimeInNanos() != RecoveryState.Index.UNKNOWN) {
          indexState.addSourceThrottling(request.sourceThrottleTimeInNanos());
        }

        RateLimiter rateLimiter = recoverySettings.rateLimiter();
        if (rateLimiter != null) {
          long bytes = bytesSinceLastPause.addAndGet(request.content().length());
          if (bytes > rateLimiter.getMinPauseCheckBytes()) {
            // Time to pause
            bytesSinceLastPause.addAndGet(-bytes);
            long throttleTimeInNanos = rateLimiter.pause(bytes);
            indexState.addTargetThrottling(throttleTimeInNanos);
            status.indexShard().recoveryStats().addThrottleTime(throttleTimeInNanos);
          }
        }

        status.writeFileChunk(
            request.metadata(),
            request.position(),
            request.content(),
            request.lastChunk(),
            request.totalTranslogOps());
      }
      channel.sendResponse(TransportResponse.Empty.INSTANCE);
    }
 @Override
 protected void doRun() throws Exception {
   try (Releasable shardReference = getIndexShardOperationsCounter(request.internalShardId)) {
     shardOperationOnReplica(request.internalShardId, request);
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
Пример #6
0
 @Override
 public void messageReceived(RecoveryCleanFilesRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps());
     // first, we go and move files that were created with the recovery id suffix to
     // the actual names, its ok if we have a corrupted index here, since we have replicas
     // to recover from in case of a full cluster shutdown just when this code executes...
     recoveryStatus
         .indexShard()
         .deleteShardState(); // we have to delete it first since even if we fail to rename the
                              // shard might be invalid
     recoveryStatus.renameAllTempFiles();
     final Store store = recoveryStatus.store();
     // now write checksums
     recoveryStatus.legacyChecksums().write(store);
     Store.MetadataSnapshot sourceMetaData = request.sourceMetaSnapshot();
     try {
       store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetaData);
     } catch (CorruptIndexException
         | IndexFormatTooNewException
         | IndexFormatTooOldException ex) {
       // this is a fatal exception at this stage.
       // this means we transferred files from the remote that have not be checksummed and they
       // are
       // broken. We have to clean up this shard entirely, remove all files and bubble it up to
       // the
       // source shard since this index might be broken there as well? The Source can handle this
       // and checks
       // its content on disk if possible.
       try {
         try {
           store.removeCorruptionMarker();
         } finally {
           Lucene.cleanLuceneIndex(store.directory()); // clean up and delete all files
         }
       } catch (Throwable e) {
         logger.debug("Failed to clean lucene index", e);
         ex.addSuppressed(e);
       }
       RecoveryFailedException rfe =
           new RecoveryFailedException(
               recoveryStatus.state(), "failed to clean after recovery", ex);
       recoveryStatus.fail(rfe, true);
       throw rfe;
     } catch (Exception ex) {
       RecoveryFailedException rfe =
           new RecoveryFailedException(
               recoveryStatus.state(), "failed to clean after recovery", ex);
       recoveryStatus.fail(rfe, true);
       throw rfe;
     }
     channel.sendResponse(TransportResponse.Empty.INSTANCE);
   }
 }
 @Override
 public void messageReceived(RecoveryFinalizeRecoveryRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.RecoveryRef recoveryRef =
       onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
     recoveryRef.status().finalizeRecovery();
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
 @Override
 public void messageReceived(
     RecoveryWaitForClusterStateRequest request, TransportChannel channel) throws Exception {
   try (RecoveriesCollection.RecoveryRef recoveryRef =
       onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
     recoveryRef.status().ensureClusterStateVersion(request.clusterStateVersion());
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
 @Override
 public void messageReceived(RecoveryCleanFilesRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.RecoveryRef recoveryRef =
       onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
     recoveryRef.status().cleanFiles(request.totalTranslogOps(), request.sourceMetaSnapshot());
     channel.sendResponse(TransportResponse.Empty.INSTANCE);
   }
 }
 @Override
 public void messageReceived(final Request request, final TransportChannel channel)
     throws Exception {
   if (logger.isTraceEnabled()) {
     logger.trace("executing [{}] on shard [{}]", request, request.internalShardId);
   }
   Response response = shardOperation(request, request.internalShardId);
   channel.sendResponse(response);
 }
Пример #11
0
 @Override
 public void messageReceived(final RecoveryFileChunkRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     final Store store = recoveryStatus.store();
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps());
     final RecoveryState.Index indexState = recoveryStatus.state().getIndex();
     if (request.sourceThrottleTimeInNanos() != RecoveryState.Index.UNKNOWN) {
       indexState.addSourceThrottling(request.sourceThrottleTimeInNanos());
     }
     IndexOutput indexOutput;
     if (request.position() == 0) {
       indexOutput =
           recoveryStatus.openAndPutIndexOutput(request.name(), request.metadata(), store);
     } else {
       indexOutput = recoveryStatus.getOpenIndexOutput(request.name());
     }
     BytesReference content = request.content();
     if (!content.hasArray()) {
       content = content.toBytesArray();
     }
     RateLimiter rl = recoverySettings.rateLimiter();
     if (rl != null) {
       long bytes = bytesSinceLastPause.addAndGet(content.length());
       if (bytes > rl.getMinPauseCheckBytes()) {
         // Time to pause
         bytesSinceLastPause.addAndGet(-bytes);
         long throttleTimeInNanos = rl.pause(bytes);
         indexState.addTargetThrottling(throttleTimeInNanos);
         recoveryStatus.indexShard().recoveryStats().addThrottleTime(throttleTimeInNanos);
       }
     }
     indexOutput.writeBytes(content.array(), content.arrayOffset(), content.length());
     indexState.addRecoveredBytesToFile(request.name(), content.length());
     if (indexOutput.getFilePointer() >= request.length() || request.lastChunk()) {
       try {
         Store.verify(indexOutput);
       } finally {
         // we are done
         indexOutput.close();
       }
       // write the checksum
       recoveryStatus.legacyChecksums().add(request.metadata());
       final String temporaryFileName = recoveryStatus.getTempNameForFile(request.name());
       assert Arrays.asList(store.directory().listAll()).contains(temporaryFileName);
       store.directory().sync(Collections.singleton(temporaryFileName));
       IndexOutput remove = recoveryStatus.removeOpenIndexOutputs(request.name());
       assert remove == null || remove == indexOutput; // remove maybe null if we got finished
     }
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
 @Override
 public void messageReceived(VerifyNodeRepositoryRequest request, TransportChannel channel)
     throws Exception {
   try {
     doVerify(request.repository, request.verificationToken);
   } catch (Exception ex) {
     logger.warn("[{}] failed to verify repository", ex, request.repository);
     throw ex;
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
Пример #13
0
 @Override
 public void messageReceived(
     RecoveryPrepareForTranslogOperationsRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps());
     recoveryStatus.indexShard().skipTranslogRecovery();
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
 protected void responseWithFailure(Throwable t) {
   try {
     channel.sendResponse(t);
   } catch (IOException responseException) {
     logger.warn(
         "failed to send error message back to client for action ["
             + transportReplicaAction
             + "]",
         responseException);
     logger.warn("actual Exception", t);
   }
 }
 @Override
 public void messageReceived(
     RecoveryPrepareForTranslogOperationsRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.RecoveryRef recoveryRef =
       onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
     recoveryRef
         .status()
         .prepareForTranslogOperations(
             request.totalTranslogOps(), request.getMaxUnsafeAutoIdTimestamp());
   }
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }
Пример #16
0
 @Override
 public void messageReceived(MasterPingRequest request, TransportChannel channel)
     throws Exception {
   DiscoveryNodes nodes = nodesProvider.nodes();
   // check if we are really the same master as the one we seemed to be think we are
   // this can happen if the master got "kill -9" and then another node started using the same
   // port
   if (!request.masterNodeId.equals(nodes.localNodeId())) {
     throw new ElasticSearchIllegalStateException(
         "Got ping as master with id [" + request.masterNodeId + "], but not master and no id");
   }
   // send a response, and note if we are connected to the master or not
   channel.sendResponse(new MasterPingResponseResponse(nodes.nodeExists(request.nodeId)));
 }
Пример #17
0
 @Override
 public void messageReceived(PingRequest request, TransportChannel channel) throws Exception {
   // if we are not the node we are supposed to be pinged, send an exception
   // this can happen when a kill -9 is sent, and another node is started using the same port
   if (!latestNodes.localNodeId().equals(request.nodeId)) {
     throw new ElasticSearchIllegalStateException(
         "Got pinged as node ["
             + request.nodeId
             + "], but I am node ["
             + latestNodes.localNodeId()
             + "]");
   }
   channel.sendResponse(new PingResponse());
 }
 @Override
 public void messageReceived(RecoveryFilesInfoRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.RecoveryRef recoveryRef =
       onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
     recoveryRef
         .status()
         .receiveFileInfo(
             request.phase1FileNames,
             request.phase1FileSizes,
             request.phase1ExistingFileNames,
             request.phase1ExistingFileSizes,
             request.totalTranslogOps);
     channel.sendResponse(TransportResponse.Empty.INSTANCE);
   }
 }
 @Override
 public void messageReceived(MasterPingRequest request, TransportChannel channel)
     throws Exception {
   DiscoveryNodes nodes = nodesProvider.nodes();
   // check if we are really the same master as the one we seemed to be think we are
   // this can happen if the master got "kill -9" and then another node started using the same
   // port
   if (!request.masterNodeId.equals(nodes.localNodeId())) {
     throw new NotMasterException();
   }
   // if we are no longer master, fail...
   if (!nodes.localNodeMaster()) {
     throw new NoLongerMasterException();
   }
   if (!nodes.nodeExists(request.nodeId)) {
     throw new NodeDoesNotExistOnMasterException();
   }
   // send a response, and note if we are connected to the master or not
   channel.sendResponse(new MasterPingResponseResponse(nodes.nodeExists(request.nodeId)));
 }
Пример #20
0
 @Override
 public void messageReceived(RecoveryFilesInfoRequest request, TransportChannel channel)
     throws Exception {
   try (RecoveriesCollection.StatusRef statusRef =
       onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
     final RecoveryStatus recoveryStatus = statusRef.status();
     final RecoveryState.Index index = recoveryStatus.state().getIndex();
     for (int i = 0; i < request.phase1ExistingFileNames.size(); i++) {
       index.addFileDetail(
           request.phase1ExistingFileNames.get(i), request.phase1ExistingFileSizes.get(i), true);
     }
     for (int i = 0; i < request.phase1FileNames.size(); i++) {
       index.addFileDetail(
           request.phase1FileNames.get(i), request.phase1FileSizes.get(i), false);
     }
     recoveryStatus.state().getTranslog().totalOperations(request.totalTranslogOps);
     recoveryStatus.state().getTranslog().totalOperationsOnStart(request.totalTranslogOps);
     // recoveryBytesCount / recoveryFileCount will be set as we go...
     channel.sendResponse(TransportResponse.Empty.INSTANCE);
   }
 }
Пример #21
0
 @Override
 public void messageReceived(ShardRoutingEntry request, TransportChannel channel)
     throws Exception {
   innerShardStarted(request.shardRouting, request.reason);
   channel.sendResponse(VoidStreamable.INSTANCE);
 }
 @Override
 public void messageReceived(
     final ReplicaOperationRequest request, final TransportChannel channel) throws Exception {
   shardOperationOnReplica(request);
   channel.sendResponse(VoidStreamable.INSTANCE);
 }
    @Override
    public void messageReceived(
        final RecoveryTranslogOperationsRequest request, final TransportChannel channel)
        throws IOException {
      try (RecoveriesCollection.RecoveryRef recoveryRef =
          onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
        final ClusterStateObserver observer =
            new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext());
        final RecoveryTarget recoveryTarget = recoveryRef.status();
        try {
          recoveryTarget.indexTranslogOperations(request.operations(), request.totalTranslogOps());
          channel.sendResponse(TransportResponse.Empty.INSTANCE);
        } catch (TranslogRecoveryPerformer.BatchOperationException exception) {
          MapperException mapperException =
              (MapperException) ExceptionsHelper.unwrap(exception, MapperException.class);
          if (mapperException == null) {
            throw exception;
          }
          // in very rare cases a translog replay from primary is processed before a mapping update
          // on this node
          // which causes local mapping changes since the mapping (clusterstate) might not have
          // arrived on this node.
          // we want to wait until these mappings are processed but also need to do some maintenance
          // and roll back the
          // number of processed (completed) operations in this batch to ensure accounting is
          // correct.
          logger.trace(
              (Supplier<?>)
                  () ->
                      new ParameterizedMessage(
                          "delaying recovery due to missing mapping changes (rolling back stats for [{}] ops)",
                          exception.completedOperations()),
              exception);
          final RecoveryState.Translog translog = recoveryTarget.state().getTranslog();
          translog.decrementRecoveredOperations(
              exception.completedOperations()); // do the maintainance and rollback competed ops
          // we do not need to use a timeout here since the entire recovery mechanism has an
          // inactivity protection (it will be
          // canceled)
          observer.waitForNextChange(
              new ClusterStateObserver.Listener() {
                @Override
                public void onNewClusterState(ClusterState state) {
                  try {
                    messageReceived(request, channel);
                  } catch (Exception e) {
                    onFailure(e);
                  }
                }

                protected void onFailure(Exception e) {
                  try {
                    channel.sendResponse(e);
                  } catch (IOException e1) {
                    logger.warn("failed to send error back to recovery source", e1);
                  }
                }

                @Override
                public void onClusterServiceClose() {
                  onFailure(
                      new ElasticsearchException(
                          "cluster service was closed while waiting for mapping updates"));
                }

                @Override
                public void onTimeout(TimeValue timeout) {
                  // note that we do not use a timeout (see comment above)
                  onFailure(
                      new ElasticsearchTimeoutException(
                          "timed out waiting for mapping updates (timeout [" + timeout + "])"));
                }
              });
        }
      }
    }
 @Override
 public void messageReceived(LivenessRequest request, TransportChannel channel) throws Exception {
   channel.sendResponse(
       new LivenessResponse(clusterService.getClusterName(), clusterService.localNode()));
 }
 @Override
 public void messageReceived(final ShardRequest request, final TransportChannel channel)
     throws Exception {
   channel.sendResponse(shardOperation(request));
 }
 @Override
 public void messageReceived(final StartRecoveryRequest request, final TransportChannel channel)
     throws Exception {
   RecoveryResponse response = recover(request);
   channel.sendResponse(response);
 }
Пример #27
0
    @Override
    public void messageReceived(
        final RecoveryTranslogOperationsRequest request, final TransportChannel channel)
        throws Exception {
      try (RecoveriesCollection.StatusRef statusRef =
          onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
        final ClusterStateObserver observer =
            new ClusterStateObserver(clusterService, null, logger);
        final RecoveryStatus recoveryStatus = statusRef.status();
        final RecoveryState.Translog translog = recoveryStatus.state().getTranslog();
        translog.totalOperations(request.totalTranslogOps());
        assert recoveryStatus.indexShard().recoveryState() == recoveryStatus.state();
        try {
          recoveryStatus.indexShard().performBatchRecovery(request.operations());
          channel.sendResponse(TransportResponse.Empty.INSTANCE);
        } catch (TranslogRecoveryPerformer.BatchOperationException exception) {
          MapperException mapperException =
              (MapperException) ExceptionsHelper.unwrap(exception, MapperException.class);
          if (mapperException == null) {
            throw exception;
          }
          // in very rare cases a translog replay from primary is processed before a mapping update
          // on this node
          // which causes local mapping changes. we want to wait until these mappings are processed.
          logger.trace(
              "delaying recovery due to missing mapping changes (rolling back stats for [{}] ops)",
              exception,
              exception.completedOperations());
          translog.decrementRecoveredOperations(exception.completedOperations());
          // we do not need to use a timeout here since the entire recovery mechanism has an
          // inactivity protection (it will be
          // canceled)
          observer.waitForNextChange(
              new ClusterStateObserver.Listener() {
                @Override
                public void onNewClusterState(ClusterState state) {
                  try {
                    messageReceived(request, channel);
                  } catch (Exception e) {
                    onFailure(e);
                  }
                }

                protected void onFailure(Exception e) {
                  try {
                    channel.sendResponse(e);
                  } catch (IOException e1) {
                    logger.warn("failed to send error back to recovery source", e1);
                  }
                }

                @Override
                public void onClusterServiceClose() {
                  onFailure(
                      new ElasticsearchException(
                          "cluster service was closed while waiting for mapping updates"));
                }

                @Override
                public void onTimeout(TimeValue timeout) {
                  // note that we do not use a timeout (see comment above)
                  onFailure(
                      new ElasticsearchTimeoutException(
                          "timed out waiting for mapping updates (timeout [" + timeout + "])"));
                }
              });
        }
      }
    }
Пример #28
0
 @Override
 public void messageReceived(LeaveRequest request, TransportChannel channel) throws Exception {
   listener.onLeave(request.node);
   channel.sendResponse(VoidStreamable.INSTANCE);
 }
 @Override
 public void messageReceived(NodeIndexStoreDeletedMessage message, TransportChannel channel)
     throws Exception {
   innerNodeIndexStoreDeleted(message.index, message.nodeId);
   channel.sendResponse(TransportResponse.Empty.INSTANCE);
 }