コード例 #1
0
  /**
   * Perform phase 3 of the recovery process
   *
   * <p>Phase3 again takes a snapshot of the translog, however this time the snapshot is acquired
   * under a write lock. The translog operations are sent to the target node where they are
   * replayed.
   *
   * <p>{@code InternalEngine#recover} is responsible for taking the snapshot of the translog, and
   * after phase 3 completes the snapshots from all three phases are released.
   */
  @Override
  public void phase3(Translog.Snapshot snapshot) throws ElasticsearchException {
    if (shard.state() == IndexShardState.CLOSED) {
      throw new IndexShardClosedException(request.shardId());
    }
    cancellableThreads.checkForCancel();
    StopWatch stopWatch = new StopWatch().start();
    final int totalOperations;
    logger.trace(
        "[{}][{}] recovery [phase3] to {}: sending transaction log operations",
        indexName,
        shardId,
        request.targetNode());

    // Send the translog operations to the target node
    totalOperations = sendSnapshot(snapshot);

    cancellableThreads.execute(
        new Interruptable() {
          @Override
          public void run() throws InterruptedException {
            // Send the FINALIZE request to the target node. The finalize request
            // clears unreferenced translog files, refreshes the engine now that
            // new segments are available, and enables garbage collection of
            // tombstone files. The shard is also moved to the POST_RECOVERY phase
            // during this time
            transportService
                .submitRequest(
                    request.targetNode(),
                    RecoveryTarget.Actions.FINALIZE,
                    new RecoveryFinalizeRecoveryRequest(request.recoveryId(), request.shardId()),
                    TransportRequestOptions.options()
                        .withTimeout(recoverySettings.internalActionLongTimeout()),
                    EmptyTransportResponseHandler.INSTANCE_SAME)
                .txGet();
          }
        });

    if (request.markAsRelocated()) {
      // TODO what happens if the recovery process fails afterwards, we need to mark this back to
      // started
      try {
        shard.relocated("to " + request.targetNode());
      } catch (IllegalIndexShardStateException e) {
        // we can ignore this exception since, on the other node, when it moved to phase3
        // it will also send shard started, which might cause the index shard we work against
        // to move be closed by the time we get to the the relocated method
      }
    }
    stopWatch.stop();
    logger.trace(
        "[{}][{}] recovery [phase3] to {}: took [{}]",
        indexName,
        shardId,
        request.targetNode(),
        stopWatch.totalTime());
    response.phase3Time = stopWatch.totalTime().millis();
    response.phase3Operations = totalOperations;
  }
コード例 #2
0
  /** finalizes the recovery process */
  public void finalizeRecovery() {
    if (shard.state() == IndexShardState.CLOSED) {
      throw new IndexShardClosedException(request.shardId());
    }
    cancellableThreads.checkForCancel();
    StopWatch stopWatch = new StopWatch().start();
    logger.trace("[{}][{}] finalizing recovery to {}", indexName, shardId, request.targetNode());
    cancellableThreads.execute(recoveryTarget::finalizeRecovery);

    if (request.isPrimaryRelocation()) {
      // in case of primary relocation we have to ensure that the cluster state on the primary
      // relocation target has all
      // replica shards that have recovered or are still recovering from the current primary,
      // otherwise replication actions
      // will not be send to these replicas. To accomplish this, first block new recoveries, then
      // take version of latest cluster
      // state. This means that no new recovery can be completed based on information of a newer
      // cluster state than the current one.
      try (Releasable ignored =
          delayNewRecoveries.apply(
              "primary relocation hand-off in progress or completed for " + shardId)) {
        final long currentClusterStateVersion = currentClusterStateVersionSupplier.get();
        logger.trace(
            "[{}][{}] waiting on {} to have cluster state with version [{}]",
            indexName,
            shardId,
            request.targetNode(),
            currentClusterStateVersion);
        cancellableThreads.execute(
            () -> recoveryTarget.ensureClusterStateVersion(currentClusterStateVersion));

        logger.trace(
            "[{}][{}] performing relocation hand-off to {}",
            indexName,
            shardId,
            request.targetNode());
        cancellableThreads.execute(() -> shard.relocated("to " + request.targetNode()));
      }
      /**
       * if the recovery process fails after setting the shard state to RELOCATED, both relocation
       * source and target are failed (see {@link IndexShard#updateRoutingEntry}).
       */
    }
    stopWatch.stop();
    logger.trace(
        "[{}][{}] finalizing recovery to {}: took [{}]",
        indexName,
        shardId,
        request.targetNode(),
        stopWatch.totalTime());
  }