コード例 #1
0
 private synchronized void scheduleSnapshotIfNeeded() {
   if (!shardGateway.requiresSnapshot()) {
     return;
   }
   if (!shardGateway.requiresSnapshotScheduling()) {
     return;
   }
   if (!indexShard.routingEntry().primary()) {
     // we only do snapshotting on the primary shard
     return;
   }
   if (!indexShard.routingEntry().started()) {
     // we only schedule when the cluster assumes we have started
     return;
   }
   if (snapshotScheduleFuture != null) {
     // we are already scheduling this one, ignore
     return;
   }
   if (snapshotInterval.millis() != -1) {
     // we need to schedule snapshot
     if (logger.isDebugEnabled()) {
       logger.debug("scheduling snapshot every [{}]", snapshotInterval);
     }
     snapshotScheduleFuture =
         threadPool.schedule(snapshotInterval, ThreadPool.Names.SNAPSHOT, snapshotRunnable);
   }
 }
コード例 #2
0
  @Override
  protected ShardStatus shardOperation(IndexShardStatusRequest request)
      throws ElasticSearchException {
    InternalIndexService indexService =
        (InternalIndexService) indicesService.indexServiceSafe(request.index());
    InternalIndexShard indexShard = (InternalIndexShard) indexService.shardSafe(request.shardId());
    ShardStatus shardStatus = new ShardStatus(indexShard.routingEntry());
    shardStatus.state = indexShard.state();
    try {
      shardStatus.storeSize = indexShard.store().estimateSize();
    } catch (IOException e) {
      // failure to get the store size...
    }
    if (indexShard.state() == IndexShardState.STARTED) {
      //            shardStatus.estimatedFlushableMemorySize =
      // indexShard.estimateFlushableMemorySize();
      shardStatus.translogId = indexShard.translog().currentId();
      shardStatus.translogOperations = indexShard.translog().estimatedNumberOfOperations();
      Engine.Searcher searcher = indexShard.searcher();
      try {
        shardStatus.docs = new DocsStatus();
        shardStatus.docs.numDocs = searcher.reader().numDocs();
        shardStatus.docs.maxDoc = searcher.reader().maxDoc();
        shardStatus.docs.deletedDocs = searcher.reader().numDeletedDocs();
      } finally {
        searcher.release();
      }

      shardStatus.mergeStats = indexShard.mergeScheduler().stats();
      shardStatus.refreshStats = indexShard.refreshStats();
      shardStatus.flushStats = indexShard.flushStats();
    }

    if (request.recovery) {
      // check on going recovery (from peer or gateway)
      RecoveryStatus peerRecoveryStatus = indexShard.peerRecoveryStatus();
      if (peerRecoveryStatus == null) {
        peerRecoveryStatus = peerRecoveryTarget.peerRecoveryStatus(indexShard.shardId());
      }
      if (peerRecoveryStatus != null) {
        PeerRecoveryStatus.Stage stage;
        switch (peerRecoveryStatus.stage()) {
          case INIT:
            stage = PeerRecoveryStatus.Stage.INIT;
            break;
          case INDEX:
            stage = PeerRecoveryStatus.Stage.INDEX;
            break;
          case TRANSLOG:
            stage = PeerRecoveryStatus.Stage.TRANSLOG;
            break;
          case FINALIZE:
            stage = PeerRecoveryStatus.Stage.FINALIZE;
            break;
          case DONE:
            stage = PeerRecoveryStatus.Stage.DONE;
            break;
          default:
            stage = PeerRecoveryStatus.Stage.INIT;
        }
        shardStatus.peerRecoveryStatus =
            new PeerRecoveryStatus(
                stage,
                peerRecoveryStatus.startTime(),
                peerRecoveryStatus.time(),
                peerRecoveryStatus.phase1TotalSize(),
                peerRecoveryStatus.phase1ExistingTotalSize(),
                peerRecoveryStatus.currentFilesSize(),
                peerRecoveryStatus.currentTranslogOperations());
      }

      IndexShardGatewayService gatewayService =
          indexService.shardInjector(request.shardId()).getInstance(IndexShardGatewayService.class);
      org.elasticsearch.index.gateway.RecoveryStatus gatewayRecoveryStatus =
          gatewayService.recoveryStatus();
      if (gatewayRecoveryStatus != null) {
        GatewayRecoveryStatus.Stage stage;
        switch (gatewayRecoveryStatus.stage()) {
          case INIT:
            stage = GatewayRecoveryStatus.Stage.INIT;
            break;
          case INDEX:
            stage = GatewayRecoveryStatus.Stage.INDEX;
            break;
          case TRANSLOG:
            stage = GatewayRecoveryStatus.Stage.TRANSLOG;
            break;
          case DONE:
            stage = GatewayRecoveryStatus.Stage.DONE;
            break;
          default:
            stage = GatewayRecoveryStatus.Stage.INIT;
        }
        shardStatus.gatewayRecoveryStatus =
            new GatewayRecoveryStatus(
                stage,
                gatewayRecoveryStatus.startTime(),
                gatewayRecoveryStatus.time(),
                gatewayRecoveryStatus.index().totalSize(),
                gatewayRecoveryStatus.index().reusedTotalSize(),
                gatewayRecoveryStatus.index().currentFilesSize(),
                gatewayRecoveryStatus.translog().currentTranslogOperations());
      }
    }

    if (request.snapshot) {
      IndexShardGatewayService gatewayService =
          indexService.shardInjector(request.shardId()).getInstance(IndexShardGatewayService.class);
      SnapshotStatus snapshotStatus = gatewayService.snapshotStatus();
      if (snapshotStatus != null) {
        GatewaySnapshotStatus.Stage stage;
        switch (snapshotStatus.stage()) {
          case DONE:
            stage = GatewaySnapshotStatus.Stage.DONE;
            break;
          case FAILURE:
            stage = GatewaySnapshotStatus.Stage.FAILURE;
            break;
          case TRANSLOG:
            stage = GatewaySnapshotStatus.Stage.TRANSLOG;
            break;
          case FINALIZE:
            stage = GatewaySnapshotStatus.Stage.FINALIZE;
            break;
          case INDEX:
            stage = GatewaySnapshotStatus.Stage.INDEX;
            break;
          default:
            stage = GatewaySnapshotStatus.Stage.NONE;
            break;
        }
        shardStatus.gatewaySnapshotStatus =
            new GatewaySnapshotStatus(
                stage,
                snapshotStatus.startTime(),
                snapshotStatus.time(),
                snapshotStatus.index().totalSize(),
                snapshotStatus.translog().expectedNumberOfOperations());
      }
    }

    return shardStatus;
  }
コード例 #3
0
  /** Snapshots the given shard into the gateway. */
  public synchronized void snapshot(final String reason)
      throws IndexShardGatewaySnapshotFailedException {
    if (!indexShard.routingEntry().primary()) {
      return;
      //            throw new IndexShardGatewaySnapshotNotAllowedException(shardId, "Snapshot not
      // allowed on non primary shard");
    }
    if (indexShard.routingEntry().relocating()) {
      // do not snapshot when in the process of relocation of primaries so we won't get conflicts
      return;
    }
    if (indexShard.state() == IndexShardState.CREATED) {
      // shard has just been created, ignore it and return
      return;
    }
    if (indexShard.state() == IndexShardState.RECOVERING) {
      // shard is recovering, don't snapshot
      return;
    }

    if (snapshotLock == null) {
      try {
        snapshotLock = shardGateway.obtainSnapshotLock();
      } catch (Exception e) {
        logger.warn("failed to obtain snapshot lock, ignoring snapshot", e);
        return;
      }
    }

    try {
      SnapshotStatus snapshotStatus =
          indexShard.snapshot(
              new Engine.SnapshotHandler<SnapshotStatus>() {
                @Override
                public SnapshotStatus snapshot(
                    SnapshotIndexCommit snapshotIndexCommit, Translog.Snapshot translogSnapshot)
                    throws EngineException {
                  if (lastIndexVersion != snapshotIndexCommit.getGeneration()
                      || lastTranslogId != translogSnapshot.translogId()
                      || lastTranslogLength < translogSnapshot.length()) {

                    logger.debug("snapshot ({}) to {} ...", reason, shardGateway);
                    SnapshotStatus snapshotStatus =
                        shardGateway.snapshot(
                            new IndexShardGateway.Snapshot(
                                snapshotIndexCommit,
                                translogSnapshot,
                                lastIndexVersion,
                                lastTranslogId,
                                lastTranslogLength,
                                lastTotalTranslogOperations));

                    lastIndexVersion = snapshotIndexCommit.getGeneration();
                    lastTranslogId = translogSnapshot.translogId();
                    lastTranslogLength = translogSnapshot.length();
                    lastTotalTranslogOperations = translogSnapshot.estimatedTotalOperations();
                    return snapshotStatus;
                  }
                  return null;
                }
              });
      if (snapshotStatus != null) {
        if (logger.isDebugEnabled()) {
          StringBuilder sb = new StringBuilder();
          sb.append("snapshot (")
              .append(reason)
              .append(") completed to ")
              .append(shardGateway)
              .append(", took [")
              .append(TimeValue.timeValueMillis(snapshotStatus.time()))
              .append("]\n");
          sb.append("    index    : version [")
              .append(lastIndexVersion)
              .append("], number_of_files [")
              .append(snapshotStatus.index().numberOfFiles())
              .append("] with total_size [")
              .append(new ByteSizeValue(snapshotStatus.index().totalSize()))
              .append("], took [")
              .append(TimeValue.timeValueMillis(snapshotStatus.index().time()))
              .append("]\n");
          sb.append("    translog : id      [")
              .append(lastTranslogId)
              .append("], number_of_operations [")
              .append(snapshotStatus.translog().expectedNumberOfOperations())
              .append("], took [")
              .append(TimeValue.timeValueMillis(snapshotStatus.translog().time()))
              .append("]");
          logger.debug(sb.toString());
        }
      }
    } catch (SnapshotFailedEngineException e) {
      if (e.getCause() instanceof IllegalStateException) {
        // ignore, that's fine, snapshot has not started yet
      } else {
        throw new IndexShardGatewaySnapshotFailedException(shardId, "Failed to snapshot", e);
      }
    } catch (IllegalIndexShardStateException e) {
      // ignore, that's fine, snapshot has not started yet
    } catch (IndexShardGatewaySnapshotFailedException e) {
      throw e;
    } catch (Exception e) {
      throw new IndexShardGatewaySnapshotFailedException(shardId, "Failed to snapshot", e);
    }
  }
コード例 #4
0
  /** Recovers the state of the shard from the gateway. */
  public void recover(final boolean indexShouldExists, final RecoveryListener listener)
      throws IndexShardGatewayRecoveryException, IgnoreGatewayRecoveryException {
    if (indexShard.state() == IndexShardState.CLOSED) {
      // got closed on us, just ignore this recovery
      listener.onIgnoreRecovery("shard closed");
      return;
    }
    if (!indexShard.routingEntry().primary()) {
      listener.onRecoveryFailed(
          new IndexShardGatewayRecoveryException(
              shardId, "Trying to recover when the shard is in backup state", null));
      return;
    }
    try {
      if (indexShard.routingEntry().restoreSource() != null) {
        indexShard.recovering("from snapshot");
      } else {
        indexShard.recovering("from gateway");
      }
    } catch (IllegalIndexShardStateException e) {
      // that's fine, since we might be called concurrently, just ignore this, we are already
      // recovering
      listener.onIgnoreRecovery("already in recovering process, " + e.getMessage());
      return;
    }

    threadPool
        .generic()
        .execute(
            new Runnable() {
              @Override
              public void run() {
                recoveryStatus = new RecoveryStatus();
                recoveryStatus.updateStage(RecoveryStatus.Stage.INIT);

                try {
                  if (indexShard.routingEntry().restoreSource() != null) {
                    logger.debug(
                        "restoring from {} ...", indexShard.routingEntry().restoreSource());
                    snapshotService.restore(recoveryStatus);
                  } else {
                    logger.debug("starting recovery from {} ...", shardGateway);
                    shardGateway.recover(indexShouldExists, recoveryStatus);
                  }

                  lastIndexVersion = recoveryStatus.index().version();
                  lastTranslogId = -1;
                  lastTranslogLength = 0;
                  lastTotalTranslogOperations =
                      recoveryStatus.translog().currentTranslogOperations();

                  // start the shard if the gateway has not started it already. Note that if the
                  // gateway
                  // moved shard to POST_RECOVERY, it may have been started as well if:
                  // 1) master sent a new cluster state indicating shard is initializing
                  // 2) IndicesClusterStateService#applyInitializingShard will send a shard started
                  // event
                  // 3) Master will mark shard as started and this will be processed locally.
                  IndexShardState shardState = indexShard.state();
                  if (shardState != IndexShardState.POST_RECOVERY
                      && shardState != IndexShardState.STARTED) {
                    indexShard.postRecovery("post recovery from gateway");
                  }
                  // refresh the shard
                  indexShard.refresh(new Engine.Refresh("post_gateway").force(true));

                  recoveryStatus.time(System.currentTimeMillis() - recoveryStatus.startTime());
                  recoveryStatus.updateStage(RecoveryStatus.Stage.DONE);

                  if (logger.isDebugEnabled()) {
                    logger.debug(
                        "recovery completed from [{}], took [{}]",
                        shardGateway,
                        timeValueMillis(recoveryStatus.time()));
                  } else if (logger.isTraceEnabled()) {
                    StringBuilder sb = new StringBuilder();
                    sb.append("recovery completed from ")
                        .append(shardGateway)
                        .append(", took [")
                        .append(timeValueMillis(recoveryStatus.time()))
                        .append("]\n");
                    sb.append("    index    : files           [")
                        .append(recoveryStatus.index().numberOfFiles())
                        .append("] with total_size [")
                        .append(new ByteSizeValue(recoveryStatus.index().totalSize()))
                        .append("], took[")
                        .append(TimeValue.timeValueMillis(recoveryStatus.index().time()))
                        .append("]\n");
                    sb.append("             : recovered_files [")
                        .append(recoveryStatus.index().numberOfRecoveredFiles())
                        .append("] with total_size [")
                        .append(new ByteSizeValue(recoveryStatus.index().recoveredTotalSize()))
                        .append("]\n");
                    sb.append("             : reusing_files   [")
                        .append(recoveryStatus.index().numberOfReusedFiles())
                        .append("] with total_size [")
                        .append(new ByteSizeValue(recoveryStatus.index().reusedTotalSize()))
                        .append("]\n");
                    sb.append("    start    : took [")
                        .append(TimeValue.timeValueMillis(recoveryStatus.start().time()))
                        .append("], check_index [")
                        .append(timeValueMillis(recoveryStatus.start().checkIndexTime()))
                        .append("]\n");
                    sb.append("    translog : number_of_operations [")
                        .append(recoveryStatus.translog().currentTranslogOperations())
                        .append("], took [")
                        .append(TimeValue.timeValueMillis(recoveryStatus.translog().time()))
                        .append("]");
                    logger.trace(sb.toString());
                  }
                  listener.onRecoveryDone();
                  scheduleSnapshotIfNeeded();
                } catch (IndexShardGatewayRecoveryException e) {
                  if (indexShard.state() == IndexShardState.CLOSED) {
                    // got closed on us, just ignore this recovery
                    listener.onIgnoreRecovery("shard closed");
                    return;
                  }
                  if ((e.getCause() instanceof IndexShardClosedException)
                      || (e.getCause() instanceof IndexShardNotStartedException)) {
                    // got closed on us, just ignore this recovery
                    listener.onIgnoreRecovery("shard closed");
                    return;
                  }
                  listener.onRecoveryFailed(e);
                } catch (IndexShardClosedException e) {
                  listener.onIgnoreRecovery("shard closed");
                } catch (IndexShardNotStartedException e) {
                  listener.onIgnoreRecovery("shard closed");
                } catch (Exception e) {
                  if (indexShard.state() == IndexShardState.CLOSED) {
                    // got closed on us, just ignore this recovery
                    listener.onIgnoreRecovery("shard closed");
                    return;
                  }
                  listener.onRecoveryFailed(
                      new IndexShardGatewayRecoveryException(shardId, "failed recovery", e));
                }
              }
            });
  }