public synchronized void startAll() throws IOException {
   final DiscoveryNode pNode = getDiscoveryNode(primary.routingEntry().currentNodeId());
   primary.markAsRecovering(
       "store",
       new RecoveryState(primary.shardId(), true, RecoveryState.Type.STORE, pNode, pNode));
   primary.recoverFromStore();
   primary.updateRoutingEntry(ShardRoutingHelper.moveToStarted(primary.routingEntry()));
   for (IndexShard replicaShard : replicas) {
     recoverReplica(
         replicaShard,
         (replica, sourceNode) ->
             new RecoveryTarget(replica, sourceNode, recoveryListener, version -> {}));
   }
 }
 public void recoverReplica(
     IndexShard replica,
     BiFunction<IndexShard, DiscoveryNode, RecoveryTarget> targetSupplier,
     boolean markAsRecovering)
     throws IOException {
   final DiscoveryNode pNode = getPrimaryNode();
   final DiscoveryNode rNode = getDiscoveryNode(replica.routingEntry().currentNodeId());
   if (markAsRecovering) {
     replica.markAsRecovering(
         "remote",
         new RecoveryState(replica.shardId(), false, RecoveryState.Type.REPLICA, pNode, rNode));
   } else {
     assertEquals(replica.state(), IndexShardState.RECOVERING);
   }
   replica.prepareForIndexRecovery();
   RecoveryTarget recoveryTarget = targetSupplier.apply(replica, pNode);
   StartRecoveryRequest request =
       new StartRecoveryRequest(
           replica.shardId(),
           pNode,
           rNode,
           getMetadataSnapshotOrEmpty(replica),
           RecoveryState.Type.REPLICA,
           0);
   RecoverySourceHandler recovery =
       new RecoverySourceHandler(
           primary,
           recoveryTarget,
           request,
           () -> 0L,
           e -> () -> {},
           (int) ByteSizeUnit.MB.toKB(1),
           logger);
   recovery.recoverToTarget();
   recoveryTarget.markAsDone();
   replica.updateRoutingEntry(ShardRoutingHelper.moveToStarted(replica.routingEntry()));
 }
  private void applyInitializingShard(
      final ClusterState state,
      final IndexMetaData indexMetaData,
      final ShardRouting shardRouting) {
    final IndexService indexService = indicesService.indexService(shardRouting.index());
    if (indexService == null) {
      // got deleted on us, ignore
      return;
    }
    final RoutingTable routingTable = state.routingTable();
    final DiscoveryNodes nodes = state.getNodes();
    final int shardId = shardRouting.id();

    if (indexService.hasShard(shardId)) {
      IndexShard indexShard = indexService.shardSafe(shardId);
      if (indexShard.state() == IndexShardState.STARTED
          || indexShard.state() == IndexShardState.POST_RECOVERY) {
        // the master thinks we are initializing, but we are already started or on POST_RECOVERY and
        // waiting
        // for master to confirm a shard started message (either master failover, or a cluster event
        // before
        // we managed to tell the master we started), mark us as started
        if (logger.isTraceEnabled()) {
          logger.trace(
              "{} master marked shard as initializing, but shard has state [{}], resending shard started to {}",
              indexShard.shardId(),
              indexShard.state(),
              nodes.masterNode());
        }
        if (nodes.masterNode() != null) {
          shardStateAction.shardStarted(
              shardRouting,
              indexMetaData.getIndexUUID(),
              "master "
                  + nodes.masterNode()
                  + " marked shard as initializing, but shard state is ["
                  + indexShard.state()
                  + "], mark shard as started",
              nodes.masterNode());
        }
        return;
      } else {
        if (indexShard.ignoreRecoveryAttempt()) {
          logger.trace(
              "ignoring recovery instruction for an existing shard {} (shard state: [{}])",
              indexShard.shardId(),
              indexShard.state());
          return;
        }
      }
    }

    // if we're in peer recovery, try to find out the source node now so in case it fails, we will
    // not create the index shard
    DiscoveryNode sourceNode = null;
    if (isPeerRecovery(shardRouting)) {
      sourceNode = findSourceNodeForPeerRecovery(routingTable, nodes, shardRouting);
      if (sourceNode == null) {
        logger.trace(
            "ignoring initializing shard {} - no source node can be found.",
            shardRouting.shardId());
        return;
      }
    }

    // if there is no shard, create it
    if (!indexService.hasShard(shardId)) {
      if (failedShards.containsKey(shardRouting.shardId())) {
        if (nodes.masterNode() != null) {
          shardStateAction.resendShardFailed(
              shardRouting,
              indexMetaData.getIndexUUID(),
              "master "
                  + nodes.masterNode()
                  + " marked shard as initializing, but shard is marked as failed, resend shard failure",
              nodes.masterNode());
        }
        return;
      }
      try {
        if (logger.isDebugEnabled()) {
          logger.debug("[{}][{}] creating shard", shardRouting.index(), shardId);
        }
        IndexShard indexShard = indexService.createShard(shardId, shardRouting.primary());
        indexShard.updateRoutingEntry(
            shardRouting, state.blocks().disableStatePersistence() == false);
        indexShard.addFailedEngineListener(failedEngineHandler);
      } catch (IndexShardAlreadyExistsException e) {
        // ignore this, the method call can happen several times
      } catch (Throwable e) {
        failAndRemoveShard(shardRouting, indexService, true, "failed to create shard", e);
        return;
      }
    }
    final IndexShard indexShard = indexService.shardSafe(shardId);

    if (indexShard.ignoreRecoveryAttempt()) {
      // we are already recovering (we can get to this state since the cluster event can happen
      // several
      // times while we recover)
      logger.trace(
          "ignoring recovery instruction for shard {} (shard state: [{}])",
          indexShard.shardId(),
          indexShard.state());
      return;
    }

    if (isPeerRecovery(shardRouting)) {
      try {

        assert sourceNode != null : "peer recovery started but sourceNode is null";

        // we don't mark this one as relocated at the end.
        // For primaries: requests in any case are routed to both when its relocating and that way
        // we handle
        //    the edge case where its mark as relocated, and we might need to roll it back...
        // For replicas: we are recovering a backup from a primary
        RecoveryState.Type type =
            shardRouting.primary() ? RecoveryState.Type.RELOCATION : RecoveryState.Type.REPLICA;
        recoveryTarget.startRecovery(
            indexShard,
            type,
            sourceNode,
            new PeerRecoveryListener(shardRouting, indexService, indexMetaData));
      } catch (Throwable e) {
        indexShard.failShard("corrupted preexisting index", e);
        handleRecoveryFailure(indexService, shardRouting, true, e);
      }
    } else {
      final IndexShardRoutingTable indexShardRouting =
          routingTable.index(shardRouting.index()).shard(shardRouting.id());
      indexService
          .shard(shardId)
          .recoverFromStore(
              indexShardRouting,
              new StoreRecoveryService.RecoveryListener() {
                @Override
                public void onRecoveryDone() {
                  shardStateAction.shardStarted(
                      shardRouting, indexMetaData.getIndexUUID(), "after recovery from store");
                }

                @Override
                public void onIgnoreRecovery(String reason) {}

                @Override
                public void onRecoveryFailed(IndexShardRecoveryException e) {
                  handleRecoveryFailure(indexService, shardRouting, true, e);
                }
              });
    }
  }
  private void applyNewOrUpdatedShards(final ClusterChangedEvent event) {
    if (!indicesService.changesAllowed()) {
      return;
    }

    RoutingTable routingTable = event.state().routingTable();
    RoutingNodes.RoutingNodeIterator routingNode =
        event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());

    if (routingNode == null) {
      failedShards.clear();
      return;
    }
    DiscoveryNodes nodes = event.state().nodes();

    for (final ShardRouting shardRouting : routingNode) {
      final IndexService indexService = indicesService.indexService(shardRouting.index());
      if (indexService == null) {
        // got deleted on us, ignore
        continue;
      }
      final IndexMetaData indexMetaData = event.state().metaData().index(shardRouting.index());
      if (indexMetaData == null) {
        // the index got deleted on the metadata, we will clean it later in the apply deleted method
        // call
        continue;
      }

      final int shardId = shardRouting.id();

      if (!indexService.hasShard(shardId) && shardRouting.started()) {
        if (failedShards.containsKey(shardRouting.shardId())) {
          if (nodes.masterNode() != null) {
            shardStateAction.resendShardFailed(
                shardRouting,
                indexMetaData.getIndexUUID(),
                "master "
                    + nodes.masterNode()
                    + " marked shard as started, but shard has previous failed. resending shard failure.",
                nodes.masterNode());
          }
        } else {
          // the master thinks we are started, but we don't have this shard at all, mark it as
          // failed
          sendFailShard(
              shardRouting,
              indexMetaData.getIndexUUID(),
              "master ["
                  + nodes.masterNode()
                  + "] marked shard as started, but shard has not been created, mark shard as failed",
              null);
        }
        continue;
      }

      IndexShard indexShard = indexService.shard(shardId);
      if (indexShard != null) {
        ShardRouting currentRoutingEntry = indexShard.routingEntry();
        // if the current and global routing are initializing, but are still not the same, its a
        // different "shard" being allocated
        // for example: a shard that recovers from one node and now needs to recover to another
        // node,
        //              or a replica allocated and then allocating a primary because the primary
        // failed on another node
        boolean shardHasBeenRemoved = false;
        if (currentRoutingEntry.initializing()
            && shardRouting.initializing()
            && !currentRoutingEntry.equals(shardRouting)) {
          logger.debug(
              "[{}][{}] removing shard (different instance of it allocated on this node, current [{}], global [{}])",
              shardRouting.index(),
              shardRouting.id(),
              currentRoutingEntry,
              shardRouting);
          // closing the shard will also cancel any ongoing recovery.
          indexService.removeShard(
              shardRouting.id(),
              "removing shard (different instance of it allocated on this node)");
          shardHasBeenRemoved = true;
        } else if (isPeerRecovery(shardRouting)) {
          final DiscoveryNode sourceNode =
              findSourceNodeForPeerRecovery(routingTable, nodes, shardRouting);
          // check if there is an existing recovery going, and if so, and the source node is not the
          // same, cancel the recovery to restart it
          final Predicate<RecoveryStatus> shouldCancel =
              new Predicate<RecoveryStatus>() {
                @Override
                public boolean apply(@Nullable RecoveryStatus status) {
                  return status.sourceNode().equals(sourceNode) == false;
                }
              };
          if (recoveryTarget.cancelRecoveriesForShard(
              indexShard.shardId(), "recovery source node changed", shouldCancel)) {
            logger.debug(
                "[{}][{}] removing shard (recovery source changed), current [{}], global [{}])",
                shardRouting.index(),
                shardRouting.id(),
                currentRoutingEntry,
                shardRouting);
            // closing the shard will also cancel any ongoing recovery.
            indexService.removeShard(
                shardRouting.id(), "removing shard (recovery source node changed)");
            shardHasBeenRemoved = true;
          }
        }
        if (shardHasBeenRemoved == false
            && (shardRouting.equals(indexShard.routingEntry()) == false
                || shardRouting.version() > indexShard.routingEntry().version())) {
          if (shardRouting.primary()
              && indexShard.routingEntry().primary() == false
              && shardRouting.initializing()
              && indexShard.allowsPrimaryPromotion() == false) {
            logger.debug("{} reinitialize shard on primary promotion", indexShard.shardId());
            indexService.removeShard(shardId, "promoted to primary");
          } else {
            // if we happen to remove the shardRouting by id above we don't need to jump in here!
            indexShard.updateRoutingEntry(
                shardRouting, event.state().blocks().disableStatePersistence() == false);
          }
        }
      }

      if (shardRouting.initializing()) {
        applyInitializingShard(event.state(), indexMetaData, shardRouting);
      }
    }
  }
예제 #5
0
  public synchronized IndexShard createShard(ShardRouting routing) throws IOException {
    final boolean primary = routing.primary();
    /*
     * TODO: we execute this in parallel but it's a synced method. Yet, we might
     * be able to serialize the execution via the cluster state in the future. for now we just
     * keep it synced.
     */
    if (closed.get()) {
      throw new IllegalStateException("Can't create shard " + routing.shardId() + ", closed");
    }
    final Settings indexSettings = this.indexSettings.getSettings();
    final ShardId shardId = routing.shardId();
    boolean success = false;
    Store store = null;
    IndexShard indexShard = null;
    final ShardLock lock = nodeEnv.shardLock(shardId, TimeUnit.SECONDS.toMillis(5));
    try {
      eventListener.beforeIndexShardCreated(shardId, indexSettings);
      ShardPath path;
      try {
        path = ShardPath.loadShardPath(logger, nodeEnv, shardId, this.indexSettings);
      } catch (IllegalStateException ex) {
        logger.warn("{} failed to load shard path, trying to remove leftover", shardId);
        try {
          ShardPath.deleteLeftoverShardDirectory(logger, nodeEnv, lock, this.indexSettings);
          path = ShardPath.loadShardPath(logger, nodeEnv, shardId, this.indexSettings);
        } catch (Throwable t) {
          t.addSuppressed(ex);
          throw t;
        }
      }

      if (path == null) {
        // TODO: we should, instead, hold a "bytes reserved" of how large we anticipate this shard
        // will be, e.g. for a shard
        // that's being relocated/replicated we know how large it will become once it's done
        // copying:
        // Count up how many shards are currently on each data path:
        Map<Path, Integer> dataPathToShardCount = new HashMap<>();
        for (IndexShard shard : this) {
          Path dataPath = shard.shardPath().getRootStatePath();
          Integer curCount = dataPathToShardCount.get(dataPath);
          if (curCount == null) {
            curCount = 0;
          }
          dataPathToShardCount.put(dataPath, curCount + 1);
        }
        path =
            ShardPath.selectNewPathForShard(
                nodeEnv,
                shardId,
                this.indexSettings,
                routing.getExpectedShardSize() == ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE
                    ? getAvgShardSizeInBytes()
                    : routing.getExpectedShardSize(),
                dataPathToShardCount);
        logger.debug("{} creating using a new path [{}]", shardId, path);
      } else {
        logger.debug("{} creating using an existing path [{}]", shardId, path);
      }

      if (shards.containsKey(shardId.id())) {
        throw new IndexShardAlreadyExistsException(shardId + " already exists");
      }

      logger.debug("creating shard_id {}", shardId);
      // if we are on a shared FS we only own the shard (ie. we can safely delete it) if we are the
      // primary.
      final boolean canDeleteShardContent =
          IndexMetaData.isOnSharedFilesystem(indexSettings) == false
              || (primary && IndexMetaData.isOnSharedFilesystem(indexSettings));
      store =
          new Store(
              shardId,
              this.indexSettings,
              indexStore.newDirectoryService(path),
              lock,
              new StoreCloseListener(
                  shardId,
                  canDeleteShardContent,
                  () -> nodeServicesProvider.getIndicesQueryCache().onClose(shardId)));
      if (useShadowEngine(primary, indexSettings)) {
        indexShard =
            new ShadowIndexShard(
                shardId,
                this.indexSettings,
                path,
                store,
                indexCache,
                mapperService,
                similarityService,
                indexFieldData,
                engineFactory,
                eventListener,
                searcherWrapper,
                nodeServicesProvider); // no indexing listeners - shadow  engines don't index
      } else {
        indexShard =
            new IndexShard(
                shardId,
                this.indexSettings,
                path,
                store,
                indexCache,
                mapperService,
                similarityService,
                indexFieldData,
                engineFactory,
                eventListener,
                searcherWrapper,
                nodeServicesProvider,
                listeners);
      }
      eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created");
      eventListener.afterIndexShardCreated(indexShard);
      indexShard.updateRoutingEntry(routing, true);
      shards = newMapBuilder(shards).put(shardId.id(), indexShard).immutableMap();
      success = true;
      return indexShard;
    } finally {
      if (success == false) {
        IOUtils.closeWhileHandlingException(lock);
        closeShard("initialization failed", shardId, indexShard, store, eventListener);
      }
    }
  }