private void sendFailShard(
     ShardRouting shardRouting, String indexUUID, String message, @Nullable Throwable failure) {
   try {
     logger.warn(
         "[{}] marking and sending shard failed due to [{}]",
         failure,
         shardRouting.shardId(),
         message);
     failedShards.put(shardRouting.shardId(), new FailedShard(shardRouting.version()));
     shardStateAction.shardFailed(
         shardRouting,
         indexUUID,
         "shard failure ["
             + message
             + "]"
             + (failure == null ? "" : "[" + detailedMessage(failure) + "]"));
   } catch (Throwable e1) {
     logger.warn(
         "[{}][{}] failed to mark shard as failed (because of [{}])",
         e1,
         shardRouting.getIndex(),
         shardRouting.getId(),
         message);
   }
 }
 private void cleanFailedShards(final ClusterChangedEvent event) {
   RoutingTable routingTable = event.state().routingTable();
   RoutingNodes.RoutingNodeIterator routingNode =
       event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());
   if (routingNode == null) {
     failedShards.clear();
     return;
   }
   DiscoveryNodes nodes = event.state().nodes();
   long now = System.currentTimeMillis();
   String localNodeId = nodes.localNodeId();
   Iterator<Map.Entry<ShardId, FailedShard>> iterator = failedShards.entrySet().iterator();
   shards:
   while (iterator.hasNext()) {
     Map.Entry<ShardId, FailedShard> entry = iterator.next();
     FailedShard failedShard = entry.getValue();
     IndexRoutingTable indexRoutingTable = routingTable.index(entry.getKey().getIndex());
     if (indexRoutingTable != null) {
       IndexShardRoutingTable shardRoutingTable = indexRoutingTable.shard(entry.getKey().id());
       if (shardRoutingTable != null) {
         for (ShardRouting shardRouting : shardRoutingTable.assignedShards()) {
           if (localNodeId.equals(shardRouting.currentNodeId())) {
             // we have a timeout here just to make sure we don't have dangled failed shards for
             // some reason
             // its just another safely layer
             if (shardRouting.version() == failedShard.version
                 && ((now - failedShard.timestamp) < TimeValue.timeValueMinutes(60).millis())) {
               // It's the same failed shard - keep it if it hasn't timed out
               continue shards;
             } else {
               // Different version or expired, remove it
               break;
             }
           }
         }
       }
     }
     iterator.remove();
   }
 }
 private void applyDeletedShards(final ClusterChangedEvent event) {
   RoutingNodes.RoutingNodeIterator routingNode =
       event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());
   if (routingNode == null) {
     return;
   }
   IntHashSet newShardIds = new IntHashSet();
   for (IndexService indexService : indicesService) {
     String index = indexService.index().name();
     IndexMetaData indexMetaData = event.state().metaData().index(index);
     if (indexMetaData == null) {
       continue;
     }
     // now, go over and delete shards that needs to get deleted
     newShardIds.clear();
     for (ShardRouting shard : routingNode) {
       if (shard.index().equals(index)) {
         newShardIds.add(shard.id());
       }
     }
     for (Integer existingShardId : indexService.shardIds()) {
       if (!newShardIds.contains(existingShardId)) {
         if (indexMetaData.state() == IndexMetaData.State.CLOSE) {
           if (logger.isDebugEnabled()) {
             logger.debug("[{}][{}] removing shard (index is closed)", index, existingShardId);
           }
           indexService.removeShard(existingShardId, "removing shard (index is closed)");
         } else {
           // we can just remove the shard, without cleaning it locally, since we will clean it
           // when all shards are allocated in the IndicesStore
           if (logger.isDebugEnabled()) {
             logger.debug("[{}][{}] removing shard (not allocated)", index, existingShardId);
           }
           indexService.removeShard(existingShardId, "removing shard (not allocated)");
         }
       }
     }
   }
 }
 private void failAndRemoveShard(
     ShardRouting shardRouting,
     IndexService indexService,
     boolean sendShardFailure,
     String message,
     @Nullable Throwable failure) {
   if (indexService.hasShard(shardRouting.getId())) {
     try {
       indexService.removeShard(shardRouting.getId(), message);
     } catch (ShardNotFoundException e) {
       // the node got closed on us, ignore it
     } catch (Throwable e1) {
       logger.warn(
           "[{}][{}] failed to remove shard after failure ([{}])",
           e1,
           shardRouting.getIndex(),
           shardRouting.getId(),
           message);
     }
   }
   if (sendShardFailure) {
     sendFailShard(shardRouting, indexService.indexUUID(), message, failure);
   }
 }
 private void applyNewIndices(final ClusterChangedEvent event) {
   // we only create indices for shards that are allocated
   RoutingNodes.RoutingNodeIterator routingNode =
       event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());
   if (routingNode == null) {
     return;
   }
   for (ShardRouting shard : routingNode) {
     if (!indicesService.hasIndex(shard.index())) {
       final IndexMetaData indexMetaData = event.state().metaData().index(shard.index());
       if (logger.isDebugEnabled()) {
         logger.debug("[{}] creating index", indexMetaData.index());
       }
       try {
         indicesService.createIndex(
             indexMetaData.index(),
             indexMetaData.settings(),
             event.state().nodes().localNode().id());
       } catch (Throwable e) {
         sendFailShard(shard, indexMetaData.getIndexUUID(), "failed to create index", e);
       }
     }
   }
 }
 private boolean isPeerRecovery(ShardRouting shardRouting) {
   return !shardRouting.primary() || shardRouting.relocatingNodeId() != null;
 }
  /**
   * Finds the routing source node for peer recovery, return null if its not found. Note, this
   * method expects the shard routing to *require* peer recovery, use {@link
   * #isPeerRecovery(org.elasticsearch.cluster.routing.ShardRouting)} to check if its needed or not.
   */
  private DiscoveryNode findSourceNodeForPeerRecovery(
      RoutingTable routingTable, DiscoveryNodes nodes, ShardRouting shardRouting) {
    DiscoveryNode sourceNode = null;
    if (!shardRouting.primary()) {
      IndexShardRoutingTable shardRoutingTable =
          routingTable.index(shardRouting.index()).shard(shardRouting.id());
      for (ShardRouting entry : shardRoutingTable) {
        if (entry.primary() && entry.active()) {
          // only recover from started primary, if we can't find one, we will do it next round
          sourceNode = nodes.get(entry.currentNodeId());
          if (sourceNode == null) {
            logger.trace(
                "can't find replica source node because primary shard {} is assigned to an unknown node.",
                entry);
            return null;
          }
          break;
        }
      }

      if (sourceNode == null) {
        logger.trace(
            "can't find replica source node for {} because a primary shard can not be found.",
            shardRouting.shardId());
      }
    } else if (shardRouting.relocatingNodeId() != null) {
      sourceNode = nodes.get(shardRouting.relocatingNodeId());
      if (sourceNode == null) {
        logger.trace(
            "can't find relocation source node for shard {} because it is assigned to an unknown node [{}].",
            shardRouting.shardId(),
            shardRouting.relocatingNodeId());
      }
    } else {
      throw new IllegalStateException(
          "trying to find source node for peer recovery when routing state means no peer recovery: "
              + shardRouting);
    }
    return sourceNode;
  }
  private void applyInitializingShard(
      final ClusterState state,
      final IndexMetaData indexMetaData,
      final ShardRouting shardRouting) {
    final IndexService indexService = indicesService.indexService(shardRouting.index());
    if (indexService == null) {
      // got deleted on us, ignore
      return;
    }
    final RoutingTable routingTable = state.routingTable();
    final DiscoveryNodes nodes = state.getNodes();
    final int shardId = shardRouting.id();

    if (indexService.hasShard(shardId)) {
      IndexShard indexShard = indexService.shardSafe(shardId);
      if (indexShard.state() == IndexShardState.STARTED
          || indexShard.state() == IndexShardState.POST_RECOVERY) {
        // the master thinks we are initializing, but we are already started or on POST_RECOVERY and
        // waiting
        // for master to confirm a shard started message (either master failover, or a cluster event
        // before
        // we managed to tell the master we started), mark us as started
        if (logger.isTraceEnabled()) {
          logger.trace(
              "{} master marked shard as initializing, but shard has state [{}], resending shard started to {}",
              indexShard.shardId(),
              indexShard.state(),
              nodes.masterNode());
        }
        if (nodes.masterNode() != null) {
          shardStateAction.shardStarted(
              shardRouting,
              indexMetaData.getIndexUUID(),
              "master "
                  + nodes.masterNode()
                  + " marked shard as initializing, but shard state is ["
                  + indexShard.state()
                  + "], mark shard as started",
              nodes.masterNode());
        }
        return;
      } else {
        if (indexShard.ignoreRecoveryAttempt()) {
          logger.trace(
              "ignoring recovery instruction for an existing shard {} (shard state: [{}])",
              indexShard.shardId(),
              indexShard.state());
          return;
        }
      }
    }

    // if we're in peer recovery, try to find out the source node now so in case it fails, we will
    // not create the index shard
    DiscoveryNode sourceNode = null;
    if (isPeerRecovery(shardRouting)) {
      sourceNode = findSourceNodeForPeerRecovery(routingTable, nodes, shardRouting);
      if (sourceNode == null) {
        logger.trace(
            "ignoring initializing shard {} - no source node can be found.",
            shardRouting.shardId());
        return;
      }
    }

    // if there is no shard, create it
    if (!indexService.hasShard(shardId)) {
      if (failedShards.containsKey(shardRouting.shardId())) {
        if (nodes.masterNode() != null) {
          shardStateAction.resendShardFailed(
              shardRouting,
              indexMetaData.getIndexUUID(),
              "master "
                  + nodes.masterNode()
                  + " marked shard as initializing, but shard is marked as failed, resend shard failure",
              nodes.masterNode());
        }
        return;
      }
      try {
        if (logger.isDebugEnabled()) {
          logger.debug("[{}][{}] creating shard", shardRouting.index(), shardId);
        }
        IndexShard indexShard = indexService.createShard(shardId, shardRouting.primary());
        indexShard.updateRoutingEntry(
            shardRouting, state.blocks().disableStatePersistence() == false);
        indexShard.addFailedEngineListener(failedEngineHandler);
      } catch (IndexShardAlreadyExistsException e) {
        // ignore this, the method call can happen several times
      } catch (Throwable e) {
        failAndRemoveShard(shardRouting, indexService, true, "failed to create shard", e);
        return;
      }
    }
    final IndexShard indexShard = indexService.shardSafe(shardId);

    if (indexShard.ignoreRecoveryAttempt()) {
      // we are already recovering (we can get to this state since the cluster event can happen
      // several
      // times while we recover)
      logger.trace(
          "ignoring recovery instruction for shard {} (shard state: [{}])",
          indexShard.shardId(),
          indexShard.state());
      return;
    }

    if (isPeerRecovery(shardRouting)) {
      try {

        assert sourceNode != null : "peer recovery started but sourceNode is null";

        // we don't mark this one as relocated at the end.
        // For primaries: requests in any case are routed to both when its relocating and that way
        // we handle
        //    the edge case where its mark as relocated, and we might need to roll it back...
        // For replicas: we are recovering a backup from a primary
        RecoveryState.Type type =
            shardRouting.primary() ? RecoveryState.Type.RELOCATION : RecoveryState.Type.REPLICA;
        recoveryTarget.startRecovery(
            indexShard,
            type,
            sourceNode,
            new PeerRecoveryListener(shardRouting, indexService, indexMetaData));
      } catch (Throwable e) {
        indexShard.failShard("corrupted preexisting index", e);
        handleRecoveryFailure(indexService, shardRouting, true, e);
      }
    } else {
      final IndexShardRoutingTable indexShardRouting =
          routingTable.index(shardRouting.index()).shard(shardRouting.id());
      indexService
          .shard(shardId)
          .recoverFromStore(
              indexShardRouting,
              new StoreRecoveryService.RecoveryListener() {
                @Override
                public void onRecoveryDone() {
                  shardStateAction.shardStarted(
                      shardRouting, indexMetaData.getIndexUUID(), "after recovery from store");
                }

                @Override
                public void onIgnoreRecovery(String reason) {}

                @Override
                public void onRecoveryFailed(IndexShardRecoveryException e) {
                  handleRecoveryFailure(indexService, shardRouting, true, e);
                }
              });
    }
  }
  private void applyNewOrUpdatedShards(final ClusterChangedEvent event) {
    if (!indicesService.changesAllowed()) {
      return;
    }

    RoutingTable routingTable = event.state().routingTable();
    RoutingNodes.RoutingNodeIterator routingNode =
        event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());

    if (routingNode == null) {
      failedShards.clear();
      return;
    }
    DiscoveryNodes nodes = event.state().nodes();

    for (final ShardRouting shardRouting : routingNode) {
      final IndexService indexService = indicesService.indexService(shardRouting.index());
      if (indexService == null) {
        // got deleted on us, ignore
        continue;
      }
      final IndexMetaData indexMetaData = event.state().metaData().index(shardRouting.index());
      if (indexMetaData == null) {
        // the index got deleted on the metadata, we will clean it later in the apply deleted method
        // call
        continue;
      }

      final int shardId = shardRouting.id();

      if (!indexService.hasShard(shardId) && shardRouting.started()) {
        if (failedShards.containsKey(shardRouting.shardId())) {
          if (nodes.masterNode() != null) {
            shardStateAction.resendShardFailed(
                shardRouting,
                indexMetaData.getIndexUUID(),
                "master "
                    + nodes.masterNode()
                    + " marked shard as started, but shard has previous failed. resending shard failure.",
                nodes.masterNode());
          }
        } else {
          // the master thinks we are started, but we don't have this shard at all, mark it as
          // failed
          sendFailShard(
              shardRouting,
              indexMetaData.getIndexUUID(),
              "master ["
                  + nodes.masterNode()
                  + "] marked shard as started, but shard has not been created, mark shard as failed",
              null);
        }
        continue;
      }

      IndexShard indexShard = indexService.shard(shardId);
      if (indexShard != null) {
        ShardRouting currentRoutingEntry = indexShard.routingEntry();
        // if the current and global routing are initializing, but are still not the same, its a
        // different "shard" being allocated
        // for example: a shard that recovers from one node and now needs to recover to another
        // node,
        //              or a replica allocated and then allocating a primary because the primary
        // failed on another node
        boolean shardHasBeenRemoved = false;
        if (currentRoutingEntry.initializing()
            && shardRouting.initializing()
            && !currentRoutingEntry.equals(shardRouting)) {
          logger.debug(
              "[{}][{}] removing shard (different instance of it allocated on this node, current [{}], global [{}])",
              shardRouting.index(),
              shardRouting.id(),
              currentRoutingEntry,
              shardRouting);
          // closing the shard will also cancel any ongoing recovery.
          indexService.removeShard(
              shardRouting.id(),
              "removing shard (different instance of it allocated on this node)");
          shardHasBeenRemoved = true;
        } else if (isPeerRecovery(shardRouting)) {
          final DiscoveryNode sourceNode =
              findSourceNodeForPeerRecovery(routingTable, nodes, shardRouting);
          // check if there is an existing recovery going, and if so, and the source node is not the
          // same, cancel the recovery to restart it
          final Predicate<RecoveryStatus> shouldCancel =
              new Predicate<RecoveryStatus>() {
                @Override
                public boolean apply(@Nullable RecoveryStatus status) {
                  return status.sourceNode().equals(sourceNode) == false;
                }
              };
          if (recoveryTarget.cancelRecoveriesForShard(
              indexShard.shardId(), "recovery source node changed", shouldCancel)) {
            logger.debug(
                "[{}][{}] removing shard (recovery source changed), current [{}], global [{}])",
                shardRouting.index(),
                shardRouting.id(),
                currentRoutingEntry,
                shardRouting);
            // closing the shard will also cancel any ongoing recovery.
            indexService.removeShard(
                shardRouting.id(), "removing shard (recovery source node changed)");
            shardHasBeenRemoved = true;
          }
        }
        if (shardHasBeenRemoved == false
            && (shardRouting.equals(indexShard.routingEntry()) == false
                || shardRouting.version() > indexShard.routingEntry().version())) {
          if (shardRouting.primary()
              && indexShard.routingEntry().primary() == false
              && shardRouting.initializing()
              && indexShard.allowsPrimaryPromotion() == false) {
            logger.debug("{} reinitialize shard on primary promotion", indexShard.shardId());
            indexService.removeShard(shardId, "promoted to primary");
          } else {
            // if we happen to remove the shardRouting by id above we don't need to jump in here!
            indexShard.updateRoutingEntry(
                shardRouting, event.state().blocks().disableStatePersistence() == false);
          }
        }
      }

      if (shardRouting.initializing()) {
        applyInitializingShard(event.state(), indexMetaData, shardRouting);
      }
    }
  }