private boolean applyStartedShards( RoutingNodes routingNodes, Iterable<? extends ShardRouting> startedShardEntries) { boolean dirty = false; // apply shards might be called several times with the same shard, ignore it for (ShardRouting startedShard : startedShardEntries) { assert startedShard.state() == INITIALIZING; // retrieve the relocating node id before calling startedShard(). String relocatingNodeId = null; RoutingNodes.RoutingNodeIterator currentRoutingNode = routingNodes.routingNodeIter(startedShard.currentNodeId()); if (currentRoutingNode != null) { for (MutableShardRouting shard : currentRoutingNode) { if (shard.shardId().equals(startedShard.shardId())) { relocatingNodeId = shard.relocatingNodeId(); if (!shard.started()) { dirty = true; routingNodes.started(shard); } break; } } } // startedShard is the current state of the shard (post relocation for example) // this means that after relocation, the state will be started and the currentNodeId will be // the node we relocated to if (relocatingNodeId == null) { continue; } RoutingNodes.RoutingNodeIterator sourceRoutingNode = routingNodes.routingNodeIter(relocatingNodeId); if (sourceRoutingNode != null) { while (sourceRoutingNode.hasNext()) { MutableShardRouting shard = sourceRoutingNode.next(); if (shard.shardId().equals(startedShard.shardId())) { if (shard.relocating()) { dirty = true; sourceRoutingNode.remove(); break; } } } } } return dirty; }
@Override public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) { DiscoveryNode discoNode = allocation.nodes().resolveNode(node); boolean found = false; for (RoutingNodes.RoutingNodeIterator it = allocation.routingNodes().routingNodeIter(discoNode.id()); it.hasNext(); ) { ShardRouting shardRouting = it.next(); if (!shardRouting.shardId().equals(shardId)) { continue; } found = true; if (shardRouting.relocatingNodeId() != null) { if (shardRouting.initializing()) { // the shard is initializing and recovering from another node, simply cancel the recovery it.remove(); // and cancel the relocating state from the shard its being relocated from RoutingNode relocatingFromNode = allocation.routingNodes().node(shardRouting.relocatingNodeId()); if (relocatingFromNode != null) { for (ShardRouting fromShardRouting : relocatingFromNode) { if (fromShardRouting.isSameShard(shardRouting) && fromShardRouting.state() == RELOCATING) { allocation.routingNodes().cancelRelocation(fromShardRouting); break; } } } } else if (shardRouting.relocating()) { // the shard is relocating to another node, cancel the recovery on the other node, and // deallocate this one if (!allowPrimary && shardRouting.primary()) { // can't cancel a primary shard being initialized if (explain) { return new RerouteExplanation( this, allocation.decision( Decision.NO, "cancel_allocation_command", "can't cancel " + shardId + " on node " + discoNode + ", shard is primary and initializing its state")); } throw new IllegalArgumentException( "[cancel_allocation] can't cancel " + shardId + " on node " + discoNode + ", shard is primary and initializing its state"); } it.moveToUnassigned(new UnassignedInfo(UnassignedInfo.Reason.REROUTE_CANCELLED, null)); // now, go and find the shard that is initializing on the target node, and cancel it as // well... RoutingNodes.RoutingNodeIterator initializingNode = allocation.routingNodes().routingNodeIter(shardRouting.relocatingNodeId()); if (initializingNode != null) { while (initializingNode.hasNext()) { ShardRouting initializingShardRouting = initializingNode.next(); if (initializingShardRouting.isRelocationTargetOf(shardRouting)) { initializingNode.remove(); } } } } } else { // the shard is not relocating, its either started, or initializing, just cancel it and move // on... if (!allowPrimary && shardRouting.primary()) { // can't cancel a primary shard being initialized if (explain) { return new RerouteExplanation( this, allocation.decision( Decision.NO, "cancel_allocation_command", "can't cancel " + shardId + " on node " + discoNode + ", shard is primary and started")); } throw new IllegalArgumentException( "[cancel_allocation] can't cancel " + shardId + " on node " + discoNode + ", shard is primary and started"); } it.moveToUnassigned(new UnassignedInfo(UnassignedInfo.Reason.REROUTE_CANCELLED, null)); } } if (!found) { if (explain) { return new RerouteExplanation( this, allocation.decision( Decision.NO, "cancel_allocation_command", "can't cancel " + shardId + ", failed to find it on node " + discoNode)); } throw new IllegalArgumentException( "[cancel_allocation] can't cancel " + shardId + ", failed to find it on node " + discoNode); } return new RerouteExplanation( this, allocation.decision( Decision.YES, "cancel_allocation_command", "shard " + shardId + " on node " + discoNode + " can be cancelled")); }
/** * Process existing recoveries of replicas and see if we need to cancel them if we find a better * match. Today, a better match is one that has full sync id match compared to not having one in * the previous recovery. */ public boolean processExistingRecoveries(RoutingAllocation allocation) { boolean changed = false; MetaData metaData = allocation.metaData(); for (RoutingNodes.RoutingNodesIterator nodes = allocation.routingNodes().nodes(); nodes.hasNext(); ) { nodes.next(); for (RoutingNodes.RoutingNodeIterator it = nodes.nodeShards(); it.hasNext(); ) { ShardRouting shard = it.next(); if (shard.primary() == true) { continue; } if (shard.initializing() == false) { continue; } if (shard.relocatingNodeId() != null) { continue; } // if we are allocating a replica because of index creation, no need to go and find a copy, // there isn't one... IndexMetaData indexMetaData = metaData.index(shard.getIndexName()); if (shard.allocatedPostIndexCreate(indexMetaData) == false) { continue; } AsyncShardFetch.FetchResult<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> shardStores = fetchData(shard, allocation); if (shardStores.hasData() == false) { logger.trace("{}: fetching new stores for initializing shard", shard); continue; // still fetching } ShardRouting primaryShard = allocation.routingNodes().activePrimary(shard); assert primaryShard != null : "the replica shard can be allocated on at least one node, so there must be an active primary"; TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryStore = findStore(primaryShard, allocation, shardStores); if (primaryStore == null || primaryStore.allocated() == false) { // if we can't find the primary data, it is probably because the primary shard is // corrupted (and listing failed) // just let the recovery find it out, no need to do anything about it for the initializing // shard logger.trace( "{}: no primary shard store found or allocated, letting actual allocation figure it out", shard); continue; } MatchingNodes matchingNodes = findMatchingNodes(shard, allocation, primaryStore, shardStores); if (matchingNodes.getNodeWithHighestMatch() != null) { DiscoveryNode currentNode = allocation.nodes().get(shard.currentNodeId()); DiscoveryNode nodeWithHighestMatch = matchingNodes.getNodeWithHighestMatch(); if (currentNode.equals(nodeWithHighestMatch) == false && matchingNodes.isNodeMatchBySyncID(currentNode) == false && matchingNodes.isNodeMatchBySyncID(nodeWithHighestMatch) == true) { // we found a better match that has a full sync id match, the existing allocation is not // fully synced // so we found a better one, cancel this one it.moveToUnassigned( new UnassignedInfo( UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, sync id match found on node [" + nodeWithHighestMatch + "]", null, allocation.getCurrentNanoTime(), System.currentTimeMillis())); changed = true; } } } } return changed; }
@Override public void execute(RoutingAllocation allocation) throws ElasticSearchException { DiscoveryNode discoNode = allocation.nodes().resolveNode(node); boolean found = false; for (RoutingNodes.RoutingNodeIterator it = allocation.routingNodes().routingNodeIter(discoNode.id()); it.hasNext(); ) { MutableShardRouting shardRouting = it.next(); if (!shardRouting.shardId().equals(shardId)) { continue; } found = true; if (shardRouting.relocatingNodeId() != null) { if (shardRouting.initializing()) { // the shard is initializing and recovering from another node, simply cancel the recovery it.remove(); // and cancel the relocating state from the shard its being relocated from RoutingNode relocatingFromNode = allocation.routingNodes().node(shardRouting.relocatingNodeId()); if (relocatingFromNode != null) { for (MutableShardRouting fromShardRouting : relocatingFromNode) { if (fromShardRouting.shardId().equals(shardRouting.shardId()) && fromShardRouting.state() == RELOCATING) { allocation.routingNodes().cancelRelocation(fromShardRouting); break; } } } } else if (shardRouting.relocating()) { // the shard is relocating to another node, cancel the recovery on the other node, and // deallocate this one if (!allowPrimary && shardRouting.primary()) { // can't cancel a primary shard being initialized throw new ElasticSearchIllegalArgumentException( "[cancel_allocation] can't cancel " + shardId + " on node " + discoNode + ", shard is primary and initializing its state"); } it.moveToUnassigned(); // now, go and find the shard that is initializing on the target node, and cancel it as // well... RoutingNodes.RoutingNodeIterator initializingNode = allocation.routingNodes().routingNodeIter(shardRouting.relocatingNodeId()); if (initializingNode != null) { while (initializingNode.hasNext()) { MutableShardRouting initializingShardRouting = initializingNode.next(); if (initializingShardRouting.shardId().equals(shardRouting.shardId()) && initializingShardRouting.state() == INITIALIZING) { initializingNode.remove(); } } } } } else { // the shard is not relocating, its either started, or initializing, just cancel it and move // on... if (!allowPrimary && shardRouting.primary()) { // can't cancel a primary shard being initialized throw new ElasticSearchIllegalArgumentException( "[cancel_allocation] can't cancel " + shardId + " on node " + discoNode + ", shard is primary and started"); } it.remove(); allocation .routingNodes() .unassigned() .add( new MutableShardRouting( shardRouting.index(), shardRouting.id(), null, shardRouting.primary(), ShardRoutingState.UNASSIGNED, shardRouting.version() + 1)); } } if (!found) { throw new ElasticSearchIllegalArgumentException( "[cancel_allocation] can't cancel " + shardId + ", failed to find it on node " + discoNode); } }
/** * Applies the relevant logic to handle a failed shard. Returns <tt>true</tt> if changes happened * that require relocation. */ private boolean applyFailedShard( RoutingAllocation allocation, ShardRouting failedShard, boolean addToIgnoreList) { // create a copy of the failed shard, since we assume we can change possible references to it // without // changing the state of failed shard failedShard = new ImmutableShardRouting(failedShard); IndexRoutingTable indexRoutingTable = allocation.routingTable().index(failedShard.index()); if (indexRoutingTable == null) { return false; } RoutingNodes routingNodes = allocation.routingNodes(); boolean dirty = false; if (failedShard.relocatingNodeId() != null) { // the shard is relocating, either in initializing (recovery from another node) or relocating // (moving to another node) if (failedShard.state() == INITIALIZING) { // the shard is initializing and recovering from another node // first, we need to cancel the current node that is being initialized RoutingNodes.RoutingNodeIterator initializingNode = routingNodes.routingNodeIter(failedShard.currentNodeId()); if (initializingNode != null) { while (initializingNode.hasNext()) { MutableShardRouting shardRouting = initializingNode.next(); if (shardRouting.equals(failedShard)) { dirty = true; initializingNode.remove(); if (addToIgnoreList) { // make sure we ignore this shard on the relevant node allocation.addIgnoreShardForNode( failedShard.shardId(), failedShard.currentNodeId()); } break; } } } if (dirty) { // now, find the node that we are relocating *from*, and cancel its relocation RoutingNode relocatingFromNode = routingNodes.node(failedShard.relocatingNodeId()); if (relocatingFromNode != null) { for (MutableShardRouting shardRouting : relocatingFromNode) { if (shardRouting.shardId().equals(failedShard.shardId()) && shardRouting.relocating()) { dirty = true; routingNodes.cancelRelocation(shardRouting); break; } } } } else { logger.debug("failed shard {} not found in routingNodes, ignoring it", failedShard); } return dirty; } else if (failedShard.state() == RELOCATING) { // the shard is relocating, meaning its the source the shard is relocating from // first, we need to cancel the current relocation from the current node // now, find the node that we are recovering from, cancel the relocation, remove it from the // node // and add it to the unassigned shards list... RoutingNodes.RoutingNodeIterator relocatingFromNode = routingNodes.routingNodeIter(failedShard.currentNodeId()); if (relocatingFromNode != null) { while (relocatingFromNode.hasNext()) { MutableShardRouting shardRouting = relocatingFromNode.next(); if (shardRouting.equals(failedShard)) { dirty = true; relocatingFromNode.remove(); if (addToIgnoreList) { // make sure we ignore this shard on the relevant node allocation.addIgnoreShardForNode( failedShard.shardId(), failedShard.currentNodeId()); } routingNodes .unassigned() .add( new MutableShardRouting( failedShard.index(), failedShard.id(), null, failedShard.primary(), ShardRoutingState.UNASSIGNED, failedShard.version() + 1)); break; } } } if (dirty) { // next, we need to find the target initializing shard that is recovering from, and remove // it... RoutingNodes.RoutingNodeIterator initializingNode = routingNodes.routingNodeIter(failedShard.relocatingNodeId()); if (initializingNode != null) { while (initializingNode.hasNext()) { MutableShardRouting shardRouting = initializingNode.next(); if (shardRouting.shardId().equals(failedShard.shardId()) && shardRouting.state() == INITIALIZING) { dirty = true; initializingNode.remove(); } } } } else { logger.debug("failed shard {} not found in routingNodes, ignoring it", failedShard); } } else { throw new ElasticsearchIllegalStateException( "illegal state for a failed shard, relocating node id is set, but state does not match: " + failedShard); } } else { // the shard is not relocating, its either started, or initializing, just cancel it and move // on... RoutingNodes.RoutingNodeIterator node = routingNodes.routingNodeIter(failedShard.currentNodeId()); if (node != null) { while (node.hasNext()) { MutableShardRouting shardRouting = node.next(); if (shardRouting.equals(failedShard)) { dirty = true; if (addToIgnoreList) { // make sure we ignore this shard on the relevant node allocation.addIgnoreShardForNode(failedShard.shardId(), failedShard.currentNodeId()); } node.remove(); // move all the shards matching the failed shard to the end of the unassigned list // so we give a chance for other allocations and won't create poison failed allocations // that can keep other shards from being allocated (because of limits applied on how // many // shards we can start per node) List<MutableShardRouting> shardsToMove = Lists.newArrayList(); for (Iterator<MutableShardRouting> unassignedIt = routingNodes.unassigned().iterator(); unassignedIt.hasNext(); ) { MutableShardRouting unassignedShardRouting = unassignedIt.next(); if (unassignedShardRouting.shardId().equals(failedShard.shardId())) { unassignedIt.remove(); shardsToMove.add(unassignedShardRouting); } } if (!shardsToMove.isEmpty()) { routingNodes.unassigned().addAll(shardsToMove); } routingNodes .unassigned() .add( new MutableShardRouting( failedShard.index(), failedShard.id(), null, null, failedShard.restoreSource(), failedShard.primary(), ShardRoutingState.UNASSIGNED, failedShard.version() + 1)); break; } } } if (!dirty) { logger.debug("failed shard {} not found in routingNodes, ignoring it", failedShard); } } return dirty; }