/** * Finds the routing source node for peer recovery, return null if its not found. Note, this * method expects the shard routing to *require* peer recovery, use {@link * #isPeerRecovery(org.elasticsearch.cluster.routing.ShardRouting)} to check if its needed or not. */ private DiscoveryNode findSourceNodeForPeerRecovery( RoutingTable routingTable, DiscoveryNodes nodes, ShardRouting shardRouting) { DiscoveryNode sourceNode = null; if (!shardRouting.primary()) { IndexShardRoutingTable shardRoutingTable = routingTable.index(shardRouting.index()).shard(shardRouting.id()); for (ShardRouting entry : shardRoutingTable) { if (entry.primary() && entry.active()) { // only recover from started primary, if we can't find one, we will do it next round sourceNode = nodes.get(entry.currentNodeId()); if (sourceNode == null) { logger.trace( "can't find replica source node because primary shard {} is assigned to an unknown node.", entry); return null; } break; } } if (sourceNode == null) { logger.trace( "can't find replica source node for {} because a primary shard can not be found.", shardRouting.shardId()); } } else if (shardRouting.relocatingNodeId() != null) { sourceNode = nodes.get(shardRouting.relocatingNodeId()); if (sourceNode == null) { logger.trace( "can't find relocation source node for shard {} because it is assigned to an unknown node [{}].", shardRouting.shardId(), shardRouting.relocatingNodeId()); } } else { throw new IllegalStateException( "trying to find source node for peer recovery when routing state means no peer recovery: " + shardRouting); } return sourceNode; }
private void cleanFailedShards(final ClusterChangedEvent event) { RoutingTable routingTable = event.state().routingTable(); RoutingNodes.RoutingNodeIterator routingNode = event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId()); if (routingNode == null) { failedShards.clear(); return; } DiscoveryNodes nodes = event.state().nodes(); long now = System.currentTimeMillis(); String localNodeId = nodes.localNodeId(); Iterator<Map.Entry<ShardId, FailedShard>> iterator = failedShards.entrySet().iterator(); shards: while (iterator.hasNext()) { Map.Entry<ShardId, FailedShard> entry = iterator.next(); FailedShard failedShard = entry.getValue(); IndexRoutingTable indexRoutingTable = routingTable.index(entry.getKey().getIndex()); if (indexRoutingTable != null) { IndexShardRoutingTable shardRoutingTable = indexRoutingTable.shard(entry.getKey().id()); if (shardRoutingTable != null) { for (ShardRouting shardRouting : shardRoutingTable.assignedShards()) { if (localNodeId.equals(shardRouting.currentNodeId())) { // we have a timeout here just to make sure we don't have dangled failed shards for // some reason // its just another safely layer if (shardRouting.version() == failedShard.version && ((now - failedShard.timestamp) < TimeValue.timeValueMinutes(60).millis())) { // It's the same failed shard - keep it if it hasn't timed out continue shards; } else { // Different version or expired, remove it break; } } } } } iterator.remove(); } }