예제 #1
0
  private boolean reroute(RoutingAllocation allocation) {
    boolean changed = false;
    // first, clear from the shards any node id they used to belong to that is now dead
    changed |= deassociateDeadNodes(allocation);

    // create a sorted list of from nodes with least number of shards to the maximum ones
    applyNewNodes(allocation);

    // elect primaries *before* allocating unassigned, so backups of primaries that failed
    // will be moved to primary state and not wait for primaries to be allocated and recovered
    // (*from gateway*)
    changed |= electPrimariesAndUnassignedDanglingReplicas(allocation);

    // now allocate all the unassigned to available nodes
    if (allocation.routingNodes().hasUnassigned()) {
      changed |= shardsAllocators.allocateUnassigned(allocation);
      // elect primaries again, in case this is needed with unassigned allocation
      changed |= electPrimariesAndUnassignedDanglingReplicas(allocation);
    }

    // move shards that no longer can be allocated
    changed |= moveShards(allocation);

    // rebalance
    changed |= shardsAllocators.rebalance(allocation);
    assert RoutingNodes.assertShardStats(allocation.routingNodes());
    return changed;
  }
예제 #2
0
  /**
   * Only handles reroute but *without* any reassignment of unassigned shards or rebalancing. Does
   * make sure to handle removed nodes, but only moved the shards to UNASSIGNED, does not reassign
   * them.
   */
  public RoutingAllocation.Result rerouteWithNoReassign(ClusterState clusterState, boolean debug) {
    RoutingNodes routingNodes = clusterState.routingNodes();
    // shuffle the unassigned nodes, just so we won't have things like poison failed shards
    routingNodes.unassigned().shuffle();
    RoutingAllocation allocation =
        new RoutingAllocation(
            allocationDeciders,
            routingNodes,
            clusterState.nodes(),
            clusterInfoService.getClusterInfo());
    allocation.debugDecision(debug);
    boolean changed = false;
    // first, clear from the shards any node id they used to belong to that is now dead
    changed |= deassociateDeadNodes(allocation);

    // create a sorted list of from nodes with least number of shards to the maximum ones
    applyNewNodes(allocation);

    // elect primaries *before* allocating unassigned, so backups of primaries that failed
    // will be moved to primary state and not wait for primaries to be allocated and recovered
    // (*from gateway*)
    changed |= electPrimariesAndUnassignedDanglingReplicas(allocation);

    if (!changed) {
      return new RoutingAllocation.Result(false, clusterState.routingTable());
    }
    return new RoutingAllocation.Result(
        true,
        new RoutingTable.Builder()
            .updateNodes(routingNodes)
            .build()
            .validateRaiseException(clusterState.metaData()));
  }
예제 #3
0
 public RoutingAllocation.Result reroute(
     ClusterState clusterState, AllocationCommands commands, boolean explain)
     throws ElasticsearchException {
   RoutingNodes routingNodes = clusterState.routingNodes();
   // we don't shuffle the unassigned shards here, to try and get as close as possible to
   // a consistent result of the effect the commands have on the routing
   // this allows systems to dry run the commands, see the resulting cluster state, and act on it
   RoutingAllocation allocation =
       new RoutingAllocation(
           allocationDeciders,
           routingNodes,
           clusterState.nodes(),
           clusterInfoService.getClusterInfo());
   // don't short circuit deciders, we want a full explanation
   allocation.debugDecision(true);
   // we ignore disable allocation, because commands are explicit
   allocation.ignoreDisable(true);
   RoutingExplanations explanations = commands.execute(allocation, explain);
   // we revert the ignore disable flag, since when rerouting, we want the original setting to take
   // place
   allocation.ignoreDisable(false);
   // the assumption is that commands will move / act on shards (or fail through exceptions)
   // so, there will always be shard "movements", so no need to check on reroute
   reroute(allocation);
   return new RoutingAllocation.Result(
       true,
       new RoutingTable.Builder()
           .updateNodes(routingNodes)
           .build()
           .validateRaiseException(clusterState.metaData()),
       explanations);
 }
예제 #4
0
 /** Applies the new nodes to the routing nodes and returns them (just the new nodes); */
 private void applyNewNodes(RoutingAllocation allocation) {
   final RoutingNodes routingNodes = allocation.routingNodes();
   for (ObjectCursor<DiscoveryNode> cursor : allocation.nodes().dataNodes().values()) {
     DiscoveryNode node = cursor.value;
     if (!routingNodes.isKnown(node)) {
       routingNodes.addNode(node);
     }
   }
 }
예제 #5
0
  private boolean electPrimariesAndUnassignedDanglingReplicas(RoutingAllocation allocation) {
    boolean changed = false;
    RoutingNodes routingNodes = allocation.routingNodes();
    if (!routingNodes.hasUnassignedPrimaries()) {
      // move out if we don't have unassigned primaries
      return changed;
    }

    // go over and remove dangling replicas that are initializing for primary shards
    List<ShardRouting> shardsToFail = Lists.newArrayList();
    for (MutableShardRouting shardEntry : routingNodes.unassigned()) {
      if (shardEntry.primary()) {
        for (MutableShardRouting routing : routingNodes.assignedShards(shardEntry)) {
          if (!routing.primary() && routing.initializing()) {
            shardsToFail.add(routing);
          }
        }
      }
    }
    for (ShardRouting shardToFail : shardsToFail) {
      changed |= applyFailedShard(allocation, shardToFail, false);
    }

    // now, go over and elect a new primary if possible, not, from this code block on, if one is
    // elected,
    // routingNodes.hasUnassignedPrimaries() will potentially be false
    for (MutableShardRouting shardEntry : routingNodes.unassigned()) {
      if (shardEntry.primary()) {
        MutableShardRouting candidate = allocation.routingNodes().activeReplica(shardEntry);
        if (candidate != null) {
          routingNodes.swapPrimaryFlag(shardEntry, candidate);
          if (candidate.relocatingNodeId() != null) {
            changed = true;
            // its also relocating, make sure to move the other routing to primary
            RoutingNode node = routingNodes.node(candidate.relocatingNodeId());
            if (node != null) {
              for (MutableShardRouting shardRouting : node) {
                if (shardRouting.shardId().equals(candidate.shardId()) && !shardRouting.primary()) {
                  routingNodes.swapPrimaryFlag(shardRouting);
                  break;
                }
              }
            }
          }
        }
      }
    }

    return changed;
  }
예제 #6
0
 private boolean deassociateDeadNodes(RoutingAllocation allocation) {
   boolean changed = false;
   for (RoutingNodes.RoutingNodesIterator it = allocation.routingNodes().nodes(); it.hasNext(); ) {
     RoutingNode node = it.next();
     if (allocation.nodes().dataNodes().containsKey(node.nodeId())) {
       // its a live node, continue
       continue;
     }
     changed = true;
     // now, go over all the shards routing on the node, and fail them
     for (MutableShardRouting shardRouting : node.copyShards()) {
       applyFailedShard(allocation, shardRouting, false);
     }
     // its a dead node, remove it, note, its important to remove it *after* we apply failed shard
     // since it relies on the fact that the RoutingNode exists in the list of nodes
     it.remove();
   }
   return changed;
 }
예제 #7
0
  private boolean moveShards(RoutingAllocation allocation) {
    boolean changed = false;

    // create a copy of the shards interleaving between nodes, and check if they can remain
    List<MutableShardRouting> shards = new ArrayList<>();
    int index = 0;
    boolean found = true;
    final RoutingNodes routingNodes = allocation.routingNodes();
    while (found) {
      found = false;
      for (RoutingNode routingNode : routingNodes) {
        if (index >= routingNode.size()) {
          continue;
        }
        found = true;
        shards.add(routingNode.get(index));
      }
      index++;
    }
    for (int i = 0; i < shards.size(); i++) {
      MutableShardRouting shardRouting = shards.get(i);
      // we can only move started shards...
      if (!shardRouting.started()) {
        continue;
      }
      final RoutingNode routingNode = routingNodes.node(shardRouting.currentNodeId());
      Decision decision = allocation.deciders().canRemain(shardRouting, routingNode, allocation);
      if (decision.type() == Decision.Type.NO) {
        logger.debug(
            "[{}][{}] allocated on [{}], but can no longer be allocated on it, moving...",
            shardRouting.index(),
            shardRouting.id(),
            routingNode.node());
        boolean moved = shardsAllocators.move(shardRouting, routingNode, allocation);
        if (!moved) {
          logger.debug("[{}][{}] can't move", shardRouting.index(), shardRouting.id());
        } else {
          changed = true;
        }
      }
    }
    return changed;
  }
예제 #8
0
 /**
  * Reroutes the routing table based on the live nodes.
  *
  * <p>
  *
  * <p>If the same instance of the routing table is returned, then no change has been made.
  */
 public RoutingAllocation.Result reroute(ClusterState clusterState, boolean debug) {
   RoutingNodes routingNodes = clusterState.routingNodes();
   // shuffle the unassigned nodes, just so we won't have things like poison failed shards
   routingNodes.unassigned().shuffle();
   RoutingAllocation allocation =
       new RoutingAllocation(
           allocationDeciders,
           routingNodes,
           clusterState.nodes(),
           clusterInfoService.getClusterInfo());
   allocation.debugDecision(debug);
   if (!reroute(allocation)) {
     return new RoutingAllocation.Result(false, clusterState.routingTable());
   }
   return new RoutingAllocation.Result(
       true,
       new RoutingTable.Builder()
           .updateNodes(routingNodes)
           .build()
           .validateRaiseException(clusterState.metaData()));
 }
예제 #9
0
  /**
   * Applies the relevant logic to handle a failed shard. Returns <tt>true</tt> if changes happened
   * that require relocation.
   */
  private boolean applyFailedShard(
      RoutingAllocation allocation, ShardRouting failedShard, boolean addToIgnoreList) {
    // create a copy of the failed shard, since we assume we can change possible references to it
    // without
    // changing the state of failed shard
    failedShard = new ImmutableShardRouting(failedShard);

    IndexRoutingTable indexRoutingTable = allocation.routingTable().index(failedShard.index());
    if (indexRoutingTable == null) {
      return false;
    }

    RoutingNodes routingNodes = allocation.routingNodes();
    boolean dirty = false;
    if (failedShard.relocatingNodeId() != null) {
      // the shard is relocating, either in initializing (recovery from another node) or relocating
      // (moving to another node)
      if (failedShard.state() == INITIALIZING) {
        // the shard is initializing and recovering from another node
        // first, we need to cancel the current node that is being initialized
        RoutingNodes.RoutingNodeIterator initializingNode =
            routingNodes.routingNodeIter(failedShard.currentNodeId());
        if (initializingNode != null) {
          while (initializingNode.hasNext()) {
            MutableShardRouting shardRouting = initializingNode.next();
            if (shardRouting.equals(failedShard)) {
              dirty = true;
              initializingNode.remove();
              if (addToIgnoreList) {
                // make sure we ignore this shard on the relevant node
                allocation.addIgnoreShardForNode(
                    failedShard.shardId(), failedShard.currentNodeId());
              }

              break;
            }
          }
        }
        if (dirty) {
          // now, find the node that we are relocating *from*, and cancel its relocation
          RoutingNode relocatingFromNode = routingNodes.node(failedShard.relocatingNodeId());
          if (relocatingFromNode != null) {
            for (MutableShardRouting shardRouting : relocatingFromNode) {
              if (shardRouting.shardId().equals(failedShard.shardId())
                  && shardRouting.relocating()) {
                dirty = true;
                routingNodes.cancelRelocation(shardRouting);
                break;
              }
            }
          }
        } else {
          logger.debug("failed shard {} not found in routingNodes, ignoring it", failedShard);
        }
        return dirty;
      } else if (failedShard.state() == RELOCATING) {
        // the shard is relocating, meaning its the source the shard is relocating from
        // first, we need to cancel the current relocation from the current node
        // now, find the node that we are recovering from, cancel the relocation, remove it from the
        // node
        // and add it to the unassigned shards list...
        RoutingNodes.RoutingNodeIterator relocatingFromNode =
            routingNodes.routingNodeIter(failedShard.currentNodeId());
        if (relocatingFromNode != null) {
          while (relocatingFromNode.hasNext()) {
            MutableShardRouting shardRouting = relocatingFromNode.next();
            if (shardRouting.equals(failedShard)) {
              dirty = true;
              relocatingFromNode.remove();
              if (addToIgnoreList) {
                // make sure we ignore this shard on the relevant node
                allocation.addIgnoreShardForNode(
                    failedShard.shardId(), failedShard.currentNodeId());
              }

              routingNodes
                  .unassigned()
                  .add(
                      new MutableShardRouting(
                          failedShard.index(),
                          failedShard.id(),
                          null,
                          failedShard.primary(),
                          ShardRoutingState.UNASSIGNED,
                          failedShard.version() + 1));
              break;
            }
          }
        }
        if (dirty) {
          // next, we need to find the target initializing shard that is recovering from, and remove
          // it...
          RoutingNodes.RoutingNodeIterator initializingNode =
              routingNodes.routingNodeIter(failedShard.relocatingNodeId());
          if (initializingNode != null) {
            while (initializingNode.hasNext()) {
              MutableShardRouting shardRouting = initializingNode.next();
              if (shardRouting.shardId().equals(failedShard.shardId())
                  && shardRouting.state() == INITIALIZING) {
                dirty = true;
                initializingNode.remove();
              }
            }
          }
        } else {
          logger.debug("failed shard {} not found in routingNodes, ignoring it", failedShard);
        }
      } else {
        throw new ElasticsearchIllegalStateException(
            "illegal state for a failed shard, relocating node id is set, but state does not match: "
                + failedShard);
      }
    } else {
      // the shard is not relocating, its either started, or initializing, just cancel it and move
      // on...
      RoutingNodes.RoutingNodeIterator node =
          routingNodes.routingNodeIter(failedShard.currentNodeId());
      if (node != null) {
        while (node.hasNext()) {
          MutableShardRouting shardRouting = node.next();
          if (shardRouting.equals(failedShard)) {
            dirty = true;
            if (addToIgnoreList) {
              // make sure we ignore this shard on the relevant node
              allocation.addIgnoreShardForNode(failedShard.shardId(), failedShard.currentNodeId());
            }
            node.remove();
            // move all the shards matching the failed shard to the end of the unassigned list
            // so we give a chance for other allocations and won't create poison failed allocations
            // that can keep other shards from being allocated (because of limits applied on how
            // many
            // shards we can start per node)
            List<MutableShardRouting> shardsToMove = Lists.newArrayList();
            for (Iterator<MutableShardRouting> unassignedIt = routingNodes.unassigned().iterator();
                unassignedIt.hasNext(); ) {
              MutableShardRouting unassignedShardRouting = unassignedIt.next();
              if (unassignedShardRouting.shardId().equals(failedShard.shardId())) {
                unassignedIt.remove();
                shardsToMove.add(unassignedShardRouting);
              }
            }
            if (!shardsToMove.isEmpty()) {
              routingNodes.unassigned().addAll(shardsToMove);
            }

            routingNodes
                .unassigned()
                .add(
                    new MutableShardRouting(
                        failedShard.index(),
                        failedShard.id(),
                        null,
                        null,
                        failedShard.restoreSource(),
                        failedShard.primary(),
                        ShardRoutingState.UNASSIGNED,
                        failedShard.version() + 1));

            break;
          }
        }
      }
      if (!dirty) {
        logger.debug("failed shard {} not found in routingNodes, ignoring it", failedShard);
      }
    }
    return dirty;
  }