private Map<DiscoveryNode, TransportNodesListShardStoreMetaData.StoreFilesMetaData>
      buildShardStores(DiscoveryNodes nodes, MutableShardRouting shard) {
    Map<DiscoveryNode, TransportNodesListShardStoreMetaData.StoreFilesMetaData> shardStores =
        cachedStores.get(shard.shardId());
    ObjectOpenHashSet<String> nodesIds;
    if (shardStores == null) {
      shardStores = Maps.newHashMap();
      cachedStores.put(shard.shardId(), shardStores);
      nodesIds = ObjectOpenHashSet.from(nodes.dataNodes().keys());
    } else {
      nodesIds = ObjectOpenHashSet.newInstance();
      // clean nodes that have failed
      for (Iterator<DiscoveryNode> it = shardStores.keySet().iterator(); it.hasNext(); ) {
        DiscoveryNode node = it.next();
        if (!nodes.nodeExists(node.id())) {
          it.remove();
        }
      }

      for (ObjectCursor<DiscoveryNode> cursor : nodes.dataNodes().values()) {
        DiscoveryNode node = cursor.value;
        if (!shardStores.containsKey(node)) {
          nodesIds.add(node.id());
        }
      }
    }

    if (!nodesIds.isEmpty()) {
      String[] nodesIdsArray = nodesIds.toArray(String.class);
      TransportNodesListShardStoreMetaData.NodesStoreFilesMetaData nodesStoreFilesMetaData =
          listShardStoreMetaData
              .list(shard.shardId(), false, nodesIdsArray, listTimeout)
              .actionGet();
      if (logger.isTraceEnabled()) {
        if (nodesStoreFilesMetaData.failures().length > 0) {
          StringBuilder sb =
              new StringBuilder(shard + ": failures when trying to list stores on nodes:");
          for (int i = 0; i < nodesStoreFilesMetaData.failures().length; i++) {
            Throwable cause = ExceptionsHelper.unwrapCause(nodesStoreFilesMetaData.failures()[i]);
            if (cause instanceof ConnectTransportException) {
              continue;
            }
            sb.append("\n    -> ")
                .append(nodesStoreFilesMetaData.failures()[i].getDetailedMessage());
          }
          logger.trace(sb.toString());
        }
      }

      for (TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData nodeStoreFilesMetaData :
          nodesStoreFilesMetaData) {
        if (nodeStoreFilesMetaData.storeFilesMetaData() != null) {
          shardStores.put(
              nodeStoreFilesMetaData.getNode(), nodeStoreFilesMetaData.storeFilesMetaData());
        }
      }
    }

    return shardStores;
  }
Ejemplo n.º 2
0
  private boolean applyStartedShards(
      RoutingNodes routingNodes, Iterable<? extends ShardRouting> startedShardEntries) {
    boolean dirty = false;
    // apply shards might be called several times with the same shard, ignore it
    for (ShardRouting startedShard : startedShardEntries) {
      assert startedShard.state() == INITIALIZING;

      // retrieve the relocating node id before calling startedShard().
      String relocatingNodeId = null;

      RoutingNodes.RoutingNodeIterator currentRoutingNode =
          routingNodes.routingNodeIter(startedShard.currentNodeId());
      if (currentRoutingNode != null) {
        for (MutableShardRouting shard : currentRoutingNode) {
          if (shard.shardId().equals(startedShard.shardId())) {
            relocatingNodeId = shard.relocatingNodeId();
            if (!shard.started()) {
              dirty = true;
              routingNodes.started(shard);
            }
            break;
          }
        }
      }

      // startedShard is the current state of the shard (post relocation for example)
      // this means that after relocation, the state will be started and the currentNodeId will be
      // the node we relocated to

      if (relocatingNodeId == null) {
        continue;
      }

      RoutingNodes.RoutingNodeIterator sourceRoutingNode =
          routingNodes.routingNodeIter(relocatingNodeId);
      if (sourceRoutingNode != null) {
        while (sourceRoutingNode.hasNext()) {
          MutableShardRouting shard = sourceRoutingNode.next();
          if (shard.shardId().equals(startedShard.shardId())) {
            if (shard.relocating()) {
              dirty = true;
              sourceRoutingNode.remove();
              break;
            }
          }
        }
      }
    }
    return dirty;
  }
Ejemplo n.º 3
0
  private boolean electPrimariesAndUnassignedDanglingReplicas(RoutingAllocation allocation) {
    boolean changed = false;
    RoutingNodes routingNodes = allocation.routingNodes();
    if (!routingNodes.hasUnassignedPrimaries()) {
      // move out if we don't have unassigned primaries
      return changed;
    }

    // go over and remove dangling replicas that are initializing for primary shards
    List<ShardRouting> shardsToFail = Lists.newArrayList();
    for (MutableShardRouting shardEntry : routingNodes.unassigned()) {
      if (shardEntry.primary()) {
        for (MutableShardRouting routing : routingNodes.assignedShards(shardEntry)) {
          if (!routing.primary() && routing.initializing()) {
            shardsToFail.add(routing);
          }
        }
      }
    }
    for (ShardRouting shardToFail : shardsToFail) {
      changed |= applyFailedShard(allocation, shardToFail, false);
    }

    // now, go over and elect a new primary if possible, not, from this code block on, if one is
    // elected,
    // routingNodes.hasUnassignedPrimaries() will potentially be false
    for (MutableShardRouting shardEntry : routingNodes.unassigned()) {
      if (shardEntry.primary()) {
        MutableShardRouting candidate = allocation.routingNodes().activeReplica(shardEntry);
        if (candidate != null) {
          routingNodes.swapPrimaryFlag(shardEntry, candidate);
          if (candidate.relocatingNodeId() != null) {
            changed = true;
            // its also relocating, make sure to move the other routing to primary
            RoutingNode node = routingNodes.node(candidate.relocatingNodeId());
            if (node != null) {
              for (MutableShardRouting shardRouting : node) {
                if (shardRouting.shardId().equals(candidate.shardId()) && !shardRouting.primary()) {
                  routingNodes.swapPrimaryFlag(shardRouting);
                  break;
                }
              }
            }
          }
        }
      }
    }

    return changed;
  }
  @Override
  public void execute(RoutingAllocation allocation) throws ElasticSearchException {
    DiscoveryNode discoNode = allocation.nodes().resolveNode(node);

    MutableShardRouting shardRouting = null;
    for (MutableShardRouting routing : allocation.routingNodes().unassigned()) {
      if (routing.shardId().equals(shardId)) {
        // prefer primaries first to allocate
        if (shardRouting == null || routing.primary()) {
          shardRouting = routing;
        }
      }
    }

    if (shardRouting == null) {
      throw new ElasticSearchIllegalArgumentException(
          "[allocate] failed to find " + shardId + " on the list of unassigned shards");
    }

    if (shardRouting.primary() && !allowPrimary) {
      throw new ElasticSearchIllegalArgumentException(
          "[allocate] trying to allocate a primary shard " + shardId + "], which is disabled");
    }

    RoutingNode routingNode = allocation.routingNodes().node(discoNode.id());
    allocation.addIgnoreDisable(shardRouting.shardId(), routingNode.nodeId());
    if (!allocation.deciders().canAllocate(shardRouting, routingNode, allocation).allowed()) {
      throw new ElasticSearchIllegalArgumentException(
          "[allocate] allocation of " + shardId + " on node " + discoNode + " is not allowed");
    }
    // go over and remove it from the unassigned
    for (Iterator<MutableShardRouting> it = allocation.routingNodes().unassigned().iterator();
        it.hasNext(); ) {
      if (it.next() != shardRouting) {
        continue;
      }
      it.remove();
      routingNode.add(shardRouting);
      break;
    }
  }
  @Override
  public void execute(RoutingAllocation allocation) throws ElasticSearchException {
    DiscoveryNode discoNode = allocation.nodes().resolveNode(node);
    boolean found = false;
    for (RoutingNodes.RoutingNodeIterator it =
            allocation.routingNodes().routingNodeIter(discoNode.id());
        it.hasNext(); ) {
      MutableShardRouting shardRouting = it.next();
      if (!shardRouting.shardId().equals(shardId)) {
        continue;
      }
      found = true;
      if (shardRouting.relocatingNodeId() != null) {
        if (shardRouting.initializing()) {
          // the shard is initializing and recovering from another node, simply cancel the recovery
          it.remove();
          // and cancel the relocating state from the shard its being relocated from
          RoutingNode relocatingFromNode =
              allocation.routingNodes().node(shardRouting.relocatingNodeId());
          if (relocatingFromNode != null) {
            for (MutableShardRouting fromShardRouting : relocatingFromNode) {
              if (fromShardRouting.shardId().equals(shardRouting.shardId())
                  && fromShardRouting.state() == RELOCATING) {
                allocation.routingNodes().cancelRelocation(fromShardRouting);
                break;
              }
            }
          }
        } else if (shardRouting.relocating()) {

          // the shard is relocating to another node, cancel the recovery on the other node, and
          // deallocate this one
          if (!allowPrimary && shardRouting.primary()) {
            // can't cancel a primary shard being initialized
            throw new ElasticSearchIllegalArgumentException(
                "[cancel_allocation] can't cancel "
                    + shardId
                    + " on node "
                    + discoNode
                    + ", shard is primary and initializing its state");
          }
          it.moveToUnassigned();
          // now, go and find the shard that is initializing on the target node, and cancel it as
          // well...
          RoutingNodes.RoutingNodeIterator initializingNode =
              allocation.routingNodes().routingNodeIter(shardRouting.relocatingNodeId());
          if (initializingNode != null) {
            while (initializingNode.hasNext()) {
              MutableShardRouting initializingShardRouting = initializingNode.next();
              if (initializingShardRouting.shardId().equals(shardRouting.shardId())
                  && initializingShardRouting.state() == INITIALIZING) {
                initializingNode.remove();
              }
            }
          }
        }
      } else {
        // the shard is not relocating, its either started, or initializing, just cancel it and move
        // on...
        if (!allowPrimary && shardRouting.primary()) {
          // can't cancel a primary shard being initialized
          throw new ElasticSearchIllegalArgumentException(
              "[cancel_allocation] can't cancel "
                  + shardId
                  + " on node "
                  + discoNode
                  + ", shard is primary and started");
        }
        it.remove();
        allocation
            .routingNodes()
            .unassigned()
            .add(
                new MutableShardRouting(
                    shardRouting.index(),
                    shardRouting.id(),
                    null,
                    shardRouting.primary(),
                    ShardRoutingState.UNASSIGNED,
                    shardRouting.version() + 1));
      }
    }
    if (!found) {
      throw new ElasticSearchIllegalArgumentException(
          "[cancel_allocation] can't cancel "
              + shardId
              + ", failed to find it on node "
              + discoNode);
    }
  }
Ejemplo n.º 6
0
  /**
   * Applies the relevant logic to handle a failed shard. Returns <tt>true</tt> if changes happened
   * that require relocation.
   */
  private boolean applyFailedShard(
      RoutingAllocation allocation, ShardRouting failedShard, boolean addToIgnoreList) {
    // create a copy of the failed shard, since we assume we can change possible references to it
    // without
    // changing the state of failed shard
    failedShard = new ImmutableShardRouting(failedShard);

    IndexRoutingTable indexRoutingTable = allocation.routingTable().index(failedShard.index());
    if (indexRoutingTable == null) {
      return false;
    }

    RoutingNodes routingNodes = allocation.routingNodes();
    boolean dirty = false;
    if (failedShard.relocatingNodeId() != null) {
      // the shard is relocating, either in initializing (recovery from another node) or relocating
      // (moving to another node)
      if (failedShard.state() == INITIALIZING) {
        // the shard is initializing and recovering from another node
        // first, we need to cancel the current node that is being initialized
        RoutingNodes.RoutingNodeIterator initializingNode =
            routingNodes.routingNodeIter(failedShard.currentNodeId());
        if (initializingNode != null) {
          while (initializingNode.hasNext()) {
            MutableShardRouting shardRouting = initializingNode.next();
            if (shardRouting.equals(failedShard)) {
              dirty = true;
              initializingNode.remove();
              if (addToIgnoreList) {
                // make sure we ignore this shard on the relevant node
                allocation.addIgnoreShardForNode(
                    failedShard.shardId(), failedShard.currentNodeId());
              }

              break;
            }
          }
        }
        if (dirty) {
          // now, find the node that we are relocating *from*, and cancel its relocation
          RoutingNode relocatingFromNode = routingNodes.node(failedShard.relocatingNodeId());
          if (relocatingFromNode != null) {
            for (MutableShardRouting shardRouting : relocatingFromNode) {
              if (shardRouting.shardId().equals(failedShard.shardId())
                  && shardRouting.relocating()) {
                dirty = true;
                routingNodes.cancelRelocation(shardRouting);
                break;
              }
            }
          }
        } else {
          logger.debug("failed shard {} not found in routingNodes, ignoring it", failedShard);
        }
        return dirty;
      } else if (failedShard.state() == RELOCATING) {
        // the shard is relocating, meaning its the source the shard is relocating from
        // first, we need to cancel the current relocation from the current node
        // now, find the node that we are recovering from, cancel the relocation, remove it from the
        // node
        // and add it to the unassigned shards list...
        RoutingNodes.RoutingNodeIterator relocatingFromNode =
            routingNodes.routingNodeIter(failedShard.currentNodeId());
        if (relocatingFromNode != null) {
          while (relocatingFromNode.hasNext()) {
            MutableShardRouting shardRouting = relocatingFromNode.next();
            if (shardRouting.equals(failedShard)) {
              dirty = true;
              relocatingFromNode.remove();
              if (addToIgnoreList) {
                // make sure we ignore this shard on the relevant node
                allocation.addIgnoreShardForNode(
                    failedShard.shardId(), failedShard.currentNodeId());
              }

              routingNodes
                  .unassigned()
                  .add(
                      new MutableShardRouting(
                          failedShard.index(),
                          failedShard.id(),
                          null,
                          failedShard.primary(),
                          ShardRoutingState.UNASSIGNED,
                          failedShard.version() + 1));
              break;
            }
          }
        }
        if (dirty) {
          // next, we need to find the target initializing shard that is recovering from, and remove
          // it...
          RoutingNodes.RoutingNodeIterator initializingNode =
              routingNodes.routingNodeIter(failedShard.relocatingNodeId());
          if (initializingNode != null) {
            while (initializingNode.hasNext()) {
              MutableShardRouting shardRouting = initializingNode.next();
              if (shardRouting.shardId().equals(failedShard.shardId())
                  && shardRouting.state() == INITIALIZING) {
                dirty = true;
                initializingNode.remove();
              }
            }
          }
        } else {
          logger.debug("failed shard {} not found in routingNodes, ignoring it", failedShard);
        }
      } else {
        throw new ElasticsearchIllegalStateException(
            "illegal state for a failed shard, relocating node id is set, but state does not match: "
                + failedShard);
      }
    } else {
      // the shard is not relocating, its either started, or initializing, just cancel it and move
      // on...
      RoutingNodes.RoutingNodeIterator node =
          routingNodes.routingNodeIter(failedShard.currentNodeId());
      if (node != null) {
        while (node.hasNext()) {
          MutableShardRouting shardRouting = node.next();
          if (shardRouting.equals(failedShard)) {
            dirty = true;
            if (addToIgnoreList) {
              // make sure we ignore this shard on the relevant node
              allocation.addIgnoreShardForNode(failedShard.shardId(), failedShard.currentNodeId());
            }
            node.remove();
            // move all the shards matching the failed shard to the end of the unassigned list
            // so we give a chance for other allocations and won't create poison failed allocations
            // that can keep other shards from being allocated (because of limits applied on how
            // many
            // shards we can start per node)
            List<MutableShardRouting> shardsToMove = Lists.newArrayList();
            for (Iterator<MutableShardRouting> unassignedIt = routingNodes.unassigned().iterator();
                unassignedIt.hasNext(); ) {
              MutableShardRouting unassignedShardRouting = unassignedIt.next();
              if (unassignedShardRouting.shardId().equals(failedShard.shardId())) {
                unassignedIt.remove();
                shardsToMove.add(unassignedShardRouting);
              }
            }
            if (!shardsToMove.isEmpty()) {
              routingNodes.unassigned().addAll(shardsToMove);
            }

            routingNodes
                .unassigned()
                .add(
                    new MutableShardRouting(
                        failedShard.index(),
                        failedShard.id(),
                        null,
                        null,
                        failedShard.restoreSource(),
                        failedShard.primary(),
                        ShardRoutingState.UNASSIGNED,
                        failedShard.version() + 1));

            break;
          }
        }
      }
      if (!dirty) {
        logger.debug("failed shard {} not found in routingNodes, ignoring it", failedShard);
      }
    }
    return dirty;
  }
  private ObjectLongOpenHashMap<DiscoveryNode> buildShardStates(
      final DiscoveryNodes nodes, MutableShardRouting shard) {
    ObjectLongOpenHashMap<DiscoveryNode> shardStates = cachedShardsState.get(shard.shardId());
    ObjectOpenHashSet<String> nodeIds;
    if (shardStates == null) {
      shardStates = new ObjectLongOpenHashMap<>();
      cachedShardsState.put(shard.shardId(), shardStates);
      nodeIds = ObjectOpenHashSet.from(nodes.dataNodes().keys());
    } else {
      // clean nodes that have failed
      shardStates
          .keys()
          .removeAll(
              new ObjectPredicate<DiscoveryNode>() {
                @Override
                public boolean apply(DiscoveryNode node) {
                  return !nodes.nodeExists(node.id());
                }
              });
      nodeIds = ObjectOpenHashSet.newInstance();
      // we have stored cached from before, see if the nodes changed, if they have, go fetch again
      for (ObjectCursor<DiscoveryNode> cursor : nodes.dataNodes().values()) {
        DiscoveryNode node = cursor.value;
        if (!shardStates.containsKey(node)) {
          nodeIds.add(node.id());
        }
      }
    }
    if (nodeIds.isEmpty()) {
      return shardStates;
    }

    String[] nodesIdsArray = nodeIds.toArray(String.class);
    TransportNodesListGatewayStartedShards.NodesGatewayStartedShards response =
        listGatewayStartedShards.list(shard.shardId(), nodesIdsArray, listTimeout).actionGet();
    if (logger.isDebugEnabled()) {
      if (response.failures().length > 0) {
        StringBuilder sb =
            new StringBuilder(shard + ": failures when trying to list shards on nodes:");
        for (int i = 0; i < response.failures().length; i++) {
          Throwable cause = ExceptionsHelper.unwrapCause(response.failures()[i]);
          if (cause instanceof ConnectTransportException) {
            continue;
          }
          sb.append("\n    -> ").append(response.failures()[i].getDetailedMessage());
        }
        logger.debug(sb.toString());
      }
    }

    for (TransportNodesListGatewayStartedShards.NodeGatewayStartedShards nodeShardState :
        response) {
      // -1 version means it does not exists, which is what the API returns, and what we expect to
      logger.trace(
          "[{}] on node [{}] has version [{}] of shard",
          shard,
          nodeShardState.getNode(),
          nodeShardState.version());
      shardStates.put(nodeShardState.getNode(), nodeShardState.version());
    }
    return shardStates;
  }
  public boolean allocateUnassigned(RoutingAllocation allocation) {
    boolean changed = false;
    DiscoveryNodes nodes = allocation.nodes();
    RoutingNodes routingNodes = allocation.routingNodes();

    // First, handle primaries, they must find a place to be allocated on here
    Iterator<MutableShardRouting> unassignedIterator = routingNodes.unassigned().iterator();
    while (unassignedIterator.hasNext()) {
      MutableShardRouting shard = unassignedIterator.next();

      if (!shard.primary()) {
        continue;
      }

      // this is an API allocation, ignore since we know there is no data...
      if (!routingNodes
          .routingTable()
          .index(shard.index())
          .shard(shard.id())
          .primaryAllocatedPostApi()) {
        continue;
      }

      ObjectLongOpenHashMap<DiscoveryNode> nodesState = buildShardStates(nodes, shard);

      int numberOfAllocationsFound = 0;
      long highestVersion = -1;
      Set<DiscoveryNode> nodesWithHighestVersion = Sets.newHashSet();
      final boolean[] states = nodesState.allocated;
      final Object[] keys = nodesState.keys;
      final long[] values = nodesState.values;
      for (int i = 0; i < states.length; i++) {
        if (!states[i]) {
          continue;
        }

        DiscoveryNode node = (DiscoveryNode) keys[i];
        long version = values[i];
        // since we don't check in NO allocation, we need to double check here
        if (allocation.shouldIgnoreShardForNode(shard.shardId(), node.id())) {
          continue;
        }
        if (version != -1) {
          numberOfAllocationsFound++;
          if (highestVersion == -1) {
            nodesWithHighestVersion.add(node);
            highestVersion = version;
          } else {
            if (version > highestVersion) {
              nodesWithHighestVersion.clear();
              nodesWithHighestVersion.add(node);
              highestVersion = version;
            } else if (version == highestVersion) {
              nodesWithHighestVersion.add(node);
            }
          }
        }
      }

      // check if the counts meets the minimum set
      int requiredAllocation = 1;
      // if we restore from a repository one copy is more then enough
      if (shard.restoreSource() == null) {
        try {
          IndexMetaData indexMetaData = routingNodes.metaData().index(shard.index());
          String initialShards =
              indexMetaData
                  .settings()
                  .get(
                      INDEX_RECOVERY_INITIAL_SHARDS,
                      settings.get(INDEX_RECOVERY_INITIAL_SHARDS, this.initialShards));
          if ("quorum".equals(initialShards)) {
            if (indexMetaData.numberOfReplicas() > 1) {
              requiredAllocation = ((1 + indexMetaData.numberOfReplicas()) / 2) + 1;
            }
          } else if ("quorum-1".equals(initialShards) || "half".equals(initialShards)) {
            if (indexMetaData.numberOfReplicas() > 2) {
              requiredAllocation = ((1 + indexMetaData.numberOfReplicas()) / 2);
            }
          } else if ("one".equals(initialShards)) {
            requiredAllocation = 1;
          } else if ("full".equals(initialShards) || "all".equals(initialShards)) {
            requiredAllocation = indexMetaData.numberOfReplicas() + 1;
          } else if ("full-1".equals(initialShards) || "all-1".equals(initialShards)) {
            if (indexMetaData.numberOfReplicas() > 1) {
              requiredAllocation = indexMetaData.numberOfReplicas();
            }
          } else {
            requiredAllocation = Integer.parseInt(initialShards);
          }
        } catch (Exception e) {
          logger.warn(
              "[{}][{}] failed to derived initial_shards from value {}, ignore allocation for {}",
              shard.index(),
              shard.id(),
              initialShards,
              shard);
        }
      }

      // not enough found for this shard, continue...
      if (numberOfAllocationsFound < requiredAllocation) {
        // if we are restoring this shard we still can allocate
        if (shard.restoreSource() == null) {
          // we can't really allocate, so ignore it and continue
          unassignedIterator.remove();
          routingNodes.ignoredUnassigned().add(shard);
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: not allocating, number_of_allocated_shards_found [{}], required_number [{}]",
                shard.index(),
                shard.id(),
                numberOfAllocationsFound,
                requiredAllocation);
          }
        } else if (logger.isDebugEnabled()) {
          logger.debug(
              "[{}][{}]: missing local data, will restore from [{}]",
              shard.index(),
              shard.id(),
              shard.restoreSource());
        }
        continue;
      }

      Set<DiscoveryNode> throttledNodes = Sets.newHashSet();
      Set<DiscoveryNode> noNodes = Sets.newHashSet();
      for (DiscoveryNode discoNode : nodesWithHighestVersion) {
        RoutingNode node = routingNodes.node(discoNode.id());
        if (node == null) {
          continue;
        }

        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
          throttledNodes.add(discoNode);
        } else if (decision.type() == Decision.Type.NO) {
          noNodes.add(discoNode);
        } else {
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: allocating [{}] to [{}] on primary allocation",
                shard.index(),
                shard.id(),
                shard,
                discoNode);
          }
          // we found a match
          changed = true;
          // make sure we create one with the version from the recovered state
          allocation
              .routingNodes()
              .assign(new MutableShardRouting(shard, highestVersion), node.nodeId());
          unassignedIterator.remove();

          // found a node, so no throttling, no "no", and break out of the loop
          throttledNodes.clear();
          noNodes.clear();
          break;
        }
      }
      if (throttledNodes.isEmpty()) {
        // if we have a node that we "can't" allocate to, force allocation, since this is our master
        // data!
        if (!noNodes.isEmpty()) {
          DiscoveryNode discoNode = noNodes.iterator().next();
          RoutingNode node = routingNodes.node(discoNode.id());
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: forcing allocating [{}] to [{}] on primary allocation",
                shard.index(),
                shard.id(),
                shard,
                discoNode);
          }
          // we found a match
          changed = true;
          // make sure we create one with the version from the recovered state
          allocation
              .routingNodes()
              .assign(new MutableShardRouting(shard, highestVersion), node.nodeId());
          unassignedIterator.remove();
        }
      } else {
        if (logger.isDebugEnabled()) {
          logger.debug(
              "[{}][{}]: throttling allocation [{}] to [{}] on primary allocation",
              shard.index(),
              shard.id(),
              shard,
              throttledNodes);
        }
        // we are throttling this, but we have enough to allocate to this node, ignore it for now
        unassignedIterator.remove();
        routingNodes.ignoredUnassigned().add(shard);
      }
    }

    if (!routingNodes.hasUnassigned()) {
      return changed;
    }

    // Now, handle replicas, try to assign them to nodes that are similar to the one the primary was
    // allocated on
    unassignedIterator = routingNodes.unassigned().iterator();
    while (unassignedIterator.hasNext()) {
      MutableShardRouting shard = unassignedIterator.next();

      // pre-check if it can be allocated to any node that currently exists, so we won't list the
      // store for it for nothing
      boolean canBeAllocatedToAtLeastOneNode = false;
      for (ObjectCursor<DiscoveryNode> cursor : nodes.dataNodes().values()) {
        RoutingNode node = routingNodes.node(cursor.value.id());
        if (node == null) {
          continue;
        }
        // if we can't allocate it on a node, ignore it, for example, this handles
        // cases for only allocating a replica after a primary
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        if (decision.type() == Decision.Type.YES) {
          canBeAllocatedToAtLeastOneNode = true;
          break;
        }
      }

      if (!canBeAllocatedToAtLeastOneNode) {
        continue;
      }

      Map<DiscoveryNode, TransportNodesListShardStoreMetaData.StoreFilesMetaData> shardStores =
          buildShardStores(nodes, shard);

      long lastSizeMatched = 0;
      DiscoveryNode lastDiscoNodeMatched = null;
      RoutingNode lastNodeMatched = null;

      for (Map.Entry<DiscoveryNode, TransportNodesListShardStoreMetaData.StoreFilesMetaData>
          nodeStoreEntry : shardStores.entrySet()) {
        DiscoveryNode discoNode = nodeStoreEntry.getKey();
        TransportNodesListShardStoreMetaData.StoreFilesMetaData storeFilesMetaData =
            nodeStoreEntry.getValue();
        logger.trace("{}: checking node [{}]", shard, discoNode);

        if (storeFilesMetaData == null) {
          // already allocated on that node...
          continue;
        }

        RoutingNode node = routingNodes.node(discoNode.id());
        if (node == null) {
          continue;
        }

        // check if we can allocate on that node...
        // we only check for NO, since if this node is THROTTLING and it has enough "same data"
        // then we will try and assign it next time
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        if (decision.type() == Decision.Type.NO) {
          continue;
        }

        // if it is already allocated, we can't assign to it...
        if (storeFilesMetaData.allocated()) {
          continue;
        }

        if (!shard.primary()) {
          MutableShardRouting primaryShard = routingNodes.activePrimary(shard);
          if (primaryShard != null) {
            assert primaryShard.active();
            DiscoveryNode primaryNode = nodes.get(primaryShard.currentNodeId());
            if (primaryNode != null) {
              TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryNodeStore =
                  shardStores.get(primaryNode);
              if (primaryNodeStore != null && primaryNodeStore.allocated()) {
                long sizeMatched = 0;

                for (StoreFileMetaData storeFileMetaData : storeFilesMetaData) {
                  if (primaryNodeStore.fileExists(storeFileMetaData.name())
                      && primaryNodeStore
                          .file(storeFileMetaData.name())
                          .isSame(storeFileMetaData)) {
                    sizeMatched += storeFileMetaData.length();
                  }
                }
                logger.trace(
                    "{}: node [{}] has [{}/{}] bytes of re-usable data",
                    shard,
                    discoNode.name(),
                    new ByteSizeValue(sizeMatched),
                    sizeMatched);
                if (sizeMatched > lastSizeMatched) {
                  lastSizeMatched = sizeMatched;
                  lastDiscoNodeMatched = discoNode;
                  lastNodeMatched = node;
                }
              }
            }
          }
        }
      }

      if (lastNodeMatched != null) {
        // we only check on THROTTLE since we checked before before on NO
        Decision decision = allocation.deciders().canAllocate(shard, lastNodeMatched, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store with total_size [{}]",
                shard.index(),
                shard.id(),
                shard,
                lastDiscoNodeMatched,
                new ByteSizeValue(lastSizeMatched));
          }
          // we are throttling this, but we have enough to allocate to this node, ignore it for now
          unassignedIterator.remove();
          routingNodes.ignoredUnassigned().add(shard);
        } else {
          if (logger.isDebugEnabled()) {
            logger.debug(
                "[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store with total_size [{}]",
                shard.index(),
                shard.id(),
                shard,
                lastDiscoNodeMatched,
                new ByteSizeValue(lastSizeMatched));
          }
          // we found a match
          changed = true;
          allocation.routingNodes().assign(shard, lastNodeMatched.nodeId());
          unassignedIterator.remove();
        }
      }
    }
    return changed;
  }