private void waitTillNodesActive() throws Exception {
    for (int i = 0; i < 60; i++) {
      Thread.sleep(3000);
      ZkStateReader zkStateReader = cloudClient.getZkStateReader();
      ClusterState clusterState = zkStateReader.getClusterState();
      DocCollection collection1 = clusterState.getCollection("collection1");
      Slice slice = collection1.getSlice("shard1");
      Collection<Replica> replicas = slice.getReplicas();
      boolean allActive = true;

      Collection<String> nodesDownNames =
          nodesDown.stream().map(n -> n.coreNodeName).collect(Collectors.toList());

      Collection<Replica> replicasToCheck =
          replicas
              .stream()
              .filter(r -> !nodesDownNames.contains(r.getName()))
              .collect(Collectors.toList());

      for (Replica replica : replicasToCheck) {
        if (!clusterState.liveNodesContain(replica.getNodeName())
            || replica.getState() != Replica.State.ACTIVE) {
          allActive = false;
          break;
        }
      }
      if (allActive) {
        return;
      }
    }
    printLayout();
    fail("timeout waiting to see all nodes active");
  }
 public void deleteAllCollections() throws Exception {
   try (ZkStateReader reader = new ZkStateReader(solrClient.getZkStateReader().getZkClient())) {
     reader.createClusterStateWatchersAndUpdate();
     for (String collection : reader.getClusterState().getCollectionStates().keySet()) {
       CollectionAdminRequest.deleteCollection(collection).process(solrClient);
     }
   }
 }
Exemplo n.º 3
0
  private SolrCore getCoreByCollection(String collectionName) {
    ZkStateReader zkStateReader = cores.getZkController().getZkStateReader();

    ClusterState clusterState = zkStateReader.getClusterState();
    DocCollection collection = clusterState.getCollectionOrNull(collectionName);
    if (collection == null) {
      return null;
    }
    Map<String, Slice> slices = collection.getActiveSlicesMap();
    if (slices == null) {
      return null;
    }
    Set<String> liveNodes = clusterState.getLiveNodes();
    // look for a core on this node
    Set<Map.Entry<String, Slice>> entries = slices.entrySet();
    SolrCore core = null;

    // Hitting the leaders is useful when it's an update request.
    // For queries it doesn't matter and hence we don't distinguish here.
    for (Map.Entry<String, Slice> entry : entries) {
      // first see if we have the leader
      Replica leaderProps = collection.getLeader(entry.getKey());
      if (leaderProps != null
          && liveNodes.contains(leaderProps.getNodeName())
          && leaderProps.getState() == Replica.State.ACTIVE) {
        core = checkProps(leaderProps);
        if (core != null) {
          return core;
        }
      }

      // check everyone then
      Map<String, Replica> shards = entry.getValue().getReplicasMap();
      Set<Map.Entry<String, Replica>> shardEntries = shards.entrySet();
      for (Map.Entry<String, Replica> shardEntry : shardEntries) {
        Replica zkProps = shardEntry.getValue();
        if (liveNodes.contains(zkProps.getNodeName())
            && zkProps.getState() == Replica.State.ACTIVE) {
          core = checkProps(zkProps);
          if (core != null) {
            return core;
          }
        }
      }
    }
    return null;
  }
Exemplo n.º 4
0
  protected final synchronized DocCollection getDocCollection() {
    if (docCollection == null) {
      ZkStateReader zkStateReader = getCloudSolrServer().getZkStateReader();
      docCollection = zkStateReader.getClusterState().getCollection(collection);

      // do basic checks once
      DocRouter docRouter = docCollection.getRouter();
      if (docRouter instanceof ImplicitDocRouter)
        throw new IllegalStateException(
            "Implicit document routing not supported by this Partitioner!");
      Collection<Slice> shards = getDocCollection().getSlices();
      if (shards == null || shards.size() == 0)
        throw new IllegalStateException(
            "Collection '" + collection + "' does not have any shards!");
    }
    return docCollection;
  }
Exemplo n.º 5
0
  protected void constructStreams() throws IOException {

    try {
      Object pushStream = ((Expressible) tupleStream).toExpression(streamFactory);

      ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
      ClusterState clusterState = zkStateReader.getClusterState();
      Set<String> liveNodes = clusterState.getLiveNodes();
      Collection<Slice> slices = clusterState.getActiveSlices(this.collection);
      List<Replica> shuffler = new ArrayList();
      for (Slice slice : slices) {
        Collection<Replica> replicas = slice.getReplicas();
        for (Replica replica : replicas) {
          if (replica.getState() == Replica.State.ACTIVE
              && liveNodes.contains(replica.getNodeName())) shuffler.add(replica);
        }
      }

      if (workers > shuffler.size()) {
        throw new IOException("Number of workers exceeds nodes in the worker collection");
      }

      Collections.shuffle(shuffler, new Random());

      for (int w = 0; w < workers; w++) {
        HashMap params = new HashMap();
        params.put("distrib", "false"); // We are the aggregator.
        params.put("numWorkers", workers);
        params.put("workerID", w);
        params.put("expr", pushStream);
        params.put("qt", "/stream");
        Replica rep = shuffler.get(w);
        ZkCoreNodeProps zkProps = new ZkCoreNodeProps(rep);
        String url = zkProps.getCoreUrl();
        SolrStream solrStream = new SolrStream(url, params);
        solrStreams.add(solrStream);
      }

      assert (solrStreams.size() == workers);

    } catch (Exception e) {
      throw new IOException(e);
    }
  }
    /**
     * Retrieve all requests recorded by this queue which were sent to given collection and shard
     *
     * @param zkStateReader the {@link org.apache.solr.common.cloud.ZkStateReader} from which
     *     cluster state is read
     * @param collectionName the given collection name for which requests have to be extracted
     * @param shardId the given shard name for which requests have to be extracted
     * @return a list of {@link
     *     org.apache.solr.handler.component.TrackingShardHandlerFactory.ShardRequestAndParams} or
     *     empty list if none are found
     */
    public List<ShardRequestAndParams> getShardRequests(
        ZkStateReader zkStateReader, String collectionName, String shardId) {
      DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
      assert collection != null;
      Slice slice = collection.getSlice(shardId);
      assert slice != null;

      for (Map.Entry<String, List<ShardRequestAndParams>> entry : requests.entrySet()) {
        // multiple shard addresses may be present separated by '|'
        List<String> list = StrUtils.splitSmart(entry.getKey(), '|');
        for (Map.Entry<String, Replica> replica : slice.getReplicasMap().entrySet()) {
          String coreUrl = new ZkCoreNodeProps(replica.getValue()).getCoreUrl();
          if (list.contains(coreUrl)) {
            return new ArrayList<>(entry.getValue());
          }
        }
      }
      return Collections.emptyList();
    }
Exemplo n.º 7
0
  protected List<String> buildShardList(CloudSolrClient cloudSolrServer) {
    ZkStateReader zkStateReader = cloudSolrServer.getZkStateReader();

    ClusterState clusterState = zkStateReader.getClusterState();

    String[] collections = null;
    if (clusterState.hasCollection(collection)) {
      collections = new String[] {collection};
    } else {
      // might be a collection alias?
      Aliases aliases = zkStateReader.getAliases();
      String aliasedCollections = aliases.getCollectionAlias(collection);
      if (aliasedCollections == null)
        throw new IllegalArgumentException("Collection " + collection + " not found!");
      collections = aliasedCollections.split(",");
    }

    Set<String> liveNodes = clusterState.getLiveNodes();
    Random random = new Random(5150);

    List<String> shards = new ArrayList<String>();
    for (String coll : collections) {
      for (Slice slice : clusterState.getSlices(coll)) {
        List<String> replicas = new ArrayList<String>();
        for (Replica r : slice.getReplicas()) {
          ZkCoreNodeProps replicaCoreProps = new ZkCoreNodeProps(r);
          if (liveNodes.contains(replicaCoreProps.getNodeName()))
            replicas.add(replicaCoreProps.getCoreUrl());
        }
        int numReplicas = replicas.size();
        if (numReplicas == 0)
          throw new IllegalStateException(
              "Shard " + slice.getName() + " does not have any active replicas!");

        String replicaUrl =
            (numReplicas == 1) ? replicas.get(0) : replicas.get(random.nextInt(replicas.size()));
        shards.add(replicaUrl);
      }
    }
    return shards;
  }
  /**
   * Walks the NamedList response after performing an update request looking for the replication
   * factor that was achieved in each shard involved in the request. For single doc updates, there
   * will be only one shard in the return value.
   */
  @SuppressWarnings("rawtypes")
  public Map<String, Integer> getShardReplicationFactor(String collection, NamedList resp) {
    connect();

    Map<String, Integer> results = new HashMap<String, Integer>();
    if (resp instanceof CloudSolrServer.RouteResponse) {
      NamedList routes = ((CloudSolrServer.RouteResponse) resp).getRouteResponses();
      ClusterState clusterState = zkStateReader.getClusterState();
      Map<String, String> leaders = new HashMap<String, String>();
      for (Slice slice : clusterState.getActiveSlices(collection)) {
        Replica leader = slice.getLeader();
        if (leader != null) {
          ZkCoreNodeProps zkProps = new ZkCoreNodeProps(leader);
          String leaderUrl = zkProps.getBaseUrl() + "/" + zkProps.getCoreName();
          leaders.put(leaderUrl, slice.getName());
          String altLeaderUrl = zkProps.getBaseUrl() + "/" + collection;
          leaders.put(altLeaderUrl, slice.getName());
        }
      }

      Iterator<Map.Entry<String, Object>> routeIter = routes.iterator();
      while (routeIter.hasNext()) {
        Map.Entry<String, Object> next = routeIter.next();
        String host = next.getKey();
        NamedList hostResp = (NamedList) next.getValue();
        Integer rf =
            (Integer) ((NamedList) hostResp.get("responseHeader")).get(UpdateRequest.REPFACT);
        if (rf != null) {
          String shard = leaders.get(host);
          if (shard == null) {
            if (host.endsWith("/")) shard = leaders.get(host.substring(0, host.length() - 1));
            if (shard == null) {
              shard = host;
            }
          }
          results.put(shard, rf);
        }
      }
    }
    return results;
  }
  @BeforeClass
  public static void setupCluster() throws Exception {
    final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");

    String configName = "solrCloudCollectionConfig";
    int nodeCount = 5;
    configureCluster(nodeCount).addConfig(configName, configDir).configure();

    Map<String, String> collectionProperties = new HashMap<>();
    collectionProperties.put("config", "solrconfig-tlog.xml");
    collectionProperties.put("schema", "schema.xml");

    // create a collection holding data for the "to" side of the JOIN

    int shards = 2;
    int replicas = 2;
    CollectionAdminRequest.createCollection(toColl, configName, shards, replicas)
        .setProperties(collectionProperties)
        .process(cluster.getSolrClient());

    // get the set of nodes where replicas for the "to" collection exist
    Set<String> nodeSet = new HashSet<>();
    ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
    ClusterState cs = zkStateReader.getClusterState();
    for (Slice slice : cs.getCollection(toColl).getActiveSlices())
      for (Replica replica : slice.getReplicas()) nodeSet.add(replica.getNodeName());
    assertTrue(nodeSet.size() > 0);

    // deploy the "from" collection to all nodes where the "to" collection exists
    CollectionAdminRequest.createCollection(fromColl, configName, 1, 4)
        .setCreateNodeSet(StringUtils.join(nodeSet, ","))
        .setProperties(collectionProperties)
        .process(cluster.getSolrClient());

    toDocId = indexDoc(toColl, 1001, "a", null, "b");
    indexDoc(fromColl, 2001, "a", "c", null);

    Thread.sleep(1000); // so the commits fire
  }
  protected Set<String> commonMocks(int liveNodesCount) throws Exception {

    shardHandlerFactoryMock.getShardHandler();
    expectLastCall()
        .andAnswer(
            new IAnswer<ShardHandler>() {
              @Override
              public ShardHandler answer() throws Throwable {
                log.info("SHARDHANDLER");
                return shardHandlerMock;
              }
            })
        .anyTimes();
    workQueueMock.peekTopN(EasyMock.anyInt(), anyObject(Set.class), EasyMock.anyLong());
    expectLastCall()
        .andAnswer(
            new IAnswer<List>() {
              @Override
              public List answer() throws Throwable {
                Object result;
                int count = 0;
                while ((result = queue.peek()) == null) {
                  Thread.sleep(1000);
                  count++;
                  if (count > 1) return null;
                }

                return Arrays.asList(result);
              }
            })
        .anyTimes();

    workQueueMock.getTailId();
    expectLastCall()
        .andAnswer(
            new IAnswer<Object>() {
              @Override
              public Object answer() throws Throwable {
                Object result = null;
                Iterator iter = queue.iterator();
                while (iter.hasNext()) {
                  result = iter.next();
                }
                return result == null ? null : ((QueueEvent) result).getId();
              }
            })
        .anyTimes();

    workQueueMock.peek(true);
    expectLastCall()
        .andAnswer(
            new IAnswer<Object>() {
              @Override
              public Object answer() throws Throwable {
                Object result;
                while ((result = queue.peek()) == null) {
                  Thread.sleep(1000);
                }
                return result;
              }
            })
        .anyTimes();

    workQueueMock.remove(anyObject(QueueEvent.class));
    expectLastCall()
        .andAnswer(
            new IAnswer<Object>() {
              @Override
              public Object answer() throws Throwable {
                queue.remove((QueueEvent) getCurrentArguments()[0]);
                return null;
              }
            })
        .anyTimes();

    workQueueMock.poll();
    expectLastCall()
        .andAnswer(
            new IAnswer<Object>() {
              @Override
              public Object answer() throws Throwable {
                return queue.poll();
              }
            })
        .anyTimes();

    zkStateReaderMock.getClusterState();
    expectLastCall()
        .andAnswer(
            new IAnswer<Object>() {
              @Override
              public Object answer() throws Throwable {
                return clusterStateMock;
              }
            })
        .anyTimes();

    zkStateReaderMock.getZkClient();
    expectLastCall()
        .andAnswer(
            new IAnswer<Object>() {
              @Override
              public Object answer() throws Throwable {
                return solrZkClientMock;
              }
            })
        .anyTimes();

    zkStateReaderMock.updateClusterState(anyBoolean());

    clusterStateMock.getCollections();
    expectLastCall()
        .andAnswer(
            new IAnswer<Object>() {
              @Override
              public Object answer() throws Throwable {
                return collectionsSet;
              }
            })
        .anyTimes();
    final Set<String> liveNodes = new HashSet<>();
    for (int i = 0; i < liveNodesCount; i++) {
      final String address = "localhost:" + (8963 + i) + "_solr";
      liveNodes.add(address);

      zkStateReaderMock.getBaseUrlForNodeName(address);
      expectLastCall()
          .andAnswer(
              new IAnswer<Object>() {
                @Override
                public Object answer() throws Throwable {
                  // This works as long as this test does not use a
                  // webapp context with an underscore in it
                  return address.replaceAll("_", "/");
                }
              })
          .anyTimes();
    }
    zkStateReaderMock.getClusterProps();
    expectLastCall()
        .andAnswer(
            new IAnswer<Map>() {
              @Override
              public Map answer() throws Throwable {
                return new HashMap();
              }
            });

    solrZkClientMock.getZkClientTimeout();
    expectLastCall()
        .andAnswer(
            new IAnswer<Object>() {
              @Override
              public Object answer() throws Throwable {
                return 30000;
              }
            })
        .anyTimes();

    clusterStateMock.hasCollection(anyObject(String.class));
    expectLastCall()
        .andAnswer(
            new IAnswer<Boolean>() {
              @Override
              public Boolean answer() throws Throwable {
                String key = (String) getCurrentArguments()[0];
                return collectionsSet.contains(key);
              }
            })
        .anyTimes();

    clusterStateMock.getLiveNodes();
    expectLastCall()
        .andAnswer(
            new IAnswer<Object>() {
              @Override
              public Object answer() throws Throwable {
                return liveNodes;
              }
            })
        .anyTimes();
    solrZkClientMock.create(
        anyObject(String.class),
        anyObject(byte[].class),
        anyObject(CreateMode.class),
        anyBoolean());
    expectLastCall()
        .andAnswer(
            new IAnswer<String>() {
              @Override
              public String answer() throws Throwable {
                String key = (String) getCurrentArguments()[0];
                zkMap.put(key, null);
                handleCreateCollMessage((byte[]) getCurrentArguments()[1]);
                return key;
              }
            })
        .anyTimes();

    solrZkClientMock.makePath(anyObject(String.class), anyObject(byte[].class), anyBoolean());
    expectLastCall()
        .andAnswer(
            new IAnswer<String>() {
              @Override
              public String answer() throws Throwable {
                String key = (String) getCurrentArguments()[0];
                return key;
              }
            })
        .anyTimes();

    solrZkClientMock.makePath(
        anyObject(String.class),
        anyObject(byte[].class),
        anyObject(CreateMode.class),
        anyBoolean());
    expectLastCall()
        .andAnswer(
            new IAnswer<String>() {
              @Override
              public String answer() throws Throwable {
                String key = (String) getCurrentArguments()[0];
                return key;
              }
            })
        .anyTimes();

    solrZkClientMock.exists(anyObject(String.class), anyBoolean());
    expectLastCall()
        .andAnswer(
            new IAnswer<Boolean>() {
              @Override
              public Boolean answer() throws Throwable {
                String key = (String) getCurrentArguments()[0];
                return zkMap.containsKey(key);
              }
            })
        .anyTimes();

    zkMap.put("/configs/myconfig", null);

    return liveNodes;
  }
  @Override
  public void doTest() throws Exception {
    boolean testsSuccesful = false;
    try {
      handle.clear();
      handle.put("QTime", SKIPVAL);
      handle.put("timestamp", SKIPVAL);
      ZkStateReader zkStateReader = cloudClient.getZkStateReader();
      // make sure we have leaders for each shard
      for (int j = 1; j < sliceCount; j++) {
        zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 10000);
      } // make sure we again have leaders for each shard

      waitForRecoveriesToFinish(false);

      // we cannot do delete by query
      // as it's not supported for recovery
      del("*:*");

      List<StopableThread> threads = new ArrayList<StopableThread>();
      int threadCount = 1;
      int i = 0;
      for (i = 0; i < threadCount; i++) {
        StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true);
        threads.add(indexThread);
        indexThread.start();
      }

      threadCount = 1;
      i = 0;
      for (i = 0; i < threadCount; i++) {
        StopableSearchThread searchThread = new StopableSearchThread();
        threads.add(searchThread);
        searchThread.start();
      }

      // TODO: we only do this sometimes so that we can sometimes compare against control,
      // it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
      boolean runFullThrottle = random().nextBoolean();
      if (runFullThrottle) {
        FullThrottleStopableIndexingThread ftIndexThread =
            new FullThrottleStopableIndexingThread(clients, "ft1", true);
        threads.add(ftIndexThread);
        ftIndexThread.start();
      }

      chaosMonkey.startTheMonkey(true, 10000);
      try {
        long runLength;
        if (RUN_LENGTH != -1) {
          runLength = RUN_LENGTH;
        } else {
          int[] runTimes =
              new int[] {5000, 6000, 10000, 15000, 25000, 30000, 30000, 45000, 90000, 120000};
          runLength = runTimes[random().nextInt(runTimes.length - 1)];
        }

        Thread.sleep(runLength);
      } finally {
        chaosMonkey.stopTheMonkey();
      }

      for (StopableThread indexThread : threads) {
        indexThread.safeStop();
      }

      // start any downed jetties to be sure we still will end up with a leader per shard...

      // wait for stop...
      for (StopableThread indexThread : threads) {
        indexThread.join();
      }

      // try and wait for any replications and what not to finish...

      Thread.sleep(2000);

      // wait until there are no recoveries...
      waitForThingsToLevelOut(Integer.MAX_VALUE); // Math.round((runLength / 1000.0f / 3.0f)));

      // make sure we again have leaders for each shard
      for (int j = 1; j < sliceCount; j++) {
        zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 30000);
      }

      commit();

      // TODO: assert we didnt kill everyone

      zkStateReader.updateClusterState(true);
      assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0);

      // we expect full throttle fails, but cloud client should not easily fail
      for (StopableThread indexThread : threads) {
        if (indexThread instanceof StopableIndexingThread
            && !(indexThread instanceof FullThrottleStopableIndexingThread)) {
          assertFalse(
              "There were too many update fails - we expect it can happen, but shouldn't easily",
              ((StopableIndexingThread) indexThread).getFailCount() > 10);
        }
      }

      // full throttle thread can
      // have request fails
      checkShardConsistency(!runFullThrottle, true);

      long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();

      // ensure we have added more than 0 docs
      long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();

      assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);

      if (VERBOSE)
        System.out.println(
            "control docs:"
                + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound()
                + "\n\n");

      // try and make a collection to make sure the overseer has survived the expiration and session
      // loss

      // sometimes we restart zookeeper as well
      if (random().nextBoolean()) {
        zkServer.shutdown();
        zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
        zkServer.run();
      }

      CloudSolrServer client = createCloudClient("collection1");
      try {
        createCollection(null, "testcollection", 1, 1, 1, client, null, "conf1");

      } finally {
        client.shutdown();
      }
      List<Integer> numShardsNumReplicas = new ArrayList<Integer>(2);
      numShardsNumReplicas.add(1);
      numShardsNumReplicas.add(1);
      checkForCollection("testcollection", numShardsNumReplicas, null);

      testsSuccesful = true;
    } finally {
      if (!testsSuccesful) {
        printLayout();
      }
    }
  }
  protected NamedList<Object> sendRequest(SolrRequest request)
      throws SolrServerException, IOException {
    connect();

    ClusterState clusterState = zkStateReader.getClusterState();

    boolean sendToLeaders = false;
    List<String> replicas = null;

    if (request instanceof IsUpdateRequest) {
      if (request instanceof UpdateRequest) {
        NamedList<Object> response = directUpdate((AbstractUpdateRequest) request, clusterState);
        if (response != null) {
          return response;
        }
      }
      sendToLeaders = true;
      replicas = new ArrayList<>();
    }

    SolrParams reqParams = request.getParams();
    if (reqParams == null) {
      reqParams = new ModifiableSolrParams();
    }
    List<String> theUrlList = new ArrayList<>();
    if (request.getPath().equals("/admin/collections")
        || request.getPath().equals("/admin/cores")) {
      Set<String> liveNodes = clusterState.getLiveNodes();
      for (String liveNode : liveNodes) {
        theUrlList.add(zkStateReader.getBaseUrlForNodeName(liveNode));
      }
    } else {
      String collection = reqParams.get(UpdateParams.COLLECTION, defaultCollection);

      if (collection == null) {
        throw new SolrServerException(
            "No collection param specified on request and no default collection has been set.");
      }

      Set<String> collectionsList = getCollectionList(clusterState, collection);
      if (collectionsList.size() == 0) {
        throw new SolrException(ErrorCode.BAD_REQUEST, "Could not find collection: " + collection);
      }

      String shardKeys = reqParams.get(ShardParams._ROUTE_);
      if (shardKeys == null) {
        shardKeys = reqParams.get(ShardParams.SHARD_KEYS); // deprecated
      }

      // TODO: not a big deal because of the caching, but we could avoid looking
      // at every shard
      // when getting leaders if we tweaked some things

      // Retrieve slices from the cloud state and, for each collection
      // specified,
      // add it to the Map of slices.
      Map<String, Slice> slices = new HashMap<>();
      for (String collectionName : collectionsList) {
        DocCollection col = getDocCollection(clusterState, collectionName);
        Collection<Slice> routeSlices = col.getRouter().getSearchSlices(shardKeys, reqParams, col);
        ClientUtils.addSlices(slices, collectionName, routeSlices, true);
      }
      Set<String> liveNodes = clusterState.getLiveNodes();

      List<String> leaderUrlList = null;
      List<String> urlList = null;
      List<String> replicasList = null;

      // build a map of unique nodes
      // TODO: allow filtering by group, role, etc
      Map<String, ZkNodeProps> nodes = new HashMap<>();
      List<String> urlList2 = new ArrayList<>();
      for (Slice slice : slices.values()) {
        for (ZkNodeProps nodeProps : slice.getReplicasMap().values()) {
          ZkCoreNodeProps coreNodeProps = new ZkCoreNodeProps(nodeProps);
          String node = coreNodeProps.getNodeName();
          if (!liveNodes.contains(coreNodeProps.getNodeName())
              || !coreNodeProps.getState().equals(ZkStateReader.ACTIVE)) continue;
          if (nodes.put(node, nodeProps) == null) {
            if (!sendToLeaders || (sendToLeaders && coreNodeProps.isLeader())) {
              String url;
              if (reqParams.get(UpdateParams.COLLECTION) == null) {
                url =
                    ZkCoreNodeProps.getCoreUrl(
                        nodeProps.getStr(ZkStateReader.BASE_URL_PROP), defaultCollection);
              } else {
                url = coreNodeProps.getCoreUrl();
              }
              urlList2.add(url);
            } else if (sendToLeaders) {
              String url;
              if (reqParams.get(UpdateParams.COLLECTION) == null) {
                url =
                    ZkCoreNodeProps.getCoreUrl(
                        nodeProps.getStr(ZkStateReader.BASE_URL_PROP), defaultCollection);
              } else {
                url = coreNodeProps.getCoreUrl();
              }
              replicas.add(url);
            }
          }
        }
      }

      if (sendToLeaders) {
        leaderUrlList = urlList2;
        replicasList = replicas;
      } else {
        urlList = urlList2;
      }

      if (sendToLeaders) {
        theUrlList = new ArrayList<>(leaderUrlList.size());
        theUrlList.addAll(leaderUrlList);
      } else {
        theUrlList = new ArrayList<>(urlList.size());
        theUrlList.addAll(urlList);
      }
      if (theUrlList.isEmpty()) {
        throw new SolrException(
            SolrException.ErrorCode.INVALID_STATE, "Not enough nodes to handle the request");
      }

      Collections.shuffle(theUrlList, rand);
      if (sendToLeaders) {
        ArrayList<String> theReplicas = new ArrayList<>(replicasList.size());
        theReplicas.addAll(replicasList);
        Collections.shuffle(theReplicas, rand);
        theUrlList.addAll(theReplicas);
      }
    }

    LBHttpSolrServer.Req req = new LBHttpSolrServer.Req(request, theUrlList);
    LBHttpSolrServer.Rsp rsp = lbServer.request(req);
    return rsp.getResponse();
  }
  /**
   * As this class doesn't watch external collections on the client side, there's a chance that the
   * request will fail due to cached stale state, which means the state must be refreshed from ZK
   * and retried.
   */
  protected NamedList<Object> requestWithRetryOnStaleState(
      SolrRequest request, int retryCount, String collection)
      throws SolrServerException, IOException {

    connect(); // important to call this before you start working with the ZkStateReader

    // build up a _stateVer_ param to pass to the server containing all of the
    // external collection state versions involved in this request, which allows
    // the server to notify us that our cached state for one or more of the external
    // collections is stale and needs to be refreshed ... this code has no impact on internal
    // collections
    String stateVerParam = null;
    List<DocCollection> requestedCollections = null;
    if (collection != null
        && !request
            .getPath()
            .startsWith("/admin")) { // don't do _stateVer_ checking for admin requests
      Set<String> requestedCollectionNames =
          getCollectionList(getZkStateReader().getClusterState(), collection);

      StringBuilder stateVerParamBuilder = null;
      for (String requestedCollection : requestedCollectionNames) {
        // track the version of state we're using on the client side using the _stateVer_ param
        DocCollection coll =
            getDocCollection(getZkStateReader().getClusterState(), requestedCollection);
        int collVer = coll.getZNodeVersion();
        if (coll.getStateFormat() > 1) {
          if (requestedCollections == null)
            requestedCollections = new ArrayList<>(requestedCollectionNames.size());
          requestedCollections.add(coll);

          if (stateVerParamBuilder == null) {
            stateVerParamBuilder = new StringBuilder();
          } else {
            stateVerParamBuilder.append(
                "|"); // hopefully pipe is not an allowed char in a collection name
          }

          stateVerParamBuilder.append(coll.getName()).append(":").append(collVer);
        }
      }

      if (stateVerParamBuilder != null) {
        stateVerParam = stateVerParamBuilder.toString();
      }
    }

    if (request.getParams() instanceof ModifiableSolrParams) {
      ModifiableSolrParams params = (ModifiableSolrParams) request.getParams();
      if (stateVerParam != null) {
        params.set(STATE_VERSION, stateVerParam);
      } else {
        params.remove(STATE_VERSION);
      }
    } // else: ??? how to set this ???

    NamedList<Object> resp = null;
    try {
      resp = sendRequest(request);
    } catch (Exception exc) {

      Throwable rootCause = SolrException.getRootCause(exc);
      // don't do retry support for admin requests or if the request doesn't have a collection
      // specified
      if (collection == null || request.getPath().startsWith("/admin")) {
        if (exc instanceof SolrServerException) {
          throw (SolrServerException) exc;
        } else if (exc instanceof IOException) {
          throw (IOException) exc;
        } else if (exc instanceof RuntimeException) {
          throw (RuntimeException) exc;
        } else {
          throw new SolrServerException(rootCause);
        }
      }

      int errorCode =
          (rootCause instanceof SolrException)
              ? ((SolrException) rootCause).code()
              : SolrException.ErrorCode.UNKNOWN.code;

      log.error(
          "Request to collection {} failed due to (" + errorCode + ") {}, retry? " + retryCount,
          collection,
          rootCause.toString());

      boolean wasCommError =
          (rootCause instanceof ConnectException
              || rootCause instanceof ConnectTimeoutException
              || rootCause instanceof NoHttpResponseException
              || rootCause instanceof SocketException);

      boolean stateWasStale = false;
      if (retryCount < MAX_STALE_RETRIES
          && requestedCollections != null
          && !requestedCollections.isEmpty()
          && SolrException.ErrorCode.getErrorCode(errorCode)
              == SolrException.ErrorCode.INVALID_STATE) {
        // cached state for one or more external collections was stale
        // re-issue request using updated state
        stateWasStale = true;

        // just re-read state for all of them, which is a little heavy handed but hopefully a rare
        // occurrence
        for (DocCollection ext : requestedCollections) {
          collectionStateCache.remove(ext.getName());
        }
      }

      // if we experienced a communication error, it's worth checking the state
      // with ZK just to make sure the node we're trying to hit is still part of the collection
      if (retryCount < MAX_STALE_RETRIES
          && !stateWasStale
          && requestedCollections != null
          && !requestedCollections.isEmpty()
          && wasCommError) {
        for (DocCollection ext : requestedCollections) {
          DocCollection latestStateFromZk =
              getDocCollection(zkStateReader.getClusterState(), ext.getName());
          if (latestStateFromZk.getZNodeVersion() != ext.getZNodeVersion()) {
            // looks like we couldn't reach the server because the state was stale == retry
            stateWasStale = true;
            // we just pulled state from ZK, so update the cache so that the retry uses it
            collectionStateCache.put(
                ext.getName(), new ExpiringCachedDocCollection(latestStateFromZk));
          }
        }
      }

      if (requestedCollections != null) {
        requestedCollections.clear(); // done with this
      }

      // if the state was stale, then we retry the request once with new state pulled from Zk
      if (stateWasStale) {
        log.warn(
            "Re-trying request to  collection(s) "
                + collection
                + " after stale state error from server.");
        resp = requestWithRetryOnStaleState(request, retryCount + 1, collection);
      } else {
        if (exc instanceof SolrServerException) {
          throw (SolrServerException) exc;
        } else if (exc instanceof IOException) {
          throw (IOException) exc;
        } else {
          throw new SolrServerException(rootCause);
        }
      }
    }

    return resp;
  }