private void waitTillNodesActive() throws Exception { for (int i = 0; i < 60; i++) { Thread.sleep(3000); ZkStateReader zkStateReader = cloudClient.getZkStateReader(); ClusterState clusterState = zkStateReader.getClusterState(); DocCollection collection1 = clusterState.getCollection("collection1"); Slice slice = collection1.getSlice("shard1"); Collection<Replica> replicas = slice.getReplicas(); boolean allActive = true; Collection<String> nodesDownNames = nodesDown.stream().map(n -> n.coreNodeName).collect(Collectors.toList()); Collection<Replica> replicasToCheck = replicas .stream() .filter(r -> !nodesDownNames.contains(r.getName())) .collect(Collectors.toList()); for (Replica replica : replicasToCheck) { if (!clusterState.liveNodesContain(replica.getNodeName()) || replica.getState() != Replica.State.ACTIVE) { allActive = false; break; } } if (allActive) { return; } } printLayout(); fail("timeout waiting to see all nodes active"); }
public void deleteAllCollections() throws Exception { try (ZkStateReader reader = new ZkStateReader(solrClient.getZkStateReader().getZkClient())) { reader.createClusterStateWatchersAndUpdate(); for (String collection : reader.getClusterState().getCollectionStates().keySet()) { CollectionAdminRequest.deleteCollection(collection).process(solrClient); } } }
private SolrCore getCoreByCollection(String collectionName) { ZkStateReader zkStateReader = cores.getZkController().getZkStateReader(); ClusterState clusterState = zkStateReader.getClusterState(); DocCollection collection = clusterState.getCollectionOrNull(collectionName); if (collection == null) { return null; } Map<String, Slice> slices = collection.getActiveSlicesMap(); if (slices == null) { return null; } Set<String> liveNodes = clusterState.getLiveNodes(); // look for a core on this node Set<Map.Entry<String, Slice>> entries = slices.entrySet(); SolrCore core = null; // Hitting the leaders is useful when it's an update request. // For queries it doesn't matter and hence we don't distinguish here. for (Map.Entry<String, Slice> entry : entries) { // first see if we have the leader Replica leaderProps = collection.getLeader(entry.getKey()); if (leaderProps != null && liveNodes.contains(leaderProps.getNodeName()) && leaderProps.getState() == Replica.State.ACTIVE) { core = checkProps(leaderProps); if (core != null) { return core; } } // check everyone then Map<String, Replica> shards = entry.getValue().getReplicasMap(); Set<Map.Entry<String, Replica>> shardEntries = shards.entrySet(); for (Map.Entry<String, Replica> shardEntry : shardEntries) { Replica zkProps = shardEntry.getValue(); if (liveNodes.contains(zkProps.getNodeName()) && zkProps.getState() == Replica.State.ACTIVE) { core = checkProps(zkProps); if (core != null) { return core; } } } } return null; }
protected final synchronized DocCollection getDocCollection() { if (docCollection == null) { ZkStateReader zkStateReader = getCloudSolrServer().getZkStateReader(); docCollection = zkStateReader.getClusterState().getCollection(collection); // do basic checks once DocRouter docRouter = docCollection.getRouter(); if (docRouter instanceof ImplicitDocRouter) throw new IllegalStateException( "Implicit document routing not supported by this Partitioner!"); Collection<Slice> shards = getDocCollection().getSlices(); if (shards == null || shards.size() == 0) throw new IllegalStateException( "Collection '" + collection + "' does not have any shards!"); } return docCollection; }
protected void constructStreams() throws IOException { try { Object pushStream = ((Expressible) tupleStream).toExpression(streamFactory); ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader(); ClusterState clusterState = zkStateReader.getClusterState(); Set<String> liveNodes = clusterState.getLiveNodes(); Collection<Slice> slices = clusterState.getActiveSlices(this.collection); List<Replica> shuffler = new ArrayList(); for (Slice slice : slices) { Collection<Replica> replicas = slice.getReplicas(); for (Replica replica : replicas) { if (replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName())) shuffler.add(replica); } } if (workers > shuffler.size()) { throw new IOException("Number of workers exceeds nodes in the worker collection"); } Collections.shuffle(shuffler, new Random()); for (int w = 0; w < workers; w++) { HashMap params = new HashMap(); params.put("distrib", "false"); // We are the aggregator. params.put("numWorkers", workers); params.put("workerID", w); params.put("expr", pushStream); params.put("qt", "/stream"); Replica rep = shuffler.get(w); ZkCoreNodeProps zkProps = new ZkCoreNodeProps(rep); String url = zkProps.getCoreUrl(); SolrStream solrStream = new SolrStream(url, params); solrStreams.add(solrStream); } assert (solrStreams.size() == workers); } catch (Exception e) { throw new IOException(e); } }
/** * Retrieve all requests recorded by this queue which were sent to given collection and shard * * @param zkStateReader the {@link org.apache.solr.common.cloud.ZkStateReader} from which * cluster state is read * @param collectionName the given collection name for which requests have to be extracted * @param shardId the given shard name for which requests have to be extracted * @return a list of {@link * org.apache.solr.handler.component.TrackingShardHandlerFactory.ShardRequestAndParams} or * empty list if none are found */ public List<ShardRequestAndParams> getShardRequests( ZkStateReader zkStateReader, String collectionName, String shardId) { DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName); assert collection != null; Slice slice = collection.getSlice(shardId); assert slice != null; for (Map.Entry<String, List<ShardRequestAndParams>> entry : requests.entrySet()) { // multiple shard addresses may be present separated by '|' List<String> list = StrUtils.splitSmart(entry.getKey(), '|'); for (Map.Entry<String, Replica> replica : slice.getReplicasMap().entrySet()) { String coreUrl = new ZkCoreNodeProps(replica.getValue()).getCoreUrl(); if (list.contains(coreUrl)) { return new ArrayList<>(entry.getValue()); } } } return Collections.emptyList(); }
protected List<String> buildShardList(CloudSolrClient cloudSolrServer) { ZkStateReader zkStateReader = cloudSolrServer.getZkStateReader(); ClusterState clusterState = zkStateReader.getClusterState(); String[] collections = null; if (clusterState.hasCollection(collection)) { collections = new String[] {collection}; } else { // might be a collection alias? Aliases aliases = zkStateReader.getAliases(); String aliasedCollections = aliases.getCollectionAlias(collection); if (aliasedCollections == null) throw new IllegalArgumentException("Collection " + collection + " not found!"); collections = aliasedCollections.split(","); } Set<String> liveNodes = clusterState.getLiveNodes(); Random random = new Random(5150); List<String> shards = new ArrayList<String>(); for (String coll : collections) { for (Slice slice : clusterState.getSlices(coll)) { List<String> replicas = new ArrayList<String>(); for (Replica r : slice.getReplicas()) { ZkCoreNodeProps replicaCoreProps = new ZkCoreNodeProps(r); if (liveNodes.contains(replicaCoreProps.getNodeName())) replicas.add(replicaCoreProps.getCoreUrl()); } int numReplicas = replicas.size(); if (numReplicas == 0) throw new IllegalStateException( "Shard " + slice.getName() + " does not have any active replicas!"); String replicaUrl = (numReplicas == 1) ? replicas.get(0) : replicas.get(random.nextInt(replicas.size())); shards.add(replicaUrl); } } return shards; }
/** * Walks the NamedList response after performing an update request looking for the replication * factor that was achieved in each shard involved in the request. For single doc updates, there * will be only one shard in the return value. */ @SuppressWarnings("rawtypes") public Map<String, Integer> getShardReplicationFactor(String collection, NamedList resp) { connect(); Map<String, Integer> results = new HashMap<String, Integer>(); if (resp instanceof CloudSolrServer.RouteResponse) { NamedList routes = ((CloudSolrServer.RouteResponse) resp).getRouteResponses(); ClusterState clusterState = zkStateReader.getClusterState(); Map<String, String> leaders = new HashMap<String, String>(); for (Slice slice : clusterState.getActiveSlices(collection)) { Replica leader = slice.getLeader(); if (leader != null) { ZkCoreNodeProps zkProps = new ZkCoreNodeProps(leader); String leaderUrl = zkProps.getBaseUrl() + "/" + zkProps.getCoreName(); leaders.put(leaderUrl, slice.getName()); String altLeaderUrl = zkProps.getBaseUrl() + "/" + collection; leaders.put(altLeaderUrl, slice.getName()); } } Iterator<Map.Entry<String, Object>> routeIter = routes.iterator(); while (routeIter.hasNext()) { Map.Entry<String, Object> next = routeIter.next(); String host = next.getKey(); NamedList hostResp = (NamedList) next.getValue(); Integer rf = (Integer) ((NamedList) hostResp.get("responseHeader")).get(UpdateRequest.REPFACT); if (rf != null) { String shard = leaders.get(host); if (shard == null) { if (host.endsWith("/")) shard = leaders.get(host.substring(0, host.length() - 1)); if (shard == null) { shard = host; } } results.put(shard, rf); } } } return results; }
@BeforeClass public static void setupCluster() throws Exception { final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf"); String configName = "solrCloudCollectionConfig"; int nodeCount = 5; configureCluster(nodeCount).addConfig(configName, configDir).configure(); Map<String, String> collectionProperties = new HashMap<>(); collectionProperties.put("config", "solrconfig-tlog.xml"); collectionProperties.put("schema", "schema.xml"); // create a collection holding data for the "to" side of the JOIN int shards = 2; int replicas = 2; CollectionAdminRequest.createCollection(toColl, configName, shards, replicas) .setProperties(collectionProperties) .process(cluster.getSolrClient()); // get the set of nodes where replicas for the "to" collection exist Set<String> nodeSet = new HashSet<>(); ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader(); ClusterState cs = zkStateReader.getClusterState(); for (Slice slice : cs.getCollection(toColl).getActiveSlices()) for (Replica replica : slice.getReplicas()) nodeSet.add(replica.getNodeName()); assertTrue(nodeSet.size() > 0); // deploy the "from" collection to all nodes where the "to" collection exists CollectionAdminRequest.createCollection(fromColl, configName, 1, 4) .setCreateNodeSet(StringUtils.join(nodeSet, ",")) .setProperties(collectionProperties) .process(cluster.getSolrClient()); toDocId = indexDoc(toColl, 1001, "a", null, "b"); indexDoc(fromColl, 2001, "a", "c", null); Thread.sleep(1000); // so the commits fire }
protected Set<String> commonMocks(int liveNodesCount) throws Exception { shardHandlerFactoryMock.getShardHandler(); expectLastCall() .andAnswer( new IAnswer<ShardHandler>() { @Override public ShardHandler answer() throws Throwable { log.info("SHARDHANDLER"); return shardHandlerMock; } }) .anyTimes(); workQueueMock.peekTopN(EasyMock.anyInt(), anyObject(Set.class), EasyMock.anyLong()); expectLastCall() .andAnswer( new IAnswer<List>() { @Override public List answer() throws Throwable { Object result; int count = 0; while ((result = queue.peek()) == null) { Thread.sleep(1000); count++; if (count > 1) return null; } return Arrays.asList(result); } }) .anyTimes(); workQueueMock.getTailId(); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { Object result = null; Iterator iter = queue.iterator(); while (iter.hasNext()) { result = iter.next(); } return result == null ? null : ((QueueEvent) result).getId(); } }) .anyTimes(); workQueueMock.peek(true); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { Object result; while ((result = queue.peek()) == null) { Thread.sleep(1000); } return result; } }) .anyTimes(); workQueueMock.remove(anyObject(QueueEvent.class)); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { queue.remove((QueueEvent) getCurrentArguments()[0]); return null; } }) .anyTimes(); workQueueMock.poll(); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { return queue.poll(); } }) .anyTimes(); zkStateReaderMock.getClusterState(); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { return clusterStateMock; } }) .anyTimes(); zkStateReaderMock.getZkClient(); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { return solrZkClientMock; } }) .anyTimes(); zkStateReaderMock.updateClusterState(anyBoolean()); clusterStateMock.getCollections(); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { return collectionsSet; } }) .anyTimes(); final Set<String> liveNodes = new HashSet<>(); for (int i = 0; i < liveNodesCount; i++) { final String address = "localhost:" + (8963 + i) + "_solr"; liveNodes.add(address); zkStateReaderMock.getBaseUrlForNodeName(address); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { // This works as long as this test does not use a // webapp context with an underscore in it return address.replaceAll("_", "/"); } }) .anyTimes(); } zkStateReaderMock.getClusterProps(); expectLastCall() .andAnswer( new IAnswer<Map>() { @Override public Map answer() throws Throwable { return new HashMap(); } }); solrZkClientMock.getZkClientTimeout(); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { return 30000; } }) .anyTimes(); clusterStateMock.hasCollection(anyObject(String.class)); expectLastCall() .andAnswer( new IAnswer<Boolean>() { @Override public Boolean answer() throws Throwable { String key = (String) getCurrentArguments()[0]; return collectionsSet.contains(key); } }) .anyTimes(); clusterStateMock.getLiveNodes(); expectLastCall() .andAnswer( new IAnswer<Object>() { @Override public Object answer() throws Throwable { return liveNodes; } }) .anyTimes(); solrZkClientMock.create( anyObject(String.class), anyObject(byte[].class), anyObject(CreateMode.class), anyBoolean()); expectLastCall() .andAnswer( new IAnswer<String>() { @Override public String answer() throws Throwable { String key = (String) getCurrentArguments()[0]; zkMap.put(key, null); handleCreateCollMessage((byte[]) getCurrentArguments()[1]); return key; } }) .anyTimes(); solrZkClientMock.makePath(anyObject(String.class), anyObject(byte[].class), anyBoolean()); expectLastCall() .andAnswer( new IAnswer<String>() { @Override public String answer() throws Throwable { String key = (String) getCurrentArguments()[0]; return key; } }) .anyTimes(); solrZkClientMock.makePath( anyObject(String.class), anyObject(byte[].class), anyObject(CreateMode.class), anyBoolean()); expectLastCall() .andAnswer( new IAnswer<String>() { @Override public String answer() throws Throwable { String key = (String) getCurrentArguments()[0]; return key; } }) .anyTimes(); solrZkClientMock.exists(anyObject(String.class), anyBoolean()); expectLastCall() .andAnswer( new IAnswer<Boolean>() { @Override public Boolean answer() throws Throwable { String key = (String) getCurrentArguments()[0]; return zkMap.containsKey(key); } }) .anyTimes(); zkMap.put("/configs/myconfig", null); return liveNodes; }
@Override public void doTest() throws Exception { boolean testsSuccesful = false; try { handle.clear(); handle.put("QTime", SKIPVAL); handle.put("timestamp", SKIPVAL); ZkStateReader zkStateReader = cloudClient.getZkStateReader(); // make sure we have leaders for each shard for (int j = 1; j < sliceCount; j++) { zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 10000); } // make sure we again have leaders for each shard waitForRecoveriesToFinish(false); // we cannot do delete by query // as it's not supported for recovery del("*:*"); List<StopableThread> threads = new ArrayList<StopableThread>(); int threadCount = 1; int i = 0; for (i = 0; i < threadCount; i++) { StopableIndexingThread indexThread = new StopableIndexingThread(Integer.toString(i), true); threads.add(indexThread); indexThread.start(); } threadCount = 1; i = 0; for (i = 0; i < threadCount; i++) { StopableSearchThread searchThread = new StopableSearchThread(); threads.add(searchThread); searchThread.start(); } // TODO: we only do this sometimes so that we can sometimes compare against control, // it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer boolean runFullThrottle = random().nextBoolean(); if (runFullThrottle) { FullThrottleStopableIndexingThread ftIndexThread = new FullThrottleStopableIndexingThread(clients, "ft1", true); threads.add(ftIndexThread); ftIndexThread.start(); } chaosMonkey.startTheMonkey(true, 10000); try { long runLength; if (RUN_LENGTH != -1) { runLength = RUN_LENGTH; } else { int[] runTimes = new int[] {5000, 6000, 10000, 15000, 25000, 30000, 30000, 45000, 90000, 120000}; runLength = runTimes[random().nextInt(runTimes.length - 1)]; } Thread.sleep(runLength); } finally { chaosMonkey.stopTheMonkey(); } for (StopableThread indexThread : threads) { indexThread.safeStop(); } // start any downed jetties to be sure we still will end up with a leader per shard... // wait for stop... for (StopableThread indexThread : threads) { indexThread.join(); } // try and wait for any replications and what not to finish... Thread.sleep(2000); // wait until there are no recoveries... waitForThingsToLevelOut(Integer.MAX_VALUE); // Math.round((runLength / 1000.0f / 3.0f))); // make sure we again have leaders for each shard for (int j = 1; j < sliceCount; j++) { zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 30000); } commit(); // TODO: assert we didnt kill everyone zkStateReader.updateClusterState(true); assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0); // we expect full throttle fails, but cloud client should not easily fail for (StopableThread indexThread : threads) { if (indexThread instanceof StopableIndexingThread && !(indexThread instanceof FullThrottleStopableIndexingThread)) { assertFalse( "There were too many update fails - we expect it can happen, but shouldn't easily", ((StopableIndexingThread) indexThread).getFailCount() > 10); } } // full throttle thread can // have request fails checkShardConsistency(!runFullThrottle, true); long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound(); // ensure we have added more than 0 docs long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound(); assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0); if (VERBOSE) System.out.println( "control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n"); // try and make a collection to make sure the overseer has survived the expiration and session // loss // sometimes we restart zookeeper as well if (random().nextBoolean()) { zkServer.shutdown(); zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort()); zkServer.run(); } CloudSolrServer client = createCloudClient("collection1"); try { createCollection(null, "testcollection", 1, 1, 1, client, null, "conf1"); } finally { client.shutdown(); } List<Integer> numShardsNumReplicas = new ArrayList<Integer>(2); numShardsNumReplicas.add(1); numShardsNumReplicas.add(1); checkForCollection("testcollection", numShardsNumReplicas, null); testsSuccesful = true; } finally { if (!testsSuccesful) { printLayout(); } } }
protected NamedList<Object> sendRequest(SolrRequest request) throws SolrServerException, IOException { connect(); ClusterState clusterState = zkStateReader.getClusterState(); boolean sendToLeaders = false; List<String> replicas = null; if (request instanceof IsUpdateRequest) { if (request instanceof UpdateRequest) { NamedList<Object> response = directUpdate((AbstractUpdateRequest) request, clusterState); if (response != null) { return response; } } sendToLeaders = true; replicas = new ArrayList<>(); } SolrParams reqParams = request.getParams(); if (reqParams == null) { reqParams = new ModifiableSolrParams(); } List<String> theUrlList = new ArrayList<>(); if (request.getPath().equals("/admin/collections") || request.getPath().equals("/admin/cores")) { Set<String> liveNodes = clusterState.getLiveNodes(); for (String liveNode : liveNodes) { theUrlList.add(zkStateReader.getBaseUrlForNodeName(liveNode)); } } else { String collection = reqParams.get(UpdateParams.COLLECTION, defaultCollection); if (collection == null) { throw new SolrServerException( "No collection param specified on request and no default collection has been set."); } Set<String> collectionsList = getCollectionList(clusterState, collection); if (collectionsList.size() == 0) { throw new SolrException(ErrorCode.BAD_REQUEST, "Could not find collection: " + collection); } String shardKeys = reqParams.get(ShardParams._ROUTE_); if (shardKeys == null) { shardKeys = reqParams.get(ShardParams.SHARD_KEYS); // deprecated } // TODO: not a big deal because of the caching, but we could avoid looking // at every shard // when getting leaders if we tweaked some things // Retrieve slices from the cloud state and, for each collection // specified, // add it to the Map of slices. Map<String, Slice> slices = new HashMap<>(); for (String collectionName : collectionsList) { DocCollection col = getDocCollection(clusterState, collectionName); Collection<Slice> routeSlices = col.getRouter().getSearchSlices(shardKeys, reqParams, col); ClientUtils.addSlices(slices, collectionName, routeSlices, true); } Set<String> liveNodes = clusterState.getLiveNodes(); List<String> leaderUrlList = null; List<String> urlList = null; List<String> replicasList = null; // build a map of unique nodes // TODO: allow filtering by group, role, etc Map<String, ZkNodeProps> nodes = new HashMap<>(); List<String> urlList2 = new ArrayList<>(); for (Slice slice : slices.values()) { for (ZkNodeProps nodeProps : slice.getReplicasMap().values()) { ZkCoreNodeProps coreNodeProps = new ZkCoreNodeProps(nodeProps); String node = coreNodeProps.getNodeName(); if (!liveNodes.contains(coreNodeProps.getNodeName()) || !coreNodeProps.getState().equals(ZkStateReader.ACTIVE)) continue; if (nodes.put(node, nodeProps) == null) { if (!sendToLeaders || (sendToLeaders && coreNodeProps.isLeader())) { String url; if (reqParams.get(UpdateParams.COLLECTION) == null) { url = ZkCoreNodeProps.getCoreUrl( nodeProps.getStr(ZkStateReader.BASE_URL_PROP), defaultCollection); } else { url = coreNodeProps.getCoreUrl(); } urlList2.add(url); } else if (sendToLeaders) { String url; if (reqParams.get(UpdateParams.COLLECTION) == null) { url = ZkCoreNodeProps.getCoreUrl( nodeProps.getStr(ZkStateReader.BASE_URL_PROP), defaultCollection); } else { url = coreNodeProps.getCoreUrl(); } replicas.add(url); } } } } if (sendToLeaders) { leaderUrlList = urlList2; replicasList = replicas; } else { urlList = urlList2; } if (sendToLeaders) { theUrlList = new ArrayList<>(leaderUrlList.size()); theUrlList.addAll(leaderUrlList); } else { theUrlList = new ArrayList<>(urlList.size()); theUrlList.addAll(urlList); } if (theUrlList.isEmpty()) { throw new SolrException( SolrException.ErrorCode.INVALID_STATE, "Not enough nodes to handle the request"); } Collections.shuffle(theUrlList, rand); if (sendToLeaders) { ArrayList<String> theReplicas = new ArrayList<>(replicasList.size()); theReplicas.addAll(replicasList); Collections.shuffle(theReplicas, rand); theUrlList.addAll(theReplicas); } } LBHttpSolrServer.Req req = new LBHttpSolrServer.Req(request, theUrlList); LBHttpSolrServer.Rsp rsp = lbServer.request(req); return rsp.getResponse(); }
/** * As this class doesn't watch external collections on the client side, there's a chance that the * request will fail due to cached stale state, which means the state must be refreshed from ZK * and retried. */ protected NamedList<Object> requestWithRetryOnStaleState( SolrRequest request, int retryCount, String collection) throws SolrServerException, IOException { connect(); // important to call this before you start working with the ZkStateReader // build up a _stateVer_ param to pass to the server containing all of the // external collection state versions involved in this request, which allows // the server to notify us that our cached state for one or more of the external // collections is stale and needs to be refreshed ... this code has no impact on internal // collections String stateVerParam = null; List<DocCollection> requestedCollections = null; if (collection != null && !request .getPath() .startsWith("/admin")) { // don't do _stateVer_ checking for admin requests Set<String> requestedCollectionNames = getCollectionList(getZkStateReader().getClusterState(), collection); StringBuilder stateVerParamBuilder = null; for (String requestedCollection : requestedCollectionNames) { // track the version of state we're using on the client side using the _stateVer_ param DocCollection coll = getDocCollection(getZkStateReader().getClusterState(), requestedCollection); int collVer = coll.getZNodeVersion(); if (coll.getStateFormat() > 1) { if (requestedCollections == null) requestedCollections = new ArrayList<>(requestedCollectionNames.size()); requestedCollections.add(coll); if (stateVerParamBuilder == null) { stateVerParamBuilder = new StringBuilder(); } else { stateVerParamBuilder.append( "|"); // hopefully pipe is not an allowed char in a collection name } stateVerParamBuilder.append(coll.getName()).append(":").append(collVer); } } if (stateVerParamBuilder != null) { stateVerParam = stateVerParamBuilder.toString(); } } if (request.getParams() instanceof ModifiableSolrParams) { ModifiableSolrParams params = (ModifiableSolrParams) request.getParams(); if (stateVerParam != null) { params.set(STATE_VERSION, stateVerParam); } else { params.remove(STATE_VERSION); } } // else: ??? how to set this ??? NamedList<Object> resp = null; try { resp = sendRequest(request); } catch (Exception exc) { Throwable rootCause = SolrException.getRootCause(exc); // don't do retry support for admin requests or if the request doesn't have a collection // specified if (collection == null || request.getPath().startsWith("/admin")) { if (exc instanceof SolrServerException) { throw (SolrServerException) exc; } else if (exc instanceof IOException) { throw (IOException) exc; } else if (exc instanceof RuntimeException) { throw (RuntimeException) exc; } else { throw new SolrServerException(rootCause); } } int errorCode = (rootCause instanceof SolrException) ? ((SolrException) rootCause).code() : SolrException.ErrorCode.UNKNOWN.code; log.error( "Request to collection {} failed due to (" + errorCode + ") {}, retry? " + retryCount, collection, rootCause.toString()); boolean wasCommError = (rootCause instanceof ConnectException || rootCause instanceof ConnectTimeoutException || rootCause instanceof NoHttpResponseException || rootCause instanceof SocketException); boolean stateWasStale = false; if (retryCount < MAX_STALE_RETRIES && requestedCollections != null && !requestedCollections.isEmpty() && SolrException.ErrorCode.getErrorCode(errorCode) == SolrException.ErrorCode.INVALID_STATE) { // cached state for one or more external collections was stale // re-issue request using updated state stateWasStale = true; // just re-read state for all of them, which is a little heavy handed but hopefully a rare // occurrence for (DocCollection ext : requestedCollections) { collectionStateCache.remove(ext.getName()); } } // if we experienced a communication error, it's worth checking the state // with ZK just to make sure the node we're trying to hit is still part of the collection if (retryCount < MAX_STALE_RETRIES && !stateWasStale && requestedCollections != null && !requestedCollections.isEmpty() && wasCommError) { for (DocCollection ext : requestedCollections) { DocCollection latestStateFromZk = getDocCollection(zkStateReader.getClusterState(), ext.getName()); if (latestStateFromZk.getZNodeVersion() != ext.getZNodeVersion()) { // looks like we couldn't reach the server because the state was stale == retry stateWasStale = true; // we just pulled state from ZK, so update the cache so that the retry uses it collectionStateCache.put( ext.getName(), new ExpiringCachedDocCollection(latestStateFromZk)); } } } if (requestedCollections != null) { requestedCollections.clear(); // done with this } // if the state was stale, then we retry the request once with new state pulled from Zk if (stateWasStale) { log.warn( "Re-trying request to collection(s) " + collection + " after stale state error from server."); resp = requestWithRetryOnStaleState(request, retryCount + 1, collection); } else { if (exc instanceof SolrServerException) { throw (SolrServerException) exc; } else if (exc instanceof IOException) { throw (IOException) exc; } else { throw new SolrServerException(rootCause); } } } return resp; }