void checkBulkAction(boolean indexShouldBeAutoCreated, BulkRequestBuilder builder) { // bulk operation do not throw MasterNotDiscoveredException exceptions. The only test that auto // create kicked in and failed is // via the timeout, as bulk operation do not wait on blocks. TimeValue timeout; if (indexShouldBeAutoCreated) { // we expect the bulk to fail because it will try to go to the master. Use small timeout and // detect it has passed timeout = new TimeValue(200); } else { // the request should fail very quickly - use a large timeout and make sure it didn't pass... timeout = new TimeValue(5000); } builder.setTimeout(timeout); long now = System.currentTimeMillis(); try { builder.get(); fail("Expected ClusterBlockException"); } catch (ClusterBlockException e) { if (indexShouldBeAutoCreated) { // timeout is 200 assertThat(System.currentTimeMillis() - now, greaterThan(timeout.millis() - 50)); assertThat(e.status(), equalTo(RestStatus.SERVICE_UNAVAILABLE)); } else { // timeout is 5000 assertThat(System.currentTimeMillis() - now, lessThan(timeout.millis() - 50)); } } }
/** * checks for any cluster state blocks. Returns true if operation is OK to proceeded. if false * is return, no further action is needed. The method takes care of any continuation, by either * responding to the listener or scheduling a retry */ protected boolean checkBlocks() { ClusterBlockException blockException = checkGlobalBlock(observer.observedState()); if (blockException != null) { if (blockException.retryable()) { logger.trace("cluster is blocked ({}), scheduling a retry", blockException.getMessage()); retry(blockException); } else { finishAsFailed(blockException); } return false; } if (resolveIndex()) { internalRequest.concreteIndex( indexNameExpressionResolver.concreteSingleIndex( observer.observedState(), internalRequest.request())); } else { internalRequest.concreteIndex(internalRequest.request().index()); } resolveRequest(observer.observedState(), internalRequest, listener); blockException = checkRequestBlock(observer.observedState(), internalRequest); if (blockException != null) { if (blockException.retryable()) { logger.trace("cluster is blocked ({}), scheduling a retry", blockException.getMessage()); retry(blockException); } else { finishAsFailed(blockException); } return false; } return true; }
public void testClusterBlockException() throws IOException { ClusterBlockException ex = serialize( new ClusterBlockException(ImmutableSet.of(DiscoverySettings.NO_MASTER_BLOCK_WRITES))); assertEquals("blocked by: [SERVICE_UNAVAILABLE/2/no master];", ex.getMessage()); assertTrue(ex.blocks().contains(DiscoverySettings.NO_MASTER_BLOCK_WRITES)); assertEquals(1, ex.blocks().size()); }
protected boolean doStart() { nodes = observer.observedState().nodes(); try { ClusterBlockException blockException = checkGlobalBlock(observer.observedState()); if (blockException != null) { if (blockException.retryable()) { retry(blockException); return false; } else { throw blockException; } } internalRequest.concreteIndex( observer .observedState() .metaData() .concreteSingleIndex( internalRequest.request().index(), internalRequest.request().indicesOptions())); // check if we need to execute, and if not, return if (!resolveRequest(observer.observedState(), internalRequest, listener)) { return true; } blockException = checkRequestBlock(observer.observedState(), internalRequest); if (blockException != null) { if (blockException.retryable()) { retry(blockException); return false; } else { throw blockException; } } shardIt = shards(observer.observedState(), internalRequest); } catch (Throwable e) { listener.onFailure(e); return true; } // no shardIt, might be in the case between index gateway recovery and shardIt initialization if (shardIt.size() == 0) { retry(null); return false; } // this transport only make sense with an iterator that returns a single shard routing (like // primary) assert shardIt.size() == 1; ShardRouting shard = shardIt.nextOrNull(); assert shard != null; if (!shard.active()) { retry(null); return false; } if (!operationStarted.compareAndSet(false, true)) { return true; } internalRequest.request().shardId = shardIt.shardId().id(); if (shard.currentNodeId().equals(nodes.localNodeId())) { internalRequest.request().beforeLocalFork(); try { threadPool .executor(executor) .execute( new Runnable() { @Override public void run() { try { shardOperation(internalRequest, listener); } catch (Throwable e) { if (retryOnFailure(e)) { operationStarted.set(false); // we already marked it as started when we executed it (removed the // listener) so pass false // to re-add to the cluster listener retry(null); } else { listener.onFailure(e); } } } }); } catch (Throwable e) { if (retryOnFailure(e)) { retry(null); } else { listener.onFailure(e); } } } else { DiscoveryNode node = nodes.get(shard.currentNodeId()); transportService.sendRequest( node, actionName, internalRequest.request(), transportOptions(), new BaseTransportResponseHandler<Response>() { @Override public Response newInstance() { return newResponse(); } @Override public String executor() { return ThreadPool.Names.SAME; } @Override public void handleResponse(Response response) { listener.onResponse(response); } @Override public void handleException(TransportException exp) { // if we got disconnected from the node, or the node / shard is not in the right // state (being closed) if (exp.unwrapCause() instanceof ConnectTransportException || exp.unwrapCause() instanceof NodeClosedException || retryOnFailure(exp)) { operationStarted.set(false); // we already marked it as started when we executed it (removed the listener) so // pass false // to re-add to the cluster listener retry(null); } else { listener.onFailure(exp); } } }); } return true; }
protected void doStart() { final ClusterState clusterState = observer.observedState(); final Predicate<ClusterState> masterChangePredicate = MasterNodeChangePredicate.build(clusterState); final DiscoveryNodes nodes = clusterState.nodes(); if (nodes.isLocalNodeElectedMaster() || localExecute(request)) { // check for block, if blocked, retry, else, execute locally final ClusterBlockException blockException = checkBlock(request, clusterState); if (blockException != null) { if (!blockException.retryable()) { listener.onFailure(blockException); } else { logger.trace("can't execute due to a cluster block, retrying", blockException); retry( blockException, newState -> { ClusterBlockException newException = checkBlock(request, newState); return (newException == null || !newException.retryable()); }); } } else { ActionListener<Response> delegate = new ActionListener<Response>() { @Override public void onResponse(Response response) { listener.onResponse(response); } @Override public void onFailure(Exception t) { if (t instanceof Discovery.FailedToCommitClusterStateException || (t instanceof NotMasterException)) { logger.debug( (org.apache.logging.log4j.util.Supplier<?>) () -> new ParameterizedMessage( "master could not publish cluster state or stepped down before publishing action [{}], scheduling a retry", actionName), t); retry(t, masterChangePredicate); } else { listener.onFailure(t); } } }; taskManager.registerChildTask(task, nodes.getLocalNodeId()); threadPool .executor(executor) .execute( new ActionRunnable(delegate) { @Override protected void doRun() throws Exception { masterOperation(task, request, clusterState, delegate); } }); } } else { if (nodes.getMasterNode() == null) { logger.debug("no known master node, scheduling a retry"); retry(null, masterChangePredicate); } else { taskManager.registerChildTask(task, nodes.getMasterNode().getId()); transportService.sendRequest( nodes.getMasterNode(), actionName, request, new ActionListenerResponseHandler<Response>( listener, TransportMasterNodeAction.this::newResponse) { @Override public void handleException(final TransportException exp) { Throwable cause = exp.unwrapCause(); if (cause instanceof ConnectTransportException) { // we want to retry here a bit to see if a new master is elected logger.debug( "connection exception while trying to forward request with action name [{}] to master node [{}], scheduling a retry. Error: [{}]", actionName, nodes.getMasterNode(), exp.getDetailedMessage()); retry(cause, masterChangePredicate); } else { listener.onFailure(exp); } } }); } } }
protected void doStart() { nodes = observer.observedState().nodes(); try { ClusterBlockException blockException = checkGlobalBlock(observer.observedState()); if (blockException != null) { if (blockException.retryable()) { retry(blockException); return; } else { throw blockException; } } request.concreteIndex( indexNameExpressionResolver .concreteSingleIndex(observer.observedState(), request) .getName()); resolveRequest(observer.observedState(), request); blockException = checkRequestBlock(observer.observedState(), request); if (blockException != null) { if (blockException.retryable()) { retry(blockException); return; } else { throw blockException; } } shardIt = shards(observer.observedState(), request); } catch (Throwable e) { listener.onFailure(e); return; } // no shardIt, might be in the case between index gateway recovery and shardIt initialization if (shardIt.size() == 0) { retry(null); return; } // this transport only make sense with an iterator that returns a single shard routing (like // primary) assert shardIt.size() == 1; ShardRouting shard = shardIt.nextOrNull(); assert shard != null; if (!shard.active()) { retry(null); return; } request.shardId = shardIt.shardId(); DiscoveryNode node = nodes.get(shard.currentNodeId()); transportService.sendRequest( node, shardActionName, request, transportOptions(), new BaseTransportResponseHandler<Response>() { @Override public Response newInstance() { return newResponse(); } @Override public String executor() { return ThreadPool.Names.SAME; } @Override public void handleResponse(Response response) { listener.onResponse(response); } @Override public void handleException(TransportException exp) { Throwable cause = exp.unwrapCause(); // if we got disconnected from the node, or the node / shard is not in the right state // (being closed) if (cause instanceof ConnectTransportException || cause instanceof NodeClosedException || retryOnFailure(exp)) { retry(cause); } else { listener.onFailure(exp); } } }); }
private void innerExecute( final Request request, final ActionListener<Response> listener, final boolean retrying) { final ClusterState clusterState = clusterService.state(); final DiscoveryNodes nodes = clusterState.nodes(); if (nodes.localNodeMaster() || localExecute(request)) { // check for block, if blocked, retry, else, execute locally final ClusterBlockException blockException = checkBlock(request, clusterState); if (blockException != null) { if (!blockException.retryable()) { listener.onFailure(blockException); return; } clusterService.add( request.masterNodeTimeout(), new TimeoutClusterStateListener() { @Override public void postAdded() { ClusterBlockException blockException = checkBlock(request, clusterService.state()); if (blockException == null || !blockException.retryable()) { clusterService.remove(this); innerExecute(request, listener, false); } } @Override public void onClose() { clusterService.remove(this); listener.onFailure(blockException); } @Override public void onTimeout(TimeValue timeout) { clusterService.remove(this); listener.onFailure(blockException); } @Override public void clusterChanged(ClusterChangedEvent event) { ClusterBlockException blockException = checkBlock(request, event.state()); if (blockException == null || !blockException.retryable()) { clusterService.remove(this); innerExecute(request, listener, false); } } }); } else { threadPool .executor(executor) .execute( new Runnable() { @Override public void run() { try { masterOperation(request, clusterState, listener); } catch (Throwable e) { listener.onFailure(e); } } }); } } else { if (nodes.masterNode() == null) { if (retrying) { listener.onFailure(new MasterNotDiscoveredException()); } else { clusterService.add( request.masterNodeTimeout(), new TimeoutClusterStateListener() { @Override public void postAdded() { ClusterState clusterStateV2 = clusterService.state(); if (clusterStateV2.nodes().masterNodeId() != null) { // now we have a master, try and execute it... clusterService.remove(this); innerExecute(request, listener, true); } } @Override public void onClose() { clusterService.remove(this); listener.onFailure(new NodeClosedException(nodes.localNode())); } @Override public void onTimeout(TimeValue timeout) { clusterService.remove(this); listener.onFailure( new MasterNotDiscoveredException("waited for [" + timeout + "]")); } @Override public void clusterChanged(ClusterChangedEvent event) { if (event.nodesDelta().masterNodeChanged()) { clusterService.remove(this); innerExecute(request, listener, true); } } }); } return; } processBeforeDelegationToMaster(request, clusterState); transportService.sendRequest( nodes.masterNode(), transportAction, request, new BaseTransportResponseHandler<Response>() { @Override public Response newInstance() { return newResponse(); } @Override public void handleResponse(Response response) { listener.onResponse(response); } @Override public String executor() { return ThreadPool.Names.SAME; } @Override public void handleException(final TransportException exp) { if (exp.unwrapCause() instanceof ConnectTransportException) { // we want to retry here a bit to see if a new master is elected clusterService.add( request.masterNodeTimeout(), new TimeoutClusterStateListener() { @Override public void postAdded() { ClusterState clusterStateV2 = clusterService.state(); if (!clusterState .nodes() .masterNodeId() .equals(clusterStateV2.nodes().masterNodeId())) { // master changes while adding the listener, try here clusterService.remove(this); innerExecute(request, listener, false); } } @Override public void onClose() { clusterService.remove(this); listener.onFailure(new NodeClosedException(nodes.localNode())); } @Override public void onTimeout(TimeValue timeout) { clusterService.remove(this); listener.onFailure(new MasterNotDiscoveredException()); } @Override public void clusterChanged(ClusterChangedEvent event) { if (event.nodesDelta().masterNodeChanged()) { clusterService.remove(this); innerExecute(request, listener, false); } } }); } else { listener.onFailure(exp); } } }); } }
/** Returns <tt>true</tt> if the action starting to be performed on the primary (or is done). */ public boolean start(final boolean fromClusterEvent) throws ElasticSearchException { final ClusterState clusterState = clusterService.state(); nodes = clusterState.nodes(); try { ClusterBlockException blockException = checkGlobalBlock(clusterState, request); if (blockException != null) { if (blockException.retryable()) { retry(fromClusterEvent, blockException); return false; } else { throw blockException; } } // check if we need to execute, and if not, return if (!resolveRequest(clusterState, request, listener)) { return true; } blockException = checkRequestBlock(clusterState, request); if (blockException != null) { if (blockException.retryable()) { retry(fromClusterEvent, blockException); return false; } else { throw blockException; } } shardIt = shards(clusterState, request); } catch (Exception e) { listener.onFailure(e); return true; } // no shardIt, might be in the case between index gateway recovery and shardIt initialization if (shardIt.size() == 0) { retry(fromClusterEvent, null); return false; } boolean foundPrimary = false; ShardRouting shardX; while ((shardX = shardIt.nextOrNull()) != null) { final ShardRouting shard = shardX; // we only deal with primary shardIt here... if (!shard.primary()) { continue; } if (!shard.active() || !nodes.nodeExists(shard.currentNodeId())) { retry(fromClusterEvent, null); return false; } // check here for consistency if (checkWriteConsistency) { WriteConsistencyLevel consistencyLevel = defaultWriteConsistencyLevel; if (request.consistencyLevel() != WriteConsistencyLevel.DEFAULT) { consistencyLevel = request.consistencyLevel(); } int requiredNumber = 1; if (consistencyLevel == WriteConsistencyLevel.QUORUM && shardIt.size() > 2) { // only for more than 2 in the number of shardIt it makes sense, otherwise its 1 shard // with 1 replica, quorum is 1 (which is what it is initialized to) requiredNumber = (shardIt.size() / 2) + 1; } else if (consistencyLevel == WriteConsistencyLevel.ALL) { requiredNumber = shardIt.size(); } if (shardIt.sizeActive() < requiredNumber) { retry(fromClusterEvent, null); return false; } } if (!primaryOperationStarted.compareAndSet(false, true)) { return true; } foundPrimary = true; if (shard.currentNodeId().equals(nodes.localNodeId())) { if (request.operationThreaded()) { request.beforeLocalFork(); threadPool .executor(executor) .execute( new Runnable() { @Override public void run() { performOnPrimary(shard.id(), fromClusterEvent, shard, clusterState); } }); } else { performOnPrimary(shard.id(), fromClusterEvent, shard, clusterState); } } else { DiscoveryNode node = nodes.get(shard.currentNodeId()); transportService.sendRequest( node, transportAction, request, transportOptions, new BaseTransportResponseHandler<Response>() { @Override public Response newInstance() { return newResponseInstance(); } @Override public String executor() { return ThreadPool.Names.SAME; } @Override public void handleResponse(Response response) { listener.onResponse(response); } @Override public void handleException(TransportException exp) { // if we got disconnected from the node, or the node / shard is not in the right // state (being closed) if (exp.unwrapCause() instanceof ConnectTransportException || exp.unwrapCause() instanceof NodeClosedException || retryPrimaryException(exp)) { primaryOperationStarted.set(false); // we already marked it as started when we executed it (removed the listener) so // pass false // to re-add to the cluster listener retry(false, null); } else { listener.onFailure(exp); } } }); } break; } // we should never get here, but here we go if (!foundPrimary) { retry(fromClusterEvent, null); return false; } return true; }
@Test public void testNoMasterActions_writeMasterBlock() throws Exception { Settings settings = settingsBuilder() .put("discovery.type", "zen") .put("action.auto_create_index", false) .put("discovery.zen.minimum_master_nodes", 2) .put("discovery.zen.ping_timeout", "200ms") .put("discovery.initial_state_timeout", "500ms") .put(DiscoverySettings.NO_MASTER_BLOCK, "write") .build(); internalCluster().startNode(settings); // start a second node, create an index, and then shut it down so we have no master block internalCluster().startNode(settings); prepareCreate("test1").setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).get(); prepareCreate("test2") .setSettings( IndexMetaData.SETTING_NUMBER_OF_SHARDS, 2, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) .get(); client().admin().cluster().prepareHealth("_all").setWaitForGreenStatus().get(); client().prepareIndex("test1", "type1", "1").setSource("field", "value1").get(); client().prepareIndex("test2", "type1", "1").setSource("field", "value1").get(); refresh(); ensureSearchable("test1", "test2"); ClusterStateResponse clusterState = client().admin().cluster().prepareState().get(); logger.info("Cluster state:\n" + clusterState.getState().prettyPrint()); internalCluster().stopRandomDataNode(); assertThat( awaitBusy( new Predicate<Object>() { public boolean apply(Object o) { ClusterState state = client().admin().cluster().prepareState().setLocal(true).get().getState(); return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID); } }), equalTo(true)); GetResponse getResponse = client().prepareGet("test1", "type1", "1").get(); assertExists(getResponse); CountResponse countResponse = client().prepareCount("test1").get(); assertHitCount(countResponse, 1l); SearchResponse searchResponse = client().prepareSearch("test1").get(); assertHitCount(searchResponse, 1l); countResponse = client().prepareCount("test2").get(); assertThat(countResponse.getTotalShards(), equalTo(2)); assertThat(countResponse.getSuccessfulShards(), equalTo(1)); TimeValue timeout = TimeValue.timeValueMillis(200); long now = System.currentTimeMillis(); try { client() .prepareUpdate("test1", "type1", "1") .setDoc("field", "value2") .setTimeout(timeout) .get(); fail("Expected ClusterBlockException"); } catch (ClusterBlockException e) { assertThat(System.currentTimeMillis() - now, greaterThan(timeout.millis() - 50)); assertThat(e.status(), equalTo(RestStatus.SERVICE_UNAVAILABLE)); } now = System.currentTimeMillis(); try { client() .prepareIndex("test1", "type1", "1") .setSource(XContentFactory.jsonBuilder().startObject().endObject()) .setTimeout(timeout) .get(); fail("Expected ClusterBlockException"); } catch (ClusterBlockException e) { assertThat(System.currentTimeMillis() - now, greaterThan(timeout.millis() - 50)); assertThat(e.status(), equalTo(RestStatus.SERVICE_UNAVAILABLE)); } internalCluster().startNode(settings); client().admin().cluster().prepareHealth().setWaitForGreenStatus().setWaitForNodes("2").get(); }