public void testLocalNodeMasterListenerCallbacks() throws Exception { TimedClusterService timedClusterService = createTimedClusterService(false); AtomicBoolean isMaster = new AtomicBoolean(); timedClusterService.add( new LocalNodeMasterListener() { @Override public void onMaster() { isMaster.set(true); } @Override public void offMaster() { isMaster.set(false); } @Override public String executorName() { return ThreadPool.Names.SAME; } }); ClusterState state = timedClusterService.state(); DiscoveryNodes nodes = state.nodes(); DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(nodes).masterNodeId(nodes.getLocalNodeId()); state = ClusterState.builder(state) .blocks(ClusterBlocks.EMPTY_CLUSTER_BLOCK) .nodes(nodesBuilder) .build(); setState(timedClusterService, state); assertThat(isMaster.get(), is(true)); nodes = state.nodes(); nodesBuilder = DiscoveryNodes.builder(nodes).masterNodeId(null); state = ClusterState.builder(state) .blocks( ClusterBlocks.builder().addGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_WRITES)) .nodes(nodesBuilder) .build(); setState(timedClusterService, state); assertThat(isMaster.get(), is(false)); nodesBuilder = DiscoveryNodes.builder(nodes).masterNodeId(nodes.getLocalNodeId()); state = ClusterState.builder(state) .blocks(ClusterBlocks.EMPTY_CLUSTER_BLOCK) .nodes(nodesBuilder) .build(); setState(timedClusterService, state); assertThat(isMaster.get(), is(true)); timedClusterService.close(); }
TimedClusterService createTimedClusterService(boolean makeMaster) throws InterruptedException { TimedClusterService timedClusterService = new TimedClusterService( Settings.builder().put("cluster.name", "ClusterServiceTests").build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool); timedClusterService.setLocalNode( new DiscoveryNode( "node1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT)); timedClusterService.setNodeConnectionsService( new NodeConnectionsService(Settings.EMPTY, null, null) { @Override public void connectToNodes(List<DiscoveryNode> addedNodes) { // skip } @Override public void disconnectFromNodes(List<DiscoveryNode> removedNodes) { // skip } }); timedClusterService.setClusterStatePublisher((event, ackListener) -> {}); timedClusterService.start(); ClusterState state = timedClusterService.state(); final DiscoveryNodes nodes = state.nodes(); final DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(nodes).masterNodeId(makeMaster ? nodes.getLocalNodeId() : null); state = ClusterState.builder(state) .blocks(ClusterBlocks.EMPTY_CLUSTER_BLOCK) .nodes(nodesBuilder) .build(); setState(timedClusterService, state); return timedClusterService; }
protected void doStart() { final ClusterState clusterState = observer.observedState(); final Predicate<ClusterState> masterChangePredicate = MasterNodeChangePredicate.build(clusterState); final DiscoveryNodes nodes = clusterState.nodes(); if (nodes.isLocalNodeElectedMaster() || localExecute(request)) { // check for block, if blocked, retry, else, execute locally final ClusterBlockException blockException = checkBlock(request, clusterState); if (blockException != null) { if (!blockException.retryable()) { listener.onFailure(blockException); } else { logger.trace("can't execute due to a cluster block, retrying", blockException); retry( blockException, newState -> { ClusterBlockException newException = checkBlock(request, newState); return (newException == null || !newException.retryable()); }); } } else { ActionListener<Response> delegate = new ActionListener<Response>() { @Override public void onResponse(Response response) { listener.onResponse(response); } @Override public void onFailure(Exception t) { if (t instanceof Discovery.FailedToCommitClusterStateException || (t instanceof NotMasterException)) { logger.debug( (org.apache.logging.log4j.util.Supplier<?>) () -> new ParameterizedMessage( "master could not publish cluster state or stepped down before publishing action [{}], scheduling a retry", actionName), t); retry(t, masterChangePredicate); } else { listener.onFailure(t); } } }; taskManager.registerChildTask(task, nodes.getLocalNodeId()); threadPool .executor(executor) .execute( new ActionRunnable(delegate) { @Override protected void doRun() throws Exception { masterOperation(task, request, clusterState, delegate); } }); } } else { if (nodes.getMasterNode() == null) { logger.debug("no known master node, scheduling a retry"); retry(null, masterChangePredicate); } else { taskManager.registerChildTask(task, nodes.getMasterNode().getId()); transportService.sendRequest( nodes.getMasterNode(), actionName, request, new ActionListenerResponseHandler<Response>( listener, TransportMasterNodeAction.this::newResponse) { @Override public void handleException(final TransportException exp) { Throwable cause = exp.unwrapCause(); if (cause instanceof ConnectTransportException) { // we want to retry here a bit to see if a new master is elected logger.debug( "connection exception while trying to forward request with action name [{}] to master node [{}], scheduling a retry. Error: [{}]", actionName, nodes.getMasterNode(), exp.getDetailedMessage()); retry(cause, masterChangePredicate); } else { listener.onFailure(exp); } } }); } } }
public void testDisconnectFromNewlyAddedNodesIfClusterStatePublishingFails() throws InterruptedException { TimedClusterService timedClusterService = new TimedClusterService( Settings.builder().put("cluster.name", "ClusterServiceTests").build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool); timedClusterService.setLocalNode( new DiscoveryNode( "node1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT)); Set<DiscoveryNode> currentNodes = Collections.synchronizedSet(new HashSet<>()); currentNodes.add(timedClusterService.localNode()); timedClusterService.setNodeConnectionsService( new NodeConnectionsService(Settings.EMPTY, null, null) { @Override public void connectToNodes(List<DiscoveryNode> addedNodes) { currentNodes.addAll(addedNodes); } @Override public void disconnectFromNodes(List<DiscoveryNode> removedNodes) { currentNodes.removeAll(removedNodes); } }); AtomicBoolean failToCommit = new AtomicBoolean(); timedClusterService.setClusterStatePublisher( (event, ackListener) -> { if (failToCommit.get()) { throw new Discovery.FailedToCommitClusterStateException("just to test this"); } }); timedClusterService.start(); ClusterState state = timedClusterService.state(); final DiscoveryNodes nodes = state.nodes(); final DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(nodes).masterNodeId(nodes.getLocalNodeId()); state = ClusterState.builder(state) .blocks(ClusterBlocks.EMPTY_CLUSTER_BLOCK) .nodes(nodesBuilder) .build(); setState(timedClusterService, state); assertThat(currentNodes, equalTo(Sets.newHashSet(timedClusterService.state().getNodes()))); final CountDownLatch latch = new CountDownLatch(1); // try to add node when cluster state publishing fails failToCommit.set(true); timedClusterService.submitStateUpdateTask( "test", new ClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) throws Exception { DiscoveryNode newNode = new DiscoveryNode( "node2", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); return ClusterState.builder(currentState) .nodes(DiscoveryNodes.builder(currentState.nodes()).add(newNode)) .build(); } @Override public void clusterStateProcessed( String source, ClusterState oldState, ClusterState newState) { latch.countDown(); } @Override public void onFailure(String source, Exception e) { latch.countDown(); } }); latch.await(); assertThat(currentNodes, equalTo(Sets.newHashSet(timedClusterService.state().getNodes()))); timedClusterService.close(); }