private ClusterState handleJoinRequest(final DiscoveryNode node) { if (!master) { throw new ElasticsearchIllegalStateException( "Node [" + localNode + "] not master for join request from [" + node + "]"); } ClusterState state = clusterService.state(); if (!transportService.addressSupported(node.address().getClass())) { // TODO, what should we do now? Maybe inform that node that its crap? logger.warn("received a wrong address type from [{}], ignoring...", node); } else { // try and connect to the node, if it fails, we can raise an exception back to the client... transportService.connectToNode(node); state = clusterService.state(); // validate the join request, will throw a failure if it fails, which will get back to the // node calling the join request membership.sendValidateJoinRequestBlocking(node, state, pingTimeout); clusterService.submitStateUpdateTask( "zen-disco-receive(join from node[" + node + "])", Priority.URGENT, new ClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { if (currentState.nodes().nodeExists(node.id())) { // the node already exists in the cluster logger.warn("received a join request for an existing node [{}]", node); // still send a new cluster state, so it will be re published and possibly update // the other node return ClusterState.builder(currentState).build(); } DiscoveryNodes.Builder builder = DiscoveryNodes.builder(currentState.nodes()); for (DiscoveryNode existingNode : currentState.nodes()) { if (node.address().equals(existingNode.address())) { builder.remove(existingNode.id()); logger.warn( "received join request from node [{}], but found existing node {} with same address, removing existing node", node, existingNode); } } latestDiscoNodes = builder.build(); // add the new node now (will update latestDiscoNodes on publish) return ClusterState.builder(currentState) .nodes(latestDiscoNodes.newNode(node)) .build(); } @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); } }); } return state; }
@Override protected void doStart() throws ElasticsearchException { Map<String, String> nodeAttributes = discoveryNodeService.buildAttributes(); // note, we rely on the fact that its a new id each time we start, see FD and "kill -9" handling final String nodeId = getNodeUUID(settings); localNode = new DiscoveryNode( settings.get("name"), nodeId, transportService.boundAddress().publishAddress(), nodeAttributes, version); latestDiscoNodes = new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build(); nodesFD.updateNodes(latestDiscoNodes); pingService.start(); // do the join on a different thread, the DiscoveryService waits for 30s anyhow till it is // discovered asyncJoinCluster(); }
private void innerJoinCluster() { boolean retry = true; while (retry) { if (lifecycle.stoppedOrClosed()) { return; } retry = false; DiscoveryNode masterNode = findMaster(); if (masterNode == null) { logger.trace("no masterNode returned"); retry = true; continue; } if (localNode.equals(masterNode)) { this.master = true; nodesFD.start(); // start the nodes FD clusterService.submitStateUpdateTask( "zen-disco-join (elected_as_master)", Priority.URGENT, new ProcessedClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { DiscoveryNodes.Builder builder = new DiscoveryNodes.Builder() .localNodeId(localNode.id()) .masterNodeId(localNode.id()) // put our local node .put(localNode); // update the fact that we are the master... latestDiscoNodes = builder.build(); ClusterBlocks clusterBlocks = ClusterBlocks.builder() .blocks(currentState.blocks()) .removeGlobalBlock(NO_MASTER_BLOCK) .build(); return ClusterState.builder(currentState) .nodes(latestDiscoNodes) .blocks(clusterBlocks) .build(); } @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); } @Override public void clusterStateProcessed( String source, ClusterState oldState, ClusterState newState) { sendInitialStateEventIfNeeded(); } }); } else { this.master = false; try { // first, make sure we can connect to the master transportService.connectToNode(masterNode); } catch (Exception e) { logger.warn("failed to connect to master [{}], retrying...", e, masterNode); retry = true; continue; } // send join request try { membership.sendJoinRequestBlocking(masterNode, localNode, pingTimeout); } catch (Exception e) { if (e instanceof ElasticsearchException) { logger.info( "failed to send join request to master [{}], reason [{}]", masterNode, ((ElasticsearchException) e).getDetailedMessage()); } else { logger.info( "failed to send join request to master [{}], reason [{}]", masterNode, e.getMessage()); } if (logger.isTraceEnabled()) { logger.trace("detailed failed reason", e); } // failed to send the join request, retry retry = true; continue; } masterFD.start(masterNode, "initial_join"); // no need to submit the received cluster state, we will get it from the master when it // publishes // the fact that we joined } } }
@Inject public ZenDiscovery( Settings settings, ClusterName clusterName, ThreadPool threadPool, TransportService transportService, ClusterService clusterService, NodeSettingsService nodeSettingsService, DiscoveryNodeService discoveryNodeService, ZenPingService pingService, Version version) { super(settings); this.clusterName = clusterName; this.threadPool = threadPool; this.clusterService = clusterService; this.transportService = transportService; this.discoveryNodeService = discoveryNodeService; this.pingService = pingService; this.version = version; // also support direct discovery.zen settings, for cases when it gets extended this.pingTimeout = settings.getAsTime( "discovery.zen.ping.timeout", settings.getAsTime( "discovery.zen.ping_timeout", componentSettings.getAsTime( "ping_timeout", componentSettings.getAsTime("initial_ping_timeout", timeValueSeconds(3))))); this.sendLeaveRequest = componentSettings.getAsBoolean("send_leave_request", true); this.masterElectionFilterClientNodes = settings.getAsBoolean("discovery.zen.master_election.filter_client", true); this.masterElectionFilterDataNodes = settings.getAsBoolean("discovery.zen.master_election.filter_data", false); logger.debug( "using ping.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", pingTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes); this.electMaster = new ElectMasterService(settings); nodeSettingsService.addListener(new ApplySettings()); this.masterFD = new MasterFaultDetection(settings, threadPool, transportService, this); this.masterFD.addListener(new MasterNodeFailureListener()); this.nodesFD = new NodesFaultDetection(settings, threadPool, transportService); this.nodesFD.addListener(new NodeFailureListener()); this.publishClusterState = new PublishClusterStateAction( settings, transportService, this, new NewClusterStateListener()); this.pingService.setNodesProvider(this); this.membership = new MembershipAction(settings, transportService, this, new MembershipListener()); transportService.registerHandler( RejoinClusterRequestHandler.ACTION, new RejoinClusterRequestHandler()); }