private ClusterState rejoin(ClusterState clusterState, String reason) { logger.warn(reason + ", current nodes: {}", clusterState.nodes()); nodesFD.stop(); masterFD.stop(reason); master = false; ClusterBlocks clusterBlocks = ClusterBlocks.builder() .blocks(clusterState.blocks()) .addGlobalBlock(NO_MASTER_BLOCK) .addGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK) .build(); // clear the routing table, we have no master, so we need to recreate the routing when we reform // the cluster RoutingTable routingTable = RoutingTable.builder().build(); // we also clean the metadata, since we are going to recover it if we become master MetaData metaData = MetaData.builder().build(); // clean the nodes, we are now not connected to anybody, since we try and reform the cluster latestDiscoNodes = new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build(); asyncJoinCluster(); return ClusterState.builder(clusterState) .blocks(clusterBlocks) .nodes(latestDiscoNodes) .routingTable(routingTable) .metaData(metaData) .build(); }
@Override protected void doClose() throws ElasticsearchException { masterFD.close(); nodesFD.close(); publishClusterState.close(); membership.close(); pingService.close(); }
@Override public void publish(ClusterState clusterState, AckListener ackListener) { if (!master) { throw new ElasticsearchIllegalStateException("Shouldn't publish state when not master"); } latestDiscoNodes = clusterState.nodes(); nodesFD.updateNodes(clusterState.nodes()); publishClusterState.publish(clusterState, ackListener); }
@Override protected void doStop() throws ElasticsearchException { pingService.stop(); masterFD.stop("zen disco stop"); nodesFD.stop(); initialStateSent.set(false); if (sendLeaveRequest) { if (!master && latestDiscoNodes.masterNode() != null) { try { membership.sendLeaveRequestBlocking( latestDiscoNodes.masterNode(), localNode, TimeValue.timeValueSeconds(1)); } catch (Exception e) { logger.debug( "failed to send leave request to master [{}]", e, latestDiscoNodes.masterNode()); } } else { DiscoveryNode[] possibleMasters = electMaster.nextPossibleMasters(latestDiscoNodes.nodes().values(), 5); for (DiscoveryNode possibleMaster : possibleMasters) { if (localNode.equals(possibleMaster)) { continue; } try { membership.sendLeaveRequest(latestDiscoNodes.masterNode(), possibleMaster); } catch (Exception e) { logger.debug( "failed to send leave request from master [{}] to possible master [{}]", e, latestDiscoNodes.masterNode(), possibleMaster); } } } } master = false; if (currentJoinThread != null) { try { currentJoinThread.interrupt(); } catch (Exception e) { // ignore } } }
@Override protected void doStart() throws ElasticsearchException { Map<String, String> nodeAttributes = discoveryNodeService.buildAttributes(); // note, we rely on the fact that its a new id each time we start, see FD and "kill -9" handling final String nodeId = getNodeUUID(settings); localNode = new DiscoveryNode( settings.get("name"), nodeId, transportService.boundAddress().publishAddress(), nodeAttributes, version); latestDiscoNodes = new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build(); nodesFD.updateNodes(latestDiscoNodes); pingService.start(); // do the join on a different thread, the DiscoveryService waits for 30s anyhow till it is // discovered asyncJoinCluster(); }
private void innerJoinCluster() { boolean retry = true; while (retry) { if (lifecycle.stoppedOrClosed()) { return; } retry = false; DiscoveryNode masterNode = findMaster(); if (masterNode == null) { logger.trace("no masterNode returned"); retry = true; continue; } if (localNode.equals(masterNode)) { this.master = true; nodesFD.start(); // start the nodes FD clusterService.submitStateUpdateTask( "zen-disco-join (elected_as_master)", Priority.URGENT, new ProcessedClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { DiscoveryNodes.Builder builder = new DiscoveryNodes.Builder() .localNodeId(localNode.id()) .masterNodeId(localNode.id()) // put our local node .put(localNode); // update the fact that we are the master... latestDiscoNodes = builder.build(); ClusterBlocks clusterBlocks = ClusterBlocks.builder() .blocks(currentState.blocks()) .removeGlobalBlock(NO_MASTER_BLOCK) .build(); return ClusterState.builder(currentState) .nodes(latestDiscoNodes) .blocks(clusterBlocks) .build(); } @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); } @Override public void clusterStateProcessed( String source, ClusterState oldState, ClusterState newState) { sendInitialStateEventIfNeeded(); } }); } else { this.master = false; try { // first, make sure we can connect to the master transportService.connectToNode(masterNode); } catch (Exception e) { logger.warn("failed to connect to master [{}], retrying...", e, masterNode); retry = true; continue; } // send join request try { membership.sendJoinRequestBlocking(masterNode, localNode, pingTimeout); } catch (Exception e) { if (e instanceof ElasticsearchException) { logger.info( "failed to send join request to master [{}], reason [{}]", masterNode, ((ElasticsearchException) e).getDetailedMessage()); } else { logger.info( "failed to send join request to master [{}], reason [{}]", masterNode, e.getMessage()); } if (logger.isTraceEnabled()) { logger.trace("detailed failed reason", e); } // failed to send the join request, retry retry = true; continue; } masterFD.start(masterNode, "initial_join"); // no need to submit the received cluster state, we will get it from the master when it // publishes // the fact that we joined } } }