private ClusterState rejoin(ClusterState clusterState, String reason) { logger.warn(reason + ", current nodes: {}", clusterState.nodes()); nodesFD.stop(); masterFD.stop(reason); master = false; ClusterBlocks clusterBlocks = ClusterBlocks.builder() .blocks(clusterState.blocks()) .addGlobalBlock(NO_MASTER_BLOCK) .addGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK) .build(); // clear the routing table, we have no master, so we need to recreate the routing when we reform // the cluster RoutingTable routingTable = RoutingTable.builder().build(); // we also clean the metadata, since we are going to recover it if we become master MetaData metaData = MetaData.builder().build(); // clean the nodes, we are now not connected to anybody, since we try and reform the cluster latestDiscoNodes = new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build(); asyncJoinCluster(); return ClusterState.builder(clusterState) .blocks(clusterBlocks) .nodes(latestDiscoNodes) .routingTable(routingTable) .metaData(metaData) .build(); }
@Override public void publish(ClusterState clusterState, AckListener ackListener) { if (!master) { throw new ElasticsearchIllegalStateException("Shouldn't publish state when not master"); } latestDiscoNodes = clusterState.nodes(); nodesFD.updateNodes(clusterState.nodes()); publishClusterState.publish(clusterState, ackListener); }
void handleNewClusterStateFromMaster( final ClusterState newState, final PublishClusterStateAction.NewClusterStateListener.NewStateProcessed newStateProcessed) { if (master) { clusterService.submitStateUpdateTask( "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]", Priority.URGENT, new ProcessedClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { if (newState.version() > currentState.version()) { logger.warn( "received cluster state from [{}] which is also master but with a newer cluster_state, rejoining to cluster...", newState.nodes().masterNode()); return rejoin( currentState, "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]"); } else { logger.warn( "received cluster state from [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster", newState.nodes().masterNode(), newState.nodes().masterNode()); transportService.sendRequest( newState.nodes().masterNode(), RejoinClusterRequestHandler.ACTION, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { @Override public void handleException(TransportException exp) { logger.warn( "failed to send rejoin request to [{}]", exp, newState.nodes().masterNode()); } }); return currentState; } } @Override public void clusterStateProcessed( String source, ClusterState oldState, ClusterState newState) { newStateProcessed.onNewClusterStateProcessed(); } @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); newStateProcessed.onNewClusterStateFailed(t); } }); } else { if (newState.nodes().localNode() == null) { logger.warn( "received a cluster state from [{}] and not part of the cluster, should not happen", newState.nodes().masterNode()); newStateProcessed.onNewClusterStateFailed( new ElasticsearchIllegalStateException( "received state from a node that is not part of the cluster")); } else { if (currentJoinThread != null) { logger.debug( "got a new state from master node, though we are already trying to rejoin the cluster"); } clusterService.submitStateUpdateTask( "zen-disco-receive(from master [" + newState.nodes().masterNode() + "])", Priority.URGENT, new ProcessedClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { // we don't need to do this, since we ping the master, and get notified when it has // moved from being a master // because it doesn't have enough master nodes... // if (!electMaster.hasEnoughMasterNodes(newState.nodes())) { // return disconnectFromCluster(newState, "not enough master nodes on new cluster // state received from [" + newState.nodes().masterNode() + "]"); // } latestDiscoNodes = newState.nodes(); // check to see that we monitor the correct master of the cluster if (masterFD.masterNode() == null || !masterFD.masterNode().equals(latestDiscoNodes.masterNode())) { masterFD.restart( latestDiscoNodes.masterNode(), "new cluster state received and we are monitoring the wrong master [" + masterFD.masterNode() + "]"); } ClusterState.Builder builder = ClusterState.builder(newState); // if the routing table did not change, use the original one if (newState.routingTable().version() == currentState.routingTable().version()) { builder.routingTable(currentState.routingTable()); } // same for metadata if (newState.metaData().version() == currentState.metaData().version()) { builder.metaData(currentState.metaData()); } else { // if its not the same version, only copy over new indices or ones that changed // the version MetaData.Builder metaDataBuilder = MetaData.builder(newState.metaData()).removeAllIndices(); for (IndexMetaData indexMetaData : newState.metaData()) { IndexMetaData currentIndexMetaData = currentState.metaData().index(indexMetaData.index()); if (currentIndexMetaData == null || currentIndexMetaData.version() != indexMetaData.version()) { metaDataBuilder.put(indexMetaData, false); } else { metaDataBuilder.put(currentIndexMetaData, false); } } builder.metaData(metaDataBuilder); } return builder.build(); } @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); newStateProcessed.onNewClusterStateFailed(t); } @Override public void clusterStateProcessed( String source, ClusterState oldState, ClusterState newState) { sendInitialStateEventIfNeeded(); newStateProcessed.onNewClusterStateProcessed(); } }); } } }
void handleNewClusterStateFromMaster( ClusterState newClusterState, final PublishClusterStateAction.NewClusterStateListener.NewStateProcessed newStateProcessed) { if (master) { final ClusterState newState = newClusterState; clusterService.submitStateUpdateTask( "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]", Priority.URGENT, new ProcessedClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { if (newState.version() > currentState.version()) { logger.warn( "received cluster state from [{}] which is also master but with a newer cluster_state, rejoining to cluster...", newState.nodes().masterNode()); return rejoin( currentState, "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]"); } else { logger.warn( "received cluster state from [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster", newState.nodes().masterNode(), newState.nodes().masterNode()); transportService.sendRequest( newState.nodes().masterNode(), RejoinClusterRequestHandler.ACTION, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { @Override public void handleException(TransportException exp) { logger.warn( "failed to send rejoin request to [{}]", exp, newState.nodes().masterNode()); } }); return currentState; } } @Override public void clusterStateProcessed( String source, ClusterState oldState, ClusterState newState) { newStateProcessed.onNewClusterStateProcessed(); } @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); newStateProcessed.onNewClusterStateFailed(t); } }); } else { if (newClusterState.nodes().localNode() == null) { logger.warn( "received a cluster state from [{}] and not part of the cluster, should not happen", newClusterState.nodes().masterNode()); newStateProcessed.onNewClusterStateFailed( new ElasticsearchIllegalStateException( "received state from a node that is not part of the cluster")); } else { if (currentJoinThread != null) { logger.debug( "got a new state from master node, though we are already trying to rejoin the cluster"); } final ProcessClusterState processClusterState = new ProcessClusterState(newClusterState, newStateProcessed); processNewClusterStates.add(processClusterState); clusterService.submitStateUpdateTask( "zen-disco-receive(from master [" + newClusterState.nodes().masterNode() + "])", Priority.URGENT, new ProcessedClusterStateUpdateTask() { @Override public ClusterState execute(ClusterState currentState) { // we already processed it in a previous event if (processClusterState.processed) { return currentState; } // TODO: once improvement that we can do is change the message structure to include // version and masterNodeId // at the start, this will allow us to keep the "compressed bytes" around, and only // parse the first page // to figure out if we need to use it or not, and only once we picked the latest // one, parse the whole state // try and get the state with the highest version out of all the ones with the same // master node id ProcessClusterState stateToProcess = processNewClusterStates.poll(); if (stateToProcess == null) { return currentState; } stateToProcess.processed = true; while (true) { ProcessClusterState potentialState = processNewClusterStates.peek(); // nothing else in the queue, bail if (potentialState == null) { break; } // if its not from the same master, then bail if (!Objects.equal( stateToProcess.clusterState.nodes().masterNodeId(), potentialState.clusterState.nodes().masterNodeId())) { break; } // we are going to use it for sure, poll (remove) it potentialState = processNewClusterStates.poll(); potentialState.processed = true; if (potentialState.clusterState.version() > stateToProcess.clusterState.version()) { // we found a new one stateToProcess = potentialState; } } ClusterState updatedState = stateToProcess.clusterState; // if the new state has a smaller version, and it has the same master node, then no // need to process it if (updatedState.version() < currentState.version() && Objects.equal( updatedState.nodes().masterNodeId(), currentState.nodes().masterNodeId())) { return currentState; } // we don't need to do this, since we ping the master, and get notified when it has // moved from being a master // because it doesn't have enough master nodes... // if (!electMaster.hasEnoughMasterNodes(newState.nodes())) { // return disconnectFromCluster(newState, "not enough master nodes on new cluster // state received from [" + newState.nodes().masterNode() + "]"); // } latestDiscoNodes = updatedState.nodes(); // check to see that we monitor the correct master of the cluster if (masterFD.masterNode() == null || !masterFD.masterNode().equals(latestDiscoNodes.masterNode())) { masterFD.restart( latestDiscoNodes.masterNode(), "new cluster state received and we are monitoring the wrong master [" + masterFD.masterNode() + "]"); } ClusterState.Builder builder = ClusterState.builder(updatedState); // if the routing table did not change, use the original one if (updatedState.routingTable().version() == currentState.routingTable().version()) { builder.routingTable(currentState.routingTable()); } // same for metadata if (updatedState.metaData().version() == currentState.metaData().version()) { builder.metaData(currentState.metaData()); } else { // if its not the same version, only copy over new indices or ones that changed // the version MetaData.Builder metaDataBuilder = MetaData.builder(updatedState.metaData()).removeAllIndices(); for (IndexMetaData indexMetaData : updatedState.metaData()) { IndexMetaData currentIndexMetaData = currentState.metaData().index(indexMetaData.index()); if (currentIndexMetaData == null || currentIndexMetaData.version() != indexMetaData.version()) { metaDataBuilder.put(indexMetaData, false); } else { metaDataBuilder.put(currentIndexMetaData, false); } } builder.metaData(metaDataBuilder); } return builder.build(); } @Override public void onFailure(String source, Throwable t) { logger.error("unexpected failure during [{}]", t, source); newStateProcessed.onNewClusterStateFailed(t); } @Override public void clusterStateProcessed( String source, ClusterState oldState, ClusterState newState) { sendInitialStateEventIfNeeded(); newStateProcessed.onNewClusterStateProcessed(); } }); } } }