Пример #1
0
  private ClusterState rejoin(ClusterState clusterState, String reason) {
    logger.warn(reason + ", current nodes: {}", clusterState.nodes());
    nodesFD.stop();
    masterFD.stop(reason);
    master = false;

    ClusterBlocks clusterBlocks =
        ClusterBlocks.builder()
            .blocks(clusterState.blocks())
            .addGlobalBlock(NO_MASTER_BLOCK)
            .addGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK)
            .build();

    // clear the routing table, we have no master, so we need to recreate the routing when we reform
    // the cluster
    RoutingTable routingTable = RoutingTable.builder().build();
    // we also clean the metadata, since we are going to recover it if we become master
    MetaData metaData = MetaData.builder().build();

    // clean the nodes, we are now not connected to anybody, since we try and reform the cluster
    latestDiscoNodes =
        new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build();

    asyncJoinCluster();

    return ClusterState.builder(clusterState)
        .blocks(clusterBlocks)
        .nodes(latestDiscoNodes)
        .routingTable(routingTable)
        .metaData(metaData)
        .build();
  }
Пример #2
0
 @Override
 protected void doClose() throws ElasticsearchException {
   masterFD.close();
   nodesFD.close();
   publishClusterState.close();
   membership.close();
   pingService.close();
 }
Пример #3
0
 @Override
 public void publish(ClusterState clusterState, AckListener ackListener) {
   if (!master) {
     throw new ElasticsearchIllegalStateException("Shouldn't publish state when not master");
   }
   latestDiscoNodes = clusterState.nodes();
   nodesFD.updateNodes(clusterState.nodes());
   publishClusterState.publish(clusterState, ackListener);
 }
Пример #4
0
 @Override
 protected void doStop() throws ElasticsearchException {
   pingService.stop();
   masterFD.stop("zen disco stop");
   nodesFD.stop();
   initialStateSent.set(false);
   if (sendLeaveRequest) {
     if (!master && latestDiscoNodes.masterNode() != null) {
       try {
         membership.sendLeaveRequestBlocking(
             latestDiscoNodes.masterNode(), localNode, TimeValue.timeValueSeconds(1));
       } catch (Exception e) {
         logger.debug(
             "failed to send leave request to master [{}]", e, latestDiscoNodes.masterNode());
       }
     } else {
       DiscoveryNode[] possibleMasters =
           electMaster.nextPossibleMasters(latestDiscoNodes.nodes().values(), 5);
       for (DiscoveryNode possibleMaster : possibleMasters) {
         if (localNode.equals(possibleMaster)) {
           continue;
         }
         try {
           membership.sendLeaveRequest(latestDiscoNodes.masterNode(), possibleMaster);
         } catch (Exception e) {
           logger.debug(
               "failed to send leave request from master [{}] to possible master [{}]",
               e,
               latestDiscoNodes.masterNode(),
               possibleMaster);
         }
       }
     }
   }
   master = false;
   if (currentJoinThread != null) {
     try {
       currentJoinThread.interrupt();
     } catch (Exception e) {
       // ignore
     }
   }
 }
Пример #5
0
  @Override
  protected void doStart() throws ElasticsearchException {
    Map<String, String> nodeAttributes = discoveryNodeService.buildAttributes();
    // note, we rely on the fact that its a new id each time we start, see FD and "kill -9" handling
    final String nodeId = getNodeUUID(settings);
    localNode =
        new DiscoveryNode(
            settings.get("name"),
            nodeId,
            transportService.boundAddress().publishAddress(),
            nodeAttributes,
            version);
    latestDiscoNodes =
        new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build();
    nodesFD.updateNodes(latestDiscoNodes);
    pingService.start();

    // do the join on a different thread, the DiscoveryService waits for 30s anyhow till it is
    // discovered
    asyncJoinCluster();
  }
Пример #6
0
  private void innerJoinCluster() {
    boolean retry = true;
    while (retry) {
      if (lifecycle.stoppedOrClosed()) {
        return;
      }
      retry = false;
      DiscoveryNode masterNode = findMaster();
      if (masterNode == null) {
        logger.trace("no masterNode returned");
        retry = true;
        continue;
      }
      if (localNode.equals(masterNode)) {
        this.master = true;
        nodesFD.start(); // start the nodes FD
        clusterService.submitStateUpdateTask(
            "zen-disco-join (elected_as_master)",
            Priority.URGENT,
            new ProcessedClusterStateUpdateTask() {
              @Override
              public ClusterState execute(ClusterState currentState) {
                DiscoveryNodes.Builder builder =
                    new DiscoveryNodes.Builder()
                        .localNodeId(localNode.id())
                        .masterNodeId(localNode.id())
                        // put our local node
                        .put(localNode);
                // update the fact that we are the master...
                latestDiscoNodes = builder.build();
                ClusterBlocks clusterBlocks =
                    ClusterBlocks.builder()
                        .blocks(currentState.blocks())
                        .removeGlobalBlock(NO_MASTER_BLOCK)
                        .build();
                return ClusterState.builder(currentState)
                    .nodes(latestDiscoNodes)
                    .blocks(clusterBlocks)
                    .build();
              }

              @Override
              public void onFailure(String source, Throwable t) {
                logger.error("unexpected failure during [{}]", t, source);
              }

              @Override
              public void clusterStateProcessed(
                  String source, ClusterState oldState, ClusterState newState) {
                sendInitialStateEventIfNeeded();
              }
            });
      } else {
        this.master = false;
        try {
          // first, make sure we can connect to the master
          transportService.connectToNode(masterNode);
        } catch (Exception e) {
          logger.warn("failed to connect to master [{}], retrying...", e, masterNode);
          retry = true;
          continue;
        }
        // send join request
        try {
          membership.sendJoinRequestBlocking(masterNode, localNode, pingTimeout);
        } catch (Exception e) {
          if (e instanceof ElasticsearchException) {
            logger.info(
                "failed to send join request to master [{}], reason [{}]",
                masterNode,
                ((ElasticsearchException) e).getDetailedMessage());
          } else {
            logger.info(
                "failed to send join request to master [{}], reason [{}]",
                masterNode,
                e.getMessage());
          }
          if (logger.isTraceEnabled()) {
            logger.trace("detailed failed reason", e);
          }
          // failed to send the join request, retry
          retry = true;
          continue;
        }
        masterFD.start(masterNode, "initial_join");
        // no need to submit the received cluster state, we will get it from the master when it
        // publishes
        // the fact that we joined
      }
    }
  }