Java Setting.positiveTimeSetting Examples

Programming Language: Java

Namespace/Package Name: org.elasticsearch.common.settings

Class/Type: Setting

Method/Function: positiveTimeSetting

Examples at hotexamples.com: 5

Java Setting.positiveTimeSetting - 5 examples found. These are the top rated real world Java examples of org.elasticsearch.common.settings.Setting.positiveTimeSetting extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get(26)

boolSetting(13)

intSetting(10)

positiveTimeSetting(5)

timeSetting(5)

byteSizeSetting(4)

groupSetting(4)

exists(3)

listSetting(2)

longSetting(2)

getKey(1)

parseInt(1)

simpleString(1)

Example #1

Show file

File: GatewayService.java Project: elastic/elasticsearch

public class GatewayService extends AbstractLifecycleComponent implements ClusterStateListener {

  public static final Setting<Integer> EXPECTED_NODES_SETTING =
      Setting.intSetting("gateway.expected_nodes", -1, -1, Property.NodeScope);
  public static final Setting<Integer> EXPECTED_DATA_NODES_SETTING =
      Setting.intSetting("gateway.expected_data_nodes", -1, -1, Property.NodeScope);
  public static final Setting<Integer> EXPECTED_MASTER_NODES_SETTING =
      Setting.intSetting("gateway.expected_master_nodes", -1, -1, Property.NodeScope);
  public static final Setting<TimeValue> RECOVER_AFTER_TIME_SETTING =
      Setting.positiveTimeSetting(
          "gateway.recover_after_time", TimeValue.timeValueMillis(0), Property.NodeScope);
  public static final Setting<Integer> RECOVER_AFTER_NODES_SETTING =
      Setting.intSetting("gateway.recover_after_nodes", -1, -1, Property.NodeScope);
  public static final Setting<Integer> RECOVER_AFTER_DATA_NODES_SETTING =
      Setting.intSetting("gateway.recover_after_data_nodes", -1, -1, Property.NodeScope);
  public static final Setting<Integer> RECOVER_AFTER_MASTER_NODES_SETTING =
      Setting.intSetting("gateway.recover_after_master_nodes", 0, 0, Property.NodeScope);

  public static final ClusterBlock STATE_NOT_RECOVERED_BLOCK =
      new ClusterBlock(
          1,
          "state not recovered / initialized",
          true,
          true,
          RestStatus.SERVICE_UNAVAILABLE,
          ClusterBlockLevel.ALL);

  public static final TimeValue DEFAULT_RECOVER_AFTER_TIME_IF_EXPECTED_NODES_IS_SET =
      TimeValue.timeValueMinutes(5);

  private final Gateway gateway;

  private final ThreadPool threadPool;

  private final AllocationService allocationService;

  private final ClusterService clusterService;

  private final TimeValue recoverAfterTime;
  private final int recoverAfterNodes;
  private final int expectedNodes;
  private final int recoverAfterDataNodes;
  private final int expectedDataNodes;
  private final int recoverAfterMasterNodes;
  private final int expectedMasterNodes;

  private final AtomicBoolean recovered = new AtomicBoolean();
  private final AtomicBoolean scheduledRecovery = new AtomicBoolean();

  @Inject
  public GatewayService(
      Settings settings,
      AllocationService allocationService,
      ClusterService clusterService,
      ThreadPool threadPool,
      GatewayMetaState metaState,
      TransportNodesListGatewayMetaState listGatewayMetaState,
      Discovery discovery,
      IndicesService indicesService) {
    super(settings);
    this.gateway =
        new Gateway(
            settings, clusterService, metaState, listGatewayMetaState, discovery, indicesService);
    this.allocationService = allocationService;
    this.clusterService = clusterService;
    this.threadPool = threadPool;
    // allow to control a delay of when indices will get created
    this.expectedNodes = EXPECTED_NODES_SETTING.get(this.settings);
    this.expectedDataNodes = EXPECTED_DATA_NODES_SETTING.get(this.settings);
    this.expectedMasterNodes = EXPECTED_MASTER_NODES_SETTING.get(this.settings);

    if (RECOVER_AFTER_TIME_SETTING.exists(this.settings)) {
      recoverAfterTime = RECOVER_AFTER_TIME_SETTING.get(this.settings);
    } else if (expectedNodes >= 0 || expectedDataNodes >= 0 || expectedMasterNodes >= 0) {
      recoverAfterTime = DEFAULT_RECOVER_AFTER_TIME_IF_EXPECTED_NODES_IS_SET;
    } else {
      recoverAfterTime = null;
    }
    this.recoverAfterNodes = RECOVER_AFTER_NODES_SETTING.get(this.settings);
    this.recoverAfterDataNodes = RECOVER_AFTER_DATA_NODES_SETTING.get(this.settings);
    // default the recover after master nodes to the minimum master nodes in the discovery
    if (RECOVER_AFTER_MASTER_NODES_SETTING.exists(this.settings)) {
      recoverAfterMasterNodes = RECOVER_AFTER_MASTER_NODES_SETTING.get(this.settings);
    } else {
      // TODO: change me once the minimum_master_nodes is changed too
      recoverAfterMasterNodes = settings.getAsInt("discovery.zen.minimum_master_nodes", -1);
    }

    // Add the not recovered as initial state block, we don't allow anything until
    this.clusterService.addInitialStateBlock(STATE_NOT_RECOVERED_BLOCK);
  }

  @Override
  protected void doStart() {
    // use post applied so that the state will be visible to the background recovery thread we spawn
    // in performStateRecovery
    clusterService.addListener(this);
  }

  @Override
  protected void doStop() {
    clusterService.removeListener(this);
  }

  @Override
  protected void doClose() {}

  @Override
  public void clusterChanged(final ClusterChangedEvent event) {
    if (lifecycle.stoppedOrClosed()) {
      return;
    }

    final ClusterState state = event.state();

    if (state.nodes().isLocalNodeElectedMaster() == false) {
      // not our job to recover
      return;
    }
    if (state.blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false) {
      // already recovered
      return;
    }

    DiscoveryNodes nodes = state.nodes();
    if (state.nodes().getMasterNodeId() == null) {
      logger.debug("not recovering from gateway, no master elected yet");
    } else if (recoverAfterNodes != -1
        && (nodes.getMasterAndDataNodes().size()) < recoverAfterNodes) {
      logger.debug(
          "not recovering from gateway, nodes_size (data+master) [{}] < recover_after_nodes [{}]",
          nodes.getMasterAndDataNodes().size(),
          recoverAfterNodes);
    } else if (recoverAfterDataNodes != -1 && nodes.getDataNodes().size() < recoverAfterDataNodes) {
      logger.debug(
          "not recovering from gateway, nodes_size (data) [{}] < recover_after_data_nodes [{}]",
          nodes.getDataNodes().size(),
          recoverAfterDataNodes);
    } else if (recoverAfterMasterNodes != -1
        && nodes.getMasterNodes().size() < recoverAfterMasterNodes) {
      logger.debug(
          "not recovering from gateway, nodes_size (master) [{}] < recover_after_master_nodes [{}]",
          nodes.getMasterNodes().size(),
          recoverAfterMasterNodes);
    } else {
      boolean enforceRecoverAfterTime;
      String reason;
      if (expectedNodes == -1 && expectedMasterNodes == -1 && expectedDataNodes == -1) {
        // no expected is set, honor the setting if they are there
        enforceRecoverAfterTime = true;
        reason = "recover_after_time was set to [" + recoverAfterTime + "]";
      } else {
        // one of the expected is set, see if all of them meet the need, and ignore the timeout in
        // this case
        enforceRecoverAfterTime = false;
        reason = "";
        if (expectedNodes != -1
            && (nodes.getMasterAndDataNodes().size()
                < expectedNodes)) { // does not meet the expected...
          enforceRecoverAfterTime = true;
          reason =
              "expecting ["
                  + expectedNodes
                  + "] nodes, but only have ["
                  + nodes.getMasterAndDataNodes().size()
                  + "]";
        } else if (expectedDataNodes != -1
            && (nodes.getDataNodes().size() < expectedDataNodes)) { // does not meet the expected...
          enforceRecoverAfterTime = true;
          reason =
              "expecting ["
                  + expectedDataNodes
                  + "] data nodes, but only have ["
                  + nodes.getDataNodes().size()
                  + "]";
        } else if (expectedMasterNodes != -1
            && (nodes.getMasterNodes().size()
                < expectedMasterNodes)) { // does not meet the expected...
          enforceRecoverAfterTime = true;
          reason =
              "expecting ["
                  + expectedMasterNodes
                  + "] master nodes, but only have ["
                  + nodes.getMasterNodes().size()
                  + "]";
        }
      }
      performStateRecovery(enforceRecoverAfterTime, reason);
    }
  }

  private void performStateRecovery(boolean enforceRecoverAfterTime, String reason) {
    final Gateway.GatewayStateRecoveredListener recoveryListener = new GatewayRecoveryListener();

    if (enforceRecoverAfterTime && recoverAfterTime != null) {
      if (scheduledRecovery.compareAndSet(false, true)) {
        logger.info("delaying initial state recovery for [{}]. {}", recoverAfterTime, reason);
        threadPool.schedule(
            recoverAfterTime,
            ThreadPool.Names.GENERIC,
            () -> {
              if (recovered.compareAndSet(false, true)) {
                logger.info(
                    "recover_after_time [{}] elapsed. performing state recovery...",
                    recoverAfterTime);
                gateway.performStateRecovery(recoveryListener);
              }
            });
      }
    } else {
      if (recovered.compareAndSet(false, true)) {
        threadPool
            .generic()
            .execute(
                new AbstractRunnable() {
                  @Override
                  public void onFailure(Exception e) {
                    logger.warn("Recovery failed", e);
                    // we reset `recovered` in the listener don't reset it here otherwise there
                    // might be a race
                    // that resets it to false while a new recover is already running?
                    recoveryListener.onFailure("state recovery failed: " + e.getMessage());
                  }

                  @Override
                  protected void doRun() throws Exception {
                    gateway.performStateRecovery(recoveryListener);
                  }
                });
      }
    }
  }

  public Gateway getGateway() {
    return gateway;
  }

  class GatewayRecoveryListener implements Gateway.GatewayStateRecoveredListener {

    @Override
    public void onSuccess(final ClusterState recoveredState) {
      logger.trace("successful state recovery, importing cluster state...");
      clusterService.submitStateUpdateTask(
          "local-gateway-elected-state",
          new ClusterStateUpdateTask() {
            @Override
            public ClusterState execute(ClusterState currentState) {
              assert currentState.metaData().indices().isEmpty();

              // remove the block, since we recovered from gateway
              ClusterBlocks.Builder blocks =
                  ClusterBlocks.builder()
                      .blocks(currentState.blocks())
                      .blocks(recoveredState.blocks())
                      .removeGlobalBlock(STATE_NOT_RECOVERED_BLOCK);

              MetaData.Builder metaDataBuilder = MetaData.builder(recoveredState.metaData());
              // automatically generate a UID for the metadata if we need to
              metaDataBuilder.generateClusterUuidIfNeeded();

              if (MetaData.SETTING_READ_ONLY_SETTING.get(recoveredState.metaData().settings())
                  || MetaData.SETTING_READ_ONLY_SETTING.get(currentState.metaData().settings())) {
                blocks.addGlobalBlock(MetaData.CLUSTER_READ_ONLY_BLOCK);
              }

              for (IndexMetaData indexMetaData : recoveredState.metaData()) {
                metaDataBuilder.put(indexMetaData, false);
                blocks.addBlocks(indexMetaData);
              }

              // update the state to reflect the new metadata and routing
              ClusterState updatedState =
                  ClusterState.builder(currentState)
                      .blocks(blocks)
                      .metaData(metaDataBuilder)
                      .build();

              // initialize all index routing tables as empty
              RoutingTable.Builder routingTableBuilder =
                  RoutingTable.builder(updatedState.routingTable());
              for (ObjectCursor<IndexMetaData> cursor :
                  updatedState.metaData().indices().values()) {
                routingTableBuilder.addAsRecovery(cursor.value);
              }
              // start with 0 based versions for routing table
              routingTableBuilder.version(0);

              // now, reroute
              updatedState =
                  ClusterState.builder(updatedState)
                      .routingTable(routingTableBuilder.build())
                      .build();
              return allocationService.reroute(updatedState, "state recovered");
            }

            @Override
            public void onFailure(String source, Exception e) {
              logger.error(
                  (Supplier<?>)
                      () -> new ParameterizedMessage("unexpected failure during [{}]", source),
                  e);
              GatewayRecoveryListener.this.onFailure("failed to updated cluster state");
            }

            @Override
            public void clusterStateProcessed(
                String source, ClusterState oldState, ClusterState newState) {
              logger.info(
                  "recovered [{}] indices into cluster_state",
                  newState.metaData().indices().size());
            }
          });
    }

    @Override
    public void onFailure(String message) {
      recovered.set(false);
      scheduledRecovery.set(false);
      // don't remove the block here, we don't want to allow anything in such a case
      logger.info("metadata state not restored, reason: {}", message);
    }
  }

  // used for testing
  public TimeValue recoverAfterTime() {
    return recoverAfterTime;
  }
}

Example #2

Show file

File: IndicesTTLService.java Project: CedarLabs/elasticsearch

/** A node level service that delete expired docs on node primary shards. */
public class IndicesTTLService extends AbstractLifecycleComponent<IndicesTTLService> {

  public static final Setting<TimeValue> INDICES_TTL_INTERVAL_SETTING =
      Setting.positiveTimeSetting(
          "indices.ttl.interval",
          TimeValue.timeValueSeconds(60),
          Property.Dynamic,
          Property.NodeScope);

  private final ClusterService clusterService;
  private final IndicesService indicesService;
  private final TransportBulkAction bulkAction;

  private final int bulkSize;
  private PurgerThread purgerThread;

  @Inject
  public IndicesTTLService(
      Settings settings,
      ClusterService clusterService,
      IndicesService indicesService,
      ClusterSettings clusterSettings,
      TransportBulkAction bulkAction) {
    super(settings);
    this.clusterService = clusterService;
    this.indicesService = indicesService;
    TimeValue interval = INDICES_TTL_INTERVAL_SETTING.get(settings);
    this.bulkAction = bulkAction;
    this.bulkSize = this.settings.getAsInt("indices.ttl.bulk_size", 10000);
    this.purgerThread =
        new PurgerThread(EsExecutors.threadName(settings, "[ttl_expire]"), interval);
    clusterSettings.addSettingsUpdateConsumer(
        INDICES_TTL_INTERVAL_SETTING, this.purgerThread::resetInterval);
  }

  @Override
  protected void doStart() {
    this.purgerThread.start();
  }

  @Override
  protected void doStop() {
    try {
      this.purgerThread.shutdown();
    } catch (InterruptedException e) {
      // we intentionally do not want to restore the interruption flag, we're about to shutdown
      // anyway
    }
  }

  @Override
  protected void doClose() {}

  private class PurgerThread extends Thread {
    private final AtomicBoolean running = new AtomicBoolean(true);
    private final Notifier notifier;
    private final CountDownLatch shutdownLatch = new CountDownLatch(1);

    public PurgerThread(String name, TimeValue interval) {
      super(name);
      setDaemon(true);
      this.notifier = new Notifier(interval);
    }

    public void shutdown() throws InterruptedException {
      if (running.compareAndSet(true, false)) {
        notifier.doNotify();
        shutdownLatch.await();
      }
    }

    public void resetInterval(TimeValue interval) {
      notifier.setTimeout(interval);
    }

    @Override
    public void run() {
      try {
        while (running.get()) {
          try {
            List<IndexShard> shardsToPurge = getShardsToPurge();
            purgeShards(shardsToPurge);
          } catch (Throwable e) {
            if (running.get()) {
              logger.warn("failed to execute ttl purge", e);
            }
          }
          if (running.get()) {
            notifier.await();
          }
        }
      } finally {
        shutdownLatch.countDown();
      }
    }

    /**
     * Returns the shards to purge, i.e. the local started primary shards that have ttl enabled and
     * disable_purge to false
     */
    private List<IndexShard> getShardsToPurge() {
      List<IndexShard> shardsToPurge = new ArrayList<>();
      MetaData metaData = clusterService.state().metaData();
      for (IndexService indexService : indicesService) {
        // check the value of disable_purge for this index
        IndexMetaData indexMetaData = metaData.index(indexService.index());
        if (indexMetaData == null) {
          continue;
        }
        if (indexService.getIndexSettings().isTTLPurgeDisabled()) {
          continue;
        }

        // check if ttl is enabled for at least one type of this index
        boolean hasTTLEnabled = false;
        for (String type : indexService.mapperService().types()) {
          DocumentMapper documentType = indexService.mapperService().documentMapper(type);
          if (documentType.TTLFieldMapper().enabled()) {
            hasTTLEnabled = true;
            break;
          }
        }
        if (hasTTLEnabled) {
          for (IndexShard indexShard : indexService) {
            if (indexShard.state() == IndexShardState.STARTED
                && indexShard.routingEntry().primary()
                && indexShard.routingEntry().started()) {
              shardsToPurge.add(indexShard);
            }
          }
        }
      }
      return shardsToPurge;
    }

    public TimeValue getInterval() {
      return notifier.getTimeout();
    }
  }

  private void purgeShards(List<IndexShard> shardsToPurge) {
    for (IndexShard shardToPurge : shardsToPurge) {
      Query query =
          shardToPurge
              .mapperService()
              .fullName(TTLFieldMapper.NAME)
              .rangeQuery(null, System.currentTimeMillis(), false, true);
      Engine.Searcher searcher = shardToPurge.acquireSearcher("indices_ttl");
      try {
        logger.debug(
            "[{}][{}] purging shard",
            shardToPurge.routingEntry().index(),
            shardToPurge.routingEntry().id());
        ExpiredDocsCollector expiredDocsCollector = new ExpiredDocsCollector();
        searcher.searcher().search(query, expiredDocsCollector);
        List<DocToPurge> docsToPurge = expiredDocsCollector.getDocsToPurge();

        BulkRequest bulkRequest = new BulkRequest();
        for (DocToPurge docToPurge : docsToPurge) {

          bulkRequest.add(
              new DeleteRequest()
                  .index(shardToPurge.routingEntry().getIndexName())
                  .type(docToPurge.type)
                  .id(docToPurge.id)
                  .version(docToPurge.version)
                  .routing(docToPurge.routing));
          bulkRequest = processBulkIfNeeded(bulkRequest, false);
        }
        processBulkIfNeeded(bulkRequest, true);
      } catch (Exception e) {
        logger.warn("failed to purge", e);
      } finally {
        searcher.close();
      }
    }
  }

  private static class DocToPurge {
    public final String type;
    public final String id;
    public final long version;
    public final String routing;

    public DocToPurge(String type, String id, long version, String routing) {
      this.type = type;
      this.id = id;
      this.version = version;
      this.routing = routing;
    }
  }

  private class ExpiredDocsCollector extends SimpleCollector {
    private LeafReaderContext context;
    private List<DocToPurge> docsToPurge = new ArrayList<>();

    public ExpiredDocsCollector() {}

    @Override
    public void setScorer(Scorer scorer) {}

    @Override
    public boolean needsScores() {
      return false;
    }

    @Override
    public void collect(int doc) {
      try {
        FieldsVisitor fieldsVisitor = new FieldsVisitor(false);
        context.reader().document(doc, fieldsVisitor);
        Uid uid = fieldsVisitor.uid();
        final long version =
            Versions.loadVersion(context.reader(), new Term(UidFieldMapper.NAME, uid.toBytesRef()));
        docsToPurge.add(new DocToPurge(uid.type(), uid.id(), version, fieldsVisitor.routing()));
      } catch (Exception e) {
        logger.trace("failed to collect doc", e);
      }
    }

    @Override
    public void doSetNextReader(LeafReaderContext context) throws IOException {
      this.context = context;
    }

    public List<DocToPurge> getDocsToPurge() {
      return this.docsToPurge;
    }
  }

  private BulkRequest processBulkIfNeeded(BulkRequest bulkRequest, boolean force) {
    if ((force && bulkRequest.numberOfActions() > 0) || bulkRequest.numberOfActions() >= bulkSize) {
      try {
        bulkAction.executeBulk(
            bulkRequest,
            new ActionListener<BulkResponse>() {
              @Override
              public void onResponse(BulkResponse bulkResponse) {
                if (bulkResponse.hasFailures()) {
                  int failedItems = 0;
                  for (BulkItemResponse response : bulkResponse) {
                    if (response.isFailed()) failedItems++;
                  }
                  if (logger.isTraceEnabled()) {
                    logger.trace(
                        "bulk deletion failures for [{}]/[{}] items, failure message: [{}]",
                        failedItems,
                        bulkResponse.getItems().length,
                        bulkResponse.buildFailureMessage());
                  } else {
                    logger.error(
                        "bulk deletion failures for [{}]/[{}] items",
                        failedItems,
                        bulkResponse.getItems().length);
                  }
                } else {
                  logger.trace("bulk deletion took {}ms", bulkResponse.getTookInMillis());
                }
              }

              @Override
              public void onFailure(Throwable e) {
                if (logger.isTraceEnabled()) {
                  logger.trace("failed to execute bulk", e);
                } else {
                  logger.warn("failed to execute bulk: ", e);
                }
              }
            });
      } catch (Exception e) {
        logger.warn("failed to process bulk", e);
      }
      bulkRequest = new BulkRequest();
    }
    return bulkRequest;
  }

  private static final class Notifier {

    private final ReentrantLock lock = new ReentrantLock();
    private final Condition condition = lock.newCondition();
    private volatile TimeValue timeout;

    public Notifier(TimeValue timeout) {
      assert timeout != null;
      this.timeout = timeout;
    }

    public void await() {
      lock.lock();
      try {
        condition.await(timeout.millis(), TimeUnit.MILLISECONDS);
      } catch (InterruptedException e) {
        // we intentionally do not want to restore the interruption flag, we're about to shutdown
        // anyway
      } finally {
        lock.unlock();
      }
    }

    public void setTimeout(TimeValue timeout) {
      assert timeout != null;
      this.timeout = timeout;
      doNotify();
    }

    public TimeValue getTimeout() {
      return timeout;
    }

    public void doNotify() {
      lock.lock();
      try {
        condition.signalAll();
      } finally {
        lock.unlock();
      }
    }
  }
}

Example #3

Show file

File: ClusterService.java Project: jorgediaz-lr/elasticsearch

public class ClusterService extends AbstractLifecycleComponent {

  public static final Setting<TimeValue> CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING =
      Setting.positiveTimeSetting(
          "cluster.service.slow_task_logging_threshold",
          TimeValue.timeValueSeconds(30),
          Property.Dynamic,
          Property.NodeScope);

  public static final String UPDATE_THREAD_NAME = "clusterService#updateTask";
  private final ThreadPool threadPool;
  private final ClusterName clusterName;

  private BiConsumer<ClusterChangedEvent, Discovery.AckListener> clusterStatePublisher;

  private final OperationRouting operationRouting;

  private final ClusterSettings clusterSettings;

  private TimeValue slowTaskLoggingThreshold;

  private volatile PrioritizedEsThreadPoolExecutor updateTasksExecutor;

  /** Those 3 state listeners are changing infrequently - CopyOnWriteArrayList is just fine */
  private final Collection<ClusterStateListener> priorityClusterStateListeners =
      new CopyOnWriteArrayList<>();

  private final Collection<ClusterStateListener> clusterStateListeners =
      new CopyOnWriteArrayList<>();
  private final Collection<ClusterStateListener> lastClusterStateListeners =
      new CopyOnWriteArrayList<>();
  private final Map<ClusterStateTaskExecutor, List<UpdateTask>> updateTasksPerExecutor =
      new HashMap<>();
  // TODO this is rather frequently changing I guess a Synced Set would be better here and a
  // dedicated remove API
  private final Collection<ClusterStateListener> postAppliedListeners =
      new CopyOnWriteArrayList<>();
  private final Iterable<ClusterStateListener> preAppliedListeners =
      Iterables.concat(
          priorityClusterStateListeners, clusterStateListeners, lastClusterStateListeners);

  private final LocalNodeMasterListeners localNodeMasterListeners;

  private final Queue<NotifyTimeout> onGoingTimeouts = ConcurrentCollections.newQueue();

  private volatile ClusterState clusterState;

  private final ClusterBlocks.Builder initialBlocks;

  private NodeConnectionsService nodeConnectionsService;

  public ClusterService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) {
    super(settings);
    this.operationRouting = new OperationRouting(settings, clusterSettings);
    this.threadPool = threadPool;
    this.clusterSettings = clusterSettings;
    this.clusterName = ClusterName.CLUSTER_NAME_SETTING.get(settings);
    // will be replaced on doStart.
    this.clusterState = ClusterState.builder(clusterName).build();

    this.clusterSettings.addSettingsUpdateConsumer(
        CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING, this::setSlowTaskLoggingThreshold);

    this.slowTaskLoggingThreshold =
        CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING.get(settings);

    localNodeMasterListeners = new LocalNodeMasterListeners(threadPool);

    initialBlocks = ClusterBlocks.builder();
  }

  private void setSlowTaskLoggingThreshold(TimeValue slowTaskLoggingThreshold) {
    this.slowTaskLoggingThreshold = slowTaskLoggingThreshold;
  }

  public synchronized void setClusterStatePublisher(
      BiConsumer<ClusterChangedEvent, Discovery.AckListener> publisher) {
    clusterStatePublisher = publisher;
  }

  public synchronized void setLocalNode(DiscoveryNode localNode) {
    assert clusterState.nodes().getLocalNodeId() == null : "local node is already set";
    DiscoveryNodes.Builder nodeBuilder =
        DiscoveryNodes.builder(clusterState.nodes()).add(localNode).localNodeId(localNode.getId());
    this.clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build();
  }

  public synchronized void setNodeConnectionsService(
      NodeConnectionsService nodeConnectionsService) {
    assert this.nodeConnectionsService == null : "nodeConnectionsService is already set";
    this.nodeConnectionsService = nodeConnectionsService;
  }

  /** Adds an initial block to be set on the first cluster state created. */
  public synchronized void addInitialStateBlock(ClusterBlock block) throws IllegalStateException {
    if (lifecycle.started()) {
      throw new IllegalStateException("can't set initial block when started");
    }
    initialBlocks.addGlobalBlock(block);
  }

  /** Remove an initial block to be set on the first cluster state created. */
  public synchronized void removeInitialStateBlock(ClusterBlock block)
      throws IllegalStateException {
    removeInitialStateBlock(block.id());
  }

  /** Remove an initial block to be set on the first cluster state created. */
  public synchronized void removeInitialStateBlock(int blockId) throws IllegalStateException {
    if (lifecycle.started()) {
      throw new IllegalStateException("can't set initial block when started");
    }
    initialBlocks.removeGlobalBlock(blockId);
  }

  @Override
  protected synchronized void doStart() {
    Objects.requireNonNull(
        clusterStatePublisher, "please set a cluster state publisher before starting");
    Objects.requireNonNull(
        clusterState.nodes().getLocalNode(), "please set the local node before starting");
    Objects.requireNonNull(
        nodeConnectionsService, "please set the node connection service before starting");
    add(localNodeMasterListeners);
    this.clusterState = ClusterState.builder(clusterState).blocks(initialBlocks).build();
    this.updateTasksExecutor =
        EsExecutors.newSinglePrioritizing(
            UPDATE_THREAD_NAME,
            daemonThreadFactory(settings, UPDATE_THREAD_NAME),
            threadPool.getThreadContext());
    this.clusterState = ClusterState.builder(clusterState).blocks(initialBlocks).build();
  }

  @Override
  protected synchronized void doStop() {
    for (NotifyTimeout onGoingTimeout : onGoingTimeouts) {
      onGoingTimeout.cancel();
      try {
        onGoingTimeout.cancel();
        onGoingTimeout.listener.onClose();
      } catch (Exception ex) {
        logger.debug("failed to notify listeners on shutdown", ex);
      }
    }
    ThreadPool.terminate(updateTasksExecutor, 10, TimeUnit.SECONDS);
    // close timeout listeners that did not have an ongoing timeout
    postAppliedListeners
        .stream()
        .filter(listener -> listener instanceof TimeoutClusterStateListener)
        .map(listener -> (TimeoutClusterStateListener) listener)
        .forEach(TimeoutClusterStateListener::onClose);
    remove(localNodeMasterListeners);
  }

  @Override
  protected synchronized void doClose() {}

  /** The local node. */
  public DiscoveryNode localNode() {
    DiscoveryNode localNode = clusterState.getNodes().getLocalNode();
    if (localNode == null) {
      throw new IllegalStateException("No local node found. Is the node started?");
    }
    return localNode;
  }

  public OperationRouting operationRouting() {
    return operationRouting;
  }

  /** The current state. */
  public ClusterState state() {
    return this.clusterState;
  }

  /** Adds a priority listener for updated cluster states. */
  public void addFirst(ClusterStateListener listener) {
    priorityClusterStateListeners.add(listener);
  }

  /** Adds last listener. */
  public void addLast(ClusterStateListener listener) {
    lastClusterStateListeners.add(listener);
  }

  /** Adds a listener for updated cluster states. */
  public void add(ClusterStateListener listener) {
    clusterStateListeners.add(listener);
  }

  /** Removes a listener for updated cluster states. */
  public void remove(ClusterStateListener listener) {
    clusterStateListeners.remove(listener);
    priorityClusterStateListeners.remove(listener);
    lastClusterStateListeners.remove(listener);
    postAppliedListeners.remove(listener);
    for (Iterator<NotifyTimeout> it = onGoingTimeouts.iterator(); it.hasNext(); ) {
      NotifyTimeout timeout = it.next();
      if (timeout.listener.equals(listener)) {
        timeout.cancel();
        it.remove();
      }
    }
  }

  /** Add a listener for on/off local node master events */
  public void add(LocalNodeMasterListener listener) {
    localNodeMasterListeners.add(listener);
  }

  /** Remove the given listener for on/off local master events */
  public void remove(LocalNodeMasterListener listener) {
    localNodeMasterListeners.remove(listener);
  }

  /**
   * Adds a cluster state listener that will timeout after the provided timeout, and is executed
   * after the clusterstate has been successfully applied ie. is in state {@link
   * org.elasticsearch.cluster.ClusterState.ClusterStateStatus#APPLIED} NOTE: a {@code null} timeout
   * means that the listener will never be removed automatically
   */
  public void add(@Nullable final TimeValue timeout, final TimeoutClusterStateListener listener) {
    if (lifecycle.stoppedOrClosed()) {
      listener.onClose();
      return;
    }
    // call the post added notification on the same event thread
    try {
      updateTasksExecutor.execute(
          new SourcePrioritizedRunnable(Priority.HIGH, "_add_listener_") {
            @Override
            public void run() {
              if (timeout != null) {
                NotifyTimeout notifyTimeout = new NotifyTimeout(listener, timeout);
                notifyTimeout.future =
                    threadPool.schedule(timeout, ThreadPool.Names.GENERIC, notifyTimeout);
                onGoingTimeouts.add(notifyTimeout);
              }
              postAppliedListeners.add(listener);
              listener.postAdded();
            }
          });
    } catch (EsRejectedExecutionException e) {
      if (lifecycle.stoppedOrClosed()) {
        listener.onClose();
      } else {
        throw e;
      }
    }
  }

  /**
   * Submits a cluster state update task; unlike {@link #submitStateUpdateTask(String, Object,
   * ClusterStateTaskConfig, ClusterStateTaskExecutor, ClusterStateTaskListener)}, submitted updates
   * will not be batched.
   *
   * @param source the source of the cluster state update task
   * @param updateTask the full context for the cluster state update task
   */
  public void submitStateUpdateTask(final String source, final ClusterStateUpdateTask updateTask) {
    submitStateUpdateTask(source, updateTask, updateTask, updateTask, updateTask);
  }

  /**
   * Submits a cluster state update task; submitted updates will be batched across the same instance
   * of executor. The exact batching semantics depend on the underlying implementation but a rough
   * guideline is that if the update task is submitted while there are pending update tasks for the
   * same executor, these update tasks will all be executed on the executor in a single batch
   *
   * @param source the source of the cluster state update task
   * @param task the state needed for the cluster state update task
   * @param config the cluster state update task configuration
   * @param executor the cluster state update task executor; tasks that share the same executor will
   *     be executed batches on this executor
   * @param listener callback after the cluster state update task completes
   * @param <T> the type of the cluster state update task state
   */
  public <T> void submitStateUpdateTask(
      final String source,
      final T task,
      final ClusterStateTaskConfig config,
      final ClusterStateTaskExecutor<T> executor,
      final ClusterStateTaskListener listener) {
    submitStateUpdateTasks(source, Collections.singletonMap(task, listener), config, executor);
  }

  /**
   * Submits a batch of cluster state update tasks; submitted updates are guaranteed to be processed
   * together, potentially with more tasks of the same executor.
   *
   * @param source the source of the cluster state update task
   * @param tasks a map of update tasks and their corresponding listeners
   * @param config the cluster state update task configuration
   * @param executor the cluster state update task executor; tasks that share the same executor will
   *     be executed batches on this executor
   * @param <T> the type of the cluster state update task state
   */
  public <T> void submitStateUpdateTasks(
      final String source,
      final Map<T, ClusterStateTaskListener> tasks,
      final ClusterStateTaskConfig config,
      final ClusterStateTaskExecutor<T> executor) {
    if (!lifecycle.started()) {
      return;
    }
    if (tasks.isEmpty()) {
      return;
    }
    try {
      // convert to an identity map to check for dups based on update tasks semantics of using
      // identity instead of equal
      final IdentityHashMap<T, ClusterStateTaskListener> tasksIdentity =
          new IdentityHashMap<>(tasks);
      final List<UpdateTask<T>> updateTasks =
          tasksIdentity
              .entrySet()
              .stream()
              .map(
                  entry ->
                      new UpdateTask<>(
                          source, entry.getKey(), config, executor, safe(entry.getValue(), logger)))
              .collect(Collectors.toList());

      synchronized (updateTasksPerExecutor) {
        List<UpdateTask> existingTasks =
            updateTasksPerExecutor.computeIfAbsent(executor, k -> new ArrayList<>());
        for (@SuppressWarnings("unchecked") UpdateTask<T> existing : existingTasks) {
          if (tasksIdentity.containsKey(existing.task)) {
            throw new IllegalStateException(
                "task ["
                    + executor.describeTasks(Collections.singletonList(existing.task))
                    + "] with source ["
                    + source
                    + "] is already queued");
          }
        }
        existingTasks.addAll(updateTasks);
      }

      final UpdateTask<T> firstTask = updateTasks.get(0);

      if (config.timeout() != null) {
        updateTasksExecutor.execute(
            firstTask,
            threadPool.scheduler(),
            config.timeout(),
            () ->
                threadPool
                    .generic()
                    .execute(
                        () -> {
                          for (UpdateTask<T> task : updateTasks) {
                            if (task.processed.getAndSet(true) == false) {
                              logger.debug(
                                  "cluster state update task [{}] timed out after [{}]",
                                  source,
                                  config.timeout());
                              task.listener.onFailure(
                                  source,
                                  new ProcessClusterEventTimeoutException(
                                      config.timeout(), source));
                            }
                          }
                        }));
      } else {
        updateTasksExecutor.execute(firstTask);
      }
    } catch (EsRejectedExecutionException e) {
      // ignore cases where we are shutting down..., there is really nothing interesting
      // to be done here...
      if (!lifecycle.stoppedOrClosed()) {
        throw e;
      }
    }
  }

  /** Returns the tasks that are pending. */
  public List<PendingClusterTask> pendingTasks() {
    PrioritizedEsThreadPoolExecutor.Pending[] pendings = updateTasksExecutor.getPending();
    List<PendingClusterTask> pendingClusterTasks = new ArrayList<>(pendings.length);
    for (PrioritizedEsThreadPoolExecutor.Pending pending : pendings) {
      final String source;
      final long timeInQueue;
      // we have to capture the task as it will be nulled after execution and we don't want to
      // change while we check things here.
      final Object task = pending.task;
      if (task == null) {
        continue;
      } else if (task instanceof SourcePrioritizedRunnable) {
        SourcePrioritizedRunnable runnable = (SourcePrioritizedRunnable) task;
        source = runnable.source();
        timeInQueue = runnable.getAgeInMillis();
      } else {
        assert false : "expected SourcePrioritizedRunnable got " + task.getClass();
        source = "unknown [" + task.getClass() + "]";
        timeInQueue = 0;
      }

      pendingClusterTasks.add(
          new PendingClusterTask(
              pending.insertionOrder,
              pending.priority,
              new Text(source),
              timeInQueue,
              pending.executing));
    }
    return pendingClusterTasks;
  }

  /** Returns the number of currently pending tasks. */
  public int numberOfPendingTasks() {
    return updateTasksExecutor.getNumberOfPendingTasks();
  }

  /**
   * Returns the maximum wait time for tasks in the queue
   *
   * @return A zero time value if the queue is empty, otherwise the time value oldest task waiting
   *     in the queue
   */
  public TimeValue getMaxTaskWaitTime() {
    return updateTasksExecutor.getMaxTaskWaitTime();
  }

  /** asserts that the current thread is the cluster state update thread */
  public static boolean assertClusterStateThread() {
    assert Thread.currentThread().getName().contains(ClusterService.UPDATE_THREAD_NAME)
        : "not called from the cluster state update thread";
    return true;
  }

  public ClusterName getClusterName() {
    return clusterName;
  }

  abstract static class SourcePrioritizedRunnable extends PrioritizedRunnable {
    protected final String source;

    public SourcePrioritizedRunnable(Priority priority, String source) {
      super(priority);
      this.source = source;
    }

    public String source() {
      return source;
    }
  }

  <T> void runTasksForExecutor(ClusterStateTaskExecutor<T> executor) {
    final ArrayList<UpdateTask<T>> toExecute = new ArrayList<>();
    final Map<String, ArrayList<T>> processTasksBySource = new HashMap<>();
    synchronized (updateTasksPerExecutor) {
      List<UpdateTask> pending = updateTasksPerExecutor.remove(executor);
      if (pending != null) {
        for (UpdateTask<T> task : pending) {
          if (task.processed.getAndSet(true) == false) {
            logger.trace("will process {}", task.toString(executor));
            toExecute.add(task);
            processTasksBySource
                .computeIfAbsent(task.source, s -> new ArrayList<>())
                .add(task.task);
          } else {
            logger.trace("skipping {}, already processed", task.toString(executor));
          }
        }
      }
    }
    if (toExecute.isEmpty()) {
      return;
    }
    final String tasksSummary =
        processTasksBySource
            .entrySet()
            .stream()
            .map(
                entry -> {
                  String tasks = executor.describeTasks(entry.getValue());
                  return tasks.isEmpty() ? entry.getKey() : entry.getKey() + "[" + tasks + "]";
                })
            .reduce((s1, s2) -> s1 + ", " + s2)
            .orElse("");

    if (!lifecycle.started()) {
      logger.debug("processing [{}]: ignoring, cluster_service not started", tasksSummary);
      return;
    }
    logger.debug("processing [{}]: execute", tasksSummary);
    ClusterState previousClusterState = clusterState;
    if (!previousClusterState.nodes().isLocalNodeElectedMaster() && executor.runOnlyOnMaster()) {
      logger.debug("failing [{}]: local node is no longer master", tasksSummary);
      toExecute.stream().forEach(task -> task.listener.onNoLongerMaster(task.source));
      return;
    }
    ClusterStateTaskExecutor.BatchResult<T> batchResult;
    long startTimeNS = currentTimeInNanos();
    try {
      List<T> inputs =
          toExecute.stream().map(tUpdateTask -> tUpdateTask.task).collect(Collectors.toList());
      batchResult = executor.execute(previousClusterState, inputs);
    } catch (Exception e) {
      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      if (logger.isTraceEnabled()) {
        logger.trace(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "failed to execute cluster state update in [{}], state:\nversion [{}], source [{}]\n{}{}{}",
                        executionTime,
                        previousClusterState.version(),
                        tasksSummary,
                        previousClusterState.nodes().prettyPrint(),
                        previousClusterState.routingTable().prettyPrint(),
                        previousClusterState.getRoutingNodes().prettyPrint()),
            e);
      }
      warnAboutSlowTaskIfNeeded(executionTime, tasksSummary);
      batchResult =
          ClusterStateTaskExecutor.BatchResult.<T>builder()
              .failures(toExecute.stream().map(updateTask -> updateTask.task)::iterator, e)
              .build(previousClusterState);
    }

    assert batchResult.executionResults != null;
    assert batchResult.executionResults.size() == toExecute.size()
        : String.format(
            Locale.ROOT,
            "expected [%d] task result%s but was [%d]",
            toExecute.size(),
            toExecute.size() == 1 ? "" : "s",
            batchResult.executionResults.size());
    boolean assertsEnabled = false;
    assert (assertsEnabled = true);
    if (assertsEnabled) {
      for (UpdateTask<T> updateTask : toExecute) {
        assert batchResult.executionResults.containsKey(updateTask.task)
            : "missing task result for " + updateTask.toString(executor);
      }
    }

    ClusterState newClusterState = batchResult.resultingState;
    final ArrayList<UpdateTask<T>> proccessedListeners = new ArrayList<>();
    // fail all tasks that have failed and extract those that are waiting for results
    for (UpdateTask<T> updateTask : toExecute) {
      assert batchResult.executionResults.containsKey(updateTask.task)
          : "missing " + updateTask.toString(executor);
      final ClusterStateTaskExecutor.TaskResult executionResult =
          batchResult.executionResults.get(updateTask.task);
      executionResult.handle(
          () -> proccessedListeners.add(updateTask),
          ex -> {
            logger.debug(
                (Supplier<?>)
                    () ->
                        new ParameterizedMessage(
                            "cluster state update task {} failed", updateTask.toString(executor)),
                ex);
            updateTask.listener.onFailure(updateTask.source, ex);
          });
    }

    if (previousClusterState == newClusterState) {
      for (UpdateTask<T> task : proccessedListeners) {
        if (task.listener instanceof AckedClusterStateTaskListener) {
          // no need to wait for ack if nothing changed, the update can be counted as acknowledged
          ((AckedClusterStateTaskListener) task.listener).onAllNodesAcked(null);
        }
        task.listener.clusterStateProcessed(task.source, previousClusterState, newClusterState);
      }
      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      logger.debug(
          "processing [{}]: took [{}] no change in cluster_state", tasksSummary, executionTime);
      warnAboutSlowTaskIfNeeded(executionTime, tasksSummary);
      return;
    }

    try {
      ArrayList<Discovery.AckListener> ackListeners = new ArrayList<>();
      if (newClusterState.nodes().isLocalNodeElectedMaster()) {
        // only the master controls the version numbers
        Builder builder = ClusterState.builder(newClusterState).incrementVersion();
        if (previousClusterState.routingTable() != newClusterState.routingTable()) {
          builder.routingTable(
              RoutingTable.builder(newClusterState.routingTable())
                  .version(newClusterState.routingTable().version() + 1)
                  .build());
        }
        if (previousClusterState.metaData() != newClusterState.metaData()) {
          builder.metaData(
              MetaData.builder(newClusterState.metaData())
                  .version(newClusterState.metaData().version() + 1));
        }
        newClusterState = builder.build();
        for (UpdateTask<T> task : proccessedListeners) {
          if (task.listener instanceof AckedClusterStateTaskListener) {
            final AckedClusterStateTaskListener ackedListener =
                (AckedClusterStateTaskListener) task.listener;
            if (ackedListener.ackTimeout() == null || ackedListener.ackTimeout().millis() == 0) {
              ackedListener.onAckTimeout();
            } else {
              try {
                ackListeners.add(
                    new AckCountDownListener(
                        ackedListener,
                        newClusterState.version(),
                        newClusterState.nodes(),
                        threadPool));
              } catch (EsRejectedExecutionException ex) {
                if (logger.isDebugEnabled()) {
                  logger.debug(
                      "Couldn't schedule timeout thread - node might be shutting down", ex);
                }
                // timeout straightaway, otherwise we could wait forever as the timeout thread has
                // not started
                ackedListener.onAckTimeout();
              }
            }
          }
        }
      }
      final Discovery.AckListener ackListener = new DelegetingAckListener(ackListeners);

      newClusterState.status(ClusterState.ClusterStateStatus.BEING_APPLIED);

      if (logger.isTraceEnabled()) {
        logger.trace(
            "cluster state updated, source [{}]\n{}", tasksSummary, newClusterState.prettyPrint());
      } else if (logger.isDebugEnabled()) {
        logger.debug(
            "cluster state updated, version [{}], source [{}]",
            newClusterState.version(),
            tasksSummary);
      }

      ClusterChangedEvent clusterChangedEvent =
          new ClusterChangedEvent(tasksSummary, newClusterState, previousClusterState);
      // new cluster state, notify all listeners
      final DiscoveryNodes.Delta nodesDelta = clusterChangedEvent.nodesDelta();
      if (nodesDelta.hasChanges() && logger.isInfoEnabled()) {
        String summary = nodesDelta.shortSummary();
        if (summary.length() > 0) {
          logger.info("{}, reason: {}", summary, tasksSummary);
        }
      }

      nodeConnectionsService.connectToAddedNodes(clusterChangedEvent);

      // if we are the master, publish the new state to all nodes
      // we publish here before we send a notification to all the listeners, since if it fails
      // we don't want to notify
      if (newClusterState.nodes().isLocalNodeElectedMaster()) {
        logger.debug("publishing cluster state version [{}]", newClusterState.version());
        try {
          clusterStatePublisher.accept(clusterChangedEvent, ackListener);
        } catch (Discovery.FailedToCommitClusterStateException t) {
          final long version = newClusterState.version();
          logger.warn(
              (Supplier<?>)
                  () ->
                      new ParameterizedMessage(
                          "failing [{}]: failed to commit cluster state version [{}]",
                          tasksSummary,
                          version),
              t);
          proccessedListeners.forEach(task -> task.listener.onFailure(task.source, t));
          return;
        }
      }

      // update the current cluster state
      clusterState = newClusterState;
      logger.debug("set local cluster state to version {}", newClusterState.version());
      try {
        // nothing to do until we actually recover from the gateway or any other block indicates we
        // need to disable persistency
        if (clusterChangedEvent.state().blocks().disableStatePersistence() == false
            && clusterChangedEvent.metaDataChanged()) {
          final Settings incomingSettings = clusterChangedEvent.state().metaData().settings();
          clusterSettings.applySettings(incomingSettings);
        }
      } catch (Exception ex) {
        logger.warn("failed to apply cluster settings", ex);
      }
      for (ClusterStateListener listener : preAppliedListeners) {
        try {
          listener.clusterChanged(clusterChangedEvent);
        } catch (Exception ex) {
          logger.warn("failed to notify ClusterStateListener", ex);
        }
      }

      nodeConnectionsService.disconnectFromRemovedNodes(clusterChangedEvent);

      newClusterState.status(ClusterState.ClusterStateStatus.APPLIED);

      for (ClusterStateListener listener : postAppliedListeners) {
        try {
          listener.clusterChanged(clusterChangedEvent);
        } catch (Exception ex) {
          logger.warn("failed to notify ClusterStateListener", ex);
        }
      }

      // manual ack only from the master at the end of the publish
      if (newClusterState.nodes().isLocalNodeElectedMaster()) {
        try {
          ackListener.onNodeAck(newClusterState.nodes().getLocalNode(), null);
        } catch (Exception e) {
          final DiscoveryNode localNode = newClusterState.nodes().getLocalNode();
          logger.debug(
              (Supplier<?>)
                  () ->
                      new ParameterizedMessage(
                          "error while processing ack for master node [{}]", localNode),
              e);
        }
      }

      for (UpdateTask<T> task : proccessedListeners) {
        task.listener.clusterStateProcessed(task.source, previousClusterState, newClusterState);
      }

      try {
        executor.clusterStatePublished(clusterChangedEvent);
      } catch (Exception e) {
        logger.error(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "exception thrown while notifying executor of new cluster state publication [{}]",
                        tasksSummary),
            e);
      }

      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      logger.debug(
          "processing [{}]: took [{}] done applying updated cluster_state (version: {}, uuid: {})",
          tasksSummary,
          executionTime,
          newClusterState.version(),
          newClusterState.stateUUID());
      warnAboutSlowTaskIfNeeded(executionTime, tasksSummary);
    } catch (Exception e) {
      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      final long version = newClusterState.version();
      final String stateUUID = newClusterState.stateUUID();
      final String prettyPrint = newClusterState.prettyPrint();
      logger.warn(
          (Supplier<?>)
              () ->
                  new ParameterizedMessage(
                      "failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]\n{}",
                      executionTime,
                      version,
                      stateUUID,
                      tasksSummary,
                      prettyPrint),
          e);
      // TODO: do we want to call updateTask.onFailure here?
    }
  }

  // this one is overridden in tests so we can control time
  protected long currentTimeInNanos() {
    return System.nanoTime();
  }

  private static SafeClusterStateTaskListener safe(
      ClusterStateTaskListener listener, Logger logger) {
    if (listener instanceof AckedClusterStateTaskListener) {
      return new SafeAckedClusterStateTaskListener(
          (AckedClusterStateTaskListener) listener, logger);
    } else {
      return new SafeClusterStateTaskListener(listener, logger);
    }
  }

  private static class SafeClusterStateTaskListener implements ClusterStateTaskListener {
    private final ClusterStateTaskListener listener;
    private final Logger logger;

    public SafeClusterStateTaskListener(ClusterStateTaskListener listener, Logger logger) {
      this.listener = listener;
      this.logger = logger;
    }

    @Override
    public void onFailure(String source, Exception e) {
      try {
        listener.onFailure(source, e);
      } catch (Exception inner) {
        inner.addSuppressed(e);
        logger.error(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "exception thrown by listener notifying of failure from [{}]", source),
            inner);
      }
    }

    @Override
    public void onNoLongerMaster(String source) {
      try {
        listener.onNoLongerMaster(source);
      } catch (Exception e) {
        logger.error(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "exception thrown by listener while notifying no longer master from [{}]",
                        source),
            e);
      }
    }

    @Override
    public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
      try {
        listener.clusterStateProcessed(source, oldState, newState);
      } catch (Exception e) {
        logger.error(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "exception thrown by listener while notifying of cluster state processed from [{}], old cluster state:\n"
                            + "{}\nnew cluster state:\n{}",
                        source,
                        oldState.prettyPrint(),
                        newState.prettyPrint()),
            e);
      }
    }
  }

  private static class SafeAckedClusterStateTaskListener extends SafeClusterStateTaskListener
      implements AckedClusterStateTaskListener {
    private final AckedClusterStateTaskListener listener;
    private final Logger logger;

    public SafeAckedClusterStateTaskListener(
        AckedClusterStateTaskListener listener, Logger logger) {
      super(listener, logger);
      this.listener = listener;
      this.logger = logger;
    }

    @Override
    public boolean mustAck(DiscoveryNode discoveryNode) {
      return listener.mustAck(discoveryNode);
    }

    @Override
    public void onAllNodesAcked(@Nullable Exception e) {
      try {
        listener.onAllNodesAcked(e);
      } catch (Exception inner) {
        inner.addSuppressed(e);
        logger.error("exception thrown by listener while notifying on all nodes acked", inner);
      }
    }

    @Override
    public void onAckTimeout() {
      try {
        listener.onAckTimeout();
      } catch (Exception e) {
        logger.error("exception thrown by listener while notifying on ack timeout", e);
      }
    }

    @Override
    public TimeValue ackTimeout() {
      return listener.ackTimeout();
    }
  }

  class UpdateTask<T> extends SourcePrioritizedRunnable {

    public final T task;
    public final ClusterStateTaskConfig config;
    public final ClusterStateTaskExecutor<T> executor;
    public final ClusterStateTaskListener listener;
    public final AtomicBoolean processed = new AtomicBoolean();

    UpdateTask(
        String source,
        T task,
        ClusterStateTaskConfig config,
        ClusterStateTaskExecutor<T> executor,
        ClusterStateTaskListener listener) {
      super(config.priority(), source);
      this.task = task;
      this.config = config;
      this.executor = executor;
      this.listener = listener;
    }

    @Override
    public void run() {
      // if this task is already processed, the executor shouldn't execute other tasks (that arrived
      // later),
      // to give other executors a chance to execute their tasks.
      if (processed.get() == false) {
        runTasksForExecutor(executor);
      }
    }

    public String toString(ClusterStateTaskExecutor<T> executor) {
      String taskDescription = executor.describeTasks(Collections.singletonList(task));
      if (taskDescription.isEmpty()) {
        return "[" + source + "]";
      } else {
        return "[" + source + "[" + taskDescription + "]]";
      }
    }
  }

  private void warnAboutSlowTaskIfNeeded(TimeValue executionTime, String source) {
    if (executionTime.getMillis() > slowTaskLoggingThreshold.getMillis()) {
      logger.warn(
          "cluster state update task [{}] took [{}] above the warn threshold of {}",
          source,
          executionTime,
          slowTaskLoggingThreshold);
    }
  }

  class NotifyTimeout implements Runnable {
    final TimeoutClusterStateListener listener;
    final TimeValue timeout;
    volatile ScheduledFuture future;

    NotifyTimeout(TimeoutClusterStateListener listener, TimeValue timeout) {
      this.listener = listener;
      this.timeout = timeout;
    }

    public void cancel() {
      FutureUtils.cancel(future);
    }

    @Override
    public void run() {
      if (future != null && future.isCancelled()) {
        return;
      }
      if (lifecycle.stoppedOrClosed()) {
        listener.onClose();
      } else {
        listener.onTimeout(this.timeout);
      }
      // note, we rely on the listener to remove itself in case of timeout if needed
    }
  }

  private static class LocalNodeMasterListeners implements ClusterStateListener {

    private final List<LocalNodeMasterListener> listeners = new CopyOnWriteArrayList<>();
    private final ThreadPool threadPool;
    private volatile boolean master = false;

    private LocalNodeMasterListeners(ThreadPool threadPool) {
      this.threadPool = threadPool;
    }

    @Override
    public void clusterChanged(ClusterChangedEvent event) {
      if (!master && event.localNodeMaster()) {
        master = true;
        for (LocalNodeMasterListener listener : listeners) {
          Executor executor = threadPool.executor(listener.executorName());
          executor.execute(new OnMasterRunnable(listener));
        }
        return;
      }

      if (master && !event.localNodeMaster()) {
        master = false;
        for (LocalNodeMasterListener listener : listeners) {
          Executor executor = threadPool.executor(listener.executorName());
          executor.execute(new OffMasterRunnable(listener));
        }
      }
    }

    private void add(LocalNodeMasterListener listener) {
      listeners.add(listener);
    }

    private void remove(LocalNodeMasterListener listener) {
      listeners.remove(listener);
    }

    private void clear() {
      listeners.clear();
    }
  }

  private static class OnMasterRunnable implements Runnable {

    private final LocalNodeMasterListener listener;

    private OnMasterRunnable(LocalNodeMasterListener listener) {
      this.listener = listener;
    }

    @Override
    public void run() {
      listener.onMaster();
    }
  }

  private static class OffMasterRunnable implements Runnable {

    private final LocalNodeMasterListener listener;

    private OffMasterRunnable(LocalNodeMasterListener listener) {
      this.listener = listener;
    }

    @Override
    public void run() {
      listener.offMaster();
    }
  }

  private static class DelegetingAckListener implements Discovery.AckListener {

    private final List<Discovery.AckListener> listeners;

    private DelegetingAckListener(List<Discovery.AckListener> listeners) {
      this.listeners = listeners;
    }

    @Override
    public void onNodeAck(DiscoveryNode node, @Nullable Exception e) {
      for (Discovery.AckListener listener : listeners) {
        listener.onNodeAck(node, e);
      }
    }

    @Override
    public void onTimeout() {
      throw new UnsupportedOperationException("no timeout delegation");
    }
  }

  private static class AckCountDownListener implements Discovery.AckListener {

    private static final Logger logger = Loggers.getLogger(AckCountDownListener.class);

    private final AckedClusterStateTaskListener ackedTaskListener;
    private final CountDown countDown;
    private final DiscoveryNodes nodes;
    private final long clusterStateVersion;
    private final Future<?> ackTimeoutCallback;
    private Exception lastFailure;

    AckCountDownListener(
        AckedClusterStateTaskListener ackedTaskListener,
        long clusterStateVersion,
        DiscoveryNodes nodes,
        ThreadPool threadPool) {
      this.ackedTaskListener = ackedTaskListener;
      this.clusterStateVersion = clusterStateVersion;
      this.nodes = nodes;
      int countDown = 0;
      for (DiscoveryNode node : nodes) {
        if (ackedTaskListener.mustAck(node)) {
          countDown++;
        }
      }
      // we always wait for at least 1 node (the master)
      countDown = Math.max(1, countDown);
      logger.trace(
          "expecting {} acknowledgements for cluster_state update (version: {})",
          countDown,
          clusterStateVersion);
      this.countDown = new CountDown(countDown);
      this.ackTimeoutCallback =
          threadPool.schedule(
              ackedTaskListener.ackTimeout(),
              ThreadPool.Names.GENERIC,
              new Runnable() {
                @Override
                public void run() {
                  onTimeout();
                }
              });
    }

    @Override
    public void onNodeAck(DiscoveryNode node, @Nullable Exception e) {
      if (!ackedTaskListener.mustAck(node)) {
        // we always wait for the master ack anyway
        if (!node.equals(nodes.getMasterNode())) {
          return;
        }
      }
      if (e == null) {
        logger.trace(
            "ack received from node [{}], cluster_state update (version: {})",
            node,
            clusterStateVersion);
      } else {
        this.lastFailure = e;
        logger.debug(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "ack received from node [{}], cluster_state update (version: {})",
                        node,
                        clusterStateVersion),
            e);
      }

      if (countDown.countDown()) {
        logger.trace(
            "all expected nodes acknowledged cluster_state update (version: {})",
            clusterStateVersion);
        FutureUtils.cancel(ackTimeoutCallback);
        ackedTaskListener.onAllNodesAcked(lastFailure);
      }
    }

    @Override
    public void onTimeout() {
      if (countDown.fastForward()) {
        logger.trace(
            "timeout waiting for acknowledgement for cluster_state update (version: {})",
            clusterStateVersion);
        ackedTaskListener.onAckTimeout();
      }
    }
  }

  public ClusterSettings getClusterSettings() {
    return clusterSettings;
  }

  public Settings getSettings() {
    return settings;
  }
}

Example #4

Show file

File: RecoverySettings.java Project: ricardocerq/elasticsearch

public class RecoverySettings extends AbstractComponent {

  public static final Setting<ByteSizeValue> INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING =
      Setting.byteSizeSetting(
          "indices.recovery.max_bytes_per_sec",
          new ByteSizeValue(40, ByteSizeUnit.MB),
          Property.Dynamic,
          Property.NodeScope);

  /**
   * how long to wait before retrying after issues cause by cluster state syncing between nodes
   * i.e., local node is not yet known on remote node, remote shard not yet started etc.
   */
  public static final Setting<TimeValue> INDICES_RECOVERY_RETRY_DELAY_STATE_SYNC_SETTING =
      Setting.positiveTimeSetting(
          "indices.recovery.retry_delay_state_sync",
          TimeValue.timeValueMillis(500),
          Property.Dynamic,
          Property.NodeScope);

  /** how long to wait before retrying after network related issues */
  public static final Setting<TimeValue> INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING =
      Setting.positiveTimeSetting(
          "indices.recovery.retry_delay_network",
          TimeValue.timeValueSeconds(5),
          Property.Dynamic,
          Property.NodeScope);

  /** timeout value to use for requests made as part of the recovery process */
  public static final Setting<TimeValue> INDICES_RECOVERY_INTERNAL_ACTION_TIMEOUT_SETTING =
      Setting.positiveTimeSetting(
          "indices.recovery.internal_action_timeout",
          TimeValue.timeValueMinutes(15),
          Property.Dynamic,
          Property.NodeScope);

  /**
   * timeout value to use for requests made as part of the recovery process that are expected to
   * take long time. defaults to twice `indices.recovery.internal_action_timeout`.
   */
  public static final Setting<TimeValue> INDICES_RECOVERY_INTERNAL_LONG_ACTION_TIMEOUT_SETTING =
      Setting.timeSetting(
          "indices.recovery.internal_action_long_timeout",
          (s) ->
              TimeValue.timeValueMillis(
                  INDICES_RECOVERY_INTERNAL_ACTION_TIMEOUT_SETTING.get(s).millis() * 2),
          TimeValue.timeValueSeconds(0),
          Property.Dynamic,
          Property.NodeScope);

  /**
   * recoveries that don't show any activity for more then this interval will be failed. defaults to
   * `indices.recovery.internal_action_long_timeout`
   */
  public static final Setting<TimeValue> INDICES_RECOVERY_ACTIVITY_TIMEOUT_SETTING =
      Setting.timeSetting(
          "indices.recovery.recovery_activity_timeout",
          INDICES_RECOVERY_INTERNAL_LONG_ACTION_TIMEOUT_SETTING::get,
          TimeValue.timeValueSeconds(0),
          Property.Dynamic,
          Property.NodeScope);

  public static final ByteSizeValue DEFAULT_CHUNK_SIZE = new ByteSizeValue(512, ByteSizeUnit.KB);

  private volatile ByteSizeValue maxBytesPerSec;
  private volatile SimpleRateLimiter rateLimiter;
  private volatile TimeValue retryDelayStateSync;
  private volatile TimeValue retryDelayNetwork;
  private volatile TimeValue activityTimeout;
  private volatile TimeValue internalActionTimeout;
  private volatile TimeValue internalActionLongTimeout;

  private volatile ByteSizeValue chunkSize = DEFAULT_CHUNK_SIZE;

  @Inject
  public RecoverySettings(Settings settings, ClusterSettings clusterSettings) {
    super(settings);

    this.retryDelayStateSync = INDICES_RECOVERY_RETRY_DELAY_STATE_SYNC_SETTING.get(settings);
    // doesn't have to be fast as nodes are reconnected every 10s by default (see
    // InternalClusterService.ReconnectToNodes)
    // and we want to give the master time to remove a faulty node
    this.retryDelayNetwork = INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.get(settings);

    this.internalActionTimeout = INDICES_RECOVERY_INTERNAL_ACTION_TIMEOUT_SETTING.get(settings);
    this.internalActionLongTimeout =
        INDICES_RECOVERY_INTERNAL_LONG_ACTION_TIMEOUT_SETTING.get(settings);

    this.activityTimeout = INDICES_RECOVERY_ACTIVITY_TIMEOUT_SETTING.get(settings);
    this.maxBytesPerSec = INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING.get(settings);
    if (maxBytesPerSec.getBytes() <= 0) {
      rateLimiter = null;
    } else {
      rateLimiter = new SimpleRateLimiter(maxBytesPerSec.getMbFrac());
    }

    logger.debug("using max_bytes_per_sec[{}]", maxBytesPerSec);

    clusterSettings.addSettingsUpdateConsumer(
        INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING, this::setMaxBytesPerSec);
    clusterSettings.addSettingsUpdateConsumer(
        INDICES_RECOVERY_RETRY_DELAY_STATE_SYNC_SETTING, this::setRetryDelayStateSync);
    clusterSettings.addSettingsUpdateConsumer(
        INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING, this::setRetryDelayNetwork);
    clusterSettings.addSettingsUpdateConsumer(
        INDICES_RECOVERY_INTERNAL_ACTION_TIMEOUT_SETTING, this::setInternalActionTimeout);
    clusterSettings.addSettingsUpdateConsumer(
        INDICES_RECOVERY_INTERNAL_LONG_ACTION_TIMEOUT_SETTING, this::setInternalActionLongTimeout);
    clusterSettings.addSettingsUpdateConsumer(
        INDICES_RECOVERY_ACTIVITY_TIMEOUT_SETTING, this::setActivityTimeout);
  }

  public RateLimiter rateLimiter() {
    return rateLimiter;
  }

  public TimeValue retryDelayNetwork() {
    return retryDelayNetwork;
  }

  public TimeValue retryDelayStateSync() {
    return retryDelayStateSync;
  }

  public TimeValue activityTimeout() {
    return activityTimeout;
  }

  public TimeValue internalActionTimeout() {
    return internalActionTimeout;
  }

  public TimeValue internalActionLongTimeout() {
    return internalActionLongTimeout;
  }

  public ByteSizeValue getChunkSize() {
    return chunkSize;
  }

  void setChunkSize(ByteSizeValue chunkSize) { // only settable for tests
    if (chunkSize.bytesAsInt() <= 0) {
      throw new IllegalArgumentException("chunkSize must be > 0");
    }
    this.chunkSize = chunkSize;
  }

  public void setRetryDelayStateSync(TimeValue retryDelayStateSync) {
    this.retryDelayStateSync = retryDelayStateSync;
  }

  public void setRetryDelayNetwork(TimeValue retryDelayNetwork) {
    this.retryDelayNetwork = retryDelayNetwork;
  }

  public void setActivityTimeout(TimeValue activityTimeout) {
    this.activityTimeout = activityTimeout;
  }

  public void setInternalActionTimeout(TimeValue internalActionTimeout) {
    this.internalActionTimeout = internalActionTimeout;
  }

  public void setInternalActionLongTimeout(TimeValue internalActionLongTimeout) {
    this.internalActionLongTimeout = internalActionLongTimeout;
  }

  private void setMaxBytesPerSec(ByteSizeValue maxBytesPerSec) {
    this.maxBytesPerSec = maxBytesPerSec;
    if (maxBytesPerSec.getBytes() <= 0) {
      rateLimiter = null;
    } else if (rateLimiter != null) {
      rateLimiter.setMBPerSec(maxBytesPerSec.getMbFrac());
    } else {
      rateLimiter = new SimpleRateLimiter(maxBytesPerSec.getMbFrac());
    }
  }
}

Example #5

Show file

File: FaultDetection.java Project: ricardocerq/elasticsearch

/**
 * A base class for {@link org.elasticsearch.discovery.zen.fd.MasterFaultDetection} &amp; {@link
 * org.elasticsearch.discovery.zen.fd.NodesFaultDetection}, making sure both use the same setting.
 */
public abstract class FaultDetection extends AbstractComponent {

  public static final Setting<Boolean> CONNECT_ON_NETWORK_DISCONNECT_SETTING =
      Setting.boolSetting(
          "discovery.zen.fd.connect_on_network_disconnect", false, Property.NodeScope);
  public static final Setting<TimeValue> PING_INTERVAL_SETTING =
      Setting.positiveTimeSetting(
          "discovery.zen.fd.ping_interval", timeValueSeconds(1), Property.NodeScope);
  public static final Setting<TimeValue> PING_TIMEOUT_SETTING =
      Setting.timeSetting(
          "discovery.zen.fd.ping_timeout", timeValueSeconds(30), Property.NodeScope);
  public static final Setting<Integer> PING_RETRIES_SETTING =
      Setting.intSetting("discovery.zen.fd.ping_retries", 3, Property.NodeScope);
  public static final Setting<Boolean> REGISTER_CONNECTION_LISTENER_SETTING =
      Setting.boolSetting(
          "discovery.zen.fd.register_connection_listener", true, Property.NodeScope);

  protected final ThreadPool threadPool;
  protected final ClusterName clusterName;
  protected final TransportService transportService;

  // used mainly for testing, should always be true
  protected final boolean registerConnectionListener;
  protected final FDConnectionListener connectionListener;
  protected final boolean connectOnNetworkDisconnect;

  protected final TimeValue pingInterval;
  protected final TimeValue pingRetryTimeout;
  protected final int pingRetryCount;

  public FaultDetection(
      Settings settings,
      ThreadPool threadPool,
      TransportService transportService,
      ClusterName clusterName) {
    super(settings);
    this.threadPool = threadPool;
    this.transportService = transportService;
    this.clusterName = clusterName;

    this.connectOnNetworkDisconnect = CONNECT_ON_NETWORK_DISCONNECT_SETTING.get(settings);
    this.pingInterval = PING_INTERVAL_SETTING.get(settings);
    this.pingRetryTimeout = PING_TIMEOUT_SETTING.get(settings);
    this.pingRetryCount = PING_RETRIES_SETTING.get(settings);
    this.registerConnectionListener = REGISTER_CONNECTION_LISTENER_SETTING.get(settings);

    this.connectionListener = new FDConnectionListener();
    if (registerConnectionListener) {
      transportService.addConnectionListener(connectionListener);
    }
  }

  public void close() {
    transportService.removeConnectionListener(connectionListener);
  }

  /**
   * This method will be called when the {@link org.elasticsearch.transport.TransportService} raised
   * a node disconnected event
   */
  abstract void handleTransportDisconnect(DiscoveryNode node);

  private class FDConnectionListener implements TransportConnectionListener {
    @Override
    public void onNodeConnected(DiscoveryNode node) {}

    @Override
    public void onNodeDisconnected(DiscoveryNode node) {
      handleTransportDisconnect(node);
    }
  }
}